diff --git a/pirs/.gitignore b/pirs/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..415bac95211b38be61156dbc495c8774f9f1f0f4 --- /dev/null +++ b/pirs/.gitignore @@ -0,0 +1,18 @@ +# Generated by Cargo +# will have compiled files and executables +/target/ + +# Remove Cargo.lock from gitignore if creating an executable, leave it for libraries +# More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html +Cargo.lock + +# These are backup files generated by rustfmt +**/*.rs.bk + +# IDE configurations +.idea +.vscode + +# Mac +.DS_Store +.VSCodeCounter \ No newline at end of file diff --git a/pirs/Cargo.toml b/pirs/Cargo.toml new file mode 100644 index 0000000000000000000000000000000000000000..96ed7e111eb2133960dc4e10be1a4c8e3c3c5d7c --- /dev/null +++ b/pirs/Cargo.toml @@ -0,0 +1,21 @@ +[package] +name = "gust" +version = "0.1.0" +edition = "2018" + +[dependencies] +rust-crypto = "0.2.36" +hex = "0.4.3" +deflate = "1.0.0" +flate2 = "1.0.22" +bstr = "1.0.1" +anyhow = "1.0.66" +thiserror = "1.0.37" +spdlog-rs = "0.2.4" +byteorder = "1.4.3" +sha-1 = "0.10.1" +imara-diff = "0.1.5" +min-max = "0.1" +colored="1.7.1" +crc = "3.0.0" +diffs ="0.4.1" \ No newline at end of file diff --git a/pirs/LICENSE-APACHE b/pirs/LICENSE-APACHE new file mode 100644 index 0000000000000000000000000000000000000000..f47e8d28cdc414866cbd59a211246bc4b008b90a --- /dev/null +++ b/pirs/LICENSE-APACHE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + https://www.apache.org/licenses/LICENSE-2.0 + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + +Copyright 2022 GitMega + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + https://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/pirs/LICENSE-MIT b/pirs/LICENSE-MIT new file mode 100644 index 0000000000000000000000000000000000000000..478c9dcfd878bbf54f720ffea9dc537c3f0a05dc --- /dev/null +++ b/pirs/LICENSE-MIT @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2022 GitMega + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/pirs/LICENSE-THIRD-PARTY b/pirs/LICENSE-THIRD-PARTY new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/pirs/README.md b/pirs/README.md new file mode 100644 index 0000000000000000000000000000000000000000..37aaa2b0febf4b4e1bb83389fb64ffc576cac32c --- /dev/null +++ b/pirs/README.md @@ -0,0 +1,59 @@ +# Gust +## Monorepo Platform for DevOps and Open Source Supply Chain + +Git is a content-addressable filesystem and a distributed collaboration system. All files of a single repository persisted on the disk of the machine. It brings a lot of benefits to performance and maintenance. But it also has challenges for monorepo. It is hard to manage a vast code repository like a repo has 20 TB, which is typical in a middle size enterprise. + +Google has a monolithic repository platform, Piper, with more than 100 TB of data. It's building on top of Google's infrastructure. Gust's purpose is to imitate Piper's architecture to implement a developing platform which compatible Git and trunk-based development flow for collaboration, open source compliance and supply chain management and DevSecOps. + +### 1. Theory of Git + +In Git, the content of the file or commit message to store in a file with a specification format, and we call the file an Object. There are four object types: Blob, Tree, Commit and Tag. + +### 2. Gust's features + +#### 2.1 Monorepo for Trunk-based Development + +#### 2.2 Management for Open Source Compliance and Open Source Supply Chain + +#### 2.3 Decentralized Communication for Collaboration + +#### 2.4 Synchronized Mechanism between Open Source and Inner Source + +### 3. Architecture + +### 4. Getting Started + +### 5. Contributing + +This project enforce the [DCO](https://developercertificate.org). + +Contributors sign-off that they adhere to these requirements by adding a Signed-off-by line to commit messages. + +```bash +This is my commit message + +Signed-off-by: Random J Developer +``` + +Git even has a -s command line option to append this automatically to your commit message: + +```bash +$ git commit -s -m 'This is my commit message' +``` + +### 6. License + +Gust is licensed under this Licensed: + +* MIT LICENSE ( [LICENSE-MIT](LICENSE-MIT) or https://opensource.org/licenses/MIT) +* Apache License, Version 2.0 ([LICENSE-APACHE](LICENSE-APACHE) or https://www.apache.org/licenses/LICENSE-2.0) + +### 7. References + +[1] [What is monorepo? (and should you use it?)](https://semaphoreci.com/blog/what-is-monorepo) + +[2] [Monorepo: A single repository for all your code](https://medium.com/@mattklein123/monorepo-a-single-repository-for-all-your-code-86a852bff054) + +[3] [Why Google Stores Billions of Lines of Code in a Single Repository](https://cacm.acm.org/magazines/2016/7/204032-why-google-stores-billions-of-lines-of-code-in-a-single-repository) + +[4] [Trunk Based Development](https://trunkbaseddevelopment.com) \ No newline at end of file diff --git a/pirs/resources/data/test/blob-82352c3a6a7a8bd32011751699c7a3648d1b5d3c-gitmega.md b/pirs/resources/data/test/blob-82352c3a6a7a8bd32011751699c7a3648d1b5d3c-gitmega.md new file mode 100644 index 0000000000000000000000000000000000000000..a75fb41b51d81d77826be60a77cf87f081165325 Binary files /dev/null and b/pirs/resources/data/test/blob-82352c3a6a7a8bd32011751699c7a3648d1b5d3c-gitmega.md differ diff --git a/pirs/resources/data/test/blob-a3b55a2ce16d2429dae2d690d2c15bcf26fbe33c-gust.md b/pirs/resources/data/test/blob-a3b55a2ce16d2429dae2d690d2c15bcf26fbe33c-gust.md new file mode 100644 index 0000000000000000000000000000000000000000..d8fd713a12d9925e1b0a6e582a1a3e2cc3e86293 Binary files /dev/null and b/pirs/resources/data/test/blob-a3b55a2ce16d2429dae2d690d2c15bcf26fbe33c-gust.md differ diff --git a/pirs/resources/data/test/blob-fc1a505ac94f98cc5f29100a2d9aef97027a32fb-gitmega.md b/pirs/resources/data/test/blob-fc1a505ac94f98cc5f29100a2d9aef97027a32fb-gitmega.md new file mode 100644 index 0000000000000000000000000000000000000000..814383f77f89db5ffa87f5fb1da64cc555dea2fd Binary files /dev/null and b/pirs/resources/data/test/blob-fc1a505ac94f98cc5f29100a2d9aef97027a32fb-gitmega.md differ diff --git a/pirs/resources/data/test/commit-1b490ec04712d147bbe7c8b3a6d86ed4d3587a6a b/pirs/resources/data/test/commit-1b490ec04712d147bbe7c8b3a6d86ed4d3587a6a new file mode 100644 index 0000000000000000000000000000000000000000..283fa84b0f414c1b253388fca25e53c14cda8893 Binary files /dev/null and b/pirs/resources/data/test/commit-1b490ec04712d147bbe7c8b3a6d86ed4d3587a6a differ diff --git a/pirs/resources/data/test/commit-3b8bc1e152af7ed6b69f2acfa8be709d1733e1bb b/pirs/resources/data/test/commit-3b8bc1e152af7ed6b69f2acfa8be709d1733e1bb new file mode 100644 index 0000000000000000000000000000000000000000..350a663cd8baa6d9045d32c314ca2cb332b24628 Binary files /dev/null and b/pirs/resources/data/test/commit-3b8bc1e152af7ed6b69f2acfa8be709d1733e1bb differ diff --git a/pirs/resources/data/test/gitmega.md b/pirs/resources/data/test/gitmega.md new file mode 100644 index 0000000000000000000000000000000000000000..82352c3a6a7a8bd32011751699c7a3648d1b5d3c --- /dev/null +++ b/pirs/resources/data/test/gitmega.md @@ -0,0 +1 @@ +# Hello Gitmega diff --git a/pirs/resources/data/test/pack-6590ba86f4e863e1c2c985b046e1d2f1a78a0089.pack b/pirs/resources/data/test/pack-6590ba86f4e863e1c2c985b046e1d2f1a78a0089.pack new file mode 100644 index 0000000000000000000000000000000000000000..aa47faf21659e64970587c9f1196db9535f054fb Binary files /dev/null and b/pirs/resources/data/test/pack-6590ba86f4e863e1c2c985b046e1d2f1a78a0089.pack differ diff --git a/pirs/resources/data/test/pack-8d36a6464e1f284e5e9d06683689ee751d4b2687.idx b/pirs/resources/data/test/pack-8d36a6464e1f284e5e9d06683689ee751d4b2687.idx new file mode 100644 index 0000000000000000000000000000000000000000..1b58be1c4aa728c1723dc6d07c6f1a0eecbfa80e Binary files /dev/null and b/pirs/resources/data/test/pack-8d36a6464e1f284e5e9d06683689ee751d4b2687.idx differ diff --git a/pirs/resources/data/test/pack-8d36a6464e1f284e5e9d06683689ee751d4b2687.pack b/pirs/resources/data/test/pack-8d36a6464e1f284e5e9d06683689ee751d4b2687.pack new file mode 100644 index 0000000000000000000000000000000000000000..06a45cc20ab4e1a7d58a7ba9b5d8c20eeb99edb6 Binary files /dev/null and b/pirs/resources/data/test/pack-8d36a6464e1f284e5e9d06683689ee751d4b2687.pack differ diff --git a/pirs/resources/data/test/tag-e5c324b03b72b26f11557c4955c6d17c68dc8595 b/pirs/resources/data/test/tag-e5c324b03b72b26f11557c4955c6d17c68dc8595 new file mode 100644 index 0000000000000000000000000000000000000000..e5928360650b4587ca4c0089c746cbd5a427cedc Binary files /dev/null and b/pirs/resources/data/test/tag-e5c324b03b72b26f11557c4955c6d17c68dc8595 differ diff --git a/pirs/resources/data/test/tree-1bdbc1e723aa199e83e33ecf1bb19f874a56ebc3 b/pirs/resources/data/test/tree-1bdbc1e723aa199e83e33ecf1bb19f874a56ebc3 new file mode 100644 index 0000000000000000000000000000000000000000..25a95f2ac4d16927197676d1c6e0505b5db7adae Binary files /dev/null and b/pirs/resources/data/test/tree-1bdbc1e723aa199e83e33ecf1bb19f874a56ebc3 differ diff --git a/pirs/resources/data/test/tree-9bbe4087bedef91e50dc0c1a930c1d3e86fd5f20 b/pirs/resources/data/test/tree-9bbe4087bedef91e50dc0c1a930c1d3e86fd5f20 new file mode 100644 index 0000000000000000000000000000000000000000..eb36344987edac9763a975b5d84d4be4101e2842 Binary files /dev/null and b/pirs/resources/data/test/tree-9bbe4087bedef91e50dc0c1a930c1d3e86fd5f20 differ diff --git a/pirs/resources/diff/16ecdcc8f663777896bd39ca025a041b7f005e b/pirs/resources/diff/16ecdcc8f663777896bd39ca025a041b7f005e new file mode 100644 index 0000000000000000000000000000000000000000..cd8de0caa4bfcbd309f649a1223308532e6f23ec --- /dev/null +++ b/pirs/resources/diff/16ecdcc8f663777896bd39ca025a041b7f005e @@ -0,0 +1,2 @@ +xE10Cs +8H ̿i"ڟ'p{10>x8mni"fy$(c{h+s JϨb ǩҿe1"d=ܥZ/VD}͌?u5t2a5gީcBXh;۝y)!KJ \ No newline at end of file diff --git a/pirs/resources/diff/a.txt b/pirs/resources/diff/a.txt new file mode 100644 index 0000000000000000000000000000000000000000..032488aaaa2bcc221aff39c20bc0f2797fdd5978 --- /dev/null +++ b/pirs/resources/diff/a.txt @@ -0,0 +1,7 @@ +The delta data starts with the size of the base object and the size of the object to be reconstructed. +These sizes are encoded using the size encoding from above. +The remainder of the delta data is a sequence of instructions to reconstruct the object from the base object. +If the base object is deltified, it must be converted to canonical form first. +Each instruction appends more and more data to the target object until it’s complete. +There are two supported instructions so far: +one for copy a byte range from the source object and one for inserting new data embedded in the instruction itself. \ No newline at end of file diff --git a/pirs/resources/diff/b.txt b/pirs/resources/diff/b.txt new file mode 100644 index 0000000000000000000000000000000000000000..8dec52e4b55426f42e5804c62313687a90e8945d --- /dev/null +++ b/pirs/resources/diff/b.txt @@ -0,0 +1,9 @@ +The delta data starts with the size of the base object and the size of the object to be reconstructed. +These sizes are encoded using the size encoding from above. +This is what b insert +The remainder of the delta data is a sequence of instructions to reconstruct the object from the base object. +If the base object is deltified, it must be converted to canonical form first. +Each instruction appends more and more data to the target object until it’s complete. +There are two supported instructions so far: +one for copy a byte range from the source object and one for inserting new data embedded in the instruction itself. +this is b new infomations \ No newline at end of file diff --git a/pirs/resources/diff/bee0d45f981adf7c2926a0dc04deb7f006bcc3 b/pirs/resources/diff/bee0d45f981adf7c2926a0dc04deb7f006bcc3 new file mode 100644 index 0000000000000000000000000000000000000000..e4e2eda7f3d56b223ad9462e5bbd4ad8765bfe69 --- /dev/null +++ b/pirs/resources/diff/bee0d45f981adf7c2926a0dc04deb7f006bcc3 @@ -0,0 +1 @@ +xUj0D{W '@H Zb$wM)%avvcp托瑈b(o)qD#YВ]m6N[P--!&-s-6pbaNȆN 1qux|cag:F>X6ΦӇ~\ \ No newline at end of file diff --git a/pirs/resources/loose/5f/413c76a2893bb1ff83d7c2b507a9cab30bd585 b/pirs/resources/loose/5f/413c76a2893bb1ff83d7c2b507a9cab30bd585 new file mode 100644 index 0000000000000000000000000000000000000000..fc5349ad2a10f092403e7a53a4fed69377b80e03 --- /dev/null +++ b/pirs/resources/loose/5f/413c76a2893bb1ff83d7c2b507a9cab30bd585 @@ -0,0 +1,2 @@ +xJ1=)̘d+"I3+32Yѷwy[S|UE5y(%nZ%QrB4G#sNA +>򁭡`FRV#:Kp.-QRvƅ$hޓR sK2x0qj[X{,csV wsvuߴSrwGXÂ#NL(w^?w`ʲ`ZZ}WNV a͗y_:J^%Q ^|`, \ No newline at end of file diff --git a/pirs/resources/loose/79/dc1608dba888e0378ff21591dc646c8afe4e0a b/pirs/resources/loose/79/dc1608dba888e0378ff21591dc646c8afe4e0a new file mode 100644 index 0000000000000000000000000000000000000000..484723f348982cda2905bb5855c4b7cfb6dad92a Binary files /dev/null and b/pirs/resources/loose/79/dc1608dba888e0378ff21591dc646c8afe4e0a differ diff --git a/pirs/resources/loose/8b/b783eb532d4936248f9084821af2bb309f29e7 b/pirs/resources/loose/8b/b783eb532d4936248f9084821af2bb309f29e7 new file mode 100644 index 0000000000000000000000000000000000000000..9be9f5507a6ec1d886b045f712f4fd168f84cc74 Binary files /dev/null and b/pirs/resources/loose/8b/b783eb532d4936248f9084821af2bb309f29e7 differ diff --git a/pirs/resources/loose/ce/70a618efa88992a4c4bdf22ebd832b24acf374 b/pirs/resources/loose/ce/70a618efa88992a4c4bdf22ebd832b24acf374 new file mode 100644 index 0000000000000000000000000000000000000000..79c541d0a67da26cb30c2c321312f040826a4717 Binary files /dev/null and b/pirs/resources/loose/ce/70a618efa88992a4c4bdf22ebd832b24acf374 differ diff --git a/pirs/resources/loose/pack/pack-8c81e90db37ef77494efe4f31daddad8b494e099.idx b/pirs/resources/loose/pack/pack-8c81e90db37ef77494efe4f31daddad8b494e099.idx new file mode 100644 index 0000000000000000000000000000000000000000..88d8f38967fa53a1698a4c9adafdcc7539dc964d Binary files /dev/null and b/pirs/resources/loose/pack/pack-8c81e90db37ef77494efe4f31daddad8b494e099.idx differ diff --git a/pirs/resources/loose/pack/pack-8c81e90db37ef77494efe4f31daddad8b494e099.pack b/pirs/resources/loose/pack/pack-8c81e90db37ef77494efe4f31daddad8b494e099.pack new file mode 100644 index 0000000000000000000000000000000000000000..23979d8c420f16b7f65b0b8e26d5ec57e951b868 Binary files /dev/null and b/pirs/resources/loose/pack/pack-8c81e90db37ef77494efe4f31daddad8b494e099.pack differ diff --git a/pirs/resources/pack_g/pack-29ceccde6139ca5b0bf844a4a495d32da777c746.pack b/pirs/resources/pack_g/pack-29ceccde6139ca5b0bf844a4a495d32da777c746.pack new file mode 100644 index 0000000000000000000000000000000000000000..712f9f0d4f3b1d16adc93545dd5e96a44bc37964 Binary files /dev/null and b/pirs/resources/pack_g/pack-29ceccde6139ca5b0bf844a4a495d32da777c746.pack differ diff --git a/pirs/resources/pack_g/pack-4091c90f83c0888208d5c4004f141880f9aeecb6.pack b/pirs/resources/pack_g/pack-4091c90f83c0888208d5c4004f141880f9aeecb6.pack new file mode 100644 index 0000000000000000000000000000000000000000..07df8cda91f6ba78b13fffb820137cb5b78a8723 Binary files /dev/null and b/pirs/resources/pack_g/pack-4091c90f83c0888208d5c4004f141880f9aeecb6.pack differ diff --git a/pirs/resources/pack_g/pack-5339933a074bba3f8fe49b70e6178834f215f35a.pack b/pirs/resources/pack_g/pack-5339933a074bba3f8fe49b70e6178834f215f35a.pack new file mode 100644 index 0000000000000000000000000000000000000000..05a8ebf7d22d09ffb9bfb43c8ce45952e852d794 Binary files /dev/null and b/pirs/resources/pack_g/pack-5339933a074bba3f8fe49b70e6178834f215f35a.pack differ diff --git a/pirs/resources/pack_g/pack-6ce5b12bd34d8d344af5ee51ea1579756202a005.pack b/pirs/resources/pack_g/pack-6ce5b12bd34d8d344af5ee51ea1579756202a005.pack new file mode 100644 index 0000000000000000000000000000000000000000..e29aee0944a39de8bfc072b1bec9cbb6721f589e Binary files /dev/null and b/pirs/resources/pack_g/pack-6ce5b12bd34d8d344af5ee51ea1579756202a005.pack differ diff --git a/pirs/resources/test1/pack-1d0e6c14760c956c173ede71cb28f33d921e232f.idx b/pirs/resources/test1/pack-1d0e6c14760c956c173ede71cb28f33d921e232f.idx new file mode 100644 index 0000000000000000000000000000000000000000..36d5b3bfc21ae5607c21bf78b274b910ca09f217 Binary files /dev/null and b/pirs/resources/test1/pack-1d0e6c14760c956c173ede71cb28f33d921e232f.idx differ diff --git a/pirs/resources/test1/pack-1d0e6c14760c956c173ede71cb28f33d921e232f.pack b/pirs/resources/test1/pack-1d0e6c14760c956c173ede71cb28f33d921e232f.pack new file mode 100644 index 0000000000000000000000000000000000000000..98159ff14c9f91818d48199d11eedf932c36eec6 Binary files /dev/null and b/pirs/resources/test1/pack-1d0e6c14760c956c173ede71cb28f33d921e232f.pack differ diff --git a/pirs/resources/test2/pack-8c81e90db37ef77494efe4f31daddad8b494e099.idx b/pirs/resources/test2/pack-8c81e90db37ef77494efe4f31daddad8b494e099.idx new file mode 100644 index 0000000000000000000000000000000000000000..88d8f38967fa53a1698a4c9adafdcc7539dc964d Binary files /dev/null and b/pirs/resources/test2/pack-8c81e90db37ef77494efe4f31daddad8b494e099.idx differ diff --git a/pirs/resources/test2/pack-8c81e90db37ef77494efe4f31daddad8b494e099.pack b/pirs/resources/test2/pack-8c81e90db37ef77494efe4f31daddad8b494e099.pack new file mode 100644 index 0000000000000000000000000000000000000000..23979d8c420f16b7f65b0b8e26d5ec57e951b868 Binary files /dev/null and b/pirs/resources/test2/pack-8c81e90db37ef77494efe4f31daddad8b494e099.pack differ diff --git a/pirs/resources/total/5f/413c76a2893bb1ff83d7c2b507a9cab30bd585 b/pirs/resources/total/5f/413c76a2893bb1ff83d7c2b507a9cab30bd585 new file mode 100644 index 0000000000000000000000000000000000000000..fc5349ad2a10f092403e7a53a4fed69377b80e03 --- /dev/null +++ b/pirs/resources/total/5f/413c76a2893bb1ff83d7c2b507a9cab30bd585 @@ -0,0 +1,2 @@ +xJ1=)̘d+"I3+32Yѷwy[S|UE5y(%nZ%QrB4G#sNA +>򁭡`FRV#:Kp.-QRvƅ$hޓR sK2x0qj[X{,csV wsvuߴSrwGXÂ#NL(w^?w`ʲ`ZZ}WNV a͗y_:J^%Q ^|`, \ No newline at end of file diff --git a/pirs/resources/total/79/dc1608dba888e0378ff21591dc646c8afe4e0a b/pirs/resources/total/79/dc1608dba888e0378ff21591dc646c8afe4e0a new file mode 100644 index 0000000000000000000000000000000000000000..484723f348982cda2905bb5855c4b7cfb6dad92a Binary files /dev/null and b/pirs/resources/total/79/dc1608dba888e0378ff21591dc646c8afe4e0a differ diff --git a/pirs/resources/total/8b/b783eb532d4936248f9084821af2bb309f29e7 b/pirs/resources/total/8b/b783eb532d4936248f9084821af2bb309f29e7 new file mode 100644 index 0000000000000000000000000000000000000000..9be9f5507a6ec1d886b045f712f4fd168f84cc74 Binary files /dev/null and b/pirs/resources/total/8b/b783eb532d4936248f9084821af2bb309f29e7 differ diff --git a/pirs/resources/total/ce/70a618efa88992a4c4bdf22ebd832b24acf374 b/pirs/resources/total/ce/70a618efa88992a4c4bdf22ebd832b24acf374 new file mode 100644 index 0000000000000000000000000000000000000000..79c541d0a67da26cb30c2c321312f040826a4717 Binary files /dev/null and b/pirs/resources/total/ce/70a618efa88992a4c4bdf22ebd832b24acf374 differ diff --git a/pirs/resources/total/output/pack-7ea8ad41c9d438654ef28297ecc874842c7d10de.pack b/pirs/resources/total/output/pack-7ea8ad41c9d438654ef28297ecc874842c7d10de.pack new file mode 100644 index 0000000000000000000000000000000000000000..0d56619c21cc3413e5c130fc95f99a9dae045f69 Binary files /dev/null and b/pirs/resources/total/output/pack-7ea8ad41c9d438654ef28297ecc874842c7d10de.pack differ diff --git a/pirs/resources/total/pack/pack-1d0e6c14760c956c173ede71cb28f33d921e232f.idx b/pirs/resources/total/pack/pack-1d0e6c14760c956c173ede71cb28f33d921e232f.idx new file mode 100644 index 0000000000000000000000000000000000000000..36d5b3bfc21ae5607c21bf78b274b910ca09f217 Binary files /dev/null and b/pirs/resources/total/pack/pack-1d0e6c14760c956c173ede71cb28f33d921e232f.idx differ diff --git a/pirs/resources/total/pack/pack-1d0e6c14760c956c173ede71cb28f33d921e232f.pack b/pirs/resources/total/pack/pack-1d0e6c14760c956c173ede71cb28f33d921e232f.pack new file mode 100644 index 0000000000000000000000000000000000000000..98159ff14c9f91818d48199d11eedf932c36eec6 Binary files /dev/null and b/pirs/resources/total/pack/pack-1d0e6c14760c956c173ede71cb28f33d921e232f.pack differ diff --git a/pirs/src/database/mod.rs b/pirs/src/database/mod.rs new file mode 100644 index 0000000000000000000000000000000000000000..c76f2ace9af8e355d1c96cda8ae2f25de6f88657 --- /dev/null +++ b/pirs/src/database/mod.rs @@ -0,0 +1,3 @@ +//! +//! +//! \ No newline at end of file diff --git a/pirs/src/errors.rs b/pirs/src/errors.rs new file mode 100644 index 0000000000000000000000000000000000000000..622fe2e99ce6b421af318a5d04d73421a3a9d213 --- /dev/null +++ b/pirs/src/errors.rs @@ -0,0 +1,41 @@ +use thiserror::Error; +use crate::git::hash::Hash; + +#[derive(Error, Debug)] +pub enum GitError { + #[error("The `{0}` is not a valid git object type.")] + InvalidObjectType(String), + + #[error("The `{0}` is not a valid idx file.")] + InvalidIdxFile(String), + + #[error("The `{0}` is not a valid pack file.")] + InvalidPackFile(String), + + #[error("The `{0}` is not a valid pack header.")] + InvalidPackHeader(String), + + #[error("The `{0}` is not a valid git tree type.")] + InvalidTreeItem(String), + + #[error("The {0} is not a valid Hash value ")] + InvalidHashValue(String), + + #[error("Delta Object Error Info:{0}")] + DeltaObjError(String), + + #[error("The object to be packed is incomplete ,{0}")] + UnCompletedPackObject(String), + + #[error("Error decode in the Object ,info:{0}")] + InvalidObjectInfo(String), + + #[error("Can't found Hash value :{0} from current file")] + NotFountHashValue(Hash), + + #[error(transparent)] + IOError(#[from] std::io::Error), + + + +} \ No newline at end of file diff --git a/pirs/src/gateway/mod.rs b/pirs/src/gateway/mod.rs new file mode 100644 index 0000000000000000000000000000000000000000..c76f2ace9af8e355d1c96cda8ae2f25de6f88657 --- /dev/null +++ b/pirs/src/gateway/mod.rs @@ -0,0 +1,3 @@ +//! +//! +//! \ No newline at end of file diff --git a/pirs/src/git/hash.rs b/pirs/src/git/hash.rs new file mode 100644 index 0000000000000000000000000000000000000000..a4757c8c167f7a1036ece6b1466a348b717c1d9b --- /dev/null +++ b/pirs/src/git/hash.rs @@ -0,0 +1,222 @@ +//!Hash值结构体 20位u8数组 +//! > Attention to the Display function + +use crate::errors::GitError; +use sha1::{Digest, Sha1}; +use std::convert::TryFrom; +use std::fmt::Display; +use std::str::FromStr; + +///Hash值的位数 - sha1 +pub const HASH_BYTES: usize = 20; +const COMMIT_OBJECT_TYPE: &[u8] = b"commit"; +const TREE_OBJECT_TYPE: &[u8] = b"tree"; +const BLOB_OBJECT_TYPE: &[u8] = b"blob"; +const TAG_OBJECT_TYPE: &[u8] = b"tag"; + +/// Git Object hash type. only support SHA1 for now. +#[allow(unused)] +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub enum HashType { + Sha1, +} +/// Hash struct ,only contain the u8 array :`[u8;20]` +#[allow(unused)] +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Default)] +pub struct Hash(pub [u8; HASH_BYTES]); + +use super::object::{types::ObjectType, Metadata}; +/// Display trait for Hash type +use colored::Colorize; +impl Display for Hash { + /// # !Attention + /// cause of the color chars for ,if you want to use the string with out color , + /// please call the func:`to_plain_str()` rather than the func:`to_string()` + /// ### For example : + /// the hash value `18fd2deaaf152c7f1222c52fb2673f6192b375f0`
+ /// will be the `1;31m8d2deaaf152c7f1222c52fb2673f6192b375f00m` + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + // + write!(f, "{}", self.to_plain_str().red().bold()) + } +} + +impl Hash { + /// Create Hash by the long information , the all data . + pub fn new(data: &Vec) -> Hash { + let mut new_hash = Sha1::new(); + new_hash.update(data); + let hash_re = new_hash.finalize(); + let result = <[u8; 20]>::from(hash_re); + Hash(result) + } + + /// Create Hash from the Object + pub fn from_meta(obj: &Metadata) -> Hash { + match obj.h { + // Determine what hash type is. Currently, only sha1 is supported now + HashType::Sha1 => { + let mut h = Sha1::new(); + h.update(match obj.t { + ObjectType::Commit => COMMIT_OBJECT_TYPE, + ObjectType::Tree => TREE_OBJECT_TYPE, + ObjectType::Blob => BLOB_OBJECT_TYPE, + ObjectType::Tag => TAG_OBJECT_TYPE, + _ => panic!("can put compute the delta hash value"), + }); + h.update(b" "); + h.update(obj.data.len().to_string()); + h.update(b"\0"); + h.update(&obj.data); + let hash_re = h.finalize(); + let result = <[u8; HASH_BYTES]>::from(hash_re); + Hash(result) + } + } + } + + ///Parse the hexadecimal digit 0-f + fn hex_char_value(hex_char: u8) -> Option { + match hex_char { + b'0'..=b'9' => Some(hex_char - b'0'), + b'a'..=b'f' => Some(hex_char - b'a' + 10), + //Add The Support for the Big Char + b'A'..=b'F' => Some(hex_char - b'A' + 10), + _ => None, + } + } + + ///Change the u8 array to the Hash ,which should be the 40 length, + /// every bit is a char value of the string + pub fn from_bytes(hex_hash: &[u8]) -> Option { + const BITS_PER_CHAR: usize = 4; + const CHARS_PER_BYTE: usize = 8 / BITS_PER_CHAR; + // 将切片以chunks_size的切片 + let byte_chunks = hex_hash.chunks_exact(CHARS_PER_BYTE); + if !byte_chunks.remainder().is_empty() { + return None; + } + let bytes = byte_chunks + .map(|hex_digits| { + hex_digits.iter().try_fold(0, |value, &byte| { + let char_value = Hash::hex_char_value(byte)?; + Some(value << BITS_PER_CHAR | char_value) + }) + }) + .collect::>>()?; + let bytes = <[u8; HASH_BYTES]>::try_from(bytes).ok()?; + Some(Hash(bytes)) + } + + //Create a Hash value by the row value + // It's shout be a `&[u8;20]` + pub fn from_row(hex_hash: &[u8]) -> Hash { + Hash(<[u8; HASH_BYTES]>::try_from(hex_hash).unwrap()) + } + // Get tht first u8 (0x00~0xff) from the Hash + pub fn get_first(&self) -> u8 { + return self.0[0]; + } + /// Create plain String without the color chars + pub fn to_plain_str(&self) -> String { + hex::encode(self.0) + } + + #[allow(unused)] + pub(crate) fn to_folder(&self) -> String { + let str = self.to_plain_str(); + let str = str[0..2].to_string().clone(); + str + } + + #[allow(unused)] + pub(crate) fn to_filename(&self) -> String { + let str = self.to_plain_str(); + let str = str[2..].to_string().clone(); + str + } +} + +impl FromStr for Hash { + type Err = GitError; + fn from_str(hex_hash: &str) -> Result { + Hash::from_bytes(hex_hash.as_bytes()) + .ok_or_else(|| GitError::InvalidHashValue(hex_hash.to_string())) + } +} + +mod tests { + + /// The Right Hash decode + #[test] + fn test_hash() { + use super::Hash; + use std::str::FromStr; + let test_hash = Hash::from_str("18fd2deaaf152c7f1222c52fb2673f6192b375f0").unwrap(); + let result_hash: [u8; 20] = [ + 24, 253, 45, 234, 175, 21, 44, 127, 18, 34, 197, 47, 178, 103, 63, 97, 146, 179, 117, + 240, + ]; + assert_eq!(test_hash.0, result_hash); + + println!("{}", test_hash.to_string()); + println!("{}", test_hash.to_folder()); + assert_eq!(String::from("18"), test_hash.to_folder()); + assert_eq!( + String::from("fd2deaaf152c7f1222c52fb2673f6192b375f0"), + test_hash.to_filename() + ); + } + + /// The Right Hash decode + #[test] + fn test_hash_with_zero() { + use super::Hash; + use std::str::FromStr; + let test_hash = Hash::from_str("08fd2deaaf152c7f1222c52fb2673f6192b37500").unwrap(); + let result_hash: [u8; 20] = [ + 8, 253, 45, 234, 175, 21, 44, 127, 18, 34, 197, 47, 178, 103, 63, 97, 146, 179, 117, 0, + ]; + assert_eq!(test_hash.0, result_hash); + println!("{}", test_hash); + } + /// The Wrong Hash decode + #[test] + fn test_error_hash() { + use super::Hash; + use std::str::FromStr; + let test_str = "18fd2deaaf152c7f1222c52fb2673f6192z375f0"; + let test_hash = Hash::from_str(test_str).unwrap_err(); + print!("{:?}", test_hash); + assert_eq!( + format!("The {} is not a valid Hash value ", test_str), + test_hash.to_string() + ); + } + #[test] + fn test_btree_map() { + use super::Hash; + use std::collections::BTreeMap; + use std::str::FromStr; + let mut map = BTreeMap::new(); + map.insert( + Hash::from_str("cd64b12b3949483d42d34979a3f89589aad804c2").unwrap(), + 1, + ); + map.insert( + Hash::from_str("1c6ec4271e3e75b585e8d150f9758e4ee4890dd5").unwrap(), + 2, + ); + map.insert( + Hash::from_str("f4010b9167a3c7d81bc81bfbffbeac0c9e95052f").unwrap(), + 3, + ); + map.insert( + Hash::from_str("aa36c1e0d709f96d7b356967e16766bafdf63a75").unwrap(), + 4, + ); + for (key, value) in map.iter() { + println!("key: {} \t value :{}", key, value); + } + } +} diff --git a/pirs/src/git/id.rs b/pirs/src/git/id.rs new file mode 100644 index 0000000000000000000000000000000000000000..33ffae52f410af533686967786e89b4ce46907c4 --- /dev/null +++ b/pirs/src/git/id.rs @@ -0,0 +1,129 @@ +//! # Deprecated +//! It was replaced by the Hash struct in ./hash.rs . +//! ID . + +use std::fmt::Display; + +use crypto::digest::Digest; +use crypto::sha1::Sha1; + +use crate::git::{NL, SPACE}; + +use super::object::types::ObjectType; + +/// Git Object ID: a SHA-1 hash for now, and we will support multiple hash algorithms later. +/// The SHA-1 Hax ID is a 40-byte hexadecimal string. +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Default)] +pub struct ID { + pub bytes: Vec, + pub hash: String, +} + +/// +impl ID { + /// Return the first and second alphanumeric characters of the ID. + /// In the git object store format, the first two characters is the folder for save the object. + #[allow(unused)] + pub(crate) fn to_folder(&self) -> String { + //to_folder + self.hash.as_str()[0..2].to_string() + } + + /// Return the last 18 characters of the ID for the object name. + #[allow(unused)] + pub(crate) fn to_filename(&self) -> String { + self.hash.as_str()[2..].to_string() + } + + /// Return the ID in the git object store format form a hex string. + #[allow(unused)] + pub(crate) fn from_string(s: &str) -> Self { + //from_str + ID { + bytes: hex::decode(s).unwrap(), + hash: s.to_string(), + } + } + + /// Return the ID in the git object store format from a byte array. + #[allow(unused)] + pub(crate) fn from_bytes(bytes: &[u8]) -> Self { + //hex_to_hash + ID { + bytes: bytes.to_vec(), + hash: hex::encode(bytes), + } + } + + #[allow(unused)] + pub(crate) fn from_vec(t: ObjectType, data: &mut [u8]) -> Self { + //new + let mut hash = Sha1::new(); + + let object: &[u8] = &[ + t.to_string().as_bytes(), + SPACE, + data.len().to_string().as_bytes(), + NL, + (data), + ] + .concat(); + + hash.input(object); + let mut id = [0u8; 20]; + hash.result(&mut id); + + ID::from_bytes(id.as_ref()) + } +} + +/// Display ObjectID hash data to hex string +impl Display for ID { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(f, "{}", &self.hash) + } +} + +/// +#[cfg(test)] +mod tests { + use std::env; + use std::fs::File; + use std::io::BufReader; + use std::io::Read; + use std::path::PathBuf; + + use bstr::ByteSlice; + + use super::ID; + + /// There is a bug need to be resolve: + /// The `\r\n` is a Windows Style, but the `\n` is a POSIX Style. + /// The file will be different both length and content between Windows and Mac. + /// So there is different SHA-1 value. + /// + /// Temporarily, just replace the `\r\n` to `\n` in the test. + /// + /// Same as the another test case: [test_blob_write_to_file] + /// + /// References: + /// [1] https://docs.github.com/cn/get-started/getting-started-with-git/configuring-git-to-handle-line-endings + /// + #[test] + fn test_object_id_new() { + let mut path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + path.push("resources/data/test/gitmega.md"); + + let f = File::open(path).ok(); + let mut reader = BufReader::new(f.unwrap()); + let mut buffer = Vec::new(); + reader.read_to_end(&mut buffer).ok(); + + if env::consts::OS == "windows" { + buffer = buffer.replace(b"\r\n", b"\n"); + } + + let id = ID::from_vec(super::ObjectType::Blob, &mut buffer); + assert_eq!("82352c3a6a7a8bd32011751699c7a3648d1b5d3c", id.to_string()); + } +} diff --git a/pirs/src/git/idx/mod.rs b/pirs/src/git/idx/mod.rs new file mode 100644 index 0000000000000000000000000000000000000000..ab5b4863a35254a0df90fbfd39bb9820e51d68ab --- /dev/null +++ b/pirs/src/git/idx/mod.rs @@ -0,0 +1,302 @@ +//!Idx file , which is in the dir:`.git/object/pack/*.idx` +//! +//!This file provides the offset of different objects, +//!which is used to quickly find the target object in the pack file(*.pack). +//! + +use std::collections::HashMap; +use std::fmt::Display; +use std::io::Cursor; + +use crate::errors::GitError; +use crate::git::hash::Hash; +use byteorder::{BigEndian, ReadBytesExt}; + +use crate::utils; + +use super::pack::Pack; + +/// +#[allow(unused)] +#[derive(Debug)] +pub struct IdxItem { + pub id: Hash, + pub crc32: String, + pub offset: usize, +} + +/// +impl Display for IdxItem { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(f, "{} {} ({})", self.offset, self.id, self.crc32) + } +} + +/// +#[allow(unused)] +#[derive(Default,Debug)] +pub struct Idx { + pub version: u32, + pub number_of_objects: usize, + pub map_of_prefix: HashMap, + pub item_hash:HashMap, + pub idx_items: Vec, + pub pack_signature: Hash, + pub idx_signature: Hash, + _file_data: Vec, +} + +/// +impl Idx { + /// + #[allow(unused)] + fn sha1_prefix(&self, n: usize) -> String { + let pre = format!("{:x}", n); + + if pre.len() == 1 { + format!("0{}", pre) + } else { + pre + } + } + + /// + #[allow(unused)] + pub fn decode(&mut self, data: Vec) -> Result<(), GitError> { + let mut offset: usize = 0; + + let mut id_of_objects: Vec = Vec::new(); + let mut crc32_of_objects: Vec = Vec::new(); + + // 4-byte Header: //FF 74 4F 63 + if data[offset..4].to_vec() != vec![255, 116, 79, 99] { + return Err(GitError::InvalidIdxFile(format!( + "Invalid idx header: {:?}", + data[0..4].to_vec() + ))); + } + offset += 4; + + // 4-byte version number (network byte order): + let mut v = Cursor::new(data[offset..8].to_vec()); + self.version = v.read_u32::().unwrap(); + offset += 4; + + // Layer 1: + // Number of objects in the pack (network byte order) + // The prefix of the SHA-1 hash of the object has how many objects it is in the pack. + let mut n: usize = 0; + for i in (offset..offset + 256 * 4).filter(|x| ((x - offset) % 4 == 0)) { + let mut v = Cursor::new(data[i..i + 4].to_vec()); + let m = v.read_u32::().unwrap() as usize; + + if m != n { + self.map_of_prefix + .insert(self.sha1_prefix((i - 8) / 4), m - n); + self.number_of_objects = m; + n = m; + } + } + offset += 256 * 4; // 1040 + + // Layer 2: + // The all the SHA-1 hashes of the objects in the pack. + for i in (offset..offset + (20 * n) as usize).filter(|x| ((x - offset) % 20 == 0)) { + let id = Hash::from_row(&data[(i as usize)..(i as usize) + 20].to_vec()); + id_of_objects.push(id); + } + offset += 20 * n as usize; + + // Layer 3: + // The CRC32 of the object data. + for i in (offset..offset + (4 * n) as usize).filter(|x| ((x - offset) % 4 == 0)) { + crc32_of_objects.push(hex::encode(&data[i..i + 4])); + } + offset += 4 * n as usize; + + // Layer 4: + // the object offset in the pack file. + let mut index = 0; + for (index, i) in (offset..offset + (4 * n) as usize) + .filter(|x| ((x - offset) % 4 == 0)) + .enumerate() + { + let mut v = Cursor::new(data[i..i + 4].to_vec()); + let m = v.read_u32::().unwrap() as usize; + + self.idx_items.push(IdxItem { + id: id_of_objects[index].clone(), + crc32: crc32_of_objects[index].clone(), + offset: m, + }); + } + offset += 4 * n as usize; + + // Layer 5 + + // Layer 6: + // The SHA-1 hash of the pack file itself. + // The SHA-1 hash of the index file itself. + self.pack_signature = Hash::from_row(&data[offset..offset + 20].to_vec()); + offset += 20; + self.idx_signature = Hash::from_row(&data[offset..].to_vec()); + + + + /// fill the item_hash map. + for (index,item) in self.idx_items.iter().enumerate() { + self.item_hash.insert(item.id, index); + } + Ok(()) + } + + #[allow(unused)] + pub fn encode(pack: Pack) -> Self { + let mut idx = Self::default(); + let mut result: Vec = vec![255, 116, 79, 99]; //header + let mut version: Vec = vec![0, 0, 0, 2]; + result.append(&mut version); + idx.version = 2; + + // Layer 1: + // Number of objects in the pack (network byte order) + // The prefix of the SHA-1 hash of the object has how many objects it is in the pack. + idx.number_of_objects = pack.get_object_number(); + let mut fan_out: [u32; 256] = [0; 256]; + let cache = pack.get_cache(); + for (key, value) in cache.by_hash.iter() { + fan_out[key.get_first() as usize] += 1; + } + let mut _sum = 0; + for i in 0..256 { + _sum += fan_out[i]; + fan_out[i] = _sum; + result.append(&mut utils::u32_vec(fan_out[i])); + } + + // Layer 2: + // The all the SHA-1 hashes of the objects in the pack. + for key in cache.by_hash.keys() { + result.append(&mut key.0.to_vec()) + } + + // Layer 3: + // The CRC32 of the object data. + //BUG: Cause the calculation data content of the crc32 algorithm is different, + //it is different from the crc32 value of the idx generated by git + use crc::{Algorithm, Crc, CRC_32_ISO_HDLC}; + for values in cache.by_hash.values() { + let meta = values; + let _data = meta.convert_to_vec().unwrap(); + let castagnoli: Crc = Crc::::new(&CRC_32_ISO_HDLC); + result.append(&mut utils::u32_vec(castagnoli.checksum(&_data))); + println!("Type:{}", values.t); + } + // Layer 4: + // the object offset in the pack file. + for _hash in cache.by_hash.keys() { + let offset = cache.by_offset.get(_hash).unwrap(); + result.append(&mut utils::u32_vec(*offset as u32)); + } + + // Layer 5 only for the big offset > 4G , temporary skip + + // Layer 6: + // The SHA-1 hash of the pack file itself. + let pack_hash = pack.get_hash(); + result.append(&mut pack_hash.0.to_vec()); + // The SHA-1 hash of the index file itself. + let idx_hash = Hash::new(&result); + result.append(&mut idx_hash.0.to_vec()); + idx._file_data = result; + idx + } +} + +/// +#[cfg(test)] +mod tests { + use super::Idx; + use crate::utils; + use bstr::ByteSlice; + use std::env; + use std::fs::File; + use std::io::{BufReader, Read, Write}; + use std::path::{Path, PathBuf}; + ///测试读取idx + #[test] + fn test_idx_read_from_file() { + let mut path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + path.push("resources/data/test/pack-8d36a6464e1f284e5e9d06683689ee751d4b2687.idx"); + + let f = File::open(path).ok(); + let mut reader = BufReader::new(f.unwrap()); + let mut buffer = Vec::new(); + reader.read_to_end(&mut buffer).ok(); + let mut idx = Idx::default(); + idx.decode(buffer).unwrap(); + + assert_eq!(2, idx.version); + assert_eq!(614, idx.number_of_objects); + assert_eq!(2, idx.map_of_prefix["7c"]); + assert_eq!(idx.number_of_objects, idx.idx_items.len()); + assert_eq!( + "8d36a6464e1f284e5e9d06683689ee751d4b2687", + idx.pack_signature.to_plain_str() + ); + assert_eq!( + "92d07408a070a5fbea3c1f2d00e696293b78e7c6", + idx.idx_signature.to_plain_str() + ); + println!("{:?}",idx); + } + + ///测试写入idx文件 + #[test] + fn test_idx_write_to_file() { + // "./resources/data/test/pack-6590ba86f4e863e1c2c985b046e1d2f1a78a0089.pack" + use super::super::pack; + let packs = pack::Pack::decode_file( + "./resources/test2/pack-8c81e90db37ef77494efe4f31daddad8b494e099.pack", + ); + let idx = Idx::encode(packs); + + let mut file = std::fs::File::create("./test.idx").expect("create failed"); + file.write_all(idx._file_data.as_bytes()) + .expect("write failed"); + + println!("data written to file"); + let idx_file = File::open(&Path::new("./test.idx")).unwrap(); + + let mut reader = BufReader::new(idx_file); + let mut buffer = Vec::new(); + reader.read_to_end(&mut buffer).ok(); + + let mut idx = Idx::default(); + + idx.decode(buffer).unwrap(); + } + + /// fan out table create test + #[test] + fn unsafe_fan_out() { + let mut result: Vec = vec![]; + let mut fan_out: [u32; 256] = [0; 256]; + let mut _sum = 0; + for i in 0..255 { + _sum += fan_out[i] + 5; + fan_out[i] = _sum; + result.append(&mut utils::u32_vec(fan_out[i])); + } + assert_eq!(result[0..4], [0, 0, 0, 5]); + assert_eq!(result[4..8], [0, 0, 0, 10]); + } + + // crc32 create test + #[test] + fn test_crc32() { + use crc::{Crc, CRC_32_ISCSI}; + pub const CASTAGNOLI: Crc = Crc::::new(&CRC_32_ISCSI); + assert_eq!(CASTAGNOLI.checksum(b"123456789"), 0xe3069283); + } +} diff --git a/pirs/src/git/midx/mod.rs b/pirs/src/git/midx/mod.rs new file mode 100644 index 0000000000000000000000000000000000000000..bfbaf48995cf6d03f88b43e90c020e1689fd8bbe --- /dev/null +++ b/pirs/src/git/midx/mod.rs @@ -0,0 +1,21 @@ +//! TODO: Decode for midx +//! [multi-pack-index](https://git-scm.com/docs/git-multi-pack-index) +//! [midx format](https://git-scm.com/docs/pack-format) +#[cfg(test)] +pub mod tests { + + fn test_asci(c: &[u8]) { + for i in c { + print!("{:x} ", i); + } + println!(); + } + + #[test] + fn test_all_asci() { + test_asci(b"PAND"); + test_asci(b"OIDF"); + test_asci(b"OIDL"); + test_asci(b"OOFF"); + } +} diff --git a/pirs/src/git/mod.rs b/pirs/src/git/mod.rs new file mode 100644 index 0000000000000000000000000000000000000000..658be47ff6f1f832461d3e5ee252cacf87d30984 --- /dev/null +++ b/pirs/src/git/mod.rs @@ -0,0 +1,31 @@ +//! +//! +//! +//! +//! +//! +//! +//! + +pub mod hash; +mod id; +mod idx; +mod midx; +mod object; +mod pack; + +pub const SPACE: &[u8] = &[0x20]; + +/// In the git object store format, between the size and trunk data has a special character +/// in Hex means 0x00. +pub const NL: &[u8] = &[0x00]; + +/// In the git object store format, 0x0a is the line feed character in the commit object. +// pub const LF: &[u8] = &[0x0A]; + +/// +#[cfg(test)] +mod tests { + + +} diff --git a/pirs/src/git/object/base/blob.rs b/pirs/src/git/object/base/blob.rs new file mode 100644 index 0000000000000000000000000000000000000000..4130e4c3b0abef5c889749b69fe147540b08093e --- /dev/null +++ b/pirs/src/git/object/base/blob.rs @@ -0,0 +1,156 @@ +//! +//!Blob Struct +//! +use super::tree::*; +use super::Metadata; +use crate::errors::GitError; +use std::cmp::Ordering; +use std::fmt::Display; +use std::sync::Arc; + +/// Git Object: blob +#[derive(Eq, Debug, Hash, Clone)] +pub struct Blob { + pub filename: String, + pub meta: Arc, +} +impl Ord for Blob { + fn cmp(&self, other: &Self) -> Ordering { + let o = other.filename.cmp(&self.filename); + match o { + Ordering::Equal => other.meta.size.cmp(&self.meta.size), + _ => o, + } + } +} + +impl PartialOrd for Blob { + fn partial_cmp(&self, other: &Self) -> Option { + let o = other.filename.cmp(&self.filename); + match o { + Ordering::Equal => Some(other.meta.size.cmp(&self.meta.size)), + _ => Some(o), + } + } +} + +impl PartialEq for Blob { + fn eq(&self, other: &Self) -> bool { + if self.filename.eq(&other.filename) { + return true; + } + false + } +} +/// +impl Blob { + #[allow(unused)] + pub fn new(metadata: Metadata) -> Self { + Self { + meta: Arc::new(metadata), + filename: String::new(), + } + } + + /// + #[allow(unused)] + pub(crate) fn write_to_file(&self, root_path: String) -> Result { + self.meta.write_to_file(root_path) + } + + /// + #[allow(unused)] + pub(crate) fn to_tree_item(&self, filename: String) -> Result { + Ok(TreeItem { + mode: TreeItemType::Blob.to_bytes().to_vec(), + item_type: TreeItemType::Blob, + id: self.meta.id.clone(), + filename, + }) + } +} +use bstr::BString; +impl Display for Blob { + ///为了节省输出空间 暂时只输出第一行内容 + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + let mut print_data: Vec = vec![]; + for value in self.meta.data.iter() { + if *value != b'\n' { + print_data.push(value.clone()); + } else { + break; + } + } + + writeln!(f, "size:{}", self.meta.data.len()).unwrap(); + writeln!(f, "meta data size:{}", self.meta.size).unwrap(); + writeln!(f, "File Name: {}", self.filename).unwrap(); + writeln!(f, "Type: Blob\n{}", BString::new(print_data)).unwrap(); + writeln!(f, "Only Show the first line of the File...") + } +} +/// +#[cfg(test)] +mod tests { + use std::env; + use std::fs::File; + use std::io::BufReader; + use std::io::Read; + use std::path::{Path, PathBuf}; + use std::sync::Arc; + + use crate::git::object::types::ObjectType; + use crate::git::object::Metadata; + + use super::Blob; + /// + #[test] + fn test_blob_write_to_file() { + let mut path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + path.push("resources/data/test/gitmega.md"); + let f = File::open(path).ok(); + let mut reader = BufReader::new(f.unwrap()); + let mut buffer = Vec::new(); + reader.read_to_end(&mut buffer).ok(); + + // if env::consts::OS == "windows" { + // buffer = buffer.replace(b"\r\n", b"\n"); + // } + + let data = buffer; + + let meta = Metadata::new(ObjectType::Blob, &data); + + meta.write_to_file("/tmp".to_string()) + .expect("Write error!"); + assert!(Path::new("/tmp/82/352c3a6a7a8bd32011751699c7a3648d1b5d3c").exists()); + } + + /// + #[test] + fn test_blob_read_from_file() { + let mut path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + path.push("resources/data/test/blob-82352c3a6a7a8bd32011751699c7a3648d1b5d3c-gitmega.md"); + + let meta = Metadata::read_object_from_file(path.to_str().unwrap().to_string()) + .expect("Read error!"); + + assert_eq!(meta.t, crate::git::object::types::ObjectType::Blob); + + let blob = Blob { + meta: Arc::new(meta), + filename: String::new(), + }; + + assert_eq!( + "82352c3a6a7a8bd32011751699c7a3648d1b5d3c", + blob.meta.id.to_plain_str() + ); + + assert_eq!(16, blob.meta.size); + // assert_eq!( + // "# Hello Gitmega\n", + // String::from_utf8(blob.meta.data).unwrap().as_str() + // ); + } +} diff --git a/pirs/src/git/object/base/commit.rs b/pirs/src/git/object/base/commit.rs new file mode 100644 index 0000000000000000000000000000000000000000..8bda12b7321155e4df373acf2631034303fc2824 --- /dev/null +++ b/pirs/src/git/object/base/commit.rs @@ -0,0 +1,280 @@ +//! +//!Commit Struct +//! + +use super::super::Hash; +use super::sign::AuthorSign; +use super::Metadata; +use crate::errors::GitError; +use crate::git::object::types::ObjectType; +use bstr::ByteSlice; +use std::cmp::Ordering; +use std::fmt::Display; + +/// Git Object: commit +#[allow(unused)] +#[derive(Eq, Debug, Hash, Clone)] +pub struct Commit { + pub meta: Metadata, + pub tree_id: Hash, + pub parent_tree_ids: Vec, + pub author: AuthorSign, + pub committer: AuthorSign, + pub message: String, +} +impl Ord for Commit { + fn cmp(&self, other: &Self) -> Ordering { + other.meta.size.cmp(&self.meta.size) + } +} + +impl PartialOrd for Commit { + fn partial_cmp(&self, other: &Self) -> Option { + Some(other.meta.size.cmp(&self.meta.size)) + } +} + +impl PartialEq for Commit { + fn eq(&self, other: &Self) -> bool { + self.meta.size == other.meta.size + } +} +/// +impl Commit { + /// + pub fn new(metadata: Metadata) -> Self { + let mut a = Self { + meta: metadata, + tree_id: Hash::default(), + parent_tree_ids: vec![], + author: AuthorSign { + t: "".to_string(), + name: "".to_string(), + email: "".to_string(), + timestamp: 0, + timezone: "".to_string(), + }, + committer: AuthorSign { + t: "".to_string(), + name: "".to_string(), + email: "".to_string(), + timestamp: 0, + timezone: "".to_string(), + }, + message: "".to_string(), + }; + a.decode_meta().unwrap(); + a + } + /// + + /// Decode the Metadata.data and convert to `Commit` Class + // If there a + pub(crate) fn decode_meta(&mut self) -> Result<(), GitError> { + let mut data = self.meta.data.clone(); + + // Find the tree id and remove it from the data + let tree_begin = data.find_byte(0x20).unwrap(); + let tree_end = data.find_byte(0x0a).unwrap(); + self.tree_id = Hash::from_bytes(&data[tree_begin + 1..tree_end].to_vec()).unwrap(); + data = data[tree_end + 1..].to_vec(); + + // Find the parent tree ids and remove them from the data + let author_begin = data.find("author").unwrap(); + if data.find_iter("parent").count() > 0 { + let mut parents: Vec = Vec::new(); + let mut index = 0; + + while index < author_begin { + let parent_begin = data.find_byte(0x20).unwrap(); + let parent_end = data.find_byte(0x0a).unwrap(); + parents + .push(Hash::from_bytes(&data[parent_begin + 1..parent_end].to_vec()).unwrap()); + index = index + parent_end + 1; + } + + self.parent_tree_ids = parents; + } + data = data[author_begin..].to_vec(); + + // Find the author and remove it from the data + let author_data = data[..data.find_byte(0x0a).unwrap()].to_vec(); + self.author.decode_from_data(author_data)?; + data = data[data.find_byte(0x0a).unwrap() + 1..].to_vec(); + + // Find the committer and remove it from the data + let committer_data = data[..data.find_byte(0x0a).unwrap()].to_vec(); + self.committer.decode_from_data(committer_data)?; + self.message = data[data.find_byte(0x0a).unwrap() + 1..] + .to_vec() + .to_str() + .unwrap() + .to_string(); + + Ok(()) + } + + /// + #[allow(unused)] + pub(crate) fn write_to_file(&self, root_path: String) -> Result { + self.meta.write_to_file(root_path) + } + + /// + #[allow(unused)] + pub(crate) fn encode_metadata(&self) -> Result { + let mut data = Vec::new(); + + data.extend_from_slice("tree".as_bytes()); + data.extend_from_slice(0x20u8.to_be_bytes().as_ref()); + data.extend_from_slice(self.tree_id.to_plain_str().as_bytes()); + data.extend_from_slice(0x0au8.to_be_bytes().as_ref()); + + for parent_tree_id in &self.parent_tree_ids { + data.extend_from_slice("parent".as_bytes()); + data.extend_from_slice(0x20u8.to_be_bytes().as_ref()); + data.extend_from_slice(parent_tree_id.to_plain_str().as_bytes()); + data.extend_from_slice(0x0au8.to_be_bytes().as_ref()); + } + + data.extend_from_slice(self.author.encode_to_data().unwrap().as_ref()); + data.extend_from_slice(0x0au8.to_be_bytes().as_ref()); + data.extend_from_slice(self.committer.encode_to_data().unwrap().as_ref()); + data.extend_from_slice(0x0au8.to_be_bytes().as_ref()); + data.extend_from_slice(self.message.as_bytes()); + + Ok(Metadata::new(ObjectType::Commit, &data)) + } +} + +impl Display for Commit { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + writeln!(f, "Tree: {}", self.tree_id)?; + + for parent in self.parent_tree_ids.iter() { + writeln!(f, "parent: {}", parent)?; + } + + writeln!(f, "author {}", self.author)?; + writeln!(f, "committer {}", self.committer)?; + writeln!(f, "Message: {}", self.message) + } +} + +#[cfg(test)] +mod tests { + use super::AuthorSign; + use super::Metadata; + use crate::git::hash::Hash; + use crate::git::object::types::ObjectType; + use std::env; + use std::path::Path; + use std::path::PathBuf; + use std::str::FromStr; + + use super::Commit; + + fn get_empty_commit(path: PathBuf) -> super::Commit { + let meta = Metadata::read_object_from_file(path.to_str().unwrap().to_string()) + .expect("Read error!"); + + Commit { + meta, + tree_id: Hash::default(), + parent_tree_ids: vec![], + author: AuthorSign { + t: "".to_string(), + name: "".to_string(), + email: "".to_string(), + timestamp: 0, + timezone: "".to_string(), + }, + committer: AuthorSign { + t: "".to_string(), + name: "".to_string(), + email: "".to_string(), + timestamp: 0, + timezone: "".to_string(), + }, + message: "".to_string(), + } + } + + /// + #[test] + fn test_commit_read_from_file_without_parent() { + let mut path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + path.push("resources/data/test/commit-1b490ec04712d147bbe7c8b3a6d86ed4d3587a6a"); + + let mut commit = get_empty_commit(path); + + commit.decode_meta().unwrap(); + + assert_eq!( + String::from("1bdbc1e723aa199e83e33ecf1bb19f874a56ebc3"), + commit.tree_id.to_plain_str() + ); + } + + /// + #[test] + fn test_commit_read_from_file_with_parent() { + let mut path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + path.push("resources/data/test/commit-3b8bc1e152af7ed6b69f2acfa8be709d1733e1bb"); + + let mut commit = get_empty_commit(path); + + commit.decode_meta().unwrap(); + + assert_eq!( + "9bbe4087bedef91e50dc0c1a930c1d3e86fd5f20", + commit.tree_id.to_plain_str() + ); + } + + /// + #[test] + fn test_commit_write_to_file() { + let meta = Metadata::new(ObjectType::Commit, &vec![]); + + let author = AuthorSign { + t: "author".to_string(), + name: "Quanyi Ma".to_string(), + email: "eli@patch.sh".to_string(), + timestamp: 1649521615, + timezone: "+0800".to_string(), + }; + + let committer = AuthorSign { + t: "committer".to_string(), + name: "Quanyi Ma".to_string(), + email: "eli@patch.sh".to_string(), + timestamp: 1649521615, + timezone: "+0800".to_string(), + }; + + let mut commit = super::Commit { + meta, + tree_id:Hash::from_str("9bbe4087bedef91e50dc0c1a930c1d3e86fd5f20").unwrap(), + parent_tree_ids: vec![ + Hash::from_str("1b490ec04712d147bbe7c8b3a6d86ed4d3587a6a").unwrap(), + ], + author, + committer, + message:"gpgsig -----BEGIN PGP SIGNATURE-----\n \n iQIzBAABCAAdFiEEanuf5/5ADLU2lvsCZL9E4tsHuXIFAmJRs88ACgkQZL9E4tsH\n uXJAmBAAtubFjLjNzIgal1/Gwy/zlpw7aQvVO2xcX3Xhbeb0UJyKvrSm/Ht19kiz\n 6Bc8ZV75mpKKip93XAljUgWgAO6Q4DUFnVA5bwF1vvhKHbgXLr+I8q+5GqmLW61U\n oBrB/3aJJ/uAxElQz5nOhgB7ztCfeKQ5egbhBXn9QGqPg/RkfQmDPYsU7evk1J0Z\n CyKinbSNe0c92qE95nURzozFb1zf0rO9NtnpYohFCEO5qyuoV4nz7npnJD4Miqy9\n IUQapeJeZC7eDvU8AWbxARrkXQkyfLSebDVcqbz7WfQz+4dhoK7jADaB48oKpR/K\n bKZDJU9a2t2nPC1ojzjQJgXZ6x4linQofBR8wE1ns3W5RoRgcBSj8dQMNH8wXa/T\n oQD6hlCJpjvbiYHuc3tSgCESI4ZU7zGpL9BAQK+C91T8CUamycF1H7TAHXdzNClR\n bWO4EeRzvwZZyIL029DYFxD2IFN7OQb5jc7JvcroIW8jUN0sMPS6jY+d0bg5pgIs\n yJjmI6qPYy7R35OElfTlw8aVSOAnVbQh7MZt6n3JUyezwK9MwbiKdAYKOLYaVaC0\n ++SY+NV4Dwe6W72KhFhxwOJQRGMfES1mRxy4n85BgqfCGy7STGSBOmon3VZEl89z\n rmvdX0JXy93hGH0oUQINsN9bzpsdaQUWVND8wAnb0+sU4LvJz90=\n =9qni\n -----END PGP SIGNATURE-----\n\nAdd gust.md and modify gitmega.md\n\nSigned-off-by: Quanyi Ma \n".to_string(), + }; + + commit.meta = commit.encode_metadata().unwrap(); + + assert_eq!( + "3b8bc1e152af7ed6b69f2acfa8be709d1733e1bb", + commit.meta.id.to_plain_str() + ); + + commit + .write_to_file("/tmp".to_string()) + .expect("Write error!"); + + assert!(Path::new("/tmp/3b/8bc1e152af7ed6b69f2acfa8be709d1733e1bb").exists()); + } +} diff --git a/pirs/src/git/object/base/mod.rs b/pirs/src/git/object/base/mod.rs new file mode 100644 index 0000000000000000000000000000000000000000..82675e34e23eb2dfed07a91702fe32331059cf53 --- /dev/null +++ b/pirs/src/git/object/base/mod.rs @@ -0,0 +1,28 @@ +pub mod blob; +pub mod commit; +pub mod sign; +pub mod tag; +pub mod tree; +use std::fmt::Display; + +pub use super::Metadata; + +/// **The Object Class Enum**
+/// Merge the four basic classes into an enumeration structure for easy saving +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)] +pub enum ObjClass { + BLOB(blob::Blob), + COMMIT(commit::Commit), + TREE(tree::Tree), + TAG(tag::Tag), +} +impl Display for ObjClass { + fn fmt(&self, f: &mut ::std::fmt::Formatter) -> std::fmt::Result { + match self { + ObjClass::BLOB(_) => write!(f,"BLOB"), + ObjClass::COMMIT(_) =>write!(f,"COMMIT"), + ObjClass::TREE(_) =>write!(f,"TREE"), + ObjClass::TAG(_) => write!(f,"TAG"), + } + } +} diff --git a/pirs/src/git/object/base/sign.rs b/pirs/src/git/object/base/sign.rs new file mode 100644 index 0000000000000000000000000000000000000000..1804c87b0c5ad688c6d572c516699c8a867d6883 --- /dev/null +++ b/pirs/src/git/object/base/sign.rs @@ -0,0 +1,136 @@ +//! Sign Struct +//! + +use std::fmt::Display; + +use bstr::ByteSlice; + +use crate::errors::GitError; + +/// +#[allow(unused)] +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)] +pub struct AuthorSign { + pub t: String, + pub name: String, + pub email: String, + pub timestamp: usize, + pub timezone: String, +} + +/// +impl Display for AuthorSign { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!( + f, + "{} \n Email:<{}> \n timestamp:{}\n timezone:{}", + self.name, self.email, self.timestamp, self.timezone + ) + } +} + +/// +impl AuthorSign { + /// + #[allow(unused)] + pub(crate) fn decode_from_data(&mut self, data: Vec) -> Result<(), GitError> { + let mut data = data; + + let name_start = data.find_byte(0x20).unwrap(); + + self.t = String::from_utf8(data[..name_start].to_vec()).unwrap(); + + let email_start = data.find_byte(0x3C).unwrap(); + let email_end = data.find_byte(0x3E).unwrap(); + + self.name = data[name_start + 1..email_start - 1] + .to_str() + .unwrap() + .to_string(); + self.email = data[email_start + 1..email_end] + .to_str() + .unwrap() + .to_string(); + data = data[email_end + 2..].to_vec(); + + let timestamp_split = data.find_byte(0x20).unwrap(); + self.timestamp = data[0..timestamp_split] + .to_str() + .unwrap() + .parse::() + .unwrap(); + self.timezone = data[timestamp_split + 1..].to_str().unwrap().to_string(); + + Ok(()) + } + + /// + #[allow(unused)] + pub(crate) fn encode_to_data(&self) -> Result, GitError> { + let mut data = Vec::new(); + + data.extend_from_slice(self.t.as_bytes()); + data.extend_from_slice(0x20u8.to_be_bytes().as_ref()); + data.extend_from_slice(self.name.as_bytes()); + data.extend_from_slice(0x20u8.to_be_bytes().as_ref()); + data.extend_from_slice(0x3Cu8.to_be_bytes().as_ref()); + data.extend_from_slice(self.email.as_bytes()); + data.extend_from_slice(0x3Eu8.to_be_bytes().as_ref()); + data.extend_from_slice(0x20u8.to_be_bytes().as_ref()); + data.extend_from_slice(self.timestamp.to_string().as_bytes()); + data.extend_from_slice(0x20u8.to_be_bytes().as_ref()); + data.extend_from_slice(self.timezone.as_bytes()); + + Ok(data) + } +} + +mod tests { + #[test] + fn test_author_sign_encode() { + let author = super::AuthorSign { + t: "author".to_string(), + name: "Quanyi Ma".to_string(), + email: "eli@patch.sh".to_string(), + timestamp: 1649521615, + timezone: "+0800".to_string(), + }; + + let data = author.encode_to_data().unwrap(); + + let author_data = [ + 97, 117, 116, 104, 111, 114, 32, 81, 117, 97, 110, 121, 105, 32, 77, 97, 32, 60, 101, + 108, 105, 64, 112, 97, 116, 99, 104, 46, 115, 104, 62, 32, 49, 54, 52, 57, 53, 50, 49, + 54, 49, 53, 32, 43, 48, 56, 48, 48, + ] + .to_vec(); + + assert_eq!(data, author_data); + } + + #[test] + fn test_author_sign_decode() { + let author_data = [ + 97, 117, 116, 104, 111, 114, 32, 81, 117, 97, 110, 121, 105, 32, 77, 97, 32, 60, 101, + 108, 105, 64, 112, 97, 116, 99, 104, 46, 115, 104, 62, 32, 49, 54, 52, 57, 53, 50, 49, + 54, 49, 53, 32, 43, 48, 56, 48, 48, + ] + .to_vec(); + + let mut author = super::AuthorSign { + t: "".to_string(), + name: "".to_string(), + email: "".to_string(), + timestamp: 0, + timezone: "".to_string(), + }; + + author.decode_from_data(author_data).unwrap(); + + assert_eq!(author.t, "author"); + assert_eq!(author.name, "Quanyi Ma"); + assert_eq!(author.email, "eli@patch.sh"); + assert_eq!(author.timestamp, 1649521615); + assert_eq!(author.timezone, "+0800"); + } +} diff --git a/pirs/src/git/object/base/tag.rs b/pirs/src/git/object/base/tag.rs new file mode 100644 index 0000000000000000000000000000000000000000..f3bf6f06fa4438c8c7db298bd0230974242b9a3f --- /dev/null +++ b/pirs/src/git/object/base/tag.rs @@ -0,0 +1,264 @@ +//! Tag Struct +//! +//! +//! + +use bstr::ByteSlice; + +use super::sign::AuthorSign; +use super::Metadata; +use crate::errors::GitError; +use crate::git::hash::Hash; +use crate::git::object::types::ObjectType; + +use std::fmt::Display; + +/// Git Object: tag +use std::cmp::Ordering; +#[allow(unused)] +#[derive(Eq, Debug, Hash, Clone)] +pub struct Tag { + pub meta: Metadata, + pub object: Hash, + pub t: ObjectType, + pub tag: String, + pub tagger: AuthorSign, + pub message: String, +} +impl Ord for Tag { + fn cmp(&self, other: &Self) -> Ordering { + other.meta.size.cmp(&self.meta.size) + } +} + +impl PartialOrd for Tag { + fn partial_cmp(&self, other: &Self) -> Option { + Some(other.meta.size.cmp(&self.meta.size)) + } +} + +impl PartialEq for Tag { + fn eq(&self, other: &Self) -> bool { + self.meta.size == other.meta.size + } +} +/// +impl Tag { + /// Tag 的构造函数 接收一个@param meta::Metadata + /// 同时执行tag解码 -> `fn decode_metadata` + pub fn new(meta: Metadata) -> Self { + let mut a = Self { + meta: meta.clone(), + object: meta.id.clone(), + t: ObjectType::Commit, + tag: "".to_string(), + tagger: AuthorSign { + t: "".to_string(), + name: "".to_string(), + email: "".to_string(), + timestamp: 0, + timezone: "".to_string(), + }, + message: "".to_string(), + }; + a.decode_metadata().unwrap(); + a + } + + /// + #[allow(unused)] + fn decode_metadata(&mut self) -> Result<(), GitError> { + let mut data = self.meta.data.clone(); + + let object_begin = data.find_byte(0x20).unwrap(); + let object_end = data.find_byte(0x0a).unwrap(); + self.object = Hash::from_bytes(&data[object_begin + 1..object_end].to_vec()).unwrap(); + data = data[object_end + 1..].to_vec(); + + let type_begin = data.find_byte(0x20).unwrap(); + let type_end = data.find_byte(0x0a).unwrap(); + self.t = ObjectType::from_string(data[type_begin + 1..type_end].to_str().unwrap()).unwrap(); + data = data[type_end + 1..].to_vec(); + + let tag_begin = data.find_byte(0x20).unwrap(); + let tag_end = data.find_byte(0x0a).unwrap(); + self.tag = data[tag_begin + 1..tag_end] + .to_str() + .unwrap() + .parse() + .unwrap(); + data = data[tag_end + 1..].to_vec(); //Fixed Bug: bug type_end to tag_end + + let tagger_begin = data.find("tagger").unwrap(); + let tagger_end = data.find_byte(0x0a).unwrap(); + let tagger_data = data[tagger_begin..tagger_end].to_vec(); + self.tagger.decode_from_data(tagger_data)?; + data = data[data.find_byte(0x0a).unwrap() + 1..].to_vec(); + + self.message = data[data.find_byte(0x0a).unwrap()..] + .to_vec() + .to_str() + .unwrap() + .to_string(); + + Ok(()) + } + + /// + #[allow(unused)] + fn encode_metadata(&self) -> Result { + let mut data = Vec::new(); + + data.extend_from_slice("object".as_bytes()); + data.extend_from_slice(0x20u8.to_be_bytes().as_ref()); + data.extend_from_slice(self.object.to_plain_str().as_bytes()); + data.extend_from_slice(0x0au8.to_be_bytes().as_ref()); + + data.extend_from_slice("type".as_bytes()); + data.extend_from_slice(0x20u8.to_be_bytes().as_ref()); + data.extend_from_slice(self.t.to_string().as_bytes()); + data.extend_from_slice(0x0au8.to_be_bytes().as_ref()); + + data.extend_from_slice("tag".as_bytes()); + data.extend_from_slice(0x20u8.to_be_bytes().as_ref()); + data.extend_from_slice(self.tag.as_bytes()); + data.extend_from_slice(0x0au8.to_be_bytes().as_ref()); + + data.extend_from_slice(self.tagger.encode_to_data().unwrap().as_ref()); + data.extend_from_slice(0x0au8.to_be_bytes().as_ref()); + data.extend_from_slice(self.message.as_bytes()); + + Ok(Metadata::new(ObjectType::Tag, &data)) + } + + /// + #[allow(unused)] + fn write_to_file(&self, root_path: String) -> Result { + self.meta.write_to_file(root_path) + } +} +impl Display for Tag { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + writeln!(f, "Type: Tag").unwrap(); + writeln!(f, "Tag : {}", self.tag).unwrap(); + self.tagger.fmt(f).unwrap(); + writeln!(f, "{}", self.message) + } +} +/// +#[cfg(test)] +mod tests { + use crate::git::hash::Hash; + use crate::git::hash::HashType; + use crate::git::object::types::ObjectType; + use std::env; + use std::path::Path; + use std::path::PathBuf; + use std::str::FromStr; + use std::vec; + + use super::AuthorSign; + use super::Metadata; + + use super::Tag; + + /// + #[test] + fn test_tag_read_from_file() { + let mut path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + path.push("resources/data/test/tag-e5c324b03b72b26f11557c4955c6d17c68dc8595"); + + let meta = Metadata::read_object_from_file(path.to_str().unwrap().to_string()) + .expect("Read error!"); + + assert_eq!(ObjectType::Tag, meta.t); + assert_eq!(976, meta.size); + assert_eq!( + "e5c324b03b72b26f11557c4955c6d17c68dc8595", + meta.id.to_plain_str() + ); + + let mut tag = Tag { + meta, + object: Hash::default(), + t: ObjectType::Commit, + tag: "".to_string(), + tagger: AuthorSign { + t: "".to_string(), + name: "".to_string(), + email: "".to_string(), + timestamp: 0, + timezone: "+0000".to_string(), + }, + message: "".to_string(), + }; + + tag.decode_metadata().unwrap(); + + assert_eq!( + "6414e45babf0bdd043ba40d31123053cfebef26c", + tag.object.to_plain_str() + ); + assert_eq!("commit", tag.t.to_string()); + assert_eq!("v1.1.0", tag.tag); + assert_eq!(1653037847, tag.tagger.timestamp); + println!("{}", tag); + } + + #[test] + fn test_output_meat() { + let meta = Metadata { + t: ObjectType::Tag, + h: HashType::Sha1, + id: Hash::from_str("df1087c478c8d337cb587b897e86f2455e2687ed").unwrap(), + size: 155, + data: vec![ + 111, 98, 106, 101, 99, 116, 32, 51, 55, 50, 49, 51, 101, 55, 98, 98, 51, 99, 51, + 51, 52, 97, 48, 102, 55, 55, 48, 56, 99, 55, 97, 102, 99, 97, 98, 53, 98, 97, 98, + 98, 51, 102, 57, 53, 52, 51, 52, 10, 116, 121, 112, 101, 32, 99, 111, 109, 109, + 105, 116, 10, 116, 97, 103, 32, 48, 46, 49, 10, 116, 97, 103, 103, 101, 114, 32, + 97, 100, 105, 116, 121, 97, 32, 60, 100, 101, 118, 64, 99, 104, 105, 109, 101, 114, + 97, 99, 111, 100, 101, 114, 46, 110, 101, 116, 62, 32, 49, 52, 50, 56, 54, 49, 50, + 48, 48, 55, 32, 45, 48, 52, 48, 48, 10, 10, 70, 105, 114, 115, 116, 32, 105, 109, + 112, 108, 101, 109, 101, 110, 116, 97, 116, 105, 111, 110, 32, 111, 102, 32, 116, + 104, 101, 32, 99, 108, 105, 10, + ], + delta_header: vec![], + }; + + let tag = Tag::new(meta); + + println!("{}", tag); + } + /// + #[test] + fn test_tag_write_to_file() { + let meta = Metadata::new(ObjectType::Tag, &vec![]); + + let tagger = AuthorSign { + t: "tagger".to_string(), + name: "Quanyi Ma".to_string(), + email: "eli@patch.sh".to_string(), + timestamp: 1653037847, + timezone: "+0800".to_string(), + }; + + let mut tag = Tag { + meta, + object: Hash::from_str("6414e45babf0bdd043ba40d31123053cfebef26c").unwrap(), + t: ObjectType::Commit, + tag: "v1.1.0".to_string(), + tagger, + message: "\nIt's a lastest object\n-----BEGIN PGP SIGNATURE-----\n\niQIzBAABCAAdFiEEanuf5/5ADLU2lvsCZL9E4tsHuXIFAmKHWxcACgkQZL9E4tsH\nuXIeFhAAtX+foSvc7/1lb98+QfRjHcpO+LX+LroTaq/QGOTX/2gE+tHD2TJAga1I\nVqDEz8fh8AE366FC7UCjCb5nvsCCox2htzbIxAjsc9L/JckWtxl6WOa/5OZssrDQ\nFtX39BNNl+4TfNn/z1XV+28c9yB1N5HSoP2gzdLoASw3y9n6E0FyzLdoXPILgmJI\nL4DAG/OFkixK+I+TsK+6995497h9BCi3x30dOjfxZS9ptiKhqWulbkflvvM9Cnie\n7obXYmnoe0jBjSfO5GgJlOYcLzE9MMYYzIx47/4lcrCbQXnojkW3KV03PEXGfRCL\nw/y8oBHVvNVRF0Jn+o7F+mzIrbF6Ufku63MfRf7WmbbS3B63CILEjNyuOFoe8mDb\nrmAUffzQSrgnvBk+g01slb6Q+q7Urw6wqHtBPn3ums/inHE9ymTqS7ffmRifUfR8\nD8LvhwpSUI7BdiN6HznRFPxMXzohYIqAJbUltjr4Q7qw/kJI+305Xcs1U5AUIaOp\n77p2UFHRVoMM5mpPOCSwsVJ6cSuOjWXf9afcNMrhgclKefM0aXXnd2p5zTUEe99T\nlAtXHuprRwxtSQUzHxJCdGlUGRGRR2aS9W984SNDVmcegnOIrZD2pVm/tjDwVex5\nMuAuKHr8et1EKyvKCnta6USq7WC2l6RdsCaAYzSTQ7ljEi9A+6Q=\n=/9g0\n-----END PGP SIGNATURE-----\n".to_string(), + }; + + tag.meta = tag.encode_metadata().unwrap(); + assert_eq!( + "e5c324b03b72b26f11557c4955c6d17c68dc8595", + tag.meta.id.to_plain_str() + ); + + tag.write_to_file("/tmp".to_string()).expect("Write error!"); + assert!(Path::new("/tmp/e5/c324b03b72b26f11557c4955c6d17c68dc8595").exists()); + } +} diff --git a/pirs/src/git/object/base/tree.rs b/pirs/src/git/object/base/tree.rs new file mode 100644 index 0000000000000000000000000000000000000000..a77a4bb38bcc3d6882c8601a50298449d28541d6 --- /dev/null +++ b/pirs/src/git/object/base/tree.rs @@ -0,0 +1,410 @@ +//! Tree Struct +//! + + +use std::fmt::Display; + +use super::super::Hash; +use super::Metadata; +use crate::errors::GitError; +use crate::git::object::types::ObjectType; +use bstr::ByteSlice; + +/// +#[derive(PartialEq, Eq, Hash, Ord, PartialOrd, Debug, Clone, Copy)] +pub enum TreeItemType { + Blob, + BlobExecutable, + Tree, + Commit, + Link, +} + +use colored::Colorize; + +impl Display for TreeItemType { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + let _print = match *self { + TreeItemType::Blob => "blob", + TreeItemType::BlobExecutable => "blob executable", + TreeItemType::Tree => "tree", + TreeItemType::Commit => "commit", + TreeItemType::Link => "link", + }; + write!(f, "{}", String::from(_print).blue()) + } +} + +/// +impl TreeItemType { + /// + #[allow(unused)] + pub(crate) fn to_bytes(self) -> &'static [u8] { + match self { + TreeItemType::Blob => b"100644", + TreeItemType::BlobExecutable => b"100755", + TreeItemType::Tree => b"40000", + TreeItemType::Link => b"120000", + TreeItemType::Commit => b"160000", + } + } + + /// + #[allow(unused)] + pub(crate) fn tree_item_type_from(mode: &[u8]) -> Result { + Ok(match mode { + b"40000" => TreeItemType::Tree, + b"100644" => TreeItemType::Blob, + b"100755" => TreeItemType::BlobExecutable, + b"120000" => TreeItemType::Link, + b"160000" => TreeItemType::Commit, + b"100664" => TreeItemType::Blob, + b"100640" => TreeItemType::Blob, + _ => { + return Err(GitError::InvalidTreeItem( + String::from_utf8(mode.to_vec()).unwrap(), + )) + } + }) + } +} + +/// Git Object: tree item +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)] +pub struct TreeItem { + pub mode: Vec, + pub item_type: TreeItemType, + pub id: Hash, + pub filename: String, +} + +/// Git Object: tree +use std::cmp::Ordering; +#[derive(Eq, Debug, Hash, Clone)] +pub struct Tree { + pub meta: Metadata, + pub tree_items: Vec, + pub tree_name: String, +} +impl Ord for Tree { + fn cmp(&self, other: &Self) -> Ordering { + let o = other.tree_name.cmp(&self.tree_name); + match o { + Ordering::Equal => other.meta.size.cmp(&self.meta.size), + _ => o, + } + } +} + +impl PartialOrd for Tree { + fn partial_cmp(&self, other: &Self) -> Option { + let o = other.tree_name.cmp(&self.tree_name); + match o { + Ordering::Equal => Some(other.meta.size.cmp(&self.meta.size)), + _ => Some(o), + } + } +} + +impl PartialEq for Tree { + fn eq(&self, other: &Self) -> bool { + if self.tree_name.eq(&other.tree_name) { + return true; + } + false + } +} + +impl Display for Tree { + #[allow(unused)] + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + writeln!(f, "Type: Tree"); + for item in &self.tree_items { + writeln!( + f, + "{:6} {} {} {}", + String::from_utf8(item.mode.to_vec()).unwrap(), + item.item_type, + item.id, + item.filename + ); + } + writeln!(f, "Tree Name: {}", self.tree_name); + Ok(()) + } +} + +/// +impl Tree { + pub fn new(metadata: Metadata) -> Self { + let mut a = Self { + meta: metadata, + tree_items: vec![], + tree_name: String::new(), + }; + a.decode_metadata().unwrap(); + a + } + + pub(crate) fn decode_metadata(&mut self) -> Result<(), GitError> { + let mut index = 0; + while index < self.meta.data.len() { + let mode_index = &self.meta.data[index..].find_byte(0x20).unwrap(); + let mode = &self.meta.data[index..index + *mode_index]; + let item_type = TreeItemType::tree_item_type_from(mode).unwrap(); + + let filename_index = &self.meta.data[index..].find_byte(0x00).unwrap(); + let filename = String::from_utf8( + self.meta.data[index + mode_index + 1..index + *filename_index].to_vec(), + ) + .unwrap(); + + let id = Hash::from_row( + &self.meta.data[index + filename_index + 1..index + filename_index + 21].to_vec(), + ); + + self.tree_items.push(TreeItem { + mode: mode.to_vec(), + item_type, + id, + filename, + }); + + index = index + filename_index + 21; + } + + Ok(()) + } + + /// + #[allow(unused)] + pub(crate) fn encode_metadata(&self) -> Result { + let mut data = Vec::new(); + for item in &self.tree_items { + data.extend_from_slice(&item.mode); + data.extend_from_slice(0x20u8.to_be_bytes().as_ref()); + data.extend_from_slice(item.filename.as_bytes()); + data.extend_from_slice(0x00u8.to_be_bytes().as_ref()); + data.extend_from_slice(&item.id.0.to_vec()); + } + + Ok(Metadata::new(ObjectType::Tree, &data)) + } + + /// + #[allow(unused)] + pub(crate) fn write_to_file(&self, root_path: String) -> Result { + self.meta.write_to_file(root_path) + } +} + +/// +#[cfg(test)] +mod tests { + use std::env; + use std::path::Path; + use std::path::PathBuf; + use std::sync::Arc; + use std::vec; + + use super::super::blob::Blob; + use super::Metadata; + use super::ObjectType; + use crate::git::hash::Hash; + use crate::git::hash::HashType; + + use super::Tree; + use super::TreeItemType; + + /// + #[test] + fn test_tree_write_to_file() { + let mut path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + path.push("resources/data/test/blob-82352c3a6a7a8bd32011751699c7a3648d1b5d3c-gitmega.md"); + + let meta = Metadata::read_object_from_file(path.to_str().unwrap().to_string()) + .expect("Read error!"); + + assert_eq!(meta.t, ObjectType::Blob); + assert_eq!( + "82352c3a6a7a8bd32011751699c7a3648d1b5d3c", + meta.id.to_plain_str() + ); + assert_eq!(16, meta.size); + + let blob = Blob { + meta: Arc::new(meta), + filename: String::new(), + }; + + assert_eq!( + "# Hello Gitmega\n", + String::from_utf8(blob.meta.data.clone()).unwrap().as_str() + ); + + let item = blob.to_tree_item(String::from("gitmega.md")).unwrap(); + + let mut tree = Tree { + tree_name: String::new(), + meta: Metadata { + t: ObjectType::Tree, + h: HashType::Sha1, + id: Hash::default(), + size: 0, + data: vec![], + delta_header: vec![], + }, + tree_items: vec![item], + }; + + tree.meta = tree.encode_metadata().unwrap(); + tree.write_to_file("/tmp".to_string()) + .expect("Write error!"); + + assert!(Path::new("/tmp/1b/dbc1e723aa199e83e33ecf1bb19f874a56ebc3").exists()); + } + + /// + #[test] + fn test_tree_write_to_file_2_blob() { + let mut path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + path.push("resources/data/test/blob-fc1a505ac94f98cc5f29100a2d9aef97027a32fb-gitmega.md"); + + let meta_gitmega = Metadata::read_object_from_file(path.to_str().unwrap().to_string()) + .expect("Read error!"); + + let blob_gitmega = Blob { + meta:Arc::new(meta_gitmega) , + filename: String::new(), + }; + + let item_gitmega = blob_gitmega + .to_tree_item(String::from("gitmega.md")) + .unwrap(); + + path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + path.push("resources/data/test/blob-a3b55a2ce16d2429dae2d690d2c15bcf26fbe33c-gust.md"); + + let meta_gust = Metadata::read_object_from_file(path.to_str().unwrap().to_string()) + .expect("Read error!"); + + let blob_gust = Blob { + meta:Arc::new(meta_gust) , + filename: String::new(), + }; + + let item_gust = blob_gust.to_tree_item(String::from("gust.md")).unwrap(); + + let mut tree = Tree { + tree_name: String::new(), + meta: Metadata { + t: ObjectType::Tree, + h: HashType::Sha1, + id: Hash::default(), + size: 0, + data: vec![], + delta_header: vec![], + }, + tree_items: vec![item_gitmega, item_gust], + }; + + tree.meta = tree.encode_metadata().unwrap(); + tree.write_to_file("/tmp".to_string()) + .expect("Write error!"); + + assert!(Path::new("/tmp/9b/be4087bedef91e50dc0c1a930c1d3e86fd5f20").exists()); + } + + /// + #[test] + fn test_tree_read_from_file() { + // 100644 blob 82352c3a6a7a8bd32011751699c7a3648d1b5d3c gitmega.md + let mut path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + path.push("resources/data/test/tree-1bdbc1e723aa199e83e33ecf1bb19f874a56ebc3"); + + let meta = Metadata::read_object_from_file(path.to_str().unwrap().to_string()) + .expect("Read error!"); + + assert_eq!(ObjectType::Tree, meta.t); + assert_eq!(38, meta.size); + + let mut tree = Tree { + meta, + tree_items: Vec::new(), + tree_name: String::new(), + }; + + tree.decode_metadata().unwrap(); + + assert_eq!(1, tree.tree_items.len()); + assert_eq!("gitmega.md", tree.tree_items[0].filename.as_str()); + assert_eq!( + "82352c3a6a7a8bd32011751699c7a3648d1b5d3c", + tree.tree_items[0].id.to_plain_str() + ); + assert_eq!( + "100644", + String::from_utf8(tree.tree_items[0].mode.to_vec()) + .unwrap() + .as_str() + ); + assert_eq!(TreeItemType::Blob, tree.tree_items[0].item_type); + } + + /// + #[test] + fn test_tree_read_from_file_2_items() { + // 100644 blob fc1a505ac94f98cc5f29100a2d9aef97027a32fb gitmega.md + // 100644 blob a3b55a2ce16d2429dae2d690d2c15bcf26fbe33c gust.md + let mut path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + path.push("resources/data/test/tree-9bbe4087bedef91e50dc0c1a930c1d3e86fd5f20"); + + let meta = Metadata::read_object_from_file(path.to_str().unwrap().to_string()) + .expect("Read error!"); + + assert_eq!(ObjectType::Tree, meta.t); + assert_eq!(73, meta.size); + + let mut tree = Tree { + meta, + tree_items: Vec::new(), + tree_name: String::new(), + }; + + tree.decode_metadata().unwrap(); + + assert_eq!(2, tree.tree_items.len()); + + assert_eq!("gitmega.md", tree.tree_items[0].filename.as_str()); + + assert_eq!( + "fc1a505ac94f98cc5f29100a2d9aef97027a32fb", + tree.tree_items[0].id.to_plain_str() + ); + + assert_eq!( + "100644", + String::from_utf8(tree.tree_items[0].mode.to_vec()) + .unwrap() + .as_str() + ); + + assert_eq!(TreeItemType::Blob, tree.tree_items[0].item_type); + + assert_eq!("gust.md", tree.tree_items[1].filename.as_str()); + + assert_eq!( + "a3b55a2ce16d2429dae2d690d2c15bcf26fbe33c", + tree.tree_items[1].id.to_plain_str() + ); + + assert_eq!( + "100644", + String::from_utf8(tree.tree_items[1].mode.to_vec()) + .unwrap() + .as_str() + ); + + assert_eq!(TreeItemType::Blob, tree.tree_items[1].item_type); + } +} diff --git a/pirs/src/git/object/delta.rs b/pirs/src/git/object/delta.rs new file mode 100644 index 0000000000000000000000000000000000000000..d7721a907394d151da11c8612ee6f1b3e14c7a8f --- /dev/null +++ b/pirs/src/git/object/delta.rs @@ -0,0 +1,178 @@ +/// Processing of delta object +use super::{Hash, Metadata}; +use crate::errors::GitError; +use crate::utils; +use flate2::read::ZlibDecoder; +use std::fs::File; +use std::io::{ErrorKind, Read}; +use std::path::Path; +use std::str::FromStr; + +const COPY_INSTRUCTION_FLAG: u8 = 1 << 7; +const COPY_OFFSET_BYTES: u8 = 4; +const COPY_SIZE_BYTES: u8 = 3; +const COPY_ZERO_SIZE: usize = 0x10000; + +///使用delta指令 +pub fn apply_delta(pack_file: &mut File, base: &Metadata) -> Result { + utils::read_zlib_stream_exact(pack_file, |delta| { + let base_size = utils::read_size_encoding(delta).unwrap(); + if base.size != base_size { + return Err(GitError::DeltaObjError( + String::from_str("Incorrect base object length").unwrap(), + )); + } + + let result_size = utils::read_size_encoding(delta)?; + let mut result = Vec::with_capacity(result_size); + while apply_delta_instruction(delta, &base.data, &mut result)? {} + if result.len() != result_size { + return Err(GitError::DeltaObjError( + String::from_str("Incorrect object length").unwrap(), + )); + } + + // The object type is the same as the base object + Ok(Metadata::new(base.t, &result)) + }) +} + +///执行单个delta指令 +fn apply_delta_instruction( + stream: &mut R, + base: &[u8], + result: &mut Vec, +) -> Result { + // Check if the stream has ended, meaning the new object is done + let instruction = match utils::read_bytes(stream) { + Ok([instruction]) => instruction, + Err(err) if err.kind() == ErrorKind::UnexpectedEof => return Ok(false), + Err(err) => { + return Err(GitError::DeltaObjError(format!( + "Wrong instruction in delta :{}", + err.to_string() + ))) + } + }; + if instruction & COPY_INSTRUCTION_FLAG == 0 { + // Data instruction; the instruction byte specifies the number of data bytes + if instruction == 0 { + // Appending 0 bytes doesn't make sense, so git disallows it + return Err(GitError::DeltaObjError( + String::from_str("Invalid data instruction").unwrap(), + )); + } + + // Append the provided bytes + let mut data = vec![0; instruction as usize]; + stream.read_exact(&mut data)?; + result.extend_from_slice(&data); + } else { + // Copy instruction + let mut nonzero_bytes = instruction; + let offset = utils::read_partial_int(stream, COPY_OFFSET_BYTES, &mut nonzero_bytes)?; + let mut size = utils::read_partial_int(stream, COPY_SIZE_BYTES, &mut nonzero_bytes)?; + if size == 0 { + // Copying 0 bytes doesn't make sense, so git assumes a different size + size = COPY_ZERO_SIZE; + } + // Copy bytes from the base object + let base_data = base + .get(offset..(offset + size)) + .ok_or_else(|| GitError::DeltaObjError(format!("Invalid copy instruction"))); + + match base_data { + Ok(data) => result.extend_from_slice(data), + Err(e) => return Err(e), + } + } + + Ok(true) +} + +// 这里默认的是若是pack里面没有,则只能从loose里面找了 +#[allow(unused)] +pub fn read_object(hash: Hash) -> Result { + let object = match read_unpacked_object(hash) { + // Found in objects directory + Ok(object) => object, + // Not found in objects directory; look in packfiles + Err(_err) => panic!("not found object"), + }; + + let object_hash = object.hash(); + if object_hash != hash { + return Err(GitError::DeltaObjError(format!( + "Object {} has wrong hash {}", + hash, object_hash + ))); + } + + Ok(object) +} + +const OBJECTS_DIRECTORY: &str = ".git/objects"; + +///读出unpack 的Object +#[allow(unused)] +fn read_unpacked_object(hash: Hash) -> Result { + use super::ObjectType::*; + + let hex_hash = hash.to_string(); + let (directory_name, file_name) = hex_hash.split_at(2); + let object_file = Path::new(OBJECTS_DIRECTORY) + .join(directory_name) + .join(file_name); + let object_file = File::open(object_file)?; + let mut object_stream = ZlibDecoder::new(object_file); + let object_type = utils::read_until_delimiter(&mut object_stream, b' ')?; + let object_type = match &object_type[..] { + _commit_object_type => Commit, + _tree_object_type => Tree, + _blob_object_type => Blob, + _tag_object_type => Tag, + _ => { + return Err(GitError::DeltaObjError(format!( + "Invalid object type: {:?}", + object_type + ))) + } + }; + let size = utils::read_until_delimiter(&mut object_stream, b'\0')?; + let size = match parse_decimal(&size) { + Some(a) => a, + None => { + return Err(GitError::DeltaObjError(format!( + "Invalid object size: {:?}", + size + ))) + } + }; + + let mut contents = Vec::with_capacity(size); + object_stream.read_to_end(&mut contents)?; + if contents.len() != size { + return Err(GitError::DeltaObjError(format!("Incorrect object size"))); + } + + Ok(Metadata::new(object_type, &contents)) +} + +///解析u8数组的十进制 +fn parse_decimal(decimal_str: &[u8]) -> Option { + let mut value = 0usize; + for &decimal_char in decimal_str { + let char_value = decimal_char_value(decimal_char)?; + value = value.checked_mul(10)?; + value = value.checked_add(char_value as usize)?; + } + Some(value) +} + +///从u8转为单个10进制数 +fn decimal_char_value(decimal_char: u8) -> Option { + match decimal_char { + b'0'..=b'9' => Some(decimal_char - b'0'), + _ => None, + } +} diff --git a/pirs/src/git/object/diff.rs b/pirs/src/git/object/diff.rs new file mode 100644 index 0000000000000000000000000000000000000000..faa4bd75262a17c514ba73037203fd3ea8e934ef --- /dev/null +++ b/pirs/src/git/object/diff.rs @@ -0,0 +1,292 @@ +//! Diff algorithm for delta object +//! +use super::Metadata; +use crate::utils; +use diffs::myers; +use diffs::Diff; +use std::vec; + +const DATA_INS_LEN: usize = 0x7f; +#[allow(dead_code)] +#[derive(Debug)] +pub struct DeltaDiff { + /// keep all instruction + ops: Vec, + old_data: Metadata, + new_data: Metadata, + ///Structural Similarity,相似性 + ssam: usize, + ssam_r: f64, +} + +impl DeltaDiff { + /// Diff the two Metadata , Type should be same. + pub fn new(old: Metadata, new: Metadata) -> Self { + assert_eq!(old.t, new.t); + let mut _new = DeltaDiff { + ops: vec![], + old_data: old.clone(), + new_data: new.clone(), + + ssam: 0, + ssam_r: 0.00, + }; + + myers::diff( + &mut _new, + &old.data, + 0, + old.data.len(), + &new.data, + 0, + new.data.len(), + ) + .unwrap(); + _new + } + + pub fn get_delta_metadata(&self) -> Vec { + let mut result: Vec = vec![]; + + // 解码后长度编码 + //BUG : 更改这里的读取 + result.append(&mut utils::write_size_encoding(self.old_data.size)); + result.append(&mut utils::write_size_encoding(self.new_data.size)); + + // 编码格式 + for op in &self.ops { + result.append(&mut self.decode_op(op)); + } + result + } + + fn decode_op(&self, op: &DeltaOp) -> Vec { + let mut op_data = vec![]; + match op.ins { + Optype::DATA => { + assert!(op.len < 0x7f); + let instruct = (op.len & 0x7f) as u8; + op_data.push(instruct); + op_data.append(&mut self.new_data.data[op.begin..op.begin + op.len].to_vec()); + } + Optype::COPY => { + //TODO 暂时不考虑超出范围的情况 + let mut instruct: u8 = 0x80; + let mut offset = op.begin; + let mut size = op.len; + let mut copy_data = vec![]; + assert!(op.len < 0x1000000); + for i in 0..4 { + let _bit = (offset & 0xff) as u8; + if _bit != 0 { + instruct |= (1 << i) as u8; + copy_data.push(_bit) + } + offset >>= 8; + } + for i in 4..7 { + let _bit = (size & 0xff) as u8; + if _bit != 0 { + instruct |= (1 << i) as u8; + copy_data.push(_bit) + } + size >>= 8; + } + op_data.push(instruct); + op_data.append(&mut copy_data); + } + } + op_data + } + + pub fn get_ssam_rate(&self) -> f64 { + self.ssam_r + } +} +impl Diff for DeltaDiff { + type Error = (); + /// offset < 2^32 + /// len < 2^24 + fn equal(&mut self, _old: usize, _new: usize, _len: usize) -> Result<(), Self::Error> { + // 暂时未支持长度过大时的拆分情况 + assert!(_old < (1 << 33)); + assert!(_len < (1 << 25)); + self.ssam += _len; + if let Some(tail) = self.ops.last_mut() { + if tail.begin + tail.len == _old && tail.ins == Optype::COPY { + tail.len += _len; + } else { + self.ops.push(DeltaOp { + ins: Optype::COPY, + begin: _old, + len: _len, + }); + } + } else { + self.ops.push(DeltaOp { + ins: Optype::COPY, + begin: _old, + len: _len, + }); + } + + Ok(()) + } + + /// insert _len < 2 ^ 7 + fn insert(&mut self, _old: usize, _new: usize, _len: usize) -> Result<(), ()> { + // 暂时未支持长度过大时的拆分情况 + + // // | 0xxxxxxx | |data| | + let mut len = _len; + let mut new = _new; + if _len > DATA_INS_LEN { + while len > DATA_INS_LEN { + self.ops.push(DeltaOp { + ins: Optype::DATA, + begin: new, + len: DATA_INS_LEN, + }); + len -= DATA_INS_LEN; + new += DATA_INS_LEN; + } + self.ops.push(DeltaOp { + ins: Optype::DATA, + begin: new, + len: len, + }); + } else { + if let Some(tail) = self.ops.last_mut() { + if tail.begin + tail.len == _new + && tail.ins == Optype::DATA + && tail.len + _len < DATA_INS_LEN + { + tail.len += _len; + } else { + self.ops.push(DeltaOp { + ins: Optype::DATA, + begin: new, + len: len, + }); + } + } else { + self.ops.push(DeltaOp { + ins: Optype::DATA, + begin: new, + len: len, + }); + } + } + + Ok(()) + } + + fn finish(&mut self) -> Result<(), Self::Error> { + // compute the ssam rate when finish the diff process. + self.ssam_r = self.ssam as f64 / self.new_data.data.len() as f64; + Ok(()) + } +} + +#[derive(Debug, Clone, Copy, PartialEq)] +enum Optype { + DATA, // 插入的数据 + COPY, // 数据复制 +} + +#[derive(Debug, Clone, Copy)] +struct DeltaOp { + /// instruction type + ins: Optype, + /// data begin position + begin: usize, + /// data long + len: usize, +} + +#[cfg(test)] +mod tests { + use std::io::Write; + + use super::DeltaDiff; + use crate::{ + git::{ + object::{types::ObjectType, Metadata}, + pack::Pack, + }, + utils, + }; + use bstr::ByteSlice; + + /// 通过两个metadata 来进行对后者No.2的压缩 + /// 首先,需要两个是相同的类型(ObjectType) + /// 先确定要进行什么类型的压缩, + /// 1. ofs-object 将以No.1为base压缩为ofs-object,offset 来标识负距离上的object开头 + /// 2. ref-object 将以No.1为base, 以hash值作为标识 + /// 两种delta的共性:都需要未压缩的header编码。ofs 是sized编码的开头。ref是hash的20位u8 + /// 1, + /// + #[test] + fn test_metadata_diff_ofs_delta() { + let m1 = Metadata::read_object_from_file( + "./resources/diff/16ecdcc8f663777896bd39ca025a041b7f005e".to_string(), + ) + .unwrap(); + let mut m2 = Metadata::read_object_from_file( + "./resources/diff/bee0d45f981adf7c2926a0dc04deb7f006bcc3".to_string(), + ) + .unwrap(); + let diff = DeltaDiff::new(m1.clone(), m2.clone()); + println!("{:?}", diff.ops); + let meta_vec1 = m1.convert_to_vec().unwrap(); + + // 对于offset的 + // 不需要压缩的size + let offset_head = utils::write_offset_encoding(meta_vec1.len() as u64); + + // 需要压缩的指令data + let zlib_data = diff.get_delta_metadata(); + m2.change_to_delta(ObjectType::OffsetDelta, zlib_data, offset_head); + + // 排好序后直接把metadata按顺序放入Vec就行了 + let meta_vec = vec![m1, m2]; + let mut _pack = Pack::default(); + let pack_file_data = _pack.encode(Some(meta_vec)); + //_pack + let mut file = std::fs::File::create("delta_ofs.pack").expect("create failed"); + file.write_all(pack_file_data.as_bytes()) + .expect("write failed"); + Pack::decode_file("delta_ofs.pack"); + } + + #[test] + fn test_metadata_diff_ref_delta() { + let m1 = Metadata::read_object_from_file( + "./resources/diff/16ecdcc8f663777896bd39ca025a041b7f005e".to_string(), + ) + .unwrap(); + let mut m2 = Metadata::read_object_from_file( + "./resources/diff/bee0d45f981adf7c2926a0dc04deb7f006bcc3".to_string(), + ) + .unwrap(); + let diff = DeltaDiff::new(m1.clone(), m2.clone()); + println!("{:?}", diff); + + //不需要压缩 + let offset_head = m1.id.0.to_vec(); + assert!(offset_head.len() == 20); + + //需要压缩 + let zlib_data = diff.get_delta_metadata(); + m2.change_to_delta(ObjectType::HashDelta, zlib_data, offset_head); + + let meta_vec = vec![m1, m2]; + let mut _pack = Pack::default(); + let pack_file_data = _pack.encode(Some(meta_vec)); + //_pack + let mut file = std::fs::File::create("delta_ref.pack").expect("create failed"); + file.write_all(pack_file_data.as_bytes()) + .expect("write failed"); + Pack::decode_file("delta_ref.pack"); + } +} diff --git a/pirs/src/git/object/metadata.rs b/pirs/src/git/object/metadata.rs new file mode 100644 index 0000000000000000000000000000000000000000..1d5f674e2f1ffc78f3813381ce16d1cc481242e0 --- /dev/null +++ b/pirs/src/git/object/metadata.rs @@ -0,0 +1,157 @@ +//! the metadata of the all types of objects ,which save the total data of the object +use anyhow::Context; +use bstr::ByteSlice; +use deflate::{write::ZlibEncoder, Compression}; +use flate2::read::ZlibDecoder; +use std::fs::{create_dir_all, File}; +use std::io::{BufReader, Read, Write}; +use std::path::PathBuf; + +use super::Hash; +use super::ObjectType; +use crate::errors::GitError; +use crate::git::hash::HashType; +/// The metadata of git object. +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)] +pub struct Metadata { + pub t: ObjectType, + pub h: HashType, + pub id: Hash, + pub size: usize, + pub data: Vec, + pub delta_header: Vec, +} + +/// Implement function for Metadata +impl Metadata { + pub fn hash(&self) -> Hash { + Hash::from_meta(&self) + } + pub fn new(obj_type: ObjectType, data: &Vec) -> Metadata { + let mut _metadata = Metadata { + t: obj_type, + h: HashType::Sha1, + id: Hash::default(), + size: data.len(), + data: data.to_vec(), + delta_header: vec![], + }; + // compute hash value + _metadata.id = _metadata.hash(); + _metadata + } + + /// Write the object to the file system with folder and file. + /// This function can create a “loose” object format, + /// which can convert into the `.pack` format by the Command: + /// ```bash + /// git gc + /// ``` + #[allow(unused)] + pub(crate) fn write_to_file(&self, root_path: String) -> Result { + let mut encoder = ZlibEncoder::new(Vec::new(), Compression::Default); + encoder.write_all(&self.t.to_bytes()); + encoder.write(&[b' ']); + encoder.write(self.data.len().to_string().as_bytes()); + encoder.write(&[b'\0']); + encoder.write_all(&self.data).expect("Write error!"); + let compressed_data = encoder.finish().expect("Failed to finish compression!"); + + let mut path = PathBuf::from(root_path); + path.push(&self.id.to_folder()); + create_dir_all(&path) + .with_context(|| format!("Failed to create directory: {}", path.display())) + .unwrap(); + + path.push(&self.id.to_filename()); + + let mut file = File::create(&path) + .with_context(|| format!("Failed to create file: {}", path.display())) + .unwrap(); + file.write_all(&compressed_data) + .with_context(|| format!("Failed to write to file: {}", path.display())) + .unwrap(); + + Ok(path.to_str().unwrap().to_string()) + } + + ///Convert Metadata to the Vec ,so that it can write to File + pub fn convert_to_vec(&self) -> Result, GitError> { + let mut compressed_data = + vec![(0x80 | (self.t.type2_number() << 4)) + (self.size & 0x0f) as u8]; + let mut _size = self.size >> 4; + if _size > 0 { + while _size > 0 { + if _size >> 7 > 0 { + compressed_data.push((0x80 | _size) as u8); + _size >>= 7; + } else { + compressed_data.push((_size) as u8); + break; + } + } + } else { + compressed_data.push(0); + } + match self.t { + ObjectType::OffsetDelta => { + compressed_data.append(&mut self.delta_header.clone()); + } + ObjectType::HashDelta => { + compressed_data.append(&mut self.delta_header.clone()); + } + _ => {} + } + let mut encoder = ZlibEncoder::new(Vec::new(), Compression::Default); + encoder.write_all(&self.data).expect("Write error!"); + compressed_data.append(&mut encoder.finish().expect("Failed to finish compression!")); + Ok(compressed_data) + } + + /// Read the object from the file system and parse to a metadata object.
+ /// This file is the “loose” object format. + #[allow(unused)] + pub(crate) fn read_object_from_file(path: String) -> Result { + let file = File::open(path)?; + let mut reader = BufReader::new(file); + let mut data = Vec::new(); + reader.read_to_end(&mut data)?; + + let mut decoder = ZlibDecoder::new(&data[..]); + let mut decoded = Vec::new(); + decoder.read_to_end(&mut decoded)?; + + let type_index = decoded.find_byte(0x20).unwrap(); + let t = &decoded[0..type_index]; + + let size_index = decoded.find_byte(0x00).unwrap(); + let size = decoded[type_index + 1..size_index] + .iter() + .copied() + .map(|x| x as char) + .collect::() + .parse::() + .unwrap(); + + let mut data = decoded[size_index + 1..].to_vec(); + + match String::from_utf8(t.to_vec()).unwrap().as_str() { + "blob" => Ok(Metadata::new(ObjectType::Blob, &data)), + "tree" => Ok(Metadata::new(ObjectType::Tree, &data)), + "commit" => Ok(Metadata::new(ObjectType::Commit, &data)), + "tag" => Ok(Metadata::new(ObjectType::Tag, &data)), + _ => Err(GitError::InvalidObjectType( + String::from_utf8(t.to_vec()).unwrap(), + )), + } + } + + /// Change the base object to the delta object , + /// including : ref-object ofs-object + pub fn change_to_delta(&mut self, types: ObjectType, changed: Vec, header: Vec) { + self.t = types; + self.data = changed; + self.size = self.data.len(); + self.delta_header = header; + } +} diff --git a/pirs/src/git/object/mod.rs b/pirs/src/git/object/mod.rs new file mode 100644 index 0000000000000000000000000000000000000000..0fcedd55960f3309975503bb59c2bcca81430d2b --- /dev/null +++ b/pirs/src/git/object/mod.rs @@ -0,0 +1,45 @@ +//!Object struct , contain the raw info cut from the pack file or other file +//! + +use super::hash::Hash; +use types::ObjectType; + +pub mod base; +pub mod delta; +pub mod diff; +mod metadata; +pub mod types; +pub use metadata::Metadata; + +//Object内存存储类型 +///# Deprecate +#[derive(Clone, Debug)] +pub struct Object { + pub object_type: ObjectType, + pub contents: Vec, +} +#[allow(dead_code)] +impl Object { + /// object 的 hash转化函数 + pub fn hash(&self) -> Hash { + Hash::from_meta(&self.to_metadata()) + } + // pub fn GetObjectFromPack() + pub fn to_metadata(&self) -> Metadata { + Metadata::new(self.object_type, &self.contents) + } +} + +#[cfg(test)] +mod tests { + use super::Object; + + #[test] + fn test_obj_hash() { + let _obj = Object { + object_type: super::types::ObjectType::Blob, + contents: String::from("hello ,sss").into_bytes(), + }; + print!("{}", _obj.hash()); //602091219933865cace5ab8cd78b424735c82e6c + } +} diff --git a/pirs/src/git/object/types.rs b/pirs/src/git/object/types.rs new file mode 100644 index 0000000000000000000000000000000000000000..541e82f4342a834473a8352c7b62965389c975ea --- /dev/null +++ b/pirs/src/git/object/types.rs @@ -0,0 +1,87 @@ +//! ### Types enums for object types +//! There are ObjectType +//! PackObjectType +//! +//! +use crate::errors::GitError; +use std::{fmt::Display, vec}; + +/// Four abstract Object Types: +/// - Blob +/// - Tree +/// - Commit +/// - Tag +/// - OffsetDelta(6) +/// - HashDelta(7) +#[derive(PartialEq, Eq, Hash, Ord, PartialOrd, Debug, Clone, Copy)] +pub enum ObjectType { + Commit, + Tree, + Blob, + Tag, + OffsetDelta, + HashDelta, +} + +/// Display trait for Git objects type +impl Display for ObjectType { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + match self { + ObjectType::Blob => write!(f, "blob"), + ObjectType::Tree => write!(f, "tree"), + ObjectType::Commit => write!(f, "commit"), + ObjectType::Tag => write!(f, "tag"), + ObjectType::OffsetDelta => write!(f, "OffsetDelta"), + ObjectType::HashDelta => write!(f, "HashDelta"), + } + } +} + +/// +impl ObjectType { + /// + #[allow(unused)] + pub fn to_bytes(self) -> Vec { + match self { + ObjectType::Blob => vec![0x62, 0x6c, 0x6f, 0x62], + ObjectType::Tree => vec![0x74, 0x72, 0x65, 0x65], + ObjectType::Commit => vec![0x63, 0x6f, 0x6d, 0x6d, 0x69, 0x74], + ObjectType::Tag => vec![0x74, 0x61, 0x67], + _ => vec![], + } + } + + /// + #[allow(unused)] + pub fn from_string(s: &str) -> Result { + match s { + "blob" => Ok(ObjectType::Blob), + "tree" => Ok(ObjectType::Tree), + "commit" => Ok(ObjectType::Commit), + "tag" => Ok(ObjectType::Tag), + _ => Err(GitError::InvalidObjectType(s.to_string())), + } + } + pub fn type2_number(&self) -> u8 { + match self { + ObjectType::Commit => 1, + ObjectType::Tree => 2, + ObjectType::Blob => 3, + ObjectType::Tag => 4, + ObjectType::OffsetDelta => 6, + ObjectType::HashDelta => 7, + } + } + + pub fn number_type(num: u8) -> Self { + match num { + 1 => ObjectType::Commit, + 2 => ObjectType::Tree, + 3 => ObjectType::Blob, + 4 => ObjectType::Tag, + 6 => ObjectType::OffsetDelta, + 7 => ObjectType::HashDelta, + _ => panic!("InValid git types"), + } + } +} diff --git a/pirs/src/git/pack/cache.rs b/pirs/src/git/pack/cache.rs new file mode 100644 index 0000000000000000000000000000000000000000..6d6bddf746f17ead2e6ac76957a245ad1fc287f9 --- /dev/null +++ b/pirs/src/git/pack/cache.rs @@ -0,0 +1,43 @@ +//! Build Cache Info for the decode packed object +use crate::git::object::Metadata; +use std::collections::{BTreeMap, HashMap}; + +use super::super::hash::Hash; + +use std::sync::Arc; + +/// #### Build Cache Info for the decode packed object +/// There are two hashmap for object ,
+/// the keys is `hash value` of The object +#[derive(Default, Clone)] +pub struct PackObjectCache { + pub by_hash: BTreeMap>, + pub by_offset: HashMap, + pub offset_hash: BTreeMap, +} +// +impl PackObjectCache { + /// update cache by input object:`Rc` and the offset:`u64` + pub fn update(&mut self, object: Arc, offset: u64) { + let _hash = object.id; + self.by_hash.insert(_hash, object.clone()); + self.by_offset.insert(_hash, offset); + self.offset_hash.insert(offset, _hash); + } + #[allow(unused)] + pub fn clean(&mut self) { + self.by_hash.clear(); + self.by_offset.clear(); + self.offset_hash.clear(); + } + + pub fn offset_object(&mut self, offset: u64) -> Option<&mut Arc> { + let _hash = self.offset_hash.get(&offset)?; + + self.by_hash.get_mut(_hash) + } + + pub fn hash_object(&mut self, hash: Hash) -> Option<&mut Arc> { + self.by_hash.get_mut(&hash) + } +} diff --git a/pirs/src/git/pack/decode.rs b/pirs/src/git/pack/decode.rs new file mode 100644 index 0000000000000000000000000000000000000000..8909314dd79e047779651b232b32016d3bdf80c5 --- /dev/null +++ b/pirs/src/git/pack/decode.rs @@ -0,0 +1,185 @@ +//! Decode pack file by the `ObjDecodedMap` +use super::super::object as obj; +use super::cache::PackObjectCache; +use crate::errors::GitError; +use crate::git::hash::Hash; +use crate::git::object::types::ObjectType; +use colored::Colorize; +use obj::base::ObjClass; +use obj::base::{blob, commit, tag, tree}; +use obj::Metadata; +use std::collections::HashMap; +use std::fmt::{self, Display}; +use std::sync::Arc; +///!对取出的object字段进行进一步解码与包装 +/// 用于存储解析出的object抽象对象的hashmap +#[derive(Default, Clone)] +pub struct ObjDecodedMap { + pub _map_hash: HashMap>, + blobs: Vec, + trees: Vec, + tags: Vec, + commits: Vec, + name_map: HashMap, +} +//The further decoding process after parsing the object +impl ObjDecodedMap { + /// Further analyze different structures through cache + #[allow(unused)] + pub fn update_from_cache(&mut self, cache: &PackObjectCache) { + for (key, value) in cache.by_hash.iter() { + let metadata = Metadata::new(value.t, &value.data); + let _obj: ObjClass = match value.t { + // Give data to their **new** functions and decode it through metadata + ObjectType::Blob => { + let a = blob::Blob::new(metadata); + self.blobs.push(a.clone()); + ObjClass::BLOB(a) + } + ObjectType::Commit => { + let a = commit::Commit::new(metadata); + self.commits.push(a.clone()); + ObjClass::COMMIT(a) + } + ObjectType::Tag => { + let a = tag::Tag::new(metadata); + self.tags.push(a.clone()); + ObjClass::TAG(a) + } + ObjectType::Tree => { + let a = tree::Tree::new(metadata); + self.trees.push(a.clone()); + ObjClass::TREE(a) + } + _ => panic!("src/git/pack/decode.rs: 33 invalid type in encoded metadata"), + }; + self._map_hash.insert(key.clone(), Arc::new(_obj)); + } + } + + /// Although it seems to be an encoding thing here, it is actually a deep parsing of objects, so it is put here. + /// this func should be called after the `fn update_from_cache` + /// This function verifies the existence of tree hash objects, + /// and then Sort four objects by "Magic" Sort + #[allow(unused)] + pub fn check_completeness(&mut self) -> Result<(), GitError> { + //验证对象树 tree object的完整性 确保tree item下的hash值有对应的object + for _tree in self.trees.iter() { + for item in &_tree.tree_items { + // 保存对象名与hash值的对应 + self.name_map.insert(item.id.clone(), item.filename.clone()); + // 检查是否存在对应hash + if self._map_hash.get(&item.id) == None { + return Err(GitError::UnCompletedPackObject(format!( + "can't find hash value:{}", + &_tree.meta.id + ))); + } + } + } + + // For tree & blob object , Get their name + for _tree in self.trees.iter_mut() { + let name = self.name_map.get(&_tree.meta.id); + match name { + Some(_name) => _tree.tree_name = _name.clone(), + None => {} + } + } + + for _blob in self.blobs.iter_mut() { + let name = self.name_map.get(&_blob.meta.id); + match name { + Some(_name) => _blob.filename = _name.clone(), + None => {} + } + } + // sort the four base object + //TODO: This is called the "Magic" Sort + self.trees.sort(); + self.blobs.sort(); + self.tags.sort(); + self.commits.sort(); + Ok(()) + } + + /// usually called after the `check_completeness` function + #[allow(unused)] + pub fn vec_sliding_window(&self) -> Vec { + let mut list = vec![]; + for c in self.commits.iter() { + list.push(c.meta.clone()); + } + for t in self.tags.iter() { + list.push(t.meta.clone()); + } + for tree in self.trees.iter() { + list.push(tree.meta.clone()); + } + for blob in self.blobs.iter() { + list.push(blob.meta.as_ref().clone()); + } + + list + } + + /// print the sorted metadata + #[allow(unused)] + pub fn print_vec(&self) { + for c in self.commits.iter() { + println!("{}", c); + } + for t in self.tags.iter() { + println!("{}", t); + } + for tree in self.trees.iter() { + println!("{}", tree); + } + for blob in self.blobs.iter() { + println!("{}", blob); + } + } + + + + + +} + +impl Display for ObjDecodedMap { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + for (key, value) in self._map_hash.iter() { + writeln!(f, "*********************").unwrap(); + writeln!(f, "Hash: {}", key).unwrap(); + writeln!(f, "Type: {}", value).unwrap(); + } + writeln!( + f, + "{}", + String::from("Finish Printf for ObjDecodedMap").blue() + ) + } +} +#[cfg(test)] +mod tests { + use super::super::Pack; + use super::ObjDecodedMap; + #[test] + pub fn test_map_new() { + let mut _map = ObjDecodedMap::default(); + let decoded_pack = Pack::decode_file( + "./resources/data/test/pack-6590ba86f4e863e1c2c985b046e1d2f1a78a0089.pack", + ); + assert_eq!( + "6590ba86f4e863e1c2c985b046e1d2f1a78a0089", + decoded_pack.signature.to_plain_str() + ); + let mut result = ObjDecodedMap::default(); + result.update_from_cache(&decoded_pack.result); + result.check_completeness().unwrap(); + result.print_vec(); + } + + + +} diff --git a/pirs/src/git/pack/encode.rs b/pirs/src/git/pack/encode.rs new file mode 100644 index 0000000000000000000000000000000000000000..70cf96ace1abf69746d87195af0079286e744562 --- /dev/null +++ b/pirs/src/git/pack/encode.rs @@ -0,0 +1,450 @@ +//! encode pack file ,and create file +use bstr::ByteSlice; +use std::fs::File; +use std::io::Write; +use std::path::PathBuf; +use std::str::FromStr; + +use super::super::hash::Hash; +use super::super::object::Metadata; +use super::decode::ObjDecodedMap; +use super::Pack; +use crate::errors::GitError; +use crate::git::object::diff::DeltaDiff; +use crate::git::object::types::ObjectType; +use crate::utils; +/// the width of the sliding window +const SLIDING_WINDOW: i32 = 10; + +impl Pack { + /// The encode function of the `Pack` Struct generates the corresponding + /// file from the parsed pack or the pack generated by other methods + /// : + /// ```plaintext + /// -> |'P' 'A' 'C' 'K' |4b + /// version -> | 0 0 0 2 |4b + /// size -> | size[ 31 --- 0 ]|4b + /// ``` + /// `Pack` Struct should first carry valid `self number_ of_ Objects` field + fn encode_header(&mut self) -> Vec { + self.head = *b"PACK"; + self.version = 2; + let mut result: Vec = vec![ + b'P', b'A', b'C', b'K', // The logotype of the Pack File + 0, 0, 0, 2, + ]; // THe Version of the Pack File + let all_num = self.get_object_number(); + assert!(all_num != 0); // guarantee self.number_of_objects!=0 + assert!(all_num < (1 << 32)); //TODO: GitError:numbers of objects should < 4G , + //Encode the number of object into file + result.append(&mut utils::u32_vec(all_num as u32)); + result + } + /// Calculate the hash value of the pack file, assign the id field, and convert the hash to Vec output + fn append_hash_signature(&mut self, data: &Vec) -> Vec { + let checksum = Hash::new(&data); + self.signature = checksum.clone(); + checksum.0.to_vec() + } + + #[allow(unused)] + /// Pack 's `encode` function , only for the single .pack file + /// > If the input `meta_vec`==None requires that the pack structure is complete and valid, or at least the PackObjectCache is not empty + /// > If the input `meta_vec`!=None ,just encode that + /// # Examples + /// ``` + /// let result:Vec = decoded_pack.encode(None); + /// //or + /// let metadata_vec :Vec = ...;// Get a list of metadata + /// let result:Vec = Pack::default().encode(metadata_vec); + /// ``` + /// + pub fn encode(&mut self, meta_vec: Option>) -> Vec { + use sha1::{Digest, Sha1}; + let mut result: Vec; + let mut offset = 12; + match meta_vec { + // 有metadata的情况下 + Some(a) => { + self.number_of_objects = a.len(); + result = self.encode_header(); + for metadata in a { + result.append(&mut metadata.convert_to_vec().unwrap()); + //self.result.update(Arc::new(metadata), offset); + println!("Decode offset:{}", offset); + offset = result.len() as u64; + } + } + None => { + result = self.encode_header(); + for (key, value) in self.result.by_hash.iter() { + result.append(&mut value.convert_to_vec().unwrap()); + } + } + } + // compute pack hash signature and append to the result + result.append(&mut self.append_hash_signature(&result)); + result + } + + /// only support `offset delta` now.
+ /// Called after the func `vec_sliding_window` + #[allow(unused)] + pub fn encode_delta(meta_vec: Vec) -> (Self, Vec) { + let mut _pack = Pack::default(); + _pack.number_of_objects = meta_vec.len(); + let mut result = _pack.encode_header(); + let mut code_meta = vec![]; + assert_eq!(result.len(), 12); + + let mut offset: Vec = vec![]; //save the encoded offset + + for i in 0.._pack.number_of_objects as i32 { + let mut new_meta = meta_vec[i as usize].clone(); + let mut best_j: i32 = 11; + let mut best_ssam_rate: f64 = 0.0; + for j in 1..SLIDING_WINDOW { + if i - j < 0 { + break; + } + let _base = meta_vec[(i - j) as usize].clone(); + // If two object types are different, do not delta + if new_meta.t != _base.t { + break; + } + let diff = DeltaDiff::new(_base.clone(), new_meta.clone()); + let _rate = diff.get_ssam_rate(); + if (_rate > best_ssam_rate) && _rate > 0.5 { + best_ssam_rate = _rate; + best_j = j; + } + } + + let mut final_meta = new_meta.clone(); + if best_j != 11 { + let _base = meta_vec[(i - best_j) as usize].clone(); + let diff = DeltaDiff::new(_base.clone(), new_meta.clone()); + let zlib_data = diff.get_delta_metadata(); + let offset_head = utils::write_offset_encoding( + result.len() as u64 - offset[(i - best_j) as usize], + ); + final_meta.change_to_delta(ObjectType::OffsetDelta, zlib_data, offset_head); + } + code_meta.push(final_meta.clone()); + // TODO:update the offset and write + offset.push(result.len() as u64); + result.append(&mut final_meta.convert_to_vec().unwrap()); + println!(); + println!("Hash :{}", final_meta.id); + println!("type: {}", final_meta.t); + println!("Offset: {}", offset.last().unwrap()); + } + let mut _hash = _pack.append_hash_signature(&result); + result.append(&mut _hash); + (_pack, result) + } + /// Pack the loose object from the Given dir . + /// `obj_path`: the vector of the Hash value of the loose object + /// `loose_root_path` : loose objects' root path + /// `target_path` : the pack file store path + pub fn pack_loose(obj_path: Vec, loose_root_path: &str) -> (Self, Vec) { + let mut meta_vec = vec![]; + for path in &obj_path { + let hash_value = Hash::from_str(path).unwrap(); + let loose_path = format!( + "{}/{}/{}", + loose_root_path, + hash_value.to_folder(), + hash_value.to_filename() + ); + let _meta = Metadata::read_object_from_file(loose_path); + match _meta { + Ok(meta) => meta_vec.push(meta), + Err(e) => eprintln!("{}", e), + } + } + + // if meta_vec.len() != obj_path.len(){ + // return false; + // } + let mut pack = Pack::default(); + + let pack_file_data = pack.encode(Some(meta_vec)); + (pack, pack_file_data) + } + /// Pack the loose object from the Given string . + /// `obj_path`: the vector of the Hash value of the loose object + /// `loose_root_path` : loose objects' root path + /// `target_path` : the pack file store path + /// + pub fn pack_loose_files( + obj_path: Vec, + loose_root_path: &str, + target_path: &str, + ) -> Self { + let (mut _pack, pack_file_data) = Self::pack_loose(obj_path, loose_root_path); + let pack_file_name = format!( + "{}/pack-{}.pack", + target_path, + _pack.signature.to_plain_str() + ); + print!("to——file: {}", pack_file_name); + let mut file = std::fs::File::create(pack_file_name).expect("create failed"); + file.write_all(pack_file_data.as_bytes()) + .expect("write failed"); + _pack + } + /// Pack the loose object in a dir ,such as the `.git/object/pack`
+ /// It can auto find the loose object follow the position like below: + /// ```plaintext + /// ./in:loose_root/aa/bbbbbbbbbbbbbbbbbb + /// ``` + /// ,The object Hash is `aabbbbbbbbbbbbbbbbbb` + /// - in:loose_root : loose object root dir + /// - in: target_path : The pack file dir to store + /// + fn find_all_loose(loose_root_path: &str) -> Vec { + let loose_root = std::path::PathBuf::from(loose_root_path); + let mut loose_vec = Vec::new(); + // 打开loose 根目录 + let paths = std::fs::read_dir(&loose_root).unwrap(); + // 暂时保存根目录作为 Path buff + let mut loose_file = loose_root.clone(); + // loose_file= ./root + // 遍历目录下的hash前两位(1b)的子文件夹 + for path in paths { + if let Ok(hash_2) = path { + //the first 1 b + let file_name1 = String::from(hash_2.file_name().to_str().unwrap()); + + // 判断只有两位且是文件夹 + let is_dir = hash_2.file_type().unwrap().is_dir(); + if is_dir && (file_name1.len() == 2) { + loose_file.push(file_name1.clone()); + //loose_file = ./root/xx + let loose_s = std::fs::read_dir(&loose_file).unwrap(); + + //再打开子文件夹 此目录下即为保存的loose object文件 + for loose_path in loose_s { + if let Ok(loose_path) = loose_path { + let file_name2 = String::from(loose_path.file_name().to_str().unwrap()); + loose_file.push(file_name2.clone()); + //loose_file = ./root/xx/xxxxxxxxxxxxxxxxxxxx + //将object提取hash值并放入vec + loose_vec.push( + Hash::from_str(&(file_name1.clone() + &file_name2)) + .unwrap() + .to_plain_str(), + ); + loose_file.pop(); // pop path buf + } + } + loose_file.pop(); + } else { + continue; + } + } + } + + loose_vec + } + /// Compress all loose files from the folder + #[allow(unused)] + pub fn pack_loose_from_dir(loose_root_path: &str, target_path: &str) -> Self { + let loose_vec = Self::find_all_loose(loose_root_path); + Pack::pack_loose_files(loose_vec, loose_root_path, target_path) + } + + /// find the `.pack` file form the given Dir + /// ### Attention + /// this func only find the first .pack file it looked. + /// If you want to find all pack file ,see the `utils::find_all_pack_file` func + fn find_pack_file(object_dir: &str) -> File { + let mut object_root = std::path::PathBuf::from(object_dir); + let mut pack_file_name = String::new(); + object_root.push("pack"); + let paths = std::fs::read_dir(&object_root).unwrap(); + for path in paths { + if let Ok(pack_file) = path { + let _file_name = pack_file.file_name(); + let _file_name = _file_name.to_str().unwrap(); + if &_file_name[_file_name.len() - 4..] == "pack" { + pack_file_name.push_str(_file_name); + break; + } + } + } + object_root.push(pack_file_name); + + let pack_file = File::open(object_root).unwrap(); + pack_file + } + #[allow(dead_code)] + pub fn pack_object_dir(object_dir: &str, target_dir: &str) -> Self { + // unpack the pack file which should be unchanged + let mut pack_file = Self::find_pack_file(object_dir); + let (raw_pack, mut raw_data) = Pack::decode_raw_data(&mut pack_file); + + let loose_vec = Self::find_all_loose(object_dir); + let ( loose_pack, loose_data) = Pack::pack_loose(loose_vec, object_dir); + + // create new object struct + let mut new_pack = Self::default(); + new_pack.head = *b"PACK"; + new_pack.version = 2; + new_pack.number_of_objects = raw_pack.get_object_number() + loose_pack.get_object_number(); + let mut result = new_pack.encode_header(); + + result.append(&mut raw_data); + let mut loose_data = utils::get_pack_raw_data(loose_data); + result.append(&mut loose_data); + new_pack.signature = Hash::new(&result); + result.append(&mut new_pack.signature.0.to_vec()); + + // begin to write + let mut file = std::fs::File::create(format!( + "{}/pack-{}.pack", + target_dir, + new_pack.signature.to_plain_str() + )) + .expect("create failed"); + file.write_all(result.as_bytes()).expect("write failed"); + + new_pack + } + #[allow(unused)] + pub fn write(map: &mut ObjDecodedMap, target_dir: &str) -> Result<(), GitError> { + map.check_completeness()?; + let meta_vec = map.vec_sliding_window(); + let (_pack, data_write) = Pack::encode_delta(meta_vec); + let mut to_path = PathBuf::from(target_dir); + let file_name = format!("pack-{}.pack", _pack.signature.to_plain_str()); + to_path.push(file_name); + let mut file = std::fs::File::create(to_path).expect("create failed"); + file.write_all(data_write.as_bytes()).expect("write failed"); + Ok(()) + } +} +#[cfg(test)] +mod tests { + + const TEST_DIR: &str = "./test_dir"; + use crate::git::pack::{decode::ObjDecodedMap, Pack}; + use bstr::ByteSlice; + use std::io::Write; + + #[test] + fn test_object_dir_encode() { + Pack::pack_object_dir("./resources/total", "./resources/total/output"); + let decoded_pack = Pack::decode_file( + "./resources/total/output/pack-7ea8ad41c9d438654ef28297ecc874842c7d10de.pack", + ); + println!("{}", decoded_pack.get_object_number()); + assert_eq!( + "7ea8ad41c9d438654ef28297ecc874842c7d10de", + decoded_pack.signature.to_plain_str() + ); + } + + // + #[test] + fn test_a_real_pack_de_en() { + let decoded_pack = Pack::decode_file( + "./resources/test2/pack-8c81e90db37ef77494efe4f31daddad8b494e099.pack", + ); + let mut map = ObjDecodedMap::default(); + map.update_from_cache(&decoded_pack.get_cache()); + Pack::write(&mut map, TEST_DIR).unwrap(); + + Pack::decode_file("./test_dir/pack-83df56e42ca705892f7fd64f96ecb9870b5c5ed8.pack"); + } + #[test] + fn test_multi_pack_encode() { + let pack_1 = Pack::decode_file( + "./resources/test1/pack-1d0e6c14760c956c173ede71cb28f33d921e232f.pack", + ); + let pack_2 = Pack::decode_file( + "./resources/test2/pack-8c81e90db37ef77494efe4f31daddad8b494e099.pack", + ); + + let mut map = ObjDecodedMap::default(); + map.update_from_cache(&pack_1.get_cache()); + map.update_from_cache(&pack_2.get_cache()); + + Pack::write(&mut map, TEST_DIR).unwrap(); + + Pack::decode_file("./test_dir/pack-8e8b79ea20effb78d701fa8ad5a7e386b7d833fa.pack"); + } + + #[test] + fn dex_number() { + let all_num: usize = 0x100f1109; + println!("{:x}", (all_num >> 24) as u8); + println!("{:x}", (all_num >> 16) as u8); + println!("{:x}", (all_num >> 8) as u8); + println!("{:x}", (all_num) as u8); + } + + /// Package some loose objects into pack files
+ /// Just give the root directory of the loose file, the target root directory, and the hash string of the loose file + #[test] + fn test_loose_pack() { + let mut loose_vec = Vec::new(); + loose_vec.push(format!("5f413c76a2893bb1ff83d7c2b507a9cab30bd585")); + loose_vec.push(format!("8bb783eb532d4936248f9084821af2bb309f29e7")); + loose_vec.push(format!("79dc1608dba888e0378ff21591dc646c8afe4e0a")); + loose_vec.push(format!("ce70a618efa88992a4c4bdf22ebd832b24acf374")); + let loose_root = "./resources/loose"; + let target_path = "./resources/pack_g"; + let pack = Pack::pack_loose_files(loose_vec, loose_root, target_path); + Pack::decode_file(&format!( + "{}/pack-{}.pack", + target_path, + pack.signature.to_plain_str() + )); + } + + /// Only the root directory of the given loose will automatically read all loose files and package them to the specified folder + #[test] + fn test_loose_pack_from_dir() { + let loose_root = "./resources/loose"; + let target_path = "./resources/pack_g"; + // 解析过程 + let pack = Pack::pack_loose_from_dir(loose_root, target_path); + Pack::decode_file(&format!( + "{}/pack-{}.pack", + target_path, + pack.signature.to_plain_str() + )); + } + + #[test] + fn test_delta_pack_ok() { + let mut _map = ObjDecodedMap::default(); + let decoded_pack = Pack::decode_file( + "./resources/data/test/pack-6590ba86f4e863e1c2c985b046e1d2f1a78a0089.pack", + ); + assert_eq!( + "6590ba86f4e863e1c2c985b046e1d2f1a78a0089", + decoded_pack.signature.to_plain_str() + ); + let mut result = ObjDecodedMap::default(); + result.update_from_cache(&decoded_pack.result); + result.check_completeness().unwrap(); + let meta_vec = result.vec_sliding_window(); + let (_pack, data_write) = Pack::encode_delta(meta_vec); + + let file_name = format!("pack-{}.pack", _pack.signature.to_plain_str()); + let mut file = std::fs::File::create(file_name).expect("create failed"); + file.write_all(data_write.as_bytes()).expect("write failed"); + + let decoded_pack = + Pack::decode_file(&format!("pack-{}.pack", _pack.signature.to_plain_str())); + assert_eq!( + "aa2ab2eb4e6b37daf6dcadf1b6f0d8520c14dc89", + decoded_pack.signature.to_plain_str() + ); + } + + +} diff --git a/pirs/src/git/pack/mod.rs b/pirs/src/git/pack/mod.rs new file mode 100644 index 0000000000000000000000000000000000000000..00618d571cb5986bc17674a9c5d443322ee5b3bf --- /dev/null +++ b/pirs/src/git/pack/mod.rs @@ -0,0 +1,347 @@ +//!Encode and Decode The Pack File ,which is in the dir:`.git/object/pack/*.pack` +//! +use std::convert::TryFrom; +use std::convert::TryInto; +use std::fs::File; +use std::io::Read; +use std::path::Path; +use std::path::PathBuf; +use std::sync::Arc; + +use self::cache::PackObjectCache; +use super::hash::Hash; +use super::idx::Idx; +use super::object::delta::*; +use super::object::Metadata; +use crate::errors::GitError; +use crate::utils; + +mod cache; +pub mod decode; +pub mod encode; +pub mod multidecode; +/// ### Pack文件结构
+/// `head`: always = "PACK"
+/// `version`: version code
+/// `number_of_objects` : Total mount of objects
+/// `signature`:Hash
+/// `result`: decoded cache, +#[allow(unused)] +#[derive(Default)] +pub struct Pack { + head: [u8; 4], + version: u32, + number_of_objects: usize, + pub signature: Hash, + pub result: Arc, + pack_file :PathBuf, +} + +impl Pack { + /// Git [Pack Format](https://github.com/git/git/blob/master/Documentation/technical/pack-format.txt) + /// Git Pack-Format [Introduce](https://git-scm.com/docs/pack-format) + /// ## Decode the Pack File without the `.idx` File + /// - in: pack_file: &mut File + /// - out: The `Pack` Struct + #[allow(unused)] + pub fn decode(pack_file: &mut File) -> Result { + // Check the Header of Pack File + let mut _pack = Self::check_header(pack_file)?; + + // Init the cache for follow object parse + let mut cache = PackObjectCache::default(); + + for _ in 0.._pack.number_of_objects { + //update offset of the Object + let offset = utils::get_offset(pack_file).unwrap(); + //Get the next Object by the Pack::next_object() func + let object = Pack::next_object(pack_file, offset, &mut cache)?; + // Larger offsets would require a version-2 pack index + let offset = u32::try_from(offset) + .map_err(|_| GitError::InvalidObjectInfo(format!("Packfile is too large"))) + .unwrap(); + } + _pack.result = Arc::new(cache); + // CheckSum sha-1 + let _id: [u8; 20] = utils::read_bytes(pack_file).unwrap(); + _pack.signature = Hash::from_row(&_id[..]); + + Ok(_pack) + } + + /// Check the Header of the Pack File ,
+ /// include the **"PACK" head** , **Version Number** and **Number of the Objects** + fn check_header(pack_file: &mut File) -> Result { + //init a Pack Struct ,which is all empty + let mut _pack = Self { + head: [0, 0, 0, 0], + version: 0, + number_of_objects: 0, + signature: Hash::default(), + result: Arc::new(PackObjectCache::default()), + pack_file:PathBuf::new(), + }; + + // Get the Pack Head 4 b ,which should be the "PACK" + let magic = utils::read_bytes(pack_file).unwrap(); + if magic != *b"PACK" { + return Err(GitError::InvalidPackHeader(format!( + "{},{},{},{}", + magic[0], magic[1], magic[2], magic[3] + ))); + } + _pack.head = magic; + + //Get the Version Number + let version = utils::read_u32(pack_file).unwrap(); + if version != 2 { + return Err(GitError::InvalidPackFile(format!("Current File"))); + } + _pack.version = version; + + let object_num = utils::read_u32(pack_file).unwrap(); + _pack.number_of_objects = object_num as usize; + + Ok(_pack) + } + + + /// Decode the pack file helped by the according decoded idx file + #[allow(unused)] + pub fn decode_by_idx(idx: &mut Idx, pack_file: &mut File) -> Result { + let mut _pack = Self::check_header(pack_file)?; + let object_num = idx.number_of_objects; + _pack.number_of_objects = u32::try_from(object_num) + .map_err(|_| GitError::InvalidObjectInfo(format!("Packfile is too large"))) + .unwrap() as usize; + let mut cache = PackObjectCache::default(); + + for idx_item in idx.idx_items.iter() { + Pack::next_object(pack_file, idx_item.offset.try_into().unwrap(), &mut cache).unwrap(); + } + let mut result = decode::ObjDecodedMap::default(); + result.update_from_cache(&mut cache); + _pack.signature = idx.pack_signature.clone(); + Ok(_pack) + } + + /// Decode the object info from the pack file,
+ /// but we don't decode the object further info ,
+ /// Instead, it stores **all un decoded object information** to a `Vec`.
+ /// This function also return A Pack Struct,which only the Attr cache named `result` is invalid + pub fn decode_raw_data(pack_file: &mut File) -> (Self, Vec) { + let mut raw_pack = Self::check_header(pack_file).unwrap(); + let mut _raw: Vec = Vec::new(); + pack_file.read_to_end(&mut _raw).unwrap(); + let raw_info = _raw[.._raw.len() - 20].to_vec(); + let _hash = Hash::from_row(&_raw[_raw.len() - 20..]); + raw_pack.signature = _hash; + (raw_pack, raw_info) + } + /// Get the Object from File by the Give Offset
+ /// By the way , the cache can hold the fount object + fn next_object( + pack_file: &mut File, + offset: u64, + cache: &mut PackObjectCache, + ) -> Result, GitError> { + use super::object::types::ObjectType; + utils::seek(pack_file, offset)?; + let (type_num, size) = utils::read_type_and_size(pack_file)?; + //Get the Object according to the Types Enum + let object = match type_num { + // Undelta representation + 1..=4 => utils::read_zlib_stream_exact(pack_file, |decompressed| { + let mut contents = Vec::with_capacity(size); + decompressed.read_to_end(&mut contents)?; + if contents.len() != size { + return Err(GitError::InvalidObjectInfo(format!( + "Incorrect object size" + ))); + } + Ok(Metadata::new(ObjectType::number_type(type_num), &contents)) + }), + // Delta; base object is at an offset in the same packfile + 6 => { + let delta_offset = utils::read_offset_encoding(pack_file)?; + let base_offset = offset.checked_sub(delta_offset).ok_or_else(|| { + GitError::InvalidObjectInfo(format!("Invalid OffsetDelta offset")) + })?; + let offset = utils::get_offset(pack_file)?; + + let base_object = if let Some(object) = cache.offset_object(base_offset) { + Arc::clone(object) + } else { + //递归调用 找出base object + Pack::next_object(pack_file, base_offset, cache)? + }; + utils::seek(pack_file, offset)?; + let base_obj = base_object.as_ref(); + let objs = apply_delta(pack_file, base_obj)?; + Ok(objs) + } + // Delta; base object is given by a hash outside the packfile + //TODO : This Type need to be completed ,对应多文件的todo + 7 => { + let hash = utils::read_hash(pack_file)?; + //let object; + let base_object = if let Some(object) = cache.hash_object(hash) { + object + } else { + // object = read_object(hash)?; + // &object + return Err(GitError::NotFountHashValue(hash) ); + + }; + apply_delta(pack_file, &base_object) + } + _ => { + return Err(GitError::InvalidObjectType( + ObjectType::number_type(type_num).to_string(), + )) + } + }?; + + // //Debug Code: Print the hash & type of the parsed object + // match PackObjectType::type_number2_type(object_type) { + // Some(a) => println!("Hash:{} \t Types: {:?}",object.hash(), a), + // None =>{}, + // } + + let obj = Arc::new(object); + cache.update(Arc::clone(&obj), offset); + Ok(obj) + } + + pub fn get_object_number(&self) -> usize { + return self.number_of_objects as usize; + } + pub fn get_cache(&self) -> PackObjectCache { + return self.result.as_ref().clone(); + } + pub fn get_hash(&self) -> Hash { + return self.signature.clone(); + } + + /// Decode a pack file according to the given pack file path + /// # Examples + /// ``` + /// let decoded_pack = Pack::decode_file("./resources/data/test/pack-6590ba86f4e863e1c2c985b046e1d2f1a78a0089.pack"); + /// assert_eq!( + /// "6590ba86f4e863e1c2c985b046e1d2f1a78a0089", + /// decoded_pack.signature.to_plain_str() + /// ); + /// ``` + /// + #[allow(unused)] + pub fn decode_file(file: &str) -> Pack { + let mut pack_file = File::open(&Path::new(file)).unwrap(); + let decoded_pack = match Pack::decode(&mut pack_file) { + Ok(f) => f, + Err(e) => match e { + GitError::NotFountHashValue(a) => panic!("{}",a), + _ => panic!("unknown error"), + }, + }; + assert_eq!(*b"PACK", decoded_pack.head); + assert_eq!(2, decoded_pack.version); + decoded_pack + } +} + +/// +#[cfg(test)] +mod tests { + + use crate::git::idx::Idx; + + use super::Pack; + use std::fs::File; + use std::io::BufReader; + use std::io::Read; + use std::path::Path; + + /// Test the pack File decode standalone + #[test] + fn test_decode_pack_file1() { + let decoded_pack = Pack::decode_file( + "./resources/data/test/pack-6590ba86f4e863e1c2c985b046e1d2f1a78a0089.pack", + ); + assert_eq!( + "6590ba86f4e863e1c2c985b046e1d2f1a78a0089", + decoded_pack.signature.to_plain_str() + ); + } + #[test] + fn test_decode_pack_file_with_print() { + let decoded_pack = Pack::decode_file( + "./resources/data/test/pack-8d36a6464e1f284e5e9d06683689ee751d4b2687.pack", + ); + assert_eq!( + "8d36a6464e1f284e5e9d06683689ee751d4b2687", + decoded_pack.signature.to_plain_str() + ); + } + #[test] + fn test_parse_simple_pack() { + let decoded_pack = Pack::decode_file( + "./resources/test1/pack-1d0e6c14760c956c173ede71cb28f33d921e232f.pack", + ); + assert_eq!( + "1d0e6c14760c956c173ede71cb28f33d921e232f", + decoded_pack.signature.to_plain_str() + ); + print!("{}", decoded_pack.get_object_number()); + } + + #[test] + fn test_parse_simple_pack2() { + let decoded_pack = Pack::decode_file( + "./resources/test2/pack-8c81e90db37ef77494efe4f31daddad8b494e099.pack", + ); + assert_eq!( + "8c81e90db37ef77494efe4f31daddad8b494e099", + decoded_pack.signature.to_plain_str() + ); + print!("{}", decoded_pack.get_object_number()); + } + + #[test] + fn test_read_raw_pack() { + let mut pack_file = File::open(&Path::new( + "./resources/test1/pack-1d0e6c14760c956c173ede71cb28f33d921e232f.pack", + )) + .unwrap(); + let (raw_pack, _raw_data) = Pack::decode_raw_data(&mut pack_file); + assert_eq!( + "1d0e6c14760c956c173ede71cb28f33d921e232f", + raw_pack.signature.to_plain_str() + ); + } + ///Test the pack decode by the Idx File + #[test] + fn test_pack_idx_decode() { + let mut pack_file = File::open(&Path::new( + "./resources/data/test/pack-8d36a6464e1f284e5e9d06683689ee751d4b2687.pack", + )) + .unwrap(); + let idx_file = File::open(&Path::new( + "./resources/data/test/pack-8d36a6464e1f284e5e9d06683689ee751d4b2687.idx", + )) + .unwrap(); + let mut reader = BufReader::new(idx_file); + let mut buffer = Vec::new(); + reader.read_to_end(&mut buffer).ok(); + + let mut idx = Idx::default(); + idx.decode(buffer).unwrap(); + let decoded_pack = Pack::decode_by_idx(&mut idx, &mut pack_file).unwrap(); + assert_eq!(*b"PACK", decoded_pack.head); + assert_eq!(2, decoded_pack.version); + assert_eq!( + "8d36a6464e1f284e5e9d06683689ee751d4b2687", + decoded_pack.signature.to_plain_str() + ); + } + +} diff --git a/pirs/src/git/pack/multidecode.rs b/pirs/src/git/pack/multidecode.rs new file mode 100644 index 0000000000000000000000000000000000000000..95b7e7f16cb1eb84a500f70e804add32d79fd99a --- /dev/null +++ b/pirs/src/git/pack/multidecode.rs @@ -0,0 +1,97 @@ +use std::{fs::File, sync::Arc}; +use crate::{utils, errors::GitError, git::hash::Hash}; +use super::{Pack, cache::PackObjectCache}; +use std::convert::TryFrom; +use std::cmp::Ordering; +impl Eq for Pack{} +impl Ord for Pack { + fn cmp(&self, other: &Self) -> Ordering { + let a = self.pack_file.metadata().unwrap().created().unwrap() ; + let b = other.pack_file.metadata().unwrap().created().unwrap(); + if a==b{ + return Ordering::Equal; + } else if a>b{ + return Ordering::Greater; + } else { + return Ordering::Less; + } + } +} + +impl PartialOrd for Pack { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl PartialEq for Pack { + fn eq(&self, other: &Self) -> bool { + let a = self.pack_file.metadata().unwrap().created().unwrap() ; + let b = other.pack_file.metadata().unwrap().created().unwrap(); + a==b + } +} + +impl Pack{ + /// Decode the `.pack` file with the object cache + #[allow(unused)] + pub fn decode_with_cache(&self,cache:&mut PackObjectCache) -> Result { + let mut pack_file = File::open(self.pack_file.clone()).unwrap(); + // Check the Header of Pack File + let mut _pack = Self::check_header(&mut pack_file)?; + + for _ in 0.._pack.number_of_objects { + //update offset of the Object + let offset = utils::get_offset(&mut pack_file).unwrap(); + //Get the next Object by the Pack::next_object() func + let object = Pack::next_object(&mut pack_file, offset, cache)?; + // Larger offsets would require a version-2 pack index + let offset = u32::try_from(offset) + .map_err(|_| GitError::InvalidObjectInfo(format!("Packfile is too large"))) + .unwrap(); + } + + // CheckSum sha-1 + let _id: [u8; 20] = utils::read_bytes(&mut pack_file).unwrap(); + _pack.signature = Hash::from_row(&_id[..]); + print!("{}",cache.by_hash.len()); + Ok(_pack) + } + /// ## Decode all `.pack` file in a dir + /// For Example :`.git/object/pack` + /// This function is different from the `Pack::decode` function , which in the "pack/mod.rs" + /// and only decode the single `.pack` file + #[allow(unused)] + pub fn multi_decode(root:&str) -> Result{ + + let mut total_pack = Self::default(); + total_pack.number_of_objects=0; + let (files,hash_vec) = utils::find_all_pack_file(root); + let mut pack_vec = vec![]; + for _file_ in files.iter(){ + let mut _pack = Pack::default(); + _pack.pack_file = _file_.clone(); + pack_vec.push(_pack); + } + pack_vec.sort(); + let mut cache = PackObjectCache::default(); + for _pack_ in pack_vec.iter_mut() { + _pack_.decode_with_cache(&mut cache)?; + total_pack.number_of_objects+=_pack_.number_of_objects; + } + total_pack.result = Arc::new(cache); + Ok(total_pack) + } + + +} + +#[cfg(test)] +pub mod test{ + use crate::git::pack::Pack; + + #[test] + fn test_multi_pack_decode(){ + Pack::multi_decode("./resources./friger").unwrap(); + } +} \ No newline at end of file diff --git a/pirs/src/gust.rs b/pirs/src/gust.rs new file mode 100644 index 0000000000000000000000000000000000000000..c76f2ace9af8e355d1c96cda8ae2f25de6f88657 --- /dev/null +++ b/pirs/src/gust.rs @@ -0,0 +1,3 @@ +//! +//! +//! \ No newline at end of file diff --git a/pirs/src/lfs/mod.rs b/pirs/src/lfs/mod.rs new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/pirs/src/main.rs b/pirs/src/main.rs new file mode 100644 index 0000000000000000000000000000000000000000..030c2fe987a55a957993e7dfef4b2251e3e1cc40 --- /dev/null +++ b/pirs/src/main.rs @@ -0,0 +1,18 @@ +//! +//! +//! +extern crate core; + +mod gateway; +mod database; +mod gust; +mod lfs; +mod utils; +mod errors; +mod git; + +use anyhow::Result; + +fn main() -> Result<()> { + Ok(()) +} diff --git a/pirs/src/utils/mod.rs b/pirs/src/utils/mod.rs new file mode 100644 index 0000000000000000000000000000000000000000..0de8cf2c3299f7562722d4e1fc1d7a77bedd4bef --- /dev/null +++ b/pirs/src/utils/mod.rs @@ -0,0 +1,293 @@ +//! +//! +//! +const TYPE_BITS: u8 = 3; +const VAR_INT_ENCODING_BITS: u8 = 7; +const TYPE_BYTE_SIZE_BITS: u8 = VAR_INT_ENCODING_BITS - TYPE_BITS; +const VAR_INT_CONTINUE_FLAG: u8 = 1 << VAR_INT_ENCODING_BITS; + + +use std::{io::{self,Read,SeekFrom,Seek}, fs::File, vec, path::PathBuf, str::FromStr}; +use flate2::read::ZlibDecoder; + +use crate::errors::GitError; + +use super::git::hash::Hash; +/// Reserve the last bits of value binary +fn keep_bits(value: usize, bits: u8) -> usize { + value & ((1 << bits) - 1) +} + +pub fn read_bytes(stream: &mut R) -> io::Result<[u8; N]> { + let mut bytes = [0; N]; + stream.read_exact(&mut bytes)?; + Ok(bytes) +} + +/// Read a u32 from `stream` +pub fn read_u32(stream: &mut R) -> io::Result { + let bytes = read_bytes(stream)?; + Ok(u32::from_be_bytes(bytes)) +} + +/// Read a Hash from Read +pub fn read_hash(stream: &mut R) -> io::Result { + let bytes = read_bytes(stream)?; + Ok(Hash(bytes)) +} +/// Read vec until the delimiter +pub fn read_until_delimiter(stream: &mut R, delimiter: u8) -> io::Result> { + let mut bytes = vec![]; + loop { + let [byte] = read_bytes(stream)?; + if byte == delimiter { + break; + } + + bytes.push(byte); + } + Ok(bytes) +} +/// Returns whether the first bit of u8 is 1 and returns 7-bit true value +pub fn read_var_int_byte(stream: &mut R) -> io::Result<(u8, bool)> { + let [byte] = read_bytes(stream)?; + let value = byte & !VAR_INT_CONTINUE_FLAG; + let more_bytes = byte & VAR_INT_CONTINUE_FLAG != 0; + Ok((value, more_bytes)) +} +/// read the size encoding +pub fn read_size_encoding(stream: &mut R) -> io::Result { + let mut value = 0; + let mut length = 0; + loop { + let (byte_value, more_bytes) = read_var_int_byte(stream).unwrap(); + value |= (byte_value as usize) << length; + if !more_bytes { + return Ok(value); + } + + length += VAR_INT_ENCODING_BITS; + } +} +pub fn write_size_encoding(number:usize) -> Vec{ + let mut num = vec![]; + let mut number = number ; + + loop{ + if number >>VAR_INT_ENCODING_BITS >0{ + num.push((number & 0x7f) as u8 | 0x80 ) ; + }else{ + num.push((number & 0x7f) as u8 ) ; + break; + } + + number >>= VAR_INT_ENCODING_BITS; + } + + num +} + +/// Read the first few fields of the object and parse them out +pub fn read_type_and_size(stream: &mut R) -> io::Result<(u8, usize)> { + // Object type and uncompressed pack data size + // are stored in a "size-encoding" variable-length integer. + // Bits 4 through 6 store the type and the remaining bits store the size. + let value = read_size_encoding(stream)?; + let object_type = keep_bits(value >> TYPE_BYTE_SIZE_BITS, TYPE_BITS) as u8; + let size = keep_bits(value, TYPE_BYTE_SIZE_BITS) + | (value >> VAR_INT_ENCODING_BITS << TYPE_BYTE_SIZE_BITS); + Ok((object_type, size)) +} +///The offset for an OffsetDelta object +pub fn read_offset_encoding(stream: &mut R) -> io::Result { + // Like the object length, the offset for an OffsetDelta object + // is stored in a variable number of bytes, + // with the most significant bit of each byte indicating whether more bytes follow. + // However, the object length encoding allows redundant values, + // e.g. the 7-bit value [n] is the same as the 14- or 21-bit values [n, 0] or [n, 0, 0]. + // Instead, the offset encoding adds 1 to the value of each byte except the least significant one. + // And just for kicks, the bytes are ordered from *most* to *least* significant. + let mut value = 0; + loop { + + let (byte_value, more_bytes) = read_var_int_byte(stream)?; + + value = (value << VAR_INT_ENCODING_BITS) | byte_value as u64; + if !more_bytes { + + return Ok(value); + } + + value += 1; + } +} +/// +/// # Example +/// +/// ``` +/// let ns :u64 = 0x4af; +/// let re = write_offset_encoding(ns); +/// println!("{:?}",re); +/// ``` +/// +pub fn write_offset_encoding(number :u64) ->Vec{ + let mut num = vec![]; + let mut number = number ; + num.push((number & 0x7f) as u8); + number >>=7; + while number >0{ + num.push(((number & 0x7f) - 1) as u8 | 0x80 ) ; + number >>= 7; + } + + num.reverse(); + num +} +pub fn read_partial_int( + stream: &mut R, + bytes: u8, + present_bytes: &mut u8, +) -> io::Result { + let mut value:usize = 0; + for byte_index in 0..bytes { + if *present_bytes & 1 != 0 { + let [byte] = read_bytes(stream)?; + value |= (byte as usize) << (byte_index * 8); + } + *present_bytes >>= 1; + } + Ok(value) +} + + + +/// Return the pointer after file offset +pub fn seek(file: &mut File, offset: u64) -> io::Result<()> { + file.seek(SeekFrom::Start(offset))?; + Ok(()) +} +/// find now offset of the file +pub fn get_offset(file: &mut File) -> io::Result { + file.seek(SeekFrom::Current(0)) +} + + +// Call reader() to process a zlib stream from a file. +// Reset the file offset afterwards to the end of the zlib stream, +// since ZlibDecoder uses BufReader, which may consume extra bytes. +pub fn read_zlib_stream_exact(file: &mut File, reader: F) -> Result + where F: FnOnce(&mut ZlibDecoder<&mut File>) -> Result +{ + + let offset = get_offset(file)?; + let mut decompressed = ZlibDecoder::new(file); + let result = reader(&mut decompressed); + let zlib_end = offset + decompressed.total_in(); + seek(decompressed.into_inner(), zlib_end).unwrap(); + result +} + +pub fn u32_vec(value: u32)->Vec{ + let mut result :Vec = vec![]; + result.push((value >> 24 & 0xff) as u8 ); + result.push((value >> 16 & 0xff) as u8 ); + result.push((value >> 8 & 0xff) as u8 ); + result.push((value & 0xff) as u8 ); + result +} + +pub fn get_pack_raw_data(data:Vec) -> Vec{ + + let result = &data[12..data.len()-20]; + let result = result.to_vec(); + result +} + + +fn get_hash_form_filename(filename:&str) -> String{ + let a = String::from(&filename[5..45]); + assert!(a.len()==40); + a +} +/// 从pack目录中找到所有的pack文件 +pub fn find_all_pack_file(pack_dir : &str) ->(Vec,Vec) { + let mut file_path =vec![]; + let mut hash_list = vec![]; + let mut object_root = std::path::PathBuf::from(pack_dir); + + + let paths = std::fs::read_dir(&object_root).unwrap(); + for path in paths { + if let Ok(pack_file) = path { + let _file_name = pack_file.file_name(); + let _file_name = _file_name.to_str().unwrap(); + assert!(_file_name.len()>25); + // only find the .pack file, and find the .idx file + if &_file_name[_file_name.len() - 4..] == "pack" { + let hash_string = get_hash_form_filename(&_file_name); + let _hash = Hash::from_str(&hash_string).unwrap(); + hash_list.push(_hash); + + object_root.push(&_file_name.to_string()); + file_path.push(object_root.clone()); + object_root.pop(); + + } + } + } + (file_path,hash_list) +} + + +#[cfg(test)] +mod test{ + + #[test] + fn test_write_encode_size(){ + let ns :u64 = 966; + // 0 1e + let re = super::write_offset_encoding(ns); + println!("{:?}",re); + } + #[test] + fn test_write_size_encoding(){ + let size = 233; + let re = super::write_size_encoding(size); + print!("{:?}",re); + print!(""); + } + + #[test] + fn test_read_size_encodings(){ + let a = vec![233,1]; + print!("{}",read_size_encoding(a)); + } + fn read_size_encoding(a :Vec) ->usize { + let mut value = 0; + let mut length = 0; + + for i in a{ + let byte_value = i & 0x7f; + let more_bytes = (i & 0x8f)!=0 ; + value |= (byte_value as usize) << length; + if !more_bytes { + return value; + } + length += 7; + } + value + } + + #[test] + fn test_pack_hash() { + let root ="./resources/friger"; + let (file_path,hash_list) = super::find_all_pack_file(root); + println!("{:?}",file_path); + + for _hash in hash_list{ + println!("{}",_hash); + } + + } +} + \ No newline at end of file diff --git a/pirs/test_dir/pack-83df56e42ca705892f7fd64f96ecb9870b5c5ed8.pack b/pirs/test_dir/pack-83df56e42ca705892f7fd64f96ecb9870b5c5ed8.pack new file mode 100644 index 0000000000000000000000000000000000000000..ffa8f519d93960d0c4dc9efbe61b2f899782c81c Binary files /dev/null and b/pirs/test_dir/pack-83df56e42ca705892f7fd64f96ecb9870b5c5ed8.pack differ diff --git a/pirs/test_dir/pack-8e8b79ea20effb78d701fa8ad5a7e386b7d833fa.pack b/pirs/test_dir/pack-8e8b79ea20effb78d701fa8ad5a7e386b7d833fa.pack new file mode 100644 index 0000000000000000000000000000000000000000..caa8b138e45e9eb4708247c4c0f1ea667a6442f9 Binary files /dev/null and b/pirs/test_dir/pack-8e8b79ea20effb78d701fa8ad5a7e386b7d833fa.pack differ