From e0e6db74f9c6621c3851698b6df8c44c9f761137 Mon Sep 17 00:00:00 2001 From: Ruoqing He Date: Tue, 23 May 2023 11:24:30 -0400 Subject: [PATCH] Make LFS integrate into Gust. Signed-off-by: Ruoqing He --- gust_integrate_lfs/Cargo.toml | 63 ++ gust_integrate_lfs/src/errors.rs | 11 + gust_integrate_lfs/src/gateway/api/lib.rs | 811 ++++++++++++++++++ gust_integrate_lfs/src/gateway/api/mod.rs | 6 + gust_integrate_lfs/src/gateway/mod.rs | 6 + gust_integrate_lfs/src/gateway/ssh_server.rs | 72 ++ gust_integrate_lfs/src/git/errors.rs | 59 ++ gust_integrate_lfs/src/git/hash.rs | 219 +++++ gust_integrate_lfs/src/git/idx/mod.rs | 323 +++++++ gust_integrate_lfs/src/git/lfs/mod.rs | 1 + gust_integrate_lfs/src/git/lfs/structs.rs | 173 ++++ gust_integrate_lfs/src/git/mod.rs | 21 + .../src/git/object/base/blob.rs | 170 ++++ .../src/git/object/base/commit.rs | 296 +++++++ gust_integrate_lfs/src/git/object/base/mod.rs | 49 ++ .../src/git/object/base/sign.rs | 141 +++ gust_integrate_lfs/src/git/object/base/tag.rs | 273 ++++++ .../src/git/object/base/tree.rs | 421 +++++++++ gust_integrate_lfs/src/git/object/delta.rs | 187 ++++ gust_integrate_lfs/src/git/object/diff.rs | 300 +++++++ gust_integrate_lfs/src/git/object/metadata.rs | 172 ++++ gust_integrate_lfs/src/git/object/mod.rs | 14 + gust_integrate_lfs/src/git/object/types.rs | 96 +++ gust_integrate_lfs/src/git/pack/cache.rs | 47 + gust_integrate_lfs/src/git/pack/decode.rs | 239 ++++++ gust_integrate_lfs/src/git/pack/encode.rs | 469 ++++++++++ gust_integrate_lfs/src/git/pack/mod.rs | 395 +++++++++ .../src/git/pack/multidecode.rs | 94 ++ gust_integrate_lfs/src/git/protocol/http.rs | 111 +++ gust_integrate_lfs/src/git/protocol/mod.rs | 235 +++++ gust_integrate_lfs/src/git/protocol/pack.rs | 444 ++++++++++ gust_integrate_lfs/src/git/protocol/ssh.rs | 199 +++++ gust_integrate_lfs/src/git/utils.rs | 309 +++++++ .../src/gust/driver/database/mod.rs | 1 + .../src/gust/driver/database/mysql/mod.rs | 27 + .../src/gust/driver/database/mysql/storage.rs | 726 ++++++++++++++++ .../src/gust/driver/fs/file_system.rs | 67 ++ gust_integrate_lfs/src/gust/driver/fs/mod.rs | 1 + .../src/gust/driver/lfs_content_store/mod.rs | 87 ++ gust_integrate_lfs/src/gust/driver/mod.rs | 85 ++ .../src/gust/driver/structure/mod.rs | 175 ++++ .../src/gust/driver/structure/nodes.rs | 451 ++++++++++ .../src/gust/driver/utils/id_generator.rs | 33 + .../src/gust/driver/utils/mod.rs | 1 + gust_integrate_lfs/src/gust/mod.rs | 4 + gust_integrate_lfs/src/main.rs | 118 +++ gust_integrate_lfs/src/utils/mod.rs | 1 + 47 files changed, 8203 insertions(+) create mode 100644 gust_integrate_lfs/Cargo.toml create mode 100644 gust_integrate_lfs/src/errors.rs create mode 100644 gust_integrate_lfs/src/gateway/api/lib.rs create mode 100644 gust_integrate_lfs/src/gateway/api/mod.rs create mode 100644 gust_integrate_lfs/src/gateway/mod.rs create mode 100644 gust_integrate_lfs/src/gateway/ssh_server.rs create mode 100644 gust_integrate_lfs/src/git/errors.rs create mode 100644 gust_integrate_lfs/src/git/hash.rs create mode 100644 gust_integrate_lfs/src/git/idx/mod.rs create mode 100644 gust_integrate_lfs/src/git/lfs/mod.rs create mode 100644 gust_integrate_lfs/src/git/lfs/structs.rs create mode 100644 gust_integrate_lfs/src/git/mod.rs create mode 100644 gust_integrate_lfs/src/git/object/base/blob.rs create mode 100644 gust_integrate_lfs/src/git/object/base/commit.rs create mode 100644 gust_integrate_lfs/src/git/object/base/mod.rs create mode 100644 gust_integrate_lfs/src/git/object/base/sign.rs create mode 100644 gust_integrate_lfs/src/git/object/base/tag.rs create mode 100644 gust_integrate_lfs/src/git/object/base/tree.rs create mode 100644 gust_integrate_lfs/src/git/object/delta.rs create mode 100644 gust_integrate_lfs/src/git/object/diff.rs create mode 100644 gust_integrate_lfs/src/git/object/metadata.rs create mode 100644 gust_integrate_lfs/src/git/object/mod.rs create mode 100644 gust_integrate_lfs/src/git/object/types.rs create mode 100644 gust_integrate_lfs/src/git/pack/cache.rs create mode 100644 gust_integrate_lfs/src/git/pack/decode.rs create mode 100644 gust_integrate_lfs/src/git/pack/encode.rs create mode 100644 gust_integrate_lfs/src/git/pack/mod.rs create mode 100644 gust_integrate_lfs/src/git/pack/multidecode.rs create mode 100644 gust_integrate_lfs/src/git/protocol/http.rs create mode 100644 gust_integrate_lfs/src/git/protocol/mod.rs create mode 100644 gust_integrate_lfs/src/git/protocol/pack.rs create mode 100644 gust_integrate_lfs/src/git/protocol/ssh.rs create mode 100644 gust_integrate_lfs/src/git/utils.rs create mode 100644 gust_integrate_lfs/src/gust/driver/database/mod.rs create mode 100644 gust_integrate_lfs/src/gust/driver/database/mysql/mod.rs create mode 100644 gust_integrate_lfs/src/gust/driver/database/mysql/storage.rs create mode 100644 gust_integrate_lfs/src/gust/driver/fs/file_system.rs create mode 100644 gust_integrate_lfs/src/gust/driver/fs/mod.rs create mode 100644 gust_integrate_lfs/src/gust/driver/lfs_content_store/mod.rs create mode 100644 gust_integrate_lfs/src/gust/driver/mod.rs create mode 100644 gust_integrate_lfs/src/gust/driver/structure/mod.rs create mode 100644 gust_integrate_lfs/src/gust/driver/structure/nodes.rs create mode 100644 gust_integrate_lfs/src/gust/driver/utils/id_generator.rs create mode 100644 gust_integrate_lfs/src/gust/driver/utils/mod.rs create mode 100644 gust_integrate_lfs/src/gust/mod.rs create mode 100644 gust_integrate_lfs/src/main.rs create mode 100644 gust_integrate_lfs/src/utils/mod.rs diff --git a/gust_integrate_lfs/Cargo.toml b/gust_integrate_lfs/Cargo.toml new file mode 100644 index 00000000..3b65f905 --- /dev/null +++ b/gust_integrate_lfs/Cargo.toml @@ -0,0 +1,63 @@ +[package] +name = "gust" +version = "0.1.0" +edition = "2021" + +[workspace] +members = [".", "entity"] + +[profile.release] +debug = true + +[dependencies] +rust-crypto = "0.2.36" +hex = "0.4.3" +deflate = "1.0.0" +flate2 = "1.0.26" +bstr = "1.2.0" +anyhow = "1.0.71" +thiserror = "1.0.38" +spdlog-rs = "0.3.7" +byteorder = "1.4.3" +sha-1 = "0.10.1" +imara-diff = "0.1.5" +min-max = "0.1" +colored = "2.0.0" +crc = "3.0.0" +diffs = "0.4.1" +tokio = { version = "1.28.1", features = ["full"] } +hyper = { version = "0.14.26", features = ["full"] } +axum = "0.6.18" +dotenvy = "0.15.6" +serde = { version = "1.0.160", features = ["derive"] } +serde_json = "1.0.96" +toml = "0.7.4" +tracing = "0.1.37" +tracing-subscriber = { version = "0.3.17", features = ["env-filter"] } +bytes = "1.4.0" +futures = "0.3.28" +dirs = "4.0.0" +clap = { version = "4.3.0", features = ["derive"] } +async-trait = "0.1.68" +chrono = "0.4.23" +lru = "0.9.0" +idgenerator = "2.0.0" +russh = "0.37.1" +russh-keys = "0.37.1" +ed25519-dalek = "1.0.1" +async-recursion = "1.0.4" +tokio-test = "0.4.2" +regex = "1.8.1" +rayon = "1.7.0" +entity = { path = "entity" } +rand = "0.8.5" +serde_qs = "0.12.0" +sha256 = "1.1.3" + +[dependencies.sea-orm] +version = "0.11.3" +features = [ + "sqlx-mysql", + "runtime-tokio-rustls", + "macros", +] diff --git a/gust_integrate_lfs/src/errors.rs b/gust_integrate_lfs/src/errors.rs new file mode 100644 index 00000000..3599d330 --- /dev/null +++ b/gust_integrate_lfs/src/errors.rs @@ -0,0 +1,11 @@ +//! +//! +//! +//! +use thiserror::Error; + +#[derive(Error, Debug)] +pub enum GustError { + #[error(transparent)] + IOError(#[from] std::io::Error), +} diff --git a/gust_integrate_lfs/src/gateway/api/lib.rs b/gust_integrate_lfs/src/gateway/api/lib.rs new file mode 100644 index 00000000..0ba9a4d9 --- /dev/null +++ b/gust_integrate_lfs/src/gateway/api/lib.rs @@ -0,0 +1,811 @@ +//! +//! +//! +use std::collections::HashMap; + +use regex::Regex; + +use std::io::prelude::*; +use std::net::SocketAddr; +use std::path::PathBuf; +use std::str::FromStr; +use std::sync::Arc; + +use anyhow::Result; +use axum::body::Body; +use axum::extract::{Query, State}; +use axum::http::{Response, StatusCode}; +use axum::routing::get; +use axum::{Router, Server}; +use bytes::{BufMut, BytesMut}; +use chrono::{prelude::*, Duration}; +use futures::StreamExt; +use hyper::{Request, Uri}; +use rand::prelude::*; +use serde::Deserialize; +use serde_qs; + +use crate::git::lfs::structs::*; +use crate::git::protocol::{http, PackProtocol, Protocol}; +use crate::gust::driver::database::mysql; +use crate::gust::driver::lfs_content_store::ContentStore; +use crate::gust::driver::ObjectStorage; +use crate::ServeConfig; + +#[derive(Clone)] +struct AppState { + storage: T, + config: ServeConfig, +} + +#[derive(Deserialize, Debug)] +struct GetParams { + pub service: Option, + pub refspec: Option, + pub id: Option, + pub path: Option, + pub limit: Option, + pub cursor: Option, +} + +pub fn remove_git_suffix(uri: Uri, git_suffix: &str) -> PathBuf { + PathBuf::from(uri.path().replace(".git", "").replace(git_suffix, "")) +} + +pub async fn http_server(config: &ServeConfig) -> Result<(), Box> { + let state = AppState { + storage: mysql::init().await, + config: config.to_owned(), + }; + + let ServeConfig { + host, + port, + key_path, + cert_path, + lfs_content_path, + } = config; + let server_url = format!("{}:{}", host, port); + + let app = Router::new() + .route( + "/*path", + get(get_method_router) + .post(post_method_router) + .put(put_method_router), + ) + .with_state(state); + + let addr = SocketAddr::from_str(&server_url).unwrap(); + Server::bind(&addr).serve(app.into_make_service()).await?; + + Ok(()) +} + +/// Discovering Reference +async fn get_method_router( + state: State>, + Query(params): Query, + uri: Uri, +) -> Result, (StatusCode, String)> +where + T: ObjectStorage, +{ + // Routing LFS services. + if Regex::new(r"/objects/[a-z0-9]+$") + .unwrap() + .is_match(uri.path()) + { + // Retrieve the `:oid` field from path. + let path = uri.path().to_owned(); + let tokens: Vec<&str> = path.split('/').collect(); + // The `:oid` field is the last field. + return lfs_download_object(state, tokens[tokens.len() - 1]).await; + } else if Regex::new(r"/locks$").unwrap().is_match(uri.path()) { + // Load query parameters into struct. + let lock_list_query = LockListQuery { + path: params.path, + id: params.id, + cursor: params.cursor, + limit: params.limit, + refspec: params.refspec, + }; + return lfs_retrieve_lock(state, lock_list_query).await; + } + + if !Regex::new(r"/info/refs$").unwrap().is_match(uri.path()) { + return Err(( + StatusCode::FORBIDDEN, + String::from("Operation not supported\n"), + )); + } + let service_name = params.service.unwrap(); + if service_name == "git-upload-pack" || service_name == "git-receive-pack" { + let mut pack_protocol = PackProtocol::new( + remove_git_suffix(uri, "/info/refs"), + &service_name, + Arc::new(state.storage.clone()), + Protocol::Http, + ); + let mut headers = HashMap::new(); + headers.insert( + "Content-Type".to_string(), + format!( + "application/x-{}-advertisement", + pack_protocol.service_type.unwrap().to_string() + ), + ); + headers.insert( + "Cache-Control".to_string(), + "no-cache, max-age=0, must-revalidate".to_string(), + ); + tracing::info!("headers: {:?}", headers); + let mut resp = Response::builder(); + for (key, val) in headers { + resp = resp.header(&key, val); + } + + let pkt_line_stream = pack_protocol.git_info_refs().await; + let body = Body::from(pkt_line_stream.freeze()); + Ok(resp.body(body).unwrap()) + } else { + Err(( + StatusCode::FORBIDDEN, + String::from("Operation not supported\n"), + )) + } +} + +async fn post_method_router( + state: State>, + uri: Uri, + req: Request, +) -> Result, (StatusCode, String)> +where + T: ObjectStorage + 'static, +{ + // Routing LFS services. + if Regex::new(r"/locks/verify$").unwrap().is_match(uri.path()) { + return lfs_verify_lock(state, req).await; + } else if Regex::new(r"/locks$").unwrap().is_match(uri.path()) { + return lfs_create_lock(state, req).await; + } else if Regex::new(r"/unlock$").unwrap().is_match(uri.path()) { + // Retrieve the `:id` field from path. + let path = uri.path().to_owned(); + let tokens: Vec<&str> = path.split('/').collect(); + // The `:id` field is just ahead of the last field. + return lfs_delete_lock(state, tokens[tokens.len() - 2], req).await; + } else if Regex::new(r"/objects/batch$").unwrap().is_match(uri.path()) { + return lfs_process_batch(state, req).await; + } + + if Regex::new(r"/git-upload-pack$") + .unwrap() + .is_match(uri.path()) + { + git_upload_pack(state, remove_git_suffix(uri, "/git-upload-pack"), req).await + } else if Regex::new(r"/git-receive-pack$") + .unwrap() + .is_match(uri.path()) + { + git_receive_pack(state, remove_git_suffix(uri, "/git-receive-pack"), req).await + } else { + Err(( + StatusCode::FORBIDDEN, + String::from("Operation not supported"), + )) + } +} + +async fn put_method_router( + state: State>, + uri: Uri, + req: Request, +) -> Result, (StatusCode, String)> +where + T: ObjectStorage + 'static, +{ + if Regex::new(r"/objects/[a-z0-9]+$") + .unwrap() + .is_match(uri.path()) + { + // Retrieve the `:oid` field from path. + let path = uri.path().to_owned(); + let tokens: Vec<&str> = path.split('/').collect(); + // The `:oid` field is the last field. + lfs_upload_object(state, tokens[tokens.len() - 1], req).await + } else { + Err(( + StatusCode::FORBIDDEN, + String::from("Operation not supported"), + )) + } +} + +/// Smart Service git-upload-pack, handle git pull and clone +async fn git_upload_pack( + state: State>, + path: PathBuf, + req: Request, +) -> Result, (StatusCode, String)> +where + T: ObjectStorage + 'static, +{ + let pack_protocol = + PackProtocol::new(path, "", Arc::new(state.storage.clone()), Protocol::Http); + + http::git_upload_pack(req, pack_protocol).await +} + +// http://localhost:8000/org1/apps/App2.git +// http://localhost:8000/org1/libs/lib1.git +/// Smart Service git-receive-pack, handle git push +async fn git_receive_pack( + state: State>, + path: PathBuf, + req: Request, +) -> Result, (StatusCode, String)> +where + T: ObjectStorage + 'static, +{ + tracing::info!("req: {:?}", req); + let pack_protocol = + PackProtocol::new(path, "", Arc::new(state.storage.clone()), Protocol::Http); + http::git_receive_pack(req, pack_protocol).await +} + +async fn lfs_retrieve_lock( + state: State>, + lock_list_query: LockListQuery, +) -> Result, (StatusCode, String)> +where + T: ObjectStorage, +{ + tracing::info!("retrieving locks: {:?}", lock_list_query); + let repo = lock_list_query + .refspec + .as_ref() + .unwrap_or(&"".to_string()) + .to_string(); + let path = match lock_list_query.path.as_ref() { + Some(val) => val.to_owned(), + None => "".to_owned(), + }; + let cursor = match lock_list_query.path.as_ref() { + Some(val) => val.to_owned(), + None => "".to_owned(), + }; + let limit = match lock_list_query.path.as_ref() { + Some(val) => val.to_owned(), + None => "".to_owned(), + }; + let mut resp = Response::builder(); + resp = resp.header("Content-Type", "application/vnd.git-lfs+json"); + + let db = Arc::new(state.storage.clone()); + let (locks, next_cursor, ok) = match db + .lfs_get_filtered_locks(&repo, &path, &cursor, &limit) + .await + { + Ok((locks, next)) => (locks, next, true), + Err(_) => (vec![], "".to_string(), false), + }; + + let mut lock_list = LockList { + locks: vec![], + next_cursor: "".to_string(), + }; + + if !ok { + return Err(( + StatusCode::INTERNAL_SERVER_ERROR, + "Lookup operation failed!".to_string(), + )); + } else { + lock_list.locks = locks.clone(); + lock_list.next_cursor = next_cursor; + } + + let locks_response = serde_json::to_string(&lock_list).unwrap(); + println!("{:?}", locks_response); + let body = Body::from(locks_response); + + Ok(resp.body(body).unwrap()) +} + +async fn lfs_verify_lock( + state: State>, + req: Request, +) -> Result, (StatusCode, String)> +where + T: ObjectStorage, +{ + tracing::info!("req: {:?}", req); + let mut resp = Response::builder(); + resp = resp.header("Content-Type", "application/vnd.git-lfs+json"); + + let (_parts, mut body) = req.into_parts(); + + let mut request_body = BytesMut::new(); + + while let Some(chunk) = body.next().await { + tracing::info!("client sends :{:?}", chunk); + let bytes = chunk.unwrap(); + request_body.extend_from_slice(&bytes); + } + + let verifiable_lock_request: VerifiableLockRequest = + serde_json::from_slice(request_body.freeze().as_ref()).unwrap(); + let mut limit = verifiable_lock_request.limit.unwrap_or(0); + if limit == 0 { + limit = 100; + } + + let db = Arc::new(state.storage.clone()); + let res = db + .lfs_get_filtered_locks( + &verifiable_lock_request.refs.name, + &"".to_string(), + &verifiable_lock_request + .cursor + .unwrap_or("".to_string()) + .to_string(), + &limit.to_string(), + ) + .await; + + let (locks, next_cursor, ok) = match res { + Ok((locks, next)) => (locks, next, true), + Err(_) => (vec![], "".to_string(), false), + }; + + let mut lock_list = VerifiableLockList { + ours: vec![], + theirs: vec![], + next_cursor: "".to_string(), + }; + tracing::info!("acquired: {:?}", lock_list); + + if !ok { + return Err(( + StatusCode::INTERNAL_SERVER_ERROR, + "Lookup operation failed!".to_string(), + )); + } else { + lock_list.next_cursor = next_cursor; + + for lock in locks.iter() { + if lock.owner == None { + lock_list.ours.push(lock.clone()); + } else { + lock_list.theirs.push(lock.clone()); + } + } + } + let locks_response = serde_json::to_string(&lock_list).unwrap(); + tracing::info!("sending: {:?}", locks_response); + let body = Body::from(locks_response); + + Ok(resp.body(body).unwrap()) +} + +async fn lfs_create_lock( + state: State>, + req: Request, +) -> Result, (StatusCode, String)> +where + T: ObjectStorage, +{ + tracing::info!("req: {:?}", req); + let mut resp = Response::builder(); + resp = resp.header("Content-Type", "application/vnd.git-lfs+json"); + + let (_parts, mut body) = req.into_parts(); + + let mut request_body = BytesMut::new(); + + while let Some(chunk) = body.next().await { + tracing::info!("client sends :{:?}", chunk); + let bytes = chunk.unwrap(); + request_body.extend_from_slice(&bytes); + } + + let lock_request: LockRequest = serde_json::from_slice(request_body.freeze().as_ref()).unwrap(); + println!("{:?}", lock_request); + tracing::info!("acquired: {:?}", lock_request); + let db = Arc::new(state.storage.clone()); + let res = db + .lfs_get_filtered_locks( + &lock_request.refs.name, + &lock_request.path.to_string(), + "", + "1", + ) + .await; + + let (locks, _, ok) = match res { + Ok((locks, next)) => (locks, next, true), + Err(_) => (vec![], "".to_string(), false), + }; + + if !ok { + return Err(( + StatusCode::INTERNAL_SERVER_ERROR, + "Failed when filtering locks!".to_string(), + )); + } + + if locks.len() > 0 { + return Err((StatusCode::CONFLICT, "Lock already exist".to_string())); + } + + let lock = Lock { + id: { + let mut random_num = String::new(); + let mut rng = rand::thread_rng(); + for _ in 0..8 { + random_num += &(rng.gen_range(0..9)).to_string(); + } + random_num + }, + path: lock_request.path.to_owned(), + owner: None, + locked_at: { + let locked_at: DateTime = Utc::now(); + locked_at.to_rfc3339().to_string() + }, + }; + + let ok = db + .lfs_add_lock(&lock_request.refs.name, vec![lock.clone()]) + .await + .is_ok(); + if !ok { + return Err(( + StatusCode::INTERNAL_SERVER_ERROR, + "Failed when adding locks!".to_string(), + )); + } + + resp = resp.status(StatusCode::CREATED); + + let lock_response = LockResponse { + lock, + message: "".to_string(), + }; + let lock_response = serde_json::to_string(&lock_response).unwrap(); + let body = Body::from(lock_response); + + Ok(resp.body(body).unwrap()) +} + +async fn lfs_delete_lock( + state: State>, + id: &str, + req: Request, +) -> Result, (StatusCode, String)> +where + T: ObjectStorage, +{ + // Retrieve information from request body. + tracing::info!("req: {:?}", req); + let mut resp = Response::builder(); + resp = resp.header("Content-Type", "application/vnd.git-lfs+json"); + + let (_parts, mut body) = req.into_parts(); + + let mut request_body = BytesMut::new(); + + while let Some(chunk) = body.next().await { + tracing::info!("client sends :{:?}", chunk); + let bytes = chunk.unwrap(); + request_body.extend_from_slice(&bytes); + } + + if id.len() == 0 { + return Err((StatusCode::BAD_REQUEST, "Invalid lock id!".to_string())); + } + + if request_body.is_empty() { + return Err(( + StatusCode::BAD_REQUEST, + "Deserialize operation failed!".to_string(), + )); + } + let unlock_request: UnlockRequest = + serde_json::from_slice(request_body.freeze().as_ref()).unwrap(); + + let db = Arc::new(state.storage.clone()); + + let res = db + .lfs_delete_lock( + &unlock_request.refs.name, + None, + &id, + unlock_request.force.unwrap_or(false), + ) + .await; + + let (deleted_lock, ok) = match res { + Ok(lock) => (lock, true), + Err(_) => ( + Lock { + id: "".to_string(), + path: "".to_string(), + owner: None, + locked_at: { DateTime::::MIN_UTC.to_rfc3339().to_string() }, + }, + false, + ), + }; + + if !ok { + return Err(( + StatusCode::INTERNAL_SERVER_ERROR, + "Delete operation failed!".to_string(), + )); + } + + if deleted_lock.id == "" + && deleted_lock.path == "" + && deleted_lock.owner.is_none() + && deleted_lock.locked_at == DateTime::::MIN_UTC.to_rfc3339().to_string() + { + return Err((StatusCode::NOT_FOUND, "Unable to find lock!".to_string())); + } + + let unlock_response = UnlockResponse { + lock: deleted_lock, + message: "".to_string(), + }; + tracing::info!("sending: {:?}", unlock_response); + let unlock_response = serde_json::to_string(&unlock_response).unwrap(); + + let body = Body::from(unlock_response); + Ok(resp.body(body).unwrap()) +} + +async fn lfs_process_batch( + state: State>, + req: Request, +) -> Result, (StatusCode, String)> +where + T: ObjectStorage, +{ + // Extract the body to `BatchVars`. + tracing::info!("req: {:?}", req); + + let (_parts, mut body) = req.into_parts(); + + let mut request_body = BytesMut::new(); + + while let Some(chunk) = body.next().await { + tracing::info!("client sends :{:?}", chunk); + let bytes = chunk.unwrap(); + request_body.extend_from_slice(&bytes); + } + + let mut batch_vars: BatchVars = serde_json::from_slice(request_body.freeze().as_ref()).unwrap(); + + let bvo = &mut batch_vars.objects; + for request in bvo { + request.authorization = "".to_string(); + } + tracing::info!("acquired: {:?}", batch_vars); + + let mut response_objects = Vec::::new(); + + let db = Arc::new(state.storage.clone()); + let config = Arc::new(state.config.clone()); + + // + let server_url = format!("http://{}:{}", config.host, config.port); + + let content_store = ContentStore::new(config.lfs_content_path.to_owned()).await; + for object in batch_vars.objects { + let meta = db.lfs_get_meta(&object).await; + + // Found + let found = meta.is_ok(); + let mut meta = meta.unwrap_or_default(); + if found && content_store.exist(&meta).await { + response_objects.push(represent(&object, &meta, true, false, false, &server_url).await); + continue; + } + + // Not found + if batch_vars.operation == "upload" { + meta = db.lfs_put_meta(&object).await.unwrap(); + response_objects.push(represent(&object, &meta, false, true, false, &server_url).await); + } else { + let rep = Representation { + oid: object.oid.to_owned(), + size: object.size, + authenticated: None, + actions: None, + error: Some(ObjectError { + code: 404, + message: "Not found".to_owned(), + }), + }; + response_objects.push(rep); + } + } + + let batch_response = BatchResponse { + transfer: "basic".to_string(), + objects: response_objects, + hash_algo: "sha256".to_string(), + }; + + let json = serde_json::to_string(&batch_response).unwrap(); + //DEBUG + + let mut resp = Response::builder(); + resp = resp.status(200); + resp = resp.header("Content-Type", "application/vnd.git-lfs+json"); + + let body = Body::from(json); + let resp = resp.body(body).unwrap(); + println!("Sending: {:?}", resp); + + Ok(resp) +} + +async fn lfs_upload_object( + state: State>, + oid: &str, + req: Request, +) -> Result, (StatusCode, String)> +where + T: ObjectStorage, +{ + tracing::info!("req: {:?}", req); + // Load request parameters into struct. + let request_vars = RequestVars { + oid: oid.to_string(), + authorization: "".to_string(), + ..Default::default() + }; + + let db = Arc::new(state.storage.clone()); + let config = Arc::new(state.config.clone()); + let content_store = ContentStore::new(config.lfs_content_path.to_owned()).await; + + let meta = db.lfs_get_meta(&request_vars).await.unwrap(); + + let (_parts, mut body) = req.into_parts(); + + let mut request_body = BytesMut::new(); + + while let Some(chunk) = body.next().await { + tracing::info!("client sends :{:?}", chunk); + let bytes = chunk.unwrap(); + request_body.extend_from_slice(&bytes); + } + + let ok = content_store + .put(&meta, request_body.freeze().as_ref()) + .await; + if !ok { + db.lfs_delete_meta(&request_vars).await.unwrap(); + return Err(( + StatusCode::NOT_ACCEPTABLE, + String::from("Header not acceptable!"), + )); + } + let mut resp = Response::builder(); + resp = resp.header("Content-Type", "application/vnd.git-lfs"); + let resp = resp.body(Body::empty()).unwrap(); + + Ok(resp) +} + +async fn lfs_download_object( + state: State>, + oid: &str, +) -> Result, (StatusCode, String)> +where + T: ObjectStorage, +{ + tracing::info!("start downloading LFS object"); + let db = Arc::new(state.storage.clone()); + let config = Arc::new(state.config.clone()); + let content_store = ContentStore::new(config.lfs_content_path.to_owned()).await; + + // Load request parameters into struct. + let request_vars = RequestVars { + oid: oid.to_owned(), + authorization: "".to_owned(), + ..Default::default() + }; + + let meta = db.lfs_get_meta(&request_vars).await.unwrap(); + + let mut file = content_store.get(&meta, 0).await; + + let mut buffer = Vec::new(); + file.read_to_end(&mut buffer).unwrap(); + let mut bytes = BytesMut::new(); + bytes.put(buffer.as_ref()); + let mut resp = Response::builder(); + resp = resp.status(200); + let body = Body::from(bytes.freeze()); + Ok(resp.body(body).unwrap()) +} + +async fn represent( + rv: &RequestVars, + meta: &MetaObject, + download: bool, + upload: bool, + use_tus: bool, + server_url: &str, +) -> Representation { + let mut rep = Representation { + oid: meta.oid.to_owned(), + size: meta.size, + authenticated: Some(true), + actions: None, + error: None, + }; + + let mut header: HashMap = HashMap::new(); + let mut verify_header: HashMap = HashMap::new(); + + header.insert("Accept".to_string(), "application/vnd.git-lfs".to_owned()); + + if rv.authorization.len() > 0 { + header.insert("Authorization".to_string(), rv.authorization.to_owned()); + verify_header.insert("Authorization".to_string(), rv.authorization.to_owned()); + } + + if download { + let mut actions = HashMap::new(); + actions.insert( + "download".to_string(), + Link { + href: { rv.download_link(server_url.to_string()).await }, + header: header.clone(), + expires_at: { + let expire_time: DateTime = Utc::now() + Duration::seconds(86400); + expire_time.to_rfc3339().to_string() + }, + }, + ); + rep.actions = Some(actions); + } + + if upload { + let mut actions = HashMap::new(); + actions.insert( + "upload".to_string(), + Link { + href: { rv.upload_link(server_url.to_string()).await }, + header: header.clone(), + expires_at: { + let expire_time: DateTime = Utc::now() + Duration::seconds(86400); + expire_time.to_rfc3339().to_string() + }, + }, + ); + rep.actions = Some(actions); + if use_tus { + let mut actions = HashMap::new(); + actions.insert( + "verify".to_string(), + Link { + href: { rv.verify_link(server_url.to_string()).await }, + header: verify_header.clone(), + expires_at: { + let expire_time: DateTime = Utc::now() + Duration::seconds(86400); + expire_time.to_rfc3339().to_string() + }, + }, + ); + rep.actions = Some(actions); + } + } + + rep +} diff --git a/gust_integrate_lfs/src/gateway/api/mod.rs b/gust_integrate_lfs/src/gateway/api/mod.rs new file mode 100644 index 00000000..ec1abbd5 --- /dev/null +++ b/gust_integrate_lfs/src/gateway/api/mod.rs @@ -0,0 +1,6 @@ +//! +//! +//! +//! +//! +pub mod lib; diff --git a/gust_integrate_lfs/src/gateway/mod.rs b/gust_integrate_lfs/src/gateway/mod.rs new file mode 100644 index 00000000..1798ce27 --- /dev/null +++ b/gust_integrate_lfs/src/gateway/mod.rs @@ -0,0 +1,6 @@ +//! +//! +//! + +pub mod api; +pub mod ssh_server; diff --git a/gust_integrate_lfs/src/gateway/ssh_server.rs b/gust_integrate_lfs/src/gateway/ssh_server.rs new file mode 100644 index 00000000..b9bdf06d --- /dev/null +++ b/gust_integrate_lfs/src/gateway/ssh_server.rs @@ -0,0 +1,72 @@ +//! +//! +//! +use std::collections::HashMap; +use std::env; +use std::net::SocketAddr; +use std::path::PathBuf; +use std::str::FromStr; +use std::sync::{Arc, Mutex}; + +use anyhow::Result; + +use russh_keys::key::KeyPair; + +use tokio::io::AsyncWriteExt; + +use crate::git::protocol::ssh::SshServer; +use crate::gust::driver::database::mysql; +use crate::ServeConfig; + +/// start a ssh server +pub async fn server(command: &ServeConfig) -> Result<(), std::io::Error> { + let client_key = load_key().await.unwrap(); + let client_pubkey = Arc::new(client_key.clone_public_key().unwrap()); + + let mut config = russh::server::Config::default(); + config.connection_timeout = Some(std::time::Duration::from_secs(10)); + config.auth_rejection_time = std::time::Duration::from_secs(3); + config.keys.push(client_key); + + let config = Arc::new(config); + let sh = SshServer { + client_pubkey, + clients: Arc::new(Mutex::new(HashMap::new())), + id: 0, + storage: mysql::init().await, + pack_protocol: None, + }; + + let ServeConfig { + host, + port, + key_path, + cert_path, + lfs_content_path, + } = command; + let server_url = format!("{}:{}", host, port); + let addr = SocketAddr::from_str(&server_url).unwrap(); + russh::server::run(config, addr, sh).await +} + +async fn load_key() -> Result { + let key_root = env::var("SSH_ROOT").expect("WORK_DIR is not set in .env file"); + let key_path = PathBuf::from(key_root).join("id_rsa"); + if !key_path.exists() { + // generate a keypair if not exists + let keys = KeyPair::generate_ed25519().unwrap(); + let mut key_file = tokio::fs::File::create(&key_path).await.unwrap(); + + let KeyPair::Ed25519(inner_pair) = keys; + + key_file.write_all(&inner_pair.to_bytes()).await?; + + Ok(KeyPair::Ed25519(inner_pair)) + } else { + // load the keypair from the file + let key_data = tokio::fs::read(&key_path).await?; + let keypair = ed25519_dalek::Keypair::from_bytes(&key_data)?; + + Ok(KeyPair::Ed25519(keypair)) + } +} diff --git a/gust_integrate_lfs/src/git/errors.rs b/gust_integrate_lfs/src/git/errors.rs new file mode 100644 index 00000000..4a082718 --- /dev/null +++ b/gust_integrate_lfs/src/git/errors.rs @@ -0,0 +1,59 @@ +//! +//! +//! +//! + +use thiserror::Error; + +#[derive(Error, Debug)] +#[allow(unused)] +pub enum GitError { + #[error("The `{0}` is not a valid git object type.")] + InvalidObjectType(String), + + #[error("The `{0}` is not a valid git blob object.")] + InvalidBlobObject(String), + + #[error("The `{0}` is not a valid git tree object.")] + InvalidTreeObject(String), + + #[error("The `{0}` is not a valid git tree item.")] + InvalidTreeItem(String), + + #[error("The `{0}` is not a valid git commit object.")] + InvalidCommitObject(String), + + #[error("The `{0}` is not a valid git tag object.")] + InvalidTagObject(String), + + #[error("The `{0}` is not a valid idx file.")] + InvalidIdxFile(String), + + #[error("The `{0}` is not a valid pack file.")] + InvalidPackFile(String), + + #[error("The `{0}` is not a valid pack header.")] + InvalidPackHeader(String), + + #[error("The {0} is not a valid Hash value ")] + InvalidHashValue(String), + + #[error("Delta Object Error Info:{0}")] + DeltaObjError(String), + + #[error("The object to be packed is incomplete ,{0}")] + UnCompletedPackObject(String), + + #[error("Error decode in the Object ,info:{0}")] + InvalidObjectInfo(String), + + #[error("Can't found Hash value :{0} from current file")] + NotFountHashValue(String), +} + +#[derive(Error, Debug)] +#[allow(unused)] +pub enum GitLFSError { + #[error("Something went wrong in Git LFS")] + GeneralError(String), +} diff --git a/gust_integrate_lfs/src/git/hash.rs b/gust_integrate_lfs/src/git/hash.rs new file mode 100644 index 00000000..f82b5b81 --- /dev/null +++ b/gust_integrate_lfs/src/git/hash.rs @@ -0,0 +1,219 @@ +//! +//! +//! +//! +//! +use std::convert::TryFrom; +use std::fmt::Display; +use std::str::FromStr; + +use colored::Colorize; +use sha1::{Digest, Sha1}; + +use crate::git::errors::GitError; +use crate::git::object::metadata::MetaData; +use crate::git::object::types::ObjectType; + +/// +const HASH_BYTES: usize = 20; + +/// +const COMMIT_OBJECT_TYPE: &[u8] = b"commit"; +const TREE_OBJECT_TYPE: &[u8] = b"tree"; +const BLOB_OBJECT_TYPE: &[u8] = b"blob"; +const TAG_OBJECT_TYPE: &[u8] = b"tag"; + +/// Git Object hash type. only support SHA1 for now. +#[allow(unused)] +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub enum HashType { + Sha1, +} + +/// Hash struct ,only contain the u8 array :`[u8;20]` +#[allow(unused)] +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Default)] +pub struct Hash(pub [u8; HASH_BYTES]); + +/// Display trait for Hash type +impl Display for Hash { + /// Display trait for Hash type + /// # Attention + /// cause of the color chars for ,if you want to use the string with out color , + /// please call the func:`to_plain_str()` rather than the func:`to_string()` + /// # Example + /// the hash value `18fd2deaaf152c7f1222c52fb2673f6192b375f0`
+ /// will be the `1;31m8d2deaaf152c7f1222c52fb2673f6192b375f00m` + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(f, "{}", self.to_plain_str().red().bold()) + } +} + +impl Hash { + /// Create Hash by the long information, the all data. + /// + /// + #[allow(unused)] + pub fn new(data: &Vec) -> Hash { + let mut new_hash = Sha1::new(); + new_hash.update(data); + let hash_re = new_hash.finalize(); + let result = <[u8; 20]>::from(hash_re); + + Hash(result) + } + + /// Create Hash from the Object + /// + #[allow(unused)] + pub fn from_meta(meta: &MetaData) -> Hash { + match meta.h { + HashType::Sha1 => { + let mut h = Sha1::new(); + + h.update(match meta.t { + ObjectType::Commit => COMMIT_OBJECT_TYPE, + ObjectType::Tree => TREE_OBJECT_TYPE, + ObjectType::Blob => BLOB_OBJECT_TYPE, + ObjectType::Tag => TAG_OBJECT_TYPE, + _ => panic!("can put compute the delta hash value"), + }); + + h.update(b" "); + h.update(meta.data.len().to_string()); + h.update(b"\0"); + h.update(&meta.data); + + let hash_re = h.finalize(); + let result = <[u8; HASH_BYTES]>::from(hash_re); + + Hash(result) + } + } + } + + /// Decode the hex char to the u8 value + /// + #[allow(unused)] + fn hex_char_value(hex_char: u8) -> Option { + match hex_char { + b'0'..=b'9' => Some(hex_char - b'0'), + b'a'..=b'f' => Some(hex_char - b'a' + 10), + b'A'..=b'F' => Some(hex_char - b'A' + 10), //Add The Support for the Big Char + _ => None, + } + } + + /// Change the u8 array to the Hash ,which should be the 40 length, + /// every bit is a char value of the string + #[allow(unused)] + pub fn from_bytes(hex_hash: &[u8]) -> Option { + const BITS_PER_CHAR: usize = 4; + const CHARS_PER_BYTE: usize = 8 / BITS_PER_CHAR; + // 将切片以chunks_size的切片 + let byte_chunks = hex_hash.chunks_exact(CHARS_PER_BYTE); + if !byte_chunks.remainder().is_empty() { + return None; + } + let bytes = byte_chunks + .map(|hex_digits| { + hex_digits.iter().try_fold(0, |value, &byte| { + let char_value = Hash::hex_char_value(byte)?; + Some(value << BITS_PER_CHAR | char_value) + }) + }) + .collect::>>()?; + let bytes = <[u8; HASH_BYTES]>::try_from(bytes).ok()?; + Some(Hash(bytes)) + } + + /// Create a Hash value by the row value + /// It's shout be a `&[u8;20]` + #[allow(unused)] + pub fn from_row(hex_hash: &[u8]) -> Hash { + Hash(<[u8; HASH_BYTES]>::try_from(hex_hash).unwrap()) + } + + /// Get tht first u8 (0x00~0xff) from the Hash + #[allow(unused)] + pub fn get_first(&self) -> u8 { + return self.0[0]; + } + + /// Create plain String without the color chars + #[allow(unused)] + pub fn to_plain_str(&self) -> String { + hex::encode(self.0) + } + + #[allow(unused)] + pub(crate) fn to_folder(&self) -> String { + let str = self.to_plain_str(); + let str = str[0..2].to_string().clone(); + str + } + + #[allow(unused)] + pub(crate) fn to_filename(&self) -> String { + let str = self.to_plain_str(); + let str = str[2..].to_string().clone(); + str + } +} + +/// +/// +impl FromStr for Hash { + type Err = GitError; + + fn from_str(hex_hash: &str) -> Result { + Hash::from_bytes(hex_hash.as_bytes()) + .ok_or_else(|| GitError::InvalidHashValue(hex_hash.to_string())) + } +} + +mod tests { + /// The Right Hash decode + #[test] + fn test_hash() { + use std::str::FromStr; + + let test_hash = super::Hash::from_str("18fd2deaaf152c7f1222c52fb2673f6192b375f0").unwrap(); + let result_hash: [u8; 20] = [ + 24, 253, 45, 234, 175, 21, 44, 127, 18, 34, 197, 47, 178, 103, 63, 97, 146, 179, 117, + 240, + ]; + + assert_eq!(test_hash.0, result_hash); + assert_eq!(String::from("18"), test_hash.to_folder()); + assert_eq!( + String::from("fd2deaaf152c7f1222c52fb2673f6192b375f0"), + test_hash.to_filename() + ); + } + + /// The Right Hash decode + #[test] + fn test_hash_with_zero() { + use std::str::FromStr; + + let test_hash = super::Hash::from_str("08fd2deaaf152c7f1222c52fb2673f6192b37500").unwrap(); + let result_hash: [u8; 20] = [ + 8, 253, 45, 234, 175, 21, 44, 127, 18, 34, 197, 47, 178, 103, 63, 97, 146, 179, 117, 0, + ]; + assert_eq!(test_hash.0, result_hash); + } + + /// The Wrong Hash decode + #[test] + fn test_error_hash() { + use std::str::FromStr; + + let test_str = "18fd2deaaf152c7f1222c52fb2673f6192z375f0"; + let test_hash = super::Hash::from_str(test_str).unwrap_err(); + assert_eq!( + format!("The {} is not a valid Hash value ", test_str), + test_hash.to_string() + ); + } +} diff --git a/gust_integrate_lfs/src/git/idx/mod.rs b/gust_integrate_lfs/src/git/idx/mod.rs new file mode 100644 index 00000000..cb385006 --- /dev/null +++ b/gust_integrate_lfs/src/git/idx/mod.rs @@ -0,0 +1,323 @@ +//!Idx file , which is in the dir:`.git/object/pack/*.idx` +//! +//!This file provides the offset of different objects, +//!which is used to quickly find the target object in the pack file(*.pack). +//! + +use std::collections::HashMap; +use std::fmt::Display; +use std::fs::File; +use std::io::{BufReader, Cursor, Read}; +use std::path::PathBuf; + +use byteorder::{BigEndian, ReadBytesExt}; + +use crate::git::errors::GitError; +use crate::git::hash::Hash; +use crate::git::pack::Pack; +use crate::git::utils; + +/// +#[allow(unused)] +#[derive(Debug, Clone)] +pub struct IdxItem { + pub id: Hash, + pub crc32: String, + pub offset: usize, +} + +/// +impl Display for IdxItem { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(f, "{} {} ({})", self.offset, self.id, self.crc32) + } +} + +/// +#[allow(unused)] +#[derive(Default, Debug)] +pub struct Idx { + pub version: u32, + pub number_of_objects: usize, + pub map_of_prefix: HashMap, + pub item_hash: HashMap, + pub idx_items: Vec, + pub pack_signature: Hash, + pub idx_signature: Hash, + _file_data: Vec, +} + +/// +impl Idx { + /// + #[allow(unused)] + fn sha1_prefix(&self, n: usize) -> String { + let pre = format!("{:x}", n); + + if pre.len() == 1 { + format!("0{}", pre) + } else { + pre + } + } + + #[allow(unused)] + pub fn decode_from_path(&mut self, path: PathBuf) { + let f = File::open(path).ok(); + let mut reader = BufReader::new(f.unwrap()); + let mut buffer = Vec::new(); + reader.read_to_end(&mut buffer).ok(); + self.decode(buffer).unwrap(); + } + + /// + #[allow(unused)] + pub fn decode(&mut self, data: Vec) -> Result<(), GitError> { + let mut offset: usize = 0; + + let mut id_of_objects: Vec = Vec::new(); + let mut crc32_of_objects: Vec = Vec::new(); + + // 4-byte Header: //FF 74 4F 63 + if data[offset..4].to_vec() != vec![255, 116, 79, 99] { + return Err(GitError::InvalidIdxFile(format!( + "Invalid idx header: {:?}", + data[0..4].to_vec() + ))); + } + offset += 4; + + // 4-byte version number (network byte order): + let mut v = Cursor::new(data[offset..8].to_vec()); + self.version = v.read_u32::().unwrap(); + offset += 4; + + // Layer 1: + // Number of objects in the pack (network byte order) + // The prefix of the SHA-1 hash of the object has how many objects it is in the pack. + let mut n: usize = 0; + for i in (offset..offset + 256 * 4).filter(|x| ((x - offset) % 4 == 0)) { + let mut v = Cursor::new(data[i..i + 4].to_vec()); + let m = v.read_u32::().unwrap() as usize; + + if m != n { + self.map_of_prefix + .insert(self.sha1_prefix((i - 8) / 4), m - n); + self.number_of_objects = m; + n = m; + } + } + offset += 256 * 4; // 1040 + + // Layer 2: + // The all the SHA-1 hashes of the objects in the pack. + for i in (offset..offset + (20 * n) as usize).filter(|x| ((x - offset) % 20 == 0)) { + let id = Hash::from_row(&data[(i as usize)..(i as usize) + 20].to_vec()); + id_of_objects.push(id); + } + offset += 20 * n as usize; + + // Layer 3: + // The CRC32 of the object data. + for i in (offset..offset + (4 * n) as usize).filter(|x| ((x - offset) % 4 == 0)) { + crc32_of_objects.push(hex::encode(&data[i..i + 4])); + } + offset += 4 * n as usize; + + // Layer 4: + // the object offset in the pack file. + let mut index = 0; + for (index, i) in (offset..offset + (4 * n) as usize) + .filter(|x| ((x - offset) % 4 == 0)) + .enumerate() + { + let mut v = Cursor::new(data[i..i + 4].to_vec()); + let m = v.read_u32::().unwrap() as usize; + + self.idx_items.push(IdxItem { + id: id_of_objects[index].clone(), + crc32: crc32_of_objects[index].clone(), + offset: m, + }); + } + offset += 4 * n as usize; + + // Layer 5 + + // Layer 6: + // The SHA-1 hash of the pack file itself. + // The SHA-1 hash of the index file itself. + self.pack_signature = Hash::from_row(&data[offset..offset + 20].to_vec()); + offset += 20; + self.idx_signature = Hash::from_row(&data[offset..].to_vec()); + + /// fill the item_hash map. + for (index, item) in self.idx_items.iter().enumerate() { + self.item_hash.insert(item.id, index); + } + Ok(()) + } + + #[allow(unused)] + pub fn encode(pack: Pack) -> Self { + let mut idx = Self::default(); + let mut result: Vec = vec![255, 116, 79, 99]; //header + let mut version: Vec = vec![0, 0, 0, 2]; + result.append(&mut version); + idx.version = 2; + + // Layer 1: + // Number of objects in the pack (network byte order) + // The prefix of the SHA-1 hash of the object has how many objects it is in the pack. + idx.number_of_objects = pack.get_object_number(); + let mut fan_out: [u32; 256] = [0; 256]; + let cache = pack.get_cache(); + for (key, value) in cache.by_hash.iter() { + fan_out[key.get_first() as usize] += 1; + } + let mut _sum = 0; + for i in 0..256 { + _sum += fan_out[i]; + fan_out[i] = _sum; + result.append(&mut utils::u32_vec(fan_out[i])); + } + + // Layer 2: + // The all the SHA-1 hashes of the objects in the pack. + for key in cache.by_hash.keys() { + result.append(&mut key.0.to_vec()) + } + + // Layer 3: + // The CRC32 of the object data. + //BUG: Cause the calculation data content of the crc32 algorithm is different, + //it is different from the crc32 value of the idx generated by git + use crc::{Algorithm, Crc, CRC_32_ISO_HDLC}; + for values in cache.by_hash.values() { + let meta = values; + let _data = meta.convert_to_vec().unwrap(); + let castagnoli: Crc = Crc::::new(&CRC_32_ISO_HDLC); + result.append(&mut utils::u32_vec(castagnoli.checksum(&_data))); + println!("Type:{}", values.t); + } + // Layer 4: + // the object offset in the pack file. + for _hash in cache.by_hash.keys() { + let offset = cache.by_offset.get(_hash).unwrap(); + result.append(&mut utils::u32_vec(*offset as u32)); + } + + // Layer 5 only for the big offset > 4G , temporary skip + + // Layer 6: + // The SHA-1 hash of the pack file itself. + let pack_hash = pack.get_hash(); + result.append(&mut pack_hash.0.to_vec()); + // The SHA-1 hash of the index file itself. + let idx_hash = Hash::new(&result); + result.append(&mut idx_hash.0.to_vec()); + idx._file_data = result; + idx + } + + #[allow(unused)] + pub fn get_offset(&self, obj_id: Hash) -> IdxItem { + let prefix = self.item_hash.get(&obj_id); + let obj_prefix = self.idx_items[*prefix.unwrap()].clone(); + + obj_prefix + } +} + +/// +#[cfg(test)] +mod tests { + use std::env; + use std::fs::File; + use std::io::{BufReader, Read, Write}; + use std::path::{Path, PathBuf}; + + use bstr::ByteSlice; + use tokio_test::block_on; + + use crate::git::utils; + + use super::Idx; + + ///测试读取idx + #[test] + fn test_idx_read_from_file() { + let mut path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + path.push("resources/data/test/pack-8d36a6464e1f284e5e9d06683689ee751d4b2687.idx"); + + let f = File::open(path).ok(); + let mut reader = BufReader::new(f.unwrap()); + let mut buffer = Vec::new(); + reader.read_to_end(&mut buffer).ok(); + let mut idx = Idx::default(); + idx.decode(buffer).unwrap(); + + assert_eq!(2, idx.version); + assert_eq!(614, idx.number_of_objects); + assert_eq!(2, idx.map_of_prefix["7c"]); + assert_eq!(idx.number_of_objects, idx.idx_items.len()); + assert_eq!( + "8d36a6464e1f284e5e9d06683689ee751d4b2687", + idx.pack_signature.to_plain_str() + ); + assert_eq!( + "92d07408a070a5fbea3c1f2d00e696293b78e7c6", + idx.idx_signature.to_plain_str() + ); + println!("{:?}", idx); + } + + ///测试写入idx文件 + #[test] + fn test_idx_write_to_file() { + // "./resources/data/test/pack-6590ba86f4e863e1c2c985b046e1d2f1a78a0089.pack" + use super::super::pack; + let packs = block_on(pack::Pack::decode_file( + "./resources/test2/pack-8c81e90db37ef77494efe4f31daddad8b494e099.pack", + )); + let idx = Idx::encode(packs); + + let mut file = std::fs::File::create("./test.idx").expect("create failed"); + file.write_all(idx._file_data.as_bytes()) + .expect("write failed"); + + println!("data written to file"); + let idx_file = File::open(&Path::new("./test.idx")).unwrap(); + + let mut reader = BufReader::new(idx_file); + let mut buffer = Vec::new(); + reader.read_to_end(&mut buffer).ok(); + + let mut idx = Idx::default(); + + idx.decode(buffer).unwrap(); + } + + /// fan out table create test + #[test] + fn unsafe_fan_out() { + let mut result: Vec = vec![]; + let mut fan_out: [u32; 256] = [0; 256]; + let mut _sum = 0; + for i in 0..255 { + _sum += fan_out[i] + 5; + fan_out[i] = _sum; + result.append(&mut utils::u32_vec(fan_out[i])); + } + assert_eq!(result[0..4], [0, 0, 0, 5]); + assert_eq!(result[4..8], [0, 0, 0, 10]); + } + + // crc32 create test + #[test] + fn test_crc32() { + use crc::{Crc, CRC_32_ISCSI}; + pub const CASTAGNOLI: Crc = Crc::::new(&CRC_32_ISCSI); + assert_eq!(CASTAGNOLI.checksum(b"123456789"), 0xe3069283); + } +} diff --git a/gust_integrate_lfs/src/git/lfs/mod.rs b/gust_integrate_lfs/src/git/lfs/mod.rs new file mode 100644 index 00000000..21dab618 --- /dev/null +++ b/gust_integrate_lfs/src/git/lfs/mod.rs @@ -0,0 +1 @@ +pub mod structs; diff --git a/gust_integrate_lfs/src/git/lfs/structs.rs b/gust_integrate_lfs/src/git/lfs/structs.rs new file mode 100644 index 00000000..7f292369 --- /dev/null +++ b/gust_integrate_lfs/src/git/lfs/structs.rs @@ -0,0 +1,173 @@ +use serde::{Deserialize, Serialize}; +use std::collections::HashMap; +use std::path::PathBuf; + +#[derive(Serialize, Deserialize, Debug, Default)] +pub struct RequestVars { + pub oid: String, + pub size: i64, + #[serde(default)] + pub user: String, + #[serde(default)] + pub password: String, + #[serde(default)] + pub repo: String, + #[serde(default)] + pub authorization: String, +} + +impl RequestVars { + pub async fn download_link(&self, ext_origin: String) -> String { + self.internal_link("objects".to_string(), ext_origin).await + } + + pub async fn upload_link(&self, ext_origin: String) -> String { + self.internal_link("objects".to_string(), ext_origin).await + } + + async fn internal_link(&self, subpath: String, ext_origin: String) -> String { + let mut path = PathBuf::new(); + + let user = &self.user; + if user.len() > 0 { + path.push(user); + } + + let repo = &self.repo; + if repo.len() > 0 { + path.push(repo); + } + + path.push(ext_origin); + + path.push(&subpath); + path.push(&self.oid); + + format!("{}", path.into_os_string().into_string().unwrap()) + } + + pub async fn verify_link(&self, ext_origin: String) -> String { + let path = format!("/verify/{}", &self.oid); + format!("{}{}", ext_origin, path) + } +} + +#[derive(Serialize, Deserialize, Debug)] +pub struct BatchVars { + pub transfers: Vec, + pub operation: String, + pub objects: Vec, +} + +#[derive(Debug, Default)] +pub struct MetaObject { + pub oid: String, + pub size: i64, + pub exist: bool, +} + +#[derive(Serialize, Deserialize)] +pub struct BatchResponse { + pub transfer: String, + pub objects: Vec, + pub hash_algo: String, +} + +#[derive(Serialize, Deserialize)] +pub struct Link { + pub href: String, + pub header: HashMap, + pub expires_at: String, +} + +#[derive(Serialize, Deserialize, Default)] +pub struct ObjectError { + pub code: i64, + pub message: String, +} + +#[derive(Serialize, Deserialize)] +pub struct Representation { + pub oid: String, + pub size: i64, + #[serde(skip_serializing_if = "Option::is_none")] + pub authenticated: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub actions: Option>, + #[serde(skip_serializing_if = "Option::is_none")] + pub error: Option, +} + +#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)] +pub struct User { + pub name: String, +} + +#[derive(Serialize, Deserialize, Debug, Clone)] +pub struct Lock { + pub id: String, + pub path: String, + pub locked_at: String, + pub owner: Option, +} + +#[derive(Serialize, Deserialize, Debug)] +pub struct Ref { + pub name: String, +} + +#[derive(Serialize, Deserialize, Debug)] +pub struct LockRequest { + pub path: String, + #[serde(rename(serialize = "ref", deserialize = "ref"))] + pub refs: Ref, +} + +#[derive(Serialize, Deserialize)] +pub struct LockResponse { + pub lock: Lock, + pub message: String, +} + +#[derive(Serialize, Deserialize)] +pub struct UnlockRequest { + pub force: Option, + #[serde(rename(serialize = "ref", deserialize = "ref"))] + pub refs: Ref, +} + +#[derive(Serialize, Deserialize, Debug)] +pub struct UnlockResponse { + pub lock: Lock, + pub message: String, +} + +#[derive(Serialize, Deserialize)] +pub struct LockList { + pub locks: Vec, + pub next_cursor: String, +} + +#[derive(Serialize, Deserialize, Debug)] +pub struct VerifiableLockRequest { + #[serde(rename(serialize = "ref", deserialize = "ref"))] + pub refs: Ref, + pub cursor: Option, + pub limit: Option, +} + +#[derive(Serialize, Deserialize, Debug)] +pub struct VerifiableLockList { + pub ours: Vec, + pub theirs: Vec, + pub next_cursor: String, +} + +#[derive(Serialize, Deserialize, Debug)] +pub struct LockListQuery { + pub path: Option, + pub id: Option, + pub cursor: Option, + pub limit: Option, + pub refspec: Option, +} diff --git a/gust_integrate_lfs/src/git/mod.rs b/gust_integrate_lfs/src/git/mod.rs new file mode 100644 index 00000000..07e857b4 --- /dev/null +++ b/gust_integrate_lfs/src/git/mod.rs @@ -0,0 +1,21 @@ +//! +//! +//! +//! +//! +//! +//! +//! + +pub mod errors; +pub mod hash; +pub mod idx; +pub mod lfs; +pub mod object; +pub mod pack; +pub mod protocol; +pub mod utils; + +/// +#[cfg(test)] +mod tests {} diff --git a/gust_integrate_lfs/src/git/object/base/blob.rs b/gust_integrate_lfs/src/git/object/base/blob.rs new file mode 100644 index 00000000..1eb0ff69 --- /dev/null +++ b/gust_integrate_lfs/src/git/object/base/blob.rs @@ -0,0 +1,170 @@ +//! +//!Blob 文件对象结构体 +//! +use std::cmp::Ordering; +use std::fmt::Display; +use std::path::PathBuf; +use std::sync::Arc; + +use bstr::BString; + +use crate::git::errors::GitError; +use crate::git::object::base::tree::*; +use crate::git::object::base::ObjectClass; +use crate::git::object::metadata::MetaData; + +/// Git Object: blob +#[derive(Eq, Debug, Hash, Clone)] +pub struct Blob { + pub filename: String, + pub meta: Arc, +} + +impl Ord for Blob { + fn cmp(&self, other: &Self) -> Ordering { + let o = other.filename.cmp(&self.filename); + match o { + Ordering::Equal => other.meta.size.cmp(&self.meta.size), + _ => o, + } + } +} + +impl PartialOrd for Blob { + fn partial_cmp(&self, other: &Self) -> Option { + let o = other.filename.cmp(&self.filename); + match o { + Ordering::Equal => Some(other.meta.size.cmp(&self.meta.size)), + _ => Some(o), + } + } +} + +impl PartialEq for Blob { + fn eq(&self, other: &Self) -> bool { + if self.filename.eq(&other.filename) { + return true; + } + false + } +} + +/// +impl Blob { + pub fn parse_from_file(path: PathBuf) -> Self { + let meta = ObjectClass::parse_meta(path); + Blob::new(Arc::new(meta)) + } + + #[allow(unused)] + pub fn new(metadata: Arc) -> Self { + Self { + meta: metadata, + filename: String::new(), + } + } + + /// + #[allow(unused)] + pub(crate) fn write_to_file(&self, root_path: String) -> Result { + self.meta.write_to_file(root_path) + } + + /// + #[allow(unused)] + pub(crate) fn to_tree_item(&self, filename: String) -> Result { + Ok(TreeItem { + mode: TreeItemType::Blob.to_bytes().to_vec(), + item_type: TreeItemType::Blob, + id: self.meta.id.clone(), + filename, + }) + } +} + +impl Display for Blob { + ///为了节省输出空间 暂时只输出第一行内容 + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + let mut print_data: Vec = vec![]; + for value in self.meta.data.iter() { + if *value != b'\n' { + print_data.push(value.clone()); + } else { + break; + } + } + + writeln!(f, "size:{}", self.meta.data.len()).unwrap(); + writeln!(f, "meta data size:{}", self.meta.size).unwrap(); + writeln!(f, "File Name: {}", self.filename).unwrap(); + writeln!(f, "Type: Blob\n{}", BString::new(print_data)).unwrap(); + writeln!(f, "Only Show the first line of the File...") + } +} + +/// +#[cfg(test)] +mod tests { + use std::env; + use std::fs::File; + use std::io::BufReader; + use std::io::Read; + use std::path::{Path, PathBuf}; + use std::sync::Arc; + + use crate::git::object::metadata::MetaData; + use crate::git::object::types::ObjectType; + + use super::Blob; + + /// + #[test] + fn test_blob_write_to_file() { + let mut path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + path.push("resources/data/test/gitmega.md"); + let f = File::open(path).ok(); + let mut reader = BufReader::new(f.unwrap()); + let mut buffer = Vec::new(); + reader.read_to_end(&mut buffer).ok(); + + // if env::consts::OS == "windows" { + // buffer = buffer.replace(b"\r\n", b"\n"); + // } + + let data = buffer; + + let meta = MetaData::new(ObjectType::Blob, &data); + + meta.write_to_file("/tmp".to_string()) + .expect("Write error!"); + assert!(Path::new("/tmp/82/352c3a6a7a8bd32011751699c7a3648d1b5d3c").exists()); + } + + /// + #[test] + fn test_blob_read_from_file() { + let mut path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + path.push("resources/data/test/blob-82352c3a6a7a8bd32011751699c7a3648d1b5d3c-gitmega.md"); + + let meta = MetaData::read_object_from_file(path.to_str().unwrap().to_string()) + .expect("Read error!"); + + assert_eq!(meta.t, crate::git::object::types::ObjectType::Blob); + + let blob = Blob { + meta: Arc::new(meta), + filename: String::new(), + }; + + assert_eq!( + "82352c3a6a7a8bd32011751699c7a3648d1b5d3c", + blob.meta.id.to_plain_str() + ); + + assert_eq!(16, blob.meta.size); + // assert_eq!( + // "# Hello Gitmega\n", + // String::from_utf8(blob.meta.data).unwrap().as_str() + // ); + } +} diff --git a/gust_integrate_lfs/src/git/object/base/commit.rs b/gust_integrate_lfs/src/git/object/base/commit.rs new file mode 100644 index 00000000..73a87170 --- /dev/null +++ b/gust_integrate_lfs/src/git/object/base/commit.rs @@ -0,0 +1,296 @@ +//! +//! Commit Object Struct +//! + +use std::cmp::Ordering; +use std::fmt::Display; +use std::path::PathBuf; +use std::sync::Arc; + +use bstr::ByteSlice; + +use crate::errors::GustError; + +use crate::git::errors::GitError; +use crate::git::hash::Hash; +use crate::git::object::base::sign::AuthorSign; +use crate::git::object::base::ObjectClass; +use crate::git::object::metadata::MetaData; +use crate::git::object::types::ObjectType; + +/// Git Object: commit +#[allow(unused)] +#[derive(Eq, Debug, Hash, Clone)] +pub struct Commit { + pub meta: Arc, + pub tree_id: Hash, + pub parent_tree_ids: Vec, + pub author: AuthorSign, + pub committer: AuthorSign, + pub message: String, +} + +impl Ord for Commit { + fn cmp(&self, other: &Self) -> Ordering { + other.meta.size.cmp(&self.meta.size) + } +} + +impl PartialOrd for Commit { + fn partial_cmp(&self, other: &Self) -> Option { + Some(other.meta.size.cmp(&self.meta.size)) + } +} + +impl PartialEq for Commit { + fn eq(&self, other: &Self) -> bool { + self.meta.size == other.meta.size + } +} + +/// +impl Commit { + pub fn parse_from_file(path: PathBuf) -> Self { + let meta = ObjectClass::parse_meta(path); + Commit::new(Arc::new(meta)) + } + + /// + pub fn new(metadata: Arc) -> Self { + let mut a = Self { + meta: metadata, + tree_id: Hash::default(), + parent_tree_ids: vec![], + author: AuthorSign { + t: "".to_string(), + name: "".to_string(), + email: "".to_string(), + timestamp: 0, + timezone: "".to_string(), + }, + committer: AuthorSign { + t: "".to_string(), + name: "".to_string(), + email: "".to_string(), + timestamp: 0, + timezone: "".to_string(), + }, + message: "".to_string(), + }; + a.decode_meta().unwrap(); + a + } + /// + + /// Decode the Metadata.data and convert to `Commit` Class + // If there a + pub(crate) fn decode_meta(&mut self) -> Result<(), GustError> { + let mut data = self.meta.data.clone(); + + // Find the tree id and remove it from the data + let tree_begin = data.find_byte(0x20).unwrap(); + let tree_end = data.find_byte(0x0a).unwrap(); + self.tree_id = Hash::from_bytes(&data[tree_begin + 1..tree_end].to_vec()).unwrap(); + data = data[tree_end + 1..].to_vec(); + + // Find the parent tree ids and remove them from the data + let author_begin = data.find("author").unwrap(); + if data.find_iter("parent").count() > 0 { + let mut parents: Vec = Vec::new(); + let mut index = 0; + + while index < author_begin { + let parent_begin = data.find_byte(0x20).unwrap(); + let parent_end = data.find_byte(0x0a).unwrap(); + parents + .push(Hash::from_bytes(&data[parent_begin + 1..parent_end].to_vec()).unwrap()); + index = index + parent_end + 1; + } + + self.parent_tree_ids = parents; + } + data = data[author_begin..].to_vec(); + + // Find the author and remove it from the data + let author_data = data[..data.find_byte(0x0a).unwrap()].to_vec(); + self.author.decode_from_data(author_data)?; + data = data[data.find_byte(0x0a).unwrap() + 1..].to_vec(); + + // Find the committer and remove it from the data + let committer_data = data[..data.find_byte(0x0a).unwrap()].to_vec(); + self.committer.decode_from_data(committer_data)?; + self.message = data[data.find_byte(0x0a).unwrap() + 1..] + .to_vec() + .to_str() + .unwrap() + .to_string(); + + Ok(()) + } + + /// + #[allow(unused)] + pub(crate) fn write_to_file(&self, root_path: String) -> Result { + self.meta.write_to_file(root_path) + } + + /// + #[allow(unused)] + pub(crate) fn encode_metadata(&self) -> Result { + let mut data = Vec::new(); + + data.extend_from_slice("tree".as_bytes()); + data.extend_from_slice(0x20u8.to_be_bytes().as_ref()); + data.extend_from_slice(self.tree_id.to_plain_str().as_bytes()); + data.extend_from_slice(0x0au8.to_be_bytes().as_ref()); + + for parent_tree_id in &self.parent_tree_ids { + data.extend_from_slice("parent".as_bytes()); + data.extend_from_slice(0x20u8.to_be_bytes().as_ref()); + data.extend_from_slice(parent_tree_id.to_plain_str().as_bytes()); + data.extend_from_slice(0x0au8.to_be_bytes().as_ref()); + } + + data.extend_from_slice(self.author.encode_to_data().unwrap().as_ref()); + data.extend_from_slice(0x0au8.to_be_bytes().as_ref()); + data.extend_from_slice(self.committer.encode_to_data().unwrap().as_ref()); + data.extend_from_slice(0x0au8.to_be_bytes().as_ref()); + data.extend_from_slice(self.message.as_bytes()); + + Ok(MetaData::new(ObjectType::Commit, &data)) + } +} + +impl Display for Commit { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + writeln!(f, "Tree: {}", self.tree_id)?; + + for parent in self.parent_tree_ids.iter() { + writeln!(f, "parent: {}", parent)?; + } + + writeln!(f, "author {}", self.author)?; + writeln!(f, "committer {}", self.committer)?; + writeln!(f, "Message: {}", self.message) + } +} + +#[cfg(test)] +mod tests { + use std::env; + use std::path::Path; + use std::path::PathBuf; + use std::str::FromStr; + use std::sync::Arc; + + use crate::git::hash::Hash; + use crate::git::object::types::ObjectType; + + use super::AuthorSign; + use super::Commit; + use super::MetaData; + + fn get_empty_commit(path: PathBuf) -> super::Commit { + let meta = MetaData::read_object_from_file(path.to_str().unwrap().to_string()) + .expect("Read error!"); + + Commit { + meta: Arc::new(meta), + tree_id: Hash::default(), + parent_tree_ids: vec![], + author: AuthorSign { + t: "".to_string(), + name: "".to_string(), + email: "".to_string(), + timestamp: 0, + timezone: "".to_string(), + }, + committer: AuthorSign { + t: "".to_string(), + name: "".to_string(), + email: "".to_string(), + timestamp: 0, + timezone: "".to_string(), + }, + message: "".to_string(), + } + } + + /// + #[test] + fn test_commit_read_from_file_without_parent() { + let mut path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + path.push("resources/data/test/commit-1b490ec04712d147bbe7c8b3a6d86ed4d3587a6a"); + + let mut commit = get_empty_commit(path); + + commit.decode_meta().unwrap(); + + assert_eq!( + String::from("1bdbc1e723aa199e83e33ecf1bb19f874a56ebc3"), + commit.tree_id.to_plain_str() + ); + } + + /// + #[test] + fn test_commit_read_from_file_with_parent() { + let mut path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + path.push("resources/data/test/commit-3b8bc1e152af7ed6b69f2acfa8be709d1733e1bb"); + + let mut commit = get_empty_commit(path); + + commit.decode_meta().unwrap(); + + assert_eq!( + "9bbe4087bedef91e50dc0c1a930c1d3e86fd5f20", + commit.tree_id.to_plain_str() + ); + } + + /// + #[test] + fn test_commit_write_to_file() { + let meta = MetaData::new(ObjectType::Commit, &vec![]); + + let author = AuthorSign { + t: "author".to_string(), + name: "Quanyi Ma".to_string(), + email: "eli@patch.sh".to_string(), + timestamp: 1649521615, + timezone: "+0800".to_string(), + }; + + let committer = AuthorSign { + t: "committer".to_string(), + name: "Quanyi Ma".to_string(), + email: "eli@patch.sh".to_string(), + timestamp: 1649521615, + timezone: "+0800".to_string(), + }; + + let mut commit = super::Commit { + meta: Arc::new(meta), + tree_id: Hash::from_str("9bbe4087bedef91e50dc0c1a930c1d3e86fd5f20").unwrap(), + parent_tree_ids: vec![ + Hash::from_str("1b490ec04712d147bbe7c8b3a6d86ed4d3587a6a").unwrap(), + ], + author, + committer, + message: "gpgsig -----BEGIN PGP SIGNATURE-----\n \n iQIzBAABCAAdFiEEanuf5/5ADLU2lvsCZL9E4tsHuXIFAmJRs88ACgkQZL9E4tsH\n uXJAmBAAtubFjLjNzIgal1/Gwy/zlpw7aQvVO2xcX3Xhbeb0UJyKvrSm/Ht19kiz\n 6Bc8ZV75mpKKip93XAljUgWgAO6Q4DUFnVA5bwF1vvhKHbgXLr+I8q+5GqmLW61U\n oBrB/3aJJ/uAxElQz5nOhgB7ztCfeKQ5egbhBXn9QGqPg/RkfQmDPYsU7evk1J0Z\n CyKinbSNe0c92qE95nURzozFb1zf0rO9NtnpYohFCEO5qyuoV4nz7npnJD4Miqy9\n IUQapeJeZC7eDvU8AWbxARrkXQkyfLSebDVcqbz7WfQz+4dhoK7jADaB48oKpR/K\n bKZDJU9a2t2nPC1ojzjQJgXZ6x4linQofBR8wE1ns3W5RoRgcBSj8dQMNH8wXa/T\n oQD6hlCJpjvbiYHuc3tSgCESI4ZU7zGpL9BAQK+C91T8CUamycF1H7TAHXdzNClR\n bWO4EeRzvwZZyIL029DYFxD2IFN7OQb5jc7JvcroIW8jUN0sMPS6jY+d0bg5pgIs\n yJjmI6qPYy7R35OElfTlw8aVSOAnVbQh7MZt6n3JUyezwK9MwbiKdAYKOLYaVaC0\n ++SY+NV4Dwe6W72KhFhxwOJQRGMfES1mRxy4n85BgqfCGy7STGSBOmon3VZEl89z\n rmvdX0JXy93hGH0oUQINsN9bzpsdaQUWVND8wAnb0+sU4LvJz90=\n =9qni\n -----END PGP SIGNATURE-----\n\nAdd gust.md and modify gitmega.md\n\nSigned-off-by: Quanyi Ma \n".to_string(), + }; + + commit.meta = Arc::new(commit.encode_metadata().unwrap()); + + assert_eq!( + "3b8bc1e152af7ed6b69f2acfa8be709d1733e1bb", + commit.meta.id.to_plain_str() + ); + + commit + .write_to_file("/tmp".to_string()) + .expect("Write error!"); + + assert!(Path::new("/tmp/3b/8bc1e152af7ed6b69f2acfa8be709d1733e1bb").exists()); + } +} diff --git a/gust_integrate_lfs/src/git/object/base/mod.rs b/gust_integrate_lfs/src/git/object/base/mod.rs new file mode 100644 index 00000000..94905f3d --- /dev/null +++ b/gust_integrate_lfs/src/git/object/base/mod.rs @@ -0,0 +1,49 @@ +//! +//! +//! +//! +//! + +use std::{fmt::Display, path::PathBuf}; + +use crate::git::object::metadata::MetaData; + +pub mod blob; +pub mod commit; +pub mod sign; +pub mod tag; +pub mod tree; + +/// **The Object Class Enum**
+/// Merge the four basic classes into an enumeration structure for easy saving +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)] +pub enum ObjectClass { + BLOB(blob::Blob), + COMMIT(commit::Commit), + TREE(tree::Tree), + TAG(tag::Tag), +} + +/// +/// +/// +impl Display for ObjectClass { + fn fmt(&self, f: &mut ::std::fmt::Formatter) -> std::fmt::Result { + match self { + ObjectClass::BLOB(_) => write!(f, "BLOB"), + ObjectClass::COMMIT(_) => write!(f, "COMMIT"), + ObjectClass::TREE(_) => write!(f, "TREE"), + ObjectClass::TAG(_) => write!(f, "TAG"), + } + } +} + +impl ObjectClass { + fn parse_meta(path: PathBuf) -> MetaData { + let meta = MetaData::read_object_from_file(path.to_str().unwrap().to_string()) + .expect("Read error!"); + meta + } +} + + diff --git a/gust_integrate_lfs/src/git/object/base/sign.rs b/gust_integrate_lfs/src/git/object/base/sign.rs new file mode 100644 index 00000000..53a71eea --- /dev/null +++ b/gust_integrate_lfs/src/git/object/base/sign.rs @@ -0,0 +1,141 @@ +//! Sign 签名 对象结构体 +//! +//! +//! +//! +//! +//! + +use std::fmt::Display; + +use bstr::ByteSlice; + +use crate::errors::GustError; + +/// +#[allow(unused)] +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)] +pub struct AuthorSign { + pub t: String, + pub name: String, + pub email: String, + pub timestamp: usize, + pub timezone: String, +} + +/// +impl Display for AuthorSign { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!( + f, + "{} \n Email:<{}> \n timestamp:{}\n timezone:{}", + self.name, self.email, self.timestamp, self.timezone + ) + } +} + +/// +impl AuthorSign { + /// + #[allow(unused)] + pub(crate) fn decode_from_data(&mut self, data: Vec) -> Result<(), GustError> { + let mut data = data; + + let name_start = data.find_byte(0x20).unwrap(); + + self.t = String::from_utf8(data[..name_start].to_vec()).unwrap(); + + let email_start = data.find_byte(0x3C).unwrap(); + let email_end = data.find_byte(0x3E).unwrap(); + + self.name = data[name_start + 1..email_start - 1] + .to_str() + .unwrap() + .to_string(); + self.email = data[email_start + 1..email_end] + .to_str() + .unwrap() + .to_string(); + data = data[email_end + 2..].to_vec(); + + let timestamp_split = data.find_byte(0x20).unwrap(); + self.timestamp = data[0..timestamp_split] + .to_str() + .unwrap() + .parse::() + .unwrap(); + self.timezone = data[timestamp_split + 1..].to_str().unwrap().to_string(); + + Ok(()) + } + + /// + #[allow(unused)] + pub(crate) fn encode_to_data(&self) -> Result, GustError> { + let mut data = Vec::new(); + + data.extend_from_slice(self.t.as_bytes()); + data.extend_from_slice(0x20u8.to_be_bytes().as_ref()); + data.extend_from_slice(self.name.as_bytes()); + data.extend_from_slice(0x20u8.to_be_bytes().as_ref()); + data.extend_from_slice(0x3Cu8.to_be_bytes().as_ref()); + data.extend_from_slice(self.email.as_bytes()); + data.extend_from_slice(0x3Eu8.to_be_bytes().as_ref()); + data.extend_from_slice(0x20u8.to_be_bytes().as_ref()); + data.extend_from_slice(self.timestamp.to_string().as_bytes()); + data.extend_from_slice(0x20u8.to_be_bytes().as_ref()); + data.extend_from_slice(self.timezone.as_bytes()); + + Ok(data) + } +} + +mod tests { + #[test] + fn test_author_sign_encode() { + let author = super::AuthorSign { + t: "author".to_string(), + name: "Quanyi Ma".to_string(), + email: "eli@patch.sh".to_string(), + timestamp: 1649521615, + timezone: "+0800".to_string(), + }; + + let data = author.encode_to_data().unwrap(); + + let author_data = [ + 97, 117, 116, 104, 111, 114, 32, 81, 117, 97, 110, 121, 105, 32, 77, 97, 32, 60, 101, + 108, 105, 64, 112, 97, 116, 99, 104, 46, 115, 104, 62, 32, 49, 54, 52, 57, 53, 50, 49, + 54, 49, 53, 32, 43, 48, 56, 48, 48, + ] + .to_vec(); + + assert_eq!(data, author_data); + } + + #[test] + fn test_author_sign_decode() { + let author_data = [ + 97, 117, 116, 104, 111, 114, 32, 81, 117, 97, 110, 121, 105, 32, 77, 97, 32, 60, 101, + 108, 105, 64, 112, 97, 116, 99, 104, 46, 115, 104, 62, 32, 49, 54, 52, 57, 53, 50, 49, + 54, 49, 53, 32, 43, 48, 56, 48, 48, + ] + .to_vec(); + + let mut author = super::AuthorSign { + t: "".to_string(), + name: "".to_string(), + email: "".to_string(), + timestamp: 0, + timezone: "".to_string(), + }; + + author.decode_from_data(author_data).unwrap(); + + assert_eq!(author.t, "author"); + assert_eq!(author.name, "Quanyi Ma"); + assert_eq!(author.email, "eli@patch.sh"); + assert_eq!(author.timestamp, 1649521615); + assert_eq!(author.timezone, "+0800"); + } +} diff --git a/gust_integrate_lfs/src/git/object/base/tag.rs b/gust_integrate_lfs/src/git/object/base/tag.rs new file mode 100644 index 00000000..ed52f36c --- /dev/null +++ b/gust_integrate_lfs/src/git/object/base/tag.rs @@ -0,0 +1,273 @@ +//! Tag 对象结构体 +//! +//! +//! + +/// Git Object: tag +use std::cmp::Ordering; +use std::fmt::Display; +use std::sync::Arc; + +use bstr::ByteSlice; + +use crate::errors::GustError; + +use crate::git::errors::GitError; +use crate::git::hash::Hash; +use crate::git::object::base::sign::AuthorSign; +use crate::git::object::metadata::MetaData; +use crate::git::object::types::ObjectType; + +#[allow(unused)] +#[derive(Eq, Debug, Hash, Clone)] +pub struct Tag { + pub meta: Arc, + pub object: Hash, + pub t: ObjectType, + pub tag: String, + pub tagger: AuthorSign, + pub message: String, +} + +impl Ord for Tag { + fn cmp(&self, other: &Self) -> Ordering { + other.meta.size.cmp(&self.meta.size) + } +} + +impl PartialOrd for Tag { + fn partial_cmp(&self, other: &Self) -> Option { + Some(other.meta.size.cmp(&self.meta.size)) + } +} + +impl PartialEq for Tag { + fn eq(&self, other: &Self) -> bool { + self.meta.size == other.meta.size + } +} + +/// +impl Tag { + /// Tag 的构造函数 接收一个@param meta::Metadata + /// 同时执行tag解码 -> `fn decode_metadata` + pub fn new(meta: Arc) -> Self { + let mut a = Self { + meta: meta.clone(), + object: meta.id.clone(), + t: ObjectType::Commit, + tag: "".to_string(), + tagger: AuthorSign { + t: "".to_string(), + name: "".to_string(), + email: "".to_string(), + timestamp: 0, + timezone: "".to_string(), + }, + message: "".to_string(), + }; + a.decode_metadata().unwrap(); + a + } + + /// + #[allow(unused)] + fn decode_metadata(&mut self) -> Result<(), GustError> { + let mut data = self.meta.data.clone(); + + let object_begin = data.find_byte(0x20).unwrap(); + let object_end = data.find_byte(0x0a).unwrap(); + self.object = Hash::from_bytes(&data[object_begin + 1..object_end].to_vec()).unwrap(); + data = data[object_end + 1..].to_vec(); + + let type_begin = data.find_byte(0x20).unwrap(); + let type_end = data.find_byte(0x0a).unwrap(); + self.t = ObjectType::from_string(data[type_begin + 1..type_end].to_str().unwrap()).unwrap(); + data = data[type_end + 1..].to_vec(); + + let tag_begin = data.find_byte(0x20).unwrap(); + let tag_end = data.find_byte(0x0a).unwrap(); + self.tag = data[tag_begin + 1..tag_end] + .to_str() + .unwrap() + .parse() + .unwrap(); + data = data[tag_end + 1..].to_vec(); //Fixed Bug: bug type_end to tag_end + + let tagger_begin = data.find("tagger").unwrap(); + let tagger_end = data.find_byte(0x0a).unwrap(); + let tagger_data = data[tagger_begin..tagger_end].to_vec(); + self.tagger.decode_from_data(tagger_data)?; + data = data[data.find_byte(0x0a).unwrap() + 1..].to_vec(); + + self.message = data[data.find_byte(0x0a).unwrap()..] + .to_vec() + .to_str() + .unwrap() + .to_string(); + + Ok(()) + } + + /// + #[allow(unused)] + fn encode_metadata(&self) -> Result { + let mut data = Vec::new(); + + data.extend_from_slice("object".as_bytes()); + data.extend_from_slice(0x20u8.to_be_bytes().as_ref()); + data.extend_from_slice(self.object.to_plain_str().as_bytes()); + data.extend_from_slice(0x0au8.to_be_bytes().as_ref()); + + data.extend_from_slice("type".as_bytes()); + data.extend_from_slice(0x20u8.to_be_bytes().as_ref()); + data.extend_from_slice(self.t.to_string().as_bytes()); + data.extend_from_slice(0x0au8.to_be_bytes().as_ref()); + + data.extend_from_slice("tag".as_bytes()); + data.extend_from_slice(0x20u8.to_be_bytes().as_ref()); + data.extend_from_slice(self.tag.as_bytes()); + data.extend_from_slice(0x0au8.to_be_bytes().as_ref()); + + data.extend_from_slice(self.tagger.encode_to_data().unwrap().as_ref()); + data.extend_from_slice(0x0au8.to_be_bytes().as_ref()); + data.extend_from_slice(self.message.as_bytes()); + + Ok(MetaData::new(ObjectType::Tag, &data)) + } + + /// + #[allow(unused)] + fn write_to_file(&self, root_path: String) -> Result { + self.meta.write_to_file(root_path) + } +} + +impl Display for Tag { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + writeln!(f, "Type: Tag").unwrap(); + writeln!(f, "Tag : {}", self.tag).unwrap(); + self.tagger.fmt(f).unwrap(); + writeln!(f, "{}", self.message) + } +} + +/// +#[cfg(test)] +mod tests { + use std::env; + use std::path::Path; + use std::path::PathBuf; + use std::str::FromStr; + use std::sync::Arc; + use std::vec; + + use crate::git::hash::Hash; + use crate::git::hash::HashType; + use crate::git::object::types::ObjectType; + + use super::AuthorSign; + use super::MetaData; + use super::Tag; + + /// + #[test] + fn test_tag_read_from_file() { + let mut path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + path.push("resources/data/test/tag-e5c324b03b72b26f11557c4955c6d17c68dc8595"); + + let meta = MetaData::read_object_from_file(path.to_str().unwrap().to_string()) + .expect("Read error!"); + + assert_eq!(ObjectType::Tag, meta.t); + assert_eq!(976, meta.size); + assert_eq!( + "e5c324b03b72b26f11557c4955c6d17c68dc8595", + meta.id.to_plain_str() + ); + + let mut tag = Tag { + meta: Arc::new(meta), + object: Hash::default(), + t: ObjectType::Commit, + tag: "".to_string(), + tagger: AuthorSign { + t: "".to_string(), + name: "".to_string(), + email: "".to_string(), + timestamp: 0, + timezone: "+0000".to_string(), + }, + message: "".to_string(), + }; + + tag.decode_metadata().unwrap(); + + assert_eq!( + "6414e45babf0bdd043ba40d31123053cfebef26c", + tag.object.to_plain_str() + ); + assert_eq!("commit", tag.t.to_string()); + assert_eq!("v1.1.0", tag.tag); + assert_eq!(1653037847, tag.tagger.timestamp); + println!("{}", tag); + } + + #[test] + fn test_output_meat() { + let meta = MetaData { + t: ObjectType::Tag, + h: HashType::Sha1, + id: Hash::from_str("df1087c478c8d337cb587b897e86f2455e2687ed").unwrap(), + size: 155, + data: vec![ + 111, 98, 106, 101, 99, 116, 32, 51, 55, 50, 49, 51, 101, 55, 98, 98, 51, 99, 51, + 51, 52, 97, 48, 102, 55, 55, 48, 56, 99, 55, 97, 102, 99, 97, 98, 53, 98, 97, 98, + 98, 51, 102, 57, 53, 52, 51, 52, 10, 116, 121, 112, 101, 32, 99, 111, 109, 109, + 105, 116, 10, 116, 97, 103, 32, 48, 46, 49, 10, 116, 97, 103, 103, 101, 114, 32, + 97, 100, 105, 116, 121, 97, 32, 60, 100, 101, 118, 64, 99, 104, 105, 109, 101, 114, + 97, 99, 111, 100, 101, 114, 46, 110, 101, 116, 62, 32, 49, 52, 50, 56, 54, 49, 50, + 48, 48, 55, 32, 45, 48, 52, 48, 48, 10, 10, 70, 105, 114, 115, 116, 32, 105, 109, + 112, 108, 101, 109, 101, 110, 116, 97, 116, 105, 111, 110, 32, 111, 102, 32, 116, + 104, 101, 32, 99, 108, 105, 10, + ], + delta_header: vec![], + }; + + let tag = Tag::new(Arc::new(meta)); + + println!("{}", tag); + } + + /// + #[test] + fn test_tag_write_to_file() { + let meta = MetaData::new(ObjectType::Tag, &vec![]); + + let tagger = AuthorSign { + t: "tagger".to_string(), + name: "Quanyi Ma".to_string(), + email: "eli@patch.sh".to_string(), + timestamp: 1653037847, + timezone: "+0800".to_string(), + }; + + let mut tag = Tag { + meta: Arc::new(meta), + object: Hash::from_str("6414e45babf0bdd043ba40d31123053cfebef26c").unwrap(), + t: ObjectType::Commit, + tag: "v1.1.0".to_string(), + tagger, + message: "\nIt's a lastest object\n-----BEGIN PGP SIGNATURE-----\n\niQIzBAABCAAdFiEEanuf5/5ADLU2lvsCZL9E4tsHuXIFAmKHWxcACgkQZL9E4tsH\nuXIeFhAAtX+foSvc7/1lb98+QfRjHcpO+LX+LroTaq/QGOTX/2gE+tHD2TJAga1I\nVqDEz8fh8AE366FC7UCjCb5nvsCCox2htzbIxAjsc9L/JckWtxl6WOa/5OZssrDQ\nFtX39BNNl+4TfNn/z1XV+28c9yB1N5HSoP2gzdLoASw3y9n6E0FyzLdoXPILgmJI\nL4DAG/OFkixK+I+TsK+6995497h9BCi3x30dOjfxZS9ptiKhqWulbkflvvM9Cnie\n7obXYmnoe0jBjSfO5GgJlOYcLzE9MMYYzIx47/4lcrCbQXnojkW3KV03PEXGfRCL\nw/y8oBHVvNVRF0Jn+o7F+mzIrbF6Ufku63MfRf7WmbbS3B63CILEjNyuOFoe8mDb\nrmAUffzQSrgnvBk+g01slb6Q+q7Urw6wqHtBPn3ums/inHE9ymTqS7ffmRifUfR8\nD8LvhwpSUI7BdiN6HznRFPxMXzohYIqAJbUltjr4Q7qw/kJI+305Xcs1U5AUIaOp\n77p2UFHRVoMM5mpPOCSwsVJ6cSuOjWXf9afcNMrhgclKefM0aXXnd2p5zTUEe99T\nlAtXHuprRwxtSQUzHxJCdGlUGRGRR2aS9W984SNDVmcegnOIrZD2pVm/tjDwVex5\nMuAuKHr8et1EKyvKCnta6USq7WC2l6RdsCaAYzSTQ7ljEi9A+6Q=\n=/9g0\n-----END PGP SIGNATURE-----\n".to_string(), + }; + + tag.meta = Arc::new(tag.encode_metadata().unwrap()); + assert_eq!( + "e5c324b03b72b26f11557c4955c6d17c68dc8595", + tag.meta.id.to_plain_str() + ); + + tag.write_to_file("/tmp".to_string()).expect("Write error!"); + assert!(Path::new("/tmp/e5/c324b03b72b26f11557c4955c6d17c68dc8595").exists()); + } +} diff --git a/gust_integrate_lfs/src/git/object/base/tree.rs b/gust_integrate_lfs/src/git/object/base/tree.rs new file mode 100644 index 00000000..11f6eb8b --- /dev/null +++ b/gust_integrate_lfs/src/git/object/base/tree.rs @@ -0,0 +1,421 @@ +//! +//! +//! +//! +//! + +use std::cmp::Ordering; +use std::fmt::Display; +use std::path::PathBuf; +use std::sync::Arc; + +use bstr::ByteSlice; +use colored::Colorize; + +use crate::git::errors::GitError; +use crate::git::hash::Hash; +use crate::git::object::base::ObjectClass; +use crate::git::object::metadata::MetaData; +use crate::git::object::types::ObjectType; + +/// +#[derive(PartialEq, Eq, Hash, Ord, PartialOrd, Debug, Clone, Copy)] +pub enum TreeItemType { + Blob, + BlobExecutable, + Tree, + Commit, + Link, +} + +impl Display for TreeItemType { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + let _print = match *self { + TreeItemType::Blob => "blob", + TreeItemType::BlobExecutable => "blob executable", + TreeItemType::Tree => "tree", + TreeItemType::Commit => "commit", + TreeItemType::Link => "link", + }; + write!(f, "{}", String::from(_print).blue()) + } +} + +/// +impl TreeItemType { + /// + #[allow(unused)] + pub(crate) fn to_bytes(self) -> &'static [u8] { + match self { + TreeItemType::Blob => b"100644", + TreeItemType::BlobExecutable => b"100755", + TreeItemType::Tree => b"40000", + TreeItemType::Link => b"120000", + TreeItemType::Commit => b"160000", + } + } + + /// + #[allow(unused)] + pub(crate) fn tree_item_type_from(mode: &[u8]) -> Result { + Ok(match mode { + b"40000" => TreeItemType::Tree, + b"100644" => TreeItemType::Blob, + b"100755" => TreeItemType::BlobExecutable, + b"120000" => TreeItemType::Link, + b"160000" => TreeItemType::Commit, + b"100664" => TreeItemType::Blob, + b"100640" => TreeItemType::Blob, + _ => { + return Err(GitError::InvalidTreeItem( + String::from_utf8(mode.to_vec()).unwrap(), + )); + } + }) + } +} + +/// Git Object: tree item +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)] +pub struct TreeItem { + pub mode: Vec, + pub item_type: TreeItemType, + pub id: Hash, + pub filename: String, +} + +#[derive(Eq, Debug, Hash, Clone)] +pub struct Tree { + pub meta: Arc, + pub tree_items: Vec, + pub tree_name: String, +} + + +impl Ord for Tree { + fn cmp(&self, other: &Self) -> Ordering { + let o = other.tree_name.cmp(&self.tree_name); + match o { + Ordering::Equal => other.meta.size.cmp(&self.meta.size), + _ => o, + } + } +} + +impl PartialOrd for Tree { + fn partial_cmp(&self, other: &Self) -> Option { + let o = other.tree_name.cmp(&self.tree_name); + match o { + Ordering::Equal => Some(other.meta.size.cmp(&self.meta.size)), + _ => Some(o), + } + } +} + +impl PartialEq for Tree { + fn eq(&self, other: &Self) -> bool { + if self.tree_name.eq(&other.tree_name) { + return true; + } + false + } +} + +impl Display for Tree { + #[allow(unused)] + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + writeln!(f, "Type: Tree: {}", self.meta.id); + for item in &self.tree_items { + writeln!( + f, + "{:6} {} {} {}", + String::from_utf8(item.mode.to_vec()).unwrap(), + item.item_type, + item.id, + item.filename + ); + } + writeln!(f, "Tree Name: {}", self.tree_name); + Ok(()) + } +} + +/// +impl Tree { + pub fn parse_from_file(path: PathBuf) -> Self { + let meta = ObjectClass::parse_meta(path); + Tree::new(Arc::new(meta)) + } + + pub fn new(metadata: Arc) -> Self { + let mut a = Self { + meta: metadata, + tree_items: vec![], + tree_name: String::new(), + }; + a.decode_metadata().unwrap(); + a + } + + pub(crate) fn decode_metadata(&mut self) -> Result<(), GitError> { + let mut index = 0; + while index < self.meta.data.len() { + let mode_index = &self.meta.data[index..].find_byte(0x20).unwrap(); + let mode = &self.meta.data[index..index + *mode_index]; + let item_type = TreeItemType::tree_item_type_from(mode).unwrap(); + + let filename_index = &self.meta.data[index..].find_byte(0x00).unwrap(); + let filename = String::from_utf8( + self.meta.data[index + mode_index + 1..index + *filename_index].to_vec(), + ) + .unwrap(); + + let id = Hash::from_row( + &self.meta.data[index + filename_index + 1..index + filename_index + 21].to_vec(), + ); + + self.tree_items.push(TreeItem { + mode: mode.to_vec(), + item_type, + id, + filename, + }); + + index = index + filename_index + 21; + } + + Ok(()) + } + + /// + #[allow(unused)] + pub(crate) fn encode_metadata(&self) -> Result { + let mut data = Vec::new(); + for item in &self.tree_items { + data.extend_from_slice(&item.mode); + data.extend_from_slice(0x20u8.to_be_bytes().as_ref()); + data.extend_from_slice(item.filename.as_bytes()); + data.extend_from_slice(0x00u8.to_be_bytes().as_ref()); + data.extend_from_slice(&item.id.0.to_vec()); + } + + Ok(MetaData::new(ObjectType::Tree, &data)) + } + + /// + #[allow(unused)] + pub(crate) fn write_to_file(&self, root_path: String) -> Result { + self.meta.write_to_file(root_path) + } +} + +/// +#[cfg(test)] +mod tests { + use std::env; + use std::path::Path; + use std::path::PathBuf; + use std::sync::Arc; + use std::vec; + + use crate::git::hash::Hash; + use crate::git::hash::HashType; + + use super::super::blob::Blob; + use super::MetaData; + use super::ObjectType; + use super::Tree; + use super::TreeItemType; + + /// + #[test] + fn test_tree_write_to_file() { + let mut path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + path.push("resources/data/test/blob-82352c3a6a7a8bd32011751699c7a3648d1b5d3c-gitmega.md"); + + let meta = Arc::new(MetaData::read_object_from_file(path.to_str().unwrap().to_string()) + .expect("Read error!")); + + assert_eq!(meta.t, ObjectType::Blob); + assert_eq!( + "82352c3a6a7a8bd32011751699c7a3648d1b5d3c", + meta.id.to_plain_str() + ); + assert_eq!(16, meta.size); + + let blob = Blob { + meta: meta, + filename: String::new(), + }; + + assert_eq!( + "# Hello Gitmega\n", + String::from_utf8(blob.meta.data.clone()).unwrap().as_str() + ); + + let item = blob.to_tree_item(String::from("gitmega.md")).unwrap(); + + let mut tree = Tree { + tree_name: String::new(), + meta: Arc::new(MetaData { + t: ObjectType::Tree, + h: HashType::Sha1, + id: Hash::default(), + size: 0, + data: vec![], + delta_header: vec![], + }), + tree_items: vec![item], + }; + + tree.meta = Arc::new(tree.encode_metadata().unwrap()); + tree.write_to_file("/tmp".to_string()) + .expect("Write error!"); + + assert!(Path::new("/tmp/1b/dbc1e723aa199e83e33ecf1bb19f874a56ebc3").exists()); + } + + /// + #[test] + fn test_tree_write_to_file_2_blob() { + let mut path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + path.push("resources/data/test/blob-fc1a505ac94f98cc5f29100a2d9aef97027a32fb-gitmega.md"); + + let meta_gitmega = Arc::new(MetaData::read_object_from_file(path.to_str().unwrap().to_string()) + .expect("Read error!")); + + let blob_gitmega = Blob { + meta: meta_gitmega, + filename: String::new(), + }; + + let item_gitmega = blob_gitmega + .to_tree_item(String::from("gitmega.md")) + .unwrap(); + + path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + path.push("resources/data/test/blob-a3b55a2ce16d2429dae2d690d2c15bcf26fbe33c-gust.md"); + + let meta_gust = Arc::new(MetaData::read_object_from_file(path.to_str().unwrap().to_string()) + .expect("Read error!")); + + let blob_gust = Blob { + meta: meta_gust, + filename: String::new(), + }; + + let item_gust = blob_gust.to_tree_item(String::from("gust.md")).unwrap(); + + let mut tree = Tree { + tree_name: String::new(), + meta: Arc::new(MetaData { + t: ObjectType::Tree, + h: HashType::Sha1, + id: Hash::default(), + size: 0, + data: vec![], + delta_header: vec![], + }), + tree_items: vec![item_gitmega, item_gust], + }; + + tree.meta = Arc::new(tree.encode_metadata().unwrap()); + tree.write_to_file("/tmp".to_string()) + .expect("Write error!"); + + assert!(Path::new("/tmp/9b/be4087bedef91e50dc0c1a930c1d3e86fd5f20").exists()); + } + + /// + #[test] + fn test_tree_read_from_file() { + // 100644 blob 82352c3a6a7a8bd32011751699c7a3648d1b5d3c gitmega.md + let mut path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + path.push("resources/data/test/tree-1bdbc1e723aa199e83e33ecf1bb19f874a56ebc3"); + + let meta = Arc::new(MetaData::read_object_from_file(path.to_str().unwrap().to_string()) + .expect("Read error!")); + + assert_eq!(ObjectType::Tree, meta.t); + assert_eq!(38, meta.size); + + let mut tree = Tree { + meta, + tree_items: Vec::new(), + tree_name: String::new(), + }; + + tree.decode_metadata().unwrap(); + + assert_eq!(1, tree.tree_items.len()); + assert_eq!("gitmega.md", tree.tree_items[0].filename.as_str()); + assert_eq!( + "82352c3a6a7a8bd32011751699c7a3648d1b5d3c", + tree.tree_items[0].id.to_plain_str() + ); + assert_eq!( + "100644", + String::from_utf8(tree.tree_items[0].mode.to_vec()) + .unwrap() + .as_str() + ); + assert_eq!(TreeItemType::Blob, tree.tree_items[0].item_type); + } + + /// + #[test] + fn test_tree_read_from_file_2_items() { + // 100644 blob fc1a505ac94f98cc5f29100a2d9aef97027a32fb gitmega.md + // 100644 blob a3b55a2ce16d2429dae2d690d2c15bcf26fbe33c gust.md + let mut path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + path.push("resources/data/test/tree-9bbe4087bedef91e50dc0c1a930c1d3e86fd5f20"); + + let meta = Arc::new(MetaData::read_object_from_file(path.to_str().unwrap().to_string()) + .expect("Read error!")); + + assert_eq!(ObjectType::Tree, meta.t); + assert_eq!(73, meta.size); + + let mut tree = Tree { + meta, + tree_items: Vec::new(), + tree_name: String::new(), + }; + + tree.decode_metadata().unwrap(); + + assert_eq!(2, tree.tree_items.len()); + + assert_eq!("gitmega.md", tree.tree_items[0].filename.as_str()); + + assert_eq!( + "fc1a505ac94f98cc5f29100a2d9aef97027a32fb", + tree.tree_items[0].id.to_plain_str() + ); + + assert_eq!( + "100644", + String::from_utf8(tree.tree_items[0].mode.to_vec()) + .unwrap() + .as_str() + ); + + assert_eq!(TreeItemType::Blob, tree.tree_items[0].item_type); + + assert_eq!("gust.md", tree.tree_items[1].filename.as_str()); + + assert_eq!( + "a3b55a2ce16d2429dae2d690d2c15bcf26fbe33c", + tree.tree_items[1].id.to_plain_str() + ); + + assert_eq!( + "100644", + String::from_utf8(tree.tree_items[1].mode.to_vec()) + .unwrap() + .as_str() + ); + + assert_eq!(TreeItemType::Blob, tree.tree_items[1].item_type); + } +} diff --git a/gust_integrate_lfs/src/git/object/delta.rs b/gust_integrate_lfs/src/git/object/delta.rs new file mode 100644 index 00000000..a924aeee --- /dev/null +++ b/gust_integrate_lfs/src/git/object/delta.rs @@ -0,0 +1,187 @@ +//! +//! +//! +//! +//! +use std::fs::File; +use std::io::{ErrorKind, Read}; +use std::path::Path; +use std::str::FromStr; + +use flate2::read::ZlibDecoder; + +use crate::git::errors::GitError; +use crate::git::hash::Hash; +use crate::git::object::metadata::MetaData; +use crate::git::utils; + +const COPY_INSTRUCTION_FLAG: u8 = 1 << 7; +const COPY_OFFSET_BYTES: u8 = 4; +const COPY_SIZE_BYTES: u8 = 3; +const COPY_ZERO_SIZE: usize = 0x10000; + +///使用delta指令 +pub fn apply_delta(pack_file: &mut File, base: &MetaData) -> Result { + utils::read_zlib_stream_exact(pack_file, |delta| { + let base_size = utils::read_size_encoding(delta).unwrap(); + if base.size != base_size { + return Err(GitError::DeltaObjError( + String::from_str("Incorrect base object length").unwrap(), + )); + } + + let result_size = utils::read_size_encoding(delta).unwrap(); + let mut result = Vec::with_capacity(result_size); + while apply_delta_instruction(delta, &base.data, &mut result)? {} + if result.len() != result_size { + return Err(GitError::DeltaObjError( + String::from_str("Incorrect object length").unwrap(), + )); + } + + // The object type is the same as the base object + Ok(MetaData::new(base.t, &result)) + }) +} + +///执行单个delta指令 +fn apply_delta_instruction( + stream: &mut R, + base: &[u8], + result: &mut Vec, +) -> Result { + // Check if the stream has ended, meaning the new object is done + let instruction = match utils::read_bytes(stream) { + Ok([instruction]) => instruction, + Err(err) if err.kind() == ErrorKind::UnexpectedEof => return Ok(false), + Err(err) => { + return Err(GitError::DeltaObjError(format!( + "Wrong instruction in delta :{}", + err.to_string() + ))); + } + }; + if instruction & COPY_INSTRUCTION_FLAG == 0 { + // Data instruction; the instruction byte specifies the number of data bytes + if instruction == 0 { + // Appending 0 bytes doesn't make sense, so git disallows it + return Err(GitError::DeltaObjError( + String::from_str("Invalid data instruction").unwrap(), + )); + } + + // Append the provided bytes + let mut data = vec![0; instruction as usize]; + stream.read_exact(&mut data).unwrap(); + result.extend_from_slice(&data); + } else { + // Copy instruction + let mut nonzero_bytes = instruction; + let offset = + utils::read_partial_int(stream, COPY_OFFSET_BYTES, &mut nonzero_bytes).unwrap(); + let mut size = + utils::read_partial_int(stream, COPY_SIZE_BYTES, &mut nonzero_bytes).unwrap(); + if size == 0 { + // Copying 0 bytes doesn't make sense, so git assumes a different size + size = COPY_ZERO_SIZE; + } + // Copy bytes from the base object + let base_data = base + .get(offset..(offset + size)) + .ok_or_else(|| GitError::DeltaObjError(format!("Invalid copy instruction"))); + + match base_data { + Ok(data) => result.extend_from_slice(data), + Err(e) => return Err(e), + } + } + + Ok(true) +} + +// 这里默认的是若是pack里面没有,则只能从loose里面找了 +#[allow(unused)] +pub fn read_object(hash: Hash) -> Result { + let object = match read_unpacked_object(hash) { + // Found in objects directory + Ok(object) => object, + // Not found in objects directory; look in packfiles + Err(_err) => panic!("not found object"), + }; + + let object_hash = object.hash(); + if object_hash != hash { + return Err(GitError::DeltaObjError(format!( + "Object {} has wrong hash {}", + hash, object_hash + ))); + } + + Ok(object) +} + +const OBJECTS_DIRECTORY: &str = ".git/objects"; + +///读出unpack 的Object +#[allow(unused)] +fn read_unpacked_object(hash: Hash) -> Result { + use crate::git::object::types::ObjectType::*; + + let hex_hash = hash.to_string(); + let (directory_name, file_name) = hex_hash.split_at(2); + let object_file = Path::new(OBJECTS_DIRECTORY) + .join(directory_name) + .join(file_name); + let object_file = File::open(object_file).unwrap(); + let mut object_stream = ZlibDecoder::new(object_file); + let object_type = utils::read_until_delimiter(&mut object_stream, b' ').unwrap(); + let object_type = match &object_type[..] { + _commit_object_type => Commit, + _tree_object_type => Tree, + _blob_object_type => Blob, + _tag_object_type => Tag, + _ => { + return Err(GitError::DeltaObjError(format!( + "Invalid object type: {:?}", + object_type + ))); + } + }; + let size = utils::read_until_delimiter(&mut object_stream, b'\0').unwrap(); + let size = match parse_decimal(&size) { + Some(a) => a, + None => { + return Err(GitError::DeltaObjError(format!( + "Invalid object size: {:?}", + size + ))); + } + }; + + let mut contents = Vec::with_capacity(size); + object_stream.read_to_end(&mut contents).unwrap(); + if contents.len() != size { + return Err(GitError::DeltaObjError(format!("Incorrect object size"))); + } + + Ok(MetaData::new(object_type, &contents)) +} + +///解析u8数组的十进制 +fn parse_decimal(decimal_str: &[u8]) -> Option { + let mut value = 0usize; + for &decimal_char in decimal_str { + let char_value = decimal_char_value(decimal_char)?; + value = value.checked_mul(10)?; + value = value.checked_add(char_value as usize)?; + } + Some(value) +} + +///从u8转为单个10进制数 +fn decimal_char_value(decimal_char: u8) -> Option { + match decimal_char { + b'0'..=b'9' => Some(decimal_char - b'0'), + _ => None, + } +} diff --git a/gust_integrate_lfs/src/git/object/diff.rs b/gust_integrate_lfs/src/git/object/diff.rs new file mode 100644 index 00000000..57143d5e --- /dev/null +++ b/gust_integrate_lfs/src/git/object/diff.rs @@ -0,0 +1,300 @@ +//! +//! +//! +//! +//! +use std::vec; + +use diffs::myers; +use diffs::Diff; + +use crate::git::object::metadata::MetaData; +use crate::git::utils; + +const DATA_INS_LEN: usize = 0x7f; + +#[allow(dead_code)] +#[derive(Debug)] +pub struct DeltaDiff { + ops: Vec, + old_data: MetaData, + new_data: MetaData, + ssam: usize, + ssam_r: f64, +} + +impl DeltaDiff { + /// Diff the two Metadata , Type should be same. + /// Return the DeltaDiff struct. + pub fn new(old_md: MetaData, new_md: MetaData) -> Self { + let mut delta_diff = DeltaDiff { + ops: vec![], + old_data: old_md.clone(), + new_data: new_md.clone(), + + ssam: 0, + ssam_r: 0.00, + }; + + myers::diff( + &mut delta_diff, + &old_md.data, + 0, + old_md.data.len(), + &new_md.data, + 0, + new_md.data.len(), + ) + .unwrap(); + + delta_diff + } + + /// + /// + pub fn get_delta_metadata(&self) -> Vec { + let mut result: Vec = vec![]; + + result.append(&mut utils::write_size_encoding(self.old_data.size)); + result.append(&mut utils::write_size_encoding(self.new_data.size)); + + for op in &self.ops { + result.append(&mut self.decode_op(op)); + } + + result + } + + /// + /// Decode the DeltaOp to `Vec` + fn decode_op(&self, op: &DeltaOp) -> Vec { + let mut op_data = vec![]; + + match op.ins { + Optype::DATA => { + let instruct = (op.len & 0x7f) as u8; + op_data.push(instruct); + op_data.append(&mut self.new_data.data[op.begin..op.begin + op.len].to_vec()); + } + + Optype::COPY => { + let mut instruct: u8 = 0x80; + let mut offset = op.begin; + let mut size = op.len; + let mut copy_data = vec![]; + + for i in 0..4 { + let _bit = (offset & 0xff) as u8; + if _bit != 0 { + instruct |= (1 << i) as u8; + copy_data.push(_bit) + } + offset >>= 8; + } + + for i in 4..7 { + let _bit = (size & 0xff) as u8; + if _bit != 0 { + instruct |= (1 << i) as u8; + copy_data.push(_bit) + } + size >>= 8; + } + + op_data.push(instruct); + op_data.append(&mut copy_data); + } + } + + op_data + } + + /// + pub fn get_ssam_rate(&self) -> f64 { + self.ssam_r + } +} + +impl Diff for DeltaDiff { + type Error = (); + + /// + fn equal(&mut self, _old: usize, _new: usize, _len: usize) -> Result<(), Self::Error> { + self.ssam += _len; + if let Some(tail) = self.ops.last_mut() { + if tail.begin + tail.len == _old && tail.ins == Optype::COPY { + tail.len += _len; + } else { + self.ops.push(DeltaOp { + ins: Optype::COPY, + begin: _old, + len: _len, + }); + } + } else { + self.ops.push(DeltaOp { + ins: Optype::COPY, + begin: _old, + len: _len, + }); + } + + Ok(()) + } + + /// + /// + fn insert(&mut self, _old: usize, _new: usize, _len: usize) -> Result<(), ()> { + let mut len = _len; + let mut new = _new; + + if _len > DATA_INS_LEN { + while len > DATA_INS_LEN { + self.ops.push(DeltaOp { + ins: Optype::DATA, + begin: new, + len: DATA_INS_LEN, + }); + + len -= DATA_INS_LEN; + new += DATA_INS_LEN; + } + + self.ops.push(DeltaOp { + ins: Optype::DATA, + begin: new, + len, + }); + } else { + if let Some(tail) = self.ops.last_mut() { + if tail.begin + tail.len == _new + && tail.ins == Optype::DATA + && tail.len + _len < DATA_INS_LEN + { + tail.len += _len; + } else { + self.ops.push(DeltaOp { + ins: Optype::DATA, + begin: new, + len: len, + }); + } + } else { + self.ops.push(DeltaOp { + ins: Optype::DATA, + begin: new, + len: len, + }); + } + } + + Ok(()) + } + + fn finish(&mut self) -> Result<(), Self::Error> { + // compute the ssam rate when finish the diff process. + self.ssam_r = self.ssam as f64 / self.new_data.data.len() as f64; + Ok(()) + } +} + +#[derive(Debug, Clone, Copy, PartialEq)] +enum Optype { + DATA, + COPY, +} + +#[derive(Debug, Clone, Copy)] +struct DeltaOp { + ins: Optype, + begin: usize, + len: usize, +} + +#[cfg(test)] +mod tests { + use std::io::Write; + use std::path::PathBuf; + + use bstr::ByteSlice; + + use crate::git::{object::metadata::MetaData, object::types::ObjectType, pack::Pack, utils}; + + use super::DeltaDiff; + + /// 通过两个metadata 来进行对后者No.2的压缩 + /// 首先,需要两个是相同的类型(ObjectType) + /// 先确定要进行什么类型的压缩, + /// 1. ofs-object 将以No.1为base压缩为ofs-object,offset 来标识负距离上的object开头 + /// 2. ref-object 将以No.1为base, 以hash值作为标识 + /// 两种delta的共性:都需要未压缩的header编码。ofs 是sized编码的开头。ref是hash的20位u8 + /// 1, + /// + #[test] + fn test_metadata_diff_ofs_delta() { + let mut m1_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + m1_path.push("resources/diff/16ecdcc8f663777896bd39ca025a041b7f005e"); + + let mut m2_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + m2_path.push("resources/diff/bee0d45f981adf7c2926a0dc04deb7f006bcc3"); + + let m1 = MetaData::read_object_from_file(m1_path.to_str().unwrap().to_string()).unwrap(); + let mut m2 = + MetaData::read_object_from_file(m2_path.to_str().unwrap().to_string()).unwrap(); + + let diff = DeltaDiff::new(m1.clone(), m2.clone()); + let meta_vec1 = m1.convert_to_vec().unwrap(); + + // 对于offset的 + // 不需要压缩的size + let offset_head = utils::write_offset_encoding(meta_vec1.len() as u64); + + // 需要压缩的指令data + let zlib_data = diff.get_delta_metadata(); + m2.change_to_delta(ObjectType::OffsetDelta, zlib_data, offset_head); + + // 排好序后直接把metadata按顺序放入Vec就行了 + let meta_vec = vec![m1, m2]; + let mut _pack = Pack::default(); + let pack_file_data = _pack.encode(Some(meta_vec)); + + //_pack + let mut file = std::fs::File::create("delta_ofs.pack").expect("create failed"); + file.write_all(pack_file_data.as_bytes()) + .expect("write failed"); + + Pack::decode_file("delta_ofs.pack"); + } + + #[test] + fn test_metadata_diff_ref_delta() { + let mut m1_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + m1_path.push("resources/diff/16ecdcc8f663777896bd39ca025a041b7f005e"); + + let mut m2_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + m2_path.push("resources/diff/bee0d45f981adf7c2926a0dc04deb7f006bcc3"); + + let m1 = MetaData::read_object_from_file(m1_path.to_str().unwrap().to_string()).unwrap(); + let mut m2 = + MetaData::read_object_from_file(m2_path.to_str().unwrap().to_string()).unwrap(); + + let diff = DeltaDiff::new(m1.clone(), m2.clone()); + + //不需要压缩 + let offset_head = m1.id.0.to_vec(); + assert_eq!(offset_head.len(), 20); + + //需要压缩 + let zlib_data = diff.get_delta_metadata(); + m2.change_to_delta(ObjectType::HashDelta, zlib_data, offset_head); + + let meta_vec = vec![m1, m2]; + let mut _pack = Pack::default(); + let pack_file_data = _pack.encode(Some(meta_vec)); + //_pack + let mut file = std::fs::File::create("delta_ref.pack").expect("create failed"); + file.write_all(pack_file_data.as_bytes()) + .expect("write failed"); + Pack::decode_file("delta_ref.pack"); + } +} diff --git a/gust_integrate_lfs/src/git/object/metadata.rs b/gust_integrate_lfs/src/git/object/metadata.rs new file mode 100644 index 00000000..68b0312a --- /dev/null +++ b/gust_integrate_lfs/src/git/object/metadata.rs @@ -0,0 +1,172 @@ +//! +//! +//! +//! + +use std::fs::{create_dir_all, File}; +use std::io::{BufReader, Read, Write}; +use std::path::PathBuf; + +use anyhow::Context; +use bstr::ByteSlice; +use deflate::{write::ZlibEncoder, Compression}; +use flate2::read::ZlibDecoder; + +use crate::errors::GustError; +use crate::git::errors::GitError; +use crate::git::hash::{Hash, HashType}; +use crate::git::object::types::ObjectType; + +/// The metadata of git object. +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)] +pub struct MetaData { + pub t: ObjectType, + pub h: HashType, + pub id: Hash, + pub size: usize, + pub data: Vec, + pub delta_header: Vec, +} + +/// Implement function for Metadata +impl MetaData { + /// optimize hash calculation with LRU cache + pub fn hash(&self) -> Hash { + Hash::from_meta(&self) + } + + /// + pub fn new(object_type: ObjectType, data: &Vec) -> MetaData { + let mut metadata = MetaData { + t: object_type, + h: HashType::Sha1, + id: Hash::default(), + size: data.len(), + data: data.to_vec(), + delta_header: vec![], + }; + + // compute hash value + metadata.id = metadata.hash(); + + metadata + } + + /// Write the object to the file system with folder and file. + /// This function can create a “loose” object format, + /// which can convert into the `.pack` format by the Command: + /// ```bash + /// git gc + /// ``` + #[allow(unused)] + pub(crate) fn write_to_file(&self, root_path: String) -> Result { + let mut encoder = ZlibEncoder::new(Vec::new(), Compression::Default); + + encoder.write_all(&self.t.to_bytes()); + encoder.write(&[b' ']); + encoder.write(self.data.len().to_string().as_bytes()); + encoder.write(&[b'\0']); + encoder.write_all(&self.data).expect("Write error!"); + let compressed_data = encoder.finish().expect("Failed to finish compression!"); + + let mut path = PathBuf::from(root_path); + path.push(&self.id.to_folder()); + create_dir_all(&path) + .with_context(|| format!("Failed to create directory: {}", path.display())) + .unwrap(); + + path.push(&self.id.to_filename()); + + let mut file = File::create(&path) + .with_context(|| format!("Failed to create file: {}", path.display())) + .unwrap(); + file.write_all(&compressed_data) + .with_context(|| format!("Failed to write to file: {}", path.display())) + .unwrap(); + + Ok(path.to_str().unwrap().to_string()) + } + + ///Convert Metadata to the `Vec` ,so that it can write to File + pub fn convert_to_vec(&self) -> Result, GustError> { + let mut compressed_data = + vec![(0x80 | (self.t.type2_number() << 4)) + (self.size & 0x0f) as u8]; + let mut _size = self.size >> 4; + if _size > 0 { + while _size > 0 { + if _size >> 7 > 0 { + compressed_data.push((0x80 | _size) as u8); + _size >>= 7; + } else { + compressed_data.push((_size) as u8); + break; + } + } + } else { + compressed_data.push(0); + } + + match self.t { + ObjectType::OffsetDelta => { + compressed_data.append(&mut self.delta_header.clone()); + } + ObjectType::HashDelta => { + compressed_data.append(&mut self.delta_header.clone()); + } + _ => {} + } + + let mut encoder = ZlibEncoder::new(Vec::new(), Compression::Default); + encoder.write_all(&self.data).expect("Write error!"); + compressed_data.append(&mut encoder.finish().expect("Failed to finish compression!")); + + Ok(compressed_data) + } + + /// Read the object from the file system and parse to a metadata object.
+ /// This file is the “loose” object format. + #[allow(unused)] + pub(crate) fn read_object_from_file(path: String) -> Result { + let file = File::open(path).unwrap(); + let mut reader = BufReader::new(file); + // let mut data = Vec::new(); + // reader.read_to_end(&mut data).unwrap(); + + let mut decoder = ZlibDecoder::new(reader); + let mut decoded = Vec::new(); + decoder.read_to_end(&mut decoded).unwrap(); + + let type_index = decoded.find_byte(0x20).unwrap(); + let t = &decoded[0..type_index]; + + let size_index = decoded.find_byte(0x00).unwrap(); + let size = decoded[type_index + 1..size_index] + .iter() + .copied() + .map(|x| x as char) + .collect::() + .parse::() + .unwrap(); + + let mut data = decoded[size_index + 1..].to_vec(); + + match String::from_utf8(t.to_vec()).unwrap().as_str() { + "blob" => Ok(MetaData::new(ObjectType::Blob, &data)), + "tree" => Ok(MetaData::new(ObjectType::Tree, &data)), + "commit" => Ok(MetaData::new(ObjectType::Commit, &data)), + "tag" => Ok(MetaData::new(ObjectType::Tag, &data)), + _ => Err(GitError::InvalidObjectType( + String::from_utf8(t.to_vec()).unwrap(), + )), + } + } + + /// Change the base object to the delta object , + /// including : ref-object ofs-object + pub fn change_to_delta(&mut self, types: ObjectType, changed: Vec, header: Vec) { + self.t = types; + self.data = changed; + self.size = self.data.len(); + self.delta_header = header; + } +} diff --git a/gust_integrate_lfs/src/git/object/mod.rs b/gust_integrate_lfs/src/git/object/mod.rs new file mode 100644 index 00000000..1a986338 --- /dev/null +++ b/gust_integrate_lfs/src/git/object/mod.rs @@ -0,0 +1,14 @@ +//! +//! +//! +//! +//! + +pub mod base; +pub mod delta; +pub mod diff; +pub mod metadata; +pub mod types; + +#[cfg(test)] +mod tests {} diff --git a/gust_integrate_lfs/src/git/object/types.rs b/gust_integrate_lfs/src/git/object/types.rs new file mode 100644 index 00000000..6bf09a05 --- /dev/null +++ b/gust_integrate_lfs/src/git/object/types.rs @@ -0,0 +1,96 @@ +//! +//! +//! +//! +//! +//! +//! + +use std::{fmt::Display, vec}; + +use crate::git::errors::GitError; + +/// Four abstract Object Types: +/// - Blob +/// - Tree +/// - Commit +/// - Tag +/// - OffsetDelta(6) +/// - HashDelta(7) +#[derive(PartialEq, Eq, Hash, Ord, PartialOrd, Debug, Clone, Copy)] +pub enum ObjectType { + Commit, + Tree, + Blob, + Tag, + OffsetDelta, + HashDelta, +} + +/// Display trait for Git objects type +impl Display for ObjectType { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + match self { + ObjectType::Blob => write!(f, "blob"), + ObjectType::Tree => write!(f, "tree"), + ObjectType::Commit => write!(f, "commit"), + ObjectType::Tag => write!(f, "tag"), + ObjectType::OffsetDelta => write!(f, "OffsetDelta"), + ObjectType::HashDelta => write!(f, "HashDelta"), + } + } +} + +/// +impl ObjectType { + /// + #[allow(unused)] + pub fn to_bytes(self) -> Vec { + match self { + ObjectType::Blob => vec![0x62, 0x6c, 0x6f, 0x62], + ObjectType::Tree => vec![0x74, 0x72, 0x65, 0x65], + ObjectType::Commit => vec![0x63, 0x6f, 0x6d, 0x6d, 0x69, 0x74], + ObjectType::Tag => vec![0x74, 0x61, 0x67], + _ => vec![], + } + } + + /// + #[allow(unused)] + pub fn from_string(s: &str) -> Result { + match s { + "blob" => Ok(ObjectType::Blob), + "tree" => Ok(ObjectType::Tree), + "commit" => Ok(ObjectType::Commit), + "tag" => Ok(ObjectType::Tag), + _ => Err(GitError::InvalidObjectType(s.to_string())), + } + } + + /// TODO - Rename the function name + #[allow(unused)] + pub fn type2_number(&self) -> u8 { + match self { + ObjectType::Commit => 1, + ObjectType::Tree => 2, + ObjectType::Blob => 3, + ObjectType::Tag => 4, + ObjectType::OffsetDelta => 6, + ObjectType::HashDelta => 7, + } + } + + /// + #[allow(unused)] + pub fn number_type(num: u8) -> Self { + match num { + 1 => ObjectType::Commit, + 2 => ObjectType::Tree, + 3 => ObjectType::Blob, + 4 => ObjectType::Tag, + 6 => ObjectType::OffsetDelta, + 7 => ObjectType::HashDelta, + _ => panic!("Invalid Git object types"), + } + } +} diff --git a/gust_integrate_lfs/src/git/pack/cache.rs b/gust_integrate_lfs/src/git/pack/cache.rs new file mode 100644 index 00000000..e7960eff --- /dev/null +++ b/gust_integrate_lfs/src/git/pack/cache.rs @@ -0,0 +1,47 @@ +//! Build Cache Info for the decode packed object +//! +//! +use std::collections::{BTreeMap, HashMap}; +use std::sync::Arc; + +use crate::git::hash::Hash; +use crate::git::object::metadata::MetaData; + +/// #### Build Cache Info for the decode packed object +/// There are two hashmap for object ,
+/// the keys is `hash value` of The object +#[derive(Default, Clone)] +pub struct PackObjectCache { + pub by_hash: BTreeMap>, + pub by_offset: HashMap, + pub offset_hash: BTreeMap, + pub base: u32, + pub delta: u32, +} + +// +impl PackObjectCache { + /// update cache by input object:`Rc` and the offset:`u64` + pub fn update(&mut self, object: Arc, offset: u64) { + let _hash = object.id; + self.by_hash.insert(_hash, object.clone()); + self.by_offset.insert(_hash, offset); + self.offset_hash.insert(offset, _hash); + } + #[allow(unused)] + pub fn clean(&mut self) { + self.by_hash.clear(); + self.by_offset.clear(); + self.offset_hash.clear(); + } + + pub fn offset_object(&mut self, offset: u64) -> Option<&mut Arc> { + let _hash = self.offset_hash.get(&offset)?; + + self.by_hash.get_mut(_hash) + } + + pub fn hash_object(&mut self, hash: Hash) -> Option<&Arc> { + self.by_hash.get(&hash) + } +} diff --git a/gust_integrate_lfs/src/git/pack/decode.rs b/gust_integrate_lfs/src/git/pack/decode.rs new file mode 100644 index 00000000..fa2b762b --- /dev/null +++ b/gust_integrate_lfs/src/git/pack/decode.rs @@ -0,0 +1,239 @@ +//! Decode pack file by the `ObjDecodedMap` +//! +//! +//! +use std::collections::HashMap; +use std::fmt::{self, Display}; +use std::sync::{Arc, RwLock}; + +use colored::Colorize; + +use obj::base::ObjectClass; +use obj::base::{blob, commit, tag, tree}; +use rayon::prelude::{IntoParallelRefIterator, ParallelIterator}; +use rayon::ThreadPoolBuilder; + +use crate::git::errors::GitError; +use crate::git::hash::Hash; +use crate::git::object as obj; +use crate::git::object::base::blob::Blob; +use crate::git::object::base::commit::Commit; +use crate::git::object::base::tag::Tag; +use crate::git::object::base::tree::Tree; +use crate::git::object::metadata::MetaData; +use crate::git::object::types::ObjectType; +use crate::git::pack::cache::PackObjectCache; + +///!对取出的object字段进行进一步解码与包装 +/// 用于存储解析出的object抽象对象的hashmap +#[derive(Default, Clone)] +pub struct ObjDecodedMap { + pub map_hash: HashMap>, + pub blobs: Vec, + pub trees: Vec, + pub tags: Vec, + pub commits: Vec, + pub name_map: HashMap, +} + +//在解析完object后执行的进一步的解码过程 +impl ObjDecodedMap { + /// 通过cache对不同结构进行进一步解析 + #[allow(unused)] + pub fn update_from_cache(&mut self, cache: &PackObjectCache) { + let builder = ThreadPoolBuilder::new().num_threads(8); + let pool = builder.build().unwrap(); + let mut blobs: Arc>> = Default::default(); + let mut commits: Arc>> = Default::default(); + let mut trees: Arc>> = Default::default(); + let mut tags: Arc>> = Default::default(); + + pool.install(|| { + cache.by_hash.par_iter().for_each(|(key, value)| { + let metadata = value.clone(); + match value.t { + // 交给各自的new函数,通过metadata来解码 + ObjectType::Blob => { + blobs.write().unwrap().push(Blob::new(metadata)); + } + ObjectType::Commit => { + commits.write().unwrap().push(Commit::new(metadata)); + } + ObjectType::Tag => { + tags.write().unwrap().push(Tag::new(metadata)); + } + ObjectType::Tree => { + trees.write().unwrap().push(Tree::new(metadata)); + } + _ => panic!("src/git/pack/decode.rs: 33 invalid type in encoded metadata"), + } + }); + }); + self.blobs = blobs.read().unwrap().to_vec(); + self.commits = commits.read().unwrap().to_vec(); + self.trees = trees.read().unwrap().to_vec(); + self.tags = tags.read().unwrap().to_vec(); + + // for (key, value) in cache.by_hash.iter() { + // let metadata = MetaData::new(value.t, &value.data); + // match value.t { + // // 交给各自的new函数,通过metadata来解码 + // ObjectType::Blob => { + // let a = blob::Blob::new(metadata); + // self.blobs.push(a); + // // ObjectClass::BLOB(a) + // } + // ObjectType::Commit => { + // let a = commit::Commit::new(metadata); + // self.commits.push(a); + // // ObjectClass::COMMIT(a) + // } + // ObjectType::Tag => { + // let a = tag::Tag::new(metadata); + // self.tags.push(a); + // // ObjectClass::TAG(a) + // } + // ObjectType::Tree => { + // let a = tree::Tree::new(metadata); + // self.trees.push(a); + // // ObjectClass::TREE(a) + // } + // _ => panic!("src/git/pack/decode.rs: 33 invalid type in encoded metadata"), + // }; + // // self.map_hash.insert(key.clone(), Arc::new(obj_class)); + // } + } + + /// 虽然这里看起来是encode的事情,但实际上还是对object的深度解析,所以放在这里了。 + /// this func should be called after the `fn update_from_cache` + /// 这个函数做了tree种hash对象存在的校验, + /// 对四种对象的排序 "Magic" Sort + #[allow(unused)] + pub fn check_completeness(&mut self) -> Result<(), GitError> { + //验证对象树 tree object的完整性 确保tree item下的hash值有对应的object + for tree in self.trees.iter() { + for item in &tree.tree_items { + // 保存对象名与hash值的对应 + self.name_map.insert(item.id.clone(), item.filename.clone()); + // 检查是否存在对应hash + if self.map_hash.get(&item.id) == None { + return Err(GitError::UnCompletedPackObject(format!( + "can't find hash value: {}", + &tree.meta.id + ))); + } + } + } + + // For tree & blob object , Get their name + for _tree in self.trees.iter_mut() { + let name = self.name_map.get(&_tree.meta.id); + match name { + Some(_name) => _tree.tree_name = _name.clone(), + None => {} + } + } + + for _blob in self.blobs.iter_mut() { + let name = self.name_map.get(&_blob.meta.id); + match name { + Some(_name) => _blob.filename = _name.clone(), + None => {} + } + } + // sort the four base object + //TODO: This is called the "Magic" Sort + self.trees.sort(); + self.blobs.sort(); + self.tags.sort(); + self.commits.sort(); + Ok(()) + } + + /// 将 `check_completeness` 函数解析后的放入 + #[allow(unused)] + pub fn vec_sliding_window(&self) -> Vec { + let mut list = vec![]; + for c in self.commits.iter() { + list.push(Arc::try_unwrap(c.meta.clone()).unwrap()); + } + for t in self.tags.iter() { + list.push(Arc::try_unwrap(t.meta.clone()).unwrap()); + } + for tree in self.trees.iter() { + list.push(Arc::try_unwrap(tree.meta.clone()).unwrap()); + } + for blob in self.blobs.iter() { + list.push(Arc::try_unwrap(blob.meta.clone()).unwrap()); + } + + list + } + + #[allow(unused)] + pub fn print_vec(&self) { + for c in self.commits.iter() { + println!("{}", c); + } + for t in self.tags.iter() { + println!("{}", t); + } + for tree in self.trees.iter() { + println!("{}", tree); + } + for blob in self.blobs.iter() { + println!("{}", blob); + } + } +} + +impl Display for ObjDecodedMap { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + for (key, value) in self.map_hash.iter() { + writeln!(f, "*********************").unwrap(); + writeln!(f, "Hash: {}", key).unwrap(); + writeln!(f, "Type: {}", value).unwrap(); + } + writeln!( + f, + "{}", + String::from("Finish Printf for ObjDecodedMap").blue() + ) + } +} + +#[cfg(test)] +mod tests { + use tokio_test::block_on; + + use super::super::Pack; + use super::ObjDecodedMap; + + #[test] + pub fn test_map_new() { + let mut _map = ObjDecodedMap::default(); + let decoded_pack = block_on(Pack::decode_file( + "./resources/data/test/pack-6590ba86f4e863e1c2c985b046e1d2f1a78a0089.pack", + )); + assert_eq!( + "6590ba86f4e863e1c2c985b046e1d2f1a78a0089", + decoded_pack.signature.to_plain_str() + ); + let mut result = ObjDecodedMap::default(); + result.update_from_cache(&decoded_pack.result); + result.check_completeness().unwrap(); + result.print_vec(); + } + + // #[test] + // fn test_object_dir_encod_temp() { + // let decoded_pack = Pack::decode_file( + // "./resources/friger/pack-6cf1ec1a89de3757f7ba776e4dc108b88367c460.pack", + // ); + // println!("{}", decoded_pack.get_object_number()); + // assert_eq!( + // "6cf1ec1a89de3757f7ba776e4dc108b88367c460", + // decoded_pack.signature.to_plain_str() + // ); + // } +} diff --git a/gust_integrate_lfs/src/git/pack/encode.rs b/gust_integrate_lfs/src/git/pack/encode.rs new file mode 100644 index 00000000..4460f172 --- /dev/null +++ b/gust_integrate_lfs/src/git/pack/encode.rs @@ -0,0 +1,469 @@ +//! encode pack file ,and create file +//! +//! +//! +use std::fs::File; +use std::io::Write; +use std::path::PathBuf; +use std::str::FromStr; + +use bstr::ByteSlice; + +use crate::git::errors::GitError; +use crate::git::hash::Hash; +use crate::git::object::diff::DeltaDiff; +use crate::git::object::metadata::MetaData; +use crate::git::object::types::ObjectType; +use crate::git::pack::decode::ObjDecodedMap; +use crate::git::pack::Pack; +use crate::git::utils; + +const SLIDING_WINDOW: i32 = 10; + +/// +/// Pack类的encode函数,将解析出的pack或其他途径生成的pack生成对应的文件 +impl Pack { + /// 对pack文件的头文件进行编码,除了size大小 这部分都是基本固定的 : + /// ```plaintext + /// -> |'P' 'A' 'C' 'K' |4b + /// version -> | 0 0 0 2 |4b + /// size -> | size[ 31 --- 0 ]|4b + /// ``` + /// Pack对象应先携带有效的 `self.number_of_objects` 字段 + fn encode_header(&mut self) -> Vec { + self.head = *b"PACK"; + self.version = 2; + let mut result: Vec = vec![ + b'P', b'A', b'C', b'K', // The logotype of the Pack File + 0, 0, 0, 2, + ]; // THe Version of the Pack File + let all_num = self.get_object_number(); + assert_ne!(all_num, 0); // guarantee self.number_of_objects!=0 + assert!(all_num < (1 << 32)); + //TODO: GitError:numbers of objects should < 4G , + //Encode the number of object into file + result.append(&mut utils::u32_vec(all_num as u32)); + result + } + /// 计算pack文件的hash value,赋予id字段,并将hash转为 `Vec` 输出 + fn append_hash_signature(&mut self, data: &Vec) -> Vec { + let checksum = Hash::new(&data); + self.signature = checksum.clone(); + checksum.0.to_vec() + } + + #[allow(unused)] + /// Pack 结构体的`encode`函数 + /// > 若输出的meta_vec ==None 则需要pack结构体是完整有效的,或者至少其中的PackObjectCache不为空 + /// > 若输入的meta_vec不为None 则按照该vec进行encode + /// # Examples + /// ``` + /// let result:Vec = decoded_pack.encode(None); + /// //or + /// let metadata_vec :Vec = ...;// Get a list of metadata + /// let result:Vec = Pack::default().encode(metadata_vec); + /// ``` + /// + pub fn encode(&mut self, meta_vec: Option>) -> Vec { + use sha1::{Digest, Sha1}; + let mut result: Vec; + let mut offset = 12; + match meta_vec { + // 有metadata的情况下 + Some(a) => { + self.number_of_objects = a.len(); + result = self.encode_header(); + for metadata in a { + result.append(&mut metadata.convert_to_vec().unwrap()); + //self.result.update(Arc::new(metadata), offset); + // println!("Decode offset:{}", offset); + offset = result.len() as u64; + } + } + None => { + self.number_of_objects = self.result.by_hash.len(); + result = self.encode_header(); + for (key, value) in self.result.by_hash.iter() { + result.append(&mut value.convert_to_vec().unwrap()); + } + } + } + // compute pack hash signature and append to the result + result.append(&mut self.append_hash_signature(&result)); + result + } + + /// 仅支持offset delta + /// 一次通过metadata的完整data输出 + /// 从decode的 `vec_sliding_window` 来 + #[allow(unused)] + pub fn encode_delta(meta_vec: Vec) -> (Self, Vec) { + let mut _pack = Pack::default(); + _pack.number_of_objects = meta_vec.len(); + let mut result = _pack.encode_header(); + let mut code_meta = vec![]; + assert_eq!(result.len(), 12); + + let mut offset: Vec = vec![]; //记录已完成的metadata的offset + + for i in 0.._pack.number_of_objects as i32 { + let mut new_meta = meta_vec[i as usize].clone(); + let mut best_j: i32 = 11; + let mut best_ssam_rate: f64 = 0.0; + for j in 1..SLIDING_WINDOW { + if i - j < 0 { + break; + } + let _base = meta_vec[(i - j) as usize].clone(); + // 若两个对象类型不相同则不进行delta + if new_meta.t != _base.t { + break; + } + let diff = DeltaDiff::new(_base.clone(), new_meta.clone()); + let _rate = diff.get_ssam_rate(); + if (_rate > best_ssam_rate) && _rate > 0.5 { + best_ssam_rate = _rate; + best_j = j; + } + } + + let mut final_meta = new_meta.clone(); + if best_j != 11 { + let _base = meta_vec[(i - best_j) as usize].clone(); + let diff = DeltaDiff::new(_base.clone(), new_meta.clone()); + let zlib_data = diff.get_delta_metadata(); + let offset_head = utils::write_offset_encoding( + result.len() as u64 - offset[(i - best_j) as usize], + ); + final_meta.change_to_delta(ObjectType::OffsetDelta, zlib_data, offset_head); + } + code_meta.push(final_meta.clone()); + // TODO:update the offset and write + offset.push(result.len() as u64); + result.append(&mut final_meta.convert_to_vec().unwrap()); + println!(); + println!("Hash :{}", final_meta.id); + println!("type: {}", final_meta.t); + println!("Offset: {}", offset.last().unwrap()); + } + let mut _hash = _pack.append_hash_signature(&result); + result.append(&mut _hash); + (_pack, result) + } + /// Pack the loose object from the Given string . + /// `obj_path`: the vector of the Hash value of the loose object + /// `loose_root_path` : loose objects' root path + /// `target_path` : the pack file store path + /// 将所有的loose文件读入并写入 + pub fn pack_loose(obj_path: Vec, loose_root_path: &str) -> (Self, Vec) { + let mut meta_vec = vec![]; + for path in &obj_path { + let hash_value = Hash::from_str(path).unwrap(); + let loose_path = format!( + "{}/{}/{}", + loose_root_path, + hash_value.to_folder(), + hash_value.to_filename() + ); + let _meta = MetaData::read_object_from_file(loose_path); + match _meta { + Ok(meta) => meta_vec.push(meta), + Err(e) => eprintln!("{}", e), + } + } + + // if meta_vec.len() != obj_path.len(){ + // return false; + // } + let mut pack = Pack::default(); + + let pack_file_data = pack.encode(Some(meta_vec)); + (pack, pack_file_data) + } + /// Pack the loose object from the Given string . + /// `obj_path`: the vector of the Hash value of the loose object + /// `loose_root_path` : loose objects' root path + /// `target_path` : the pack file store path + /// + pub fn pack_loose_files( + obj_path: Vec, + loose_root_path: &str, + target_path: &str, + ) -> Self { + let (mut _pack, pack_file_data) = Self::pack_loose(obj_path, loose_root_path); + let pack_file_name = format!( + "{}/pack-{}.pack", + target_path, + _pack.signature.to_plain_str() + ); + print!("to——file: {}", pack_file_name); + let mut file = std::fs::File::create(pack_file_name).expect("create failed"); + file.write_all(pack_file_data.as_bytes()) + .expect("write failed"); + _pack + } + /// Pack the loose object in a dir ,such as the `.git/object/pack`
+ /// It can auto find the loose object follow the position like below: + /// ```plaintext + /// ./in:loose_root/aa/bbbbbbbbbbbbbbbbbb + /// ``` + /// ,The object Hash is `aabbbbbbbbbbbbbbbbbb` + /// - in:loose_root : loose object root dir + /// - in: target_path : The pack file dir to store + /// + /// 查找到所有的loose文件代表的Hash值 + pub fn find_all_loose(loose_root_path: &str) -> Vec { + let loose_root = std::path::PathBuf::from(loose_root_path); + let mut loose_vec = Vec::new(); + // 打开loose 根目录 + let paths = std::fs::read_dir(&loose_root).unwrap(); + // 暂时保存根目录作为 Path buff + let mut loose_file = loose_root.clone(); + // loose_file= ./root + // 遍历目录下的hash前两位(1b)的子文件夹 + for path in paths { + if let Ok(hash_2) = path { + //the first 1 b + let file_name1 = String::from(hash_2.file_name().to_str().unwrap()); + + // 判断只有两位且是文件夹 + let is_dir = hash_2.file_type().unwrap().is_dir(); + if is_dir && (file_name1.len() == 2) { + loose_file.push(file_name1.clone()); + //loose_file = ./root/xx + let loose_s = std::fs::read_dir(&loose_file).unwrap(); + + //再打开子文件夹 此目录下即为保存的loose object文件 + for loose_path in loose_s { + if let Ok(loose_path) = loose_path { + let file_name2 = String::from(loose_path.file_name().to_str().unwrap()); + loose_file.push(file_name2.clone()); + //loose_file = ./root/xx/xxxxxxxxxxxxxxxxxxxx + //将object提取hash值并放入vec + loose_vec.push( + Hash::from_str(&(file_name1.clone() + &file_name2)) + .unwrap() + .to_plain_str(), + ); + loose_file.pop(); // pop path buf + } + } + loose_file.pop(); + } else { + continue; + } + } + } + + loose_vec + } + /// 从文件夹中将所有loose文件压缩 + #[allow(unused)] + pub fn pack_loose_from_dir(loose_root_path: &str, target_path: &str) -> Self { + let loose_vec = Self::find_all_loose(loose_root_path); + Pack::pack_loose_files(loose_vec, loose_root_path, target_path) + } + + /// 找到pack文件 //TODO: 目前只支持单个文件 ,之后将考虑多文件 + fn find_pack_file(object_dir: &str) -> File { + let mut object_root = std::path::PathBuf::from(object_dir); + let mut pack_file_name = String::new(); + object_root.push("pack"); + let paths = std::fs::read_dir(&object_root).unwrap(); + for path in paths { + if let Ok(pack_file) = path { + let _file_name = pack_file.file_name(); + let _file_name = _file_name.to_str().unwrap(); + if &_file_name[_file_name.len() - 4..] == "pack" { + pack_file_name.push_str(_file_name); + break; + } + } + } + object_root.push(pack_file_name); + + let pack_file = File::open(object_root).unwrap(); + pack_file + } + #[allow(unused)] + pub fn pack_object_dir(object_dir: &str, target_dir: &str) -> Self { + // unpack the pack file which should be unchanged + let mut pack_file = Self::find_pack_file(object_dir); + let (raw_pack, mut raw_data) = Pack::decode_raw_data(&mut pack_file); + // 将loose object 预先压缩 + let loose_vec = Self::find_all_loose(object_dir); + let (mut loose_pack, loose_data) = Pack::pack_loose(loose_vec, object_dir); + + // 创建新的pack对象 + let mut new_pack = Self::default(); + new_pack.head = *b"PACK"; + new_pack.version = 2; + new_pack.number_of_objects = raw_pack.get_object_number() + loose_pack.get_object_number(); + let mut result = new_pack.encode_header(); + + result.append(&mut raw_data); + let mut loose_data = utils::get_pack_raw_data(loose_data); + result.append(&mut loose_data); + new_pack.signature = Hash::new(&result); + result.append(&mut new_pack.signature.0.to_vec()); + + // 开始写入 + let mut file = std::fs::File::create(format!( + "{}/pack-{}.pack", + target_dir, + new_pack.signature.to_plain_str() + )) + .expect("create failed"); + file.write_all(result.as_bytes()).expect("write failed"); + + new_pack + } + #[allow(unused)] + pub fn write(map: &mut ObjDecodedMap, target_dir: &str) -> Result<(), GitError> { + map.check_completeness().unwrap(); + let meta_vec = map.vec_sliding_window(); + let (_pack, data_write) = Pack::encode_delta(meta_vec); + let mut to_path = PathBuf::from(target_dir); + let file_name = format!("pack-{}.pack", _pack.signature.to_plain_str()); + to_path.push(file_name); + let mut file = std::fs::File::create(to_path).expect("create failed"); + file.write_all(data_write.as_bytes()).expect("write failed"); + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use std::io::Write; + + use bstr::ByteSlice; + use tokio_test::block_on; + + use crate::git::pack::{decode::ObjDecodedMap, Pack}; + + const TEST_DIR: &str = "./test_dir"; + + #[test] + fn test_object_dir_encode() { + Pack::pack_object_dir("./resources/total", "./resources/total/output"); + let decoded_pack = block_on(Pack::decode_file( + "./resources/total/output/pack-7ea8ad41c9d438654ef28297ecc874842c7d10de.pack", + )); + println!("{}", decoded_pack.get_object_number()); + assert_eq!( + "7ea8ad41c9d438654ef28297ecc874842c7d10de", + decoded_pack.signature.to_plain_str() + ); + } + + // + #[test] + fn test_a_real_pack_de_en() { + let decoded_pack = block_on(Pack::decode_file( + "./resources/test2/pack-8c81e90db37ef77494efe4f31daddad8b494e099.pack", + )); + let mut map = ObjDecodedMap::default(); + map.update_from_cache(&decoded_pack.get_cache()); + Pack::write(&mut map, TEST_DIR).unwrap(); + + Pack::decode_file("./test_dir/pack-83df56e42ca705892f7fd64f96ecb9870b5c5ed8.pack"); + } + + #[test] + fn test_multi_pack_encode() { + let pack_1 = block_on(Pack::decode_file( + "./resources/test1/pack-1d0e6c14760c956c173ede71cb28f33d921e232f.pack", + )); + let pack_2 = block_on(Pack::decode_file( + "./resources/test2/pack-8c81e90db37ef77494efe4f31daddad8b494e099.pack", + )); + + let mut map = ObjDecodedMap::default(); + map.update_from_cache(&pack_1.get_cache()); + map.update_from_cache(&pack_2.get_cache()); + + Pack::write(&mut map, TEST_DIR).unwrap(); + + Pack::decode_file("./test_dir/pack-8e8b79ea20effb78d701fa8ad5a7e386b7d833fa.pack"); + } + + #[test] + fn dex_number() { + let all_num: usize = 0x100f1109; + println!("{:x}", (all_num >> 24) as u8); + println!("{:x}", (all_num >> 16) as u8); + println!("{:x}", (all_num >> 8) as u8); + println!("{:x}", (all_num) as u8); + } + + /// 将一些loose object打包为 pack文件 + /// 只需要给出loose文件的根目录 目标根目录 和 loose 文件的hash字符串即可 + #[test] + fn test_loose_pack() { + let mut loose_vec = Vec::new(); + loose_vec.push(format!("5f413c76a2893bb1ff83d7c2b507a9cab30bd585")); + loose_vec.push(format!("8bb783eb532d4936248f9084821af2bb309f29e7")); + loose_vec.push(format!("79dc1608dba888e0378ff21591dc646c8afe4e0a")); + loose_vec.push(format!("ce70a618efa88992a4c4bdf22ebd832b24acf374")); + let loose_root = "./resources/loose"; + let target_path = "./resources/pack_g"; + let pack = Pack::pack_loose_files(loose_vec, loose_root, target_path); + Pack::decode_file(&format!( + "{}/pack-{}.pack", + target_path, + pack.signature.to_plain_str() + )); + } + + /// 只需要给定loose 的根目录 则自动读取所有loose的文件并打包至指定文件夹 + #[test] + fn test_loose_pack_from_dir() { + let loose_root = "./resources/loose"; + let target_path = "./resources/pack_g"; + // 解析过程 + let pack = Pack::pack_loose_from_dir(loose_root, target_path); + Pack::decode_file(&format!( + "{}/pack-{}.pack", + target_path, + pack.signature.to_plain_str() + )); + } + + #[test] + fn test_delta_pack_ok() { + let mut _map = ObjDecodedMap::default(); + let decoded_pack = block_on(Pack::decode_file( + "./resources/data/test/pack-6590ba86f4e863e1c2c985b046e1d2f1a78a0089.pack", + )); + assert_eq!( + "6590ba86f4e863e1c2c985b046e1d2f1a78a0089", + decoded_pack.signature.to_plain_str() + ); + let mut result = ObjDecodedMap::default(); + result.update_from_cache(&decoded_pack.result); + result.check_completeness().unwrap(); + let meta_vec = result.vec_sliding_window(); + let (_pack, data_write) = Pack::encode_delta(meta_vec); + + let file_name = format!("pack-{}.pack", _pack.signature.to_plain_str()); + let mut file = std::fs::File::create(file_name).expect("create failed"); + file.write_all(data_write.as_bytes()).expect("write failed"); + + let decoded_pack = block_on(Pack::decode_file(&format!( + "pack-{}.pack", + _pack.signature.to_plain_str() + ))); + assert_eq!( + "aa2ab2eb4e6b37daf6dcadf1b6f0d8520c14dc89", + decoded_pack.signature.to_plain_str() + ); + } + + // #[test] + // fn test_vec(){ + // let mut arr = vec! [1,2,3,4,5]; + // let ta = arr.last_mut().unwrap(); + // *ta += 8; + // print!("{:?}",arr); + // } +} diff --git a/gust_integrate_lfs/src/git/pack/mod.rs b/gust_integrate_lfs/src/git/pack/mod.rs new file mode 100644 index 00000000..bbfb3f18 --- /dev/null +++ b/gust_integrate_lfs/src/git/pack/mod.rs @@ -0,0 +1,395 @@ +//!Encode and Decode The Pack File ,which is in the dir:`.git/object/pack/*.pack` +//! +use std::convert::TryFrom; +use std::convert::TryInto; +use std::fs::File; +use std::io::Read; +use std::path::Path; +use std::path::PathBuf; +use std::sync::Arc; + +use crate::git::errors::GitError; +use crate::git::hash::Hash; +use crate::git::idx::Idx; +use crate::git::object::delta::*; +use crate::git::object::metadata::MetaData; +use crate::git::pack::cache::PackObjectCache; +use crate::git::utils; +use crate::gust::driver::database::mysql::storage::MysqlStorage; +use crate::gust::driver::ObjectStorage; +use async_recursion::async_recursion; + +pub mod cache; +pub mod decode; +pub mod encode; +pub mod multidecode; + +/// ### Pack文件结构
+/// `head`: always = "PACK"
+/// `version`: version code
+/// `number_of_objects` : Total mount of objects
+/// `signature`:Hash
+/// `result`: decoded cache, +#[allow(unused)] +#[derive(Default)] +pub struct Pack { + head: [u8; 4], + version: u32, + number_of_objects: usize, + pub signature: Hash, + pub result: Arc, + pack_file: PathBuf, +} + +impl Pack { + /// Git [Pack Format](https://github.com/git/git/blob/master/Documentation/technical/pack-format.txt) + /// Git Pack-Format [Introduce](https://git-scm.com/docs/pack-format) + /// ## Decode the Pack File without the `.idx` File + /// - in: pack_file: &mut File + /// - out: The `Pack` Struct + #[allow(unused)] + pub async fn decode( + pack_file: &mut File, + storage: &T, + ) -> Result { + // Check the Header of Pack File + let mut _pack = Self::check_header(pack_file)?; + + // Init the cache for follow object parse + let mut cache = PackObjectCache::default(); + + for i in 0.._pack.number_of_objects { + if i % 1000 == 0 { + tracing::info!( + "Unpacking: Idx/Total:{}/{}, Hash/Offset Map Size:{}/{}, Obj/Delta count, {}/{}", + i, + _pack.number_of_objects, + cache.by_hash.len(), + cache.by_offset.len(), + cache.base, + cache.delta, + ); + } + //update offset of the Object + let offset = utils::get_offset(pack_file).unwrap(); + //Get the next Object by the Pack::next_object() func + let object = Pack::next_object(pack_file, offset, &mut cache, storage).await?; + // Larger offsets would require a version-2 pack index + let offset = u32::try_from(offset) + .map_err(|_| GitError::InvalidObjectInfo(format!("Packfile is too large"))) + .unwrap(); + } + _pack.result = Arc::new(cache); + // CheckSum sha-1 + let _id: [u8; 20] = utils::read_bytes(pack_file).unwrap(); + _pack.signature = Hash::from_row(&_id[..]); + + Ok(_pack) + } + + /// Check the Header of the Pack File ,
+ /// include the **"PACK" head** , **Version Number** and **Number of the Objects** + fn check_header(pack_file: &mut File) -> Result { + //init a Pack Struct ,which is all empty + let mut _pack = Self { + head: [0, 0, 0, 0], + version: 0, + number_of_objects: 0, + signature: Hash::default(), + result: Arc::new(PackObjectCache::default()), + pack_file: PathBuf::new(), + }; + + // Get the Pack Head 4 b ,which should be the "PACK" + let magic = utils::read_bytes(pack_file).unwrap(); + if magic != *b"PACK" { + return Err(GitError::InvalidPackHeader(format!( + "{},{},{},{}", + magic[0], magic[1], magic[2], magic[3] + ))); + } + _pack.head = magic; + + //Get the Version Number + let version = utils::read_u32(pack_file).unwrap(); + if version != 2 { + return Err(GitError::InvalidPackFile(format!("Current File"))); + } + _pack.version = version; + + let object_num = utils::read_u32(pack_file).unwrap(); + _pack.number_of_objects = object_num as usize; + + Ok(_pack) + } + + /// Decode the pack file helped by the according decoded idx file + #[allow(unused)] + pub async fn decode_by_idx(idx: &mut Idx, pack_file: &mut File) -> Result { + let mut _pack = Self::check_header(pack_file)?; + let object_num = idx.number_of_objects; + _pack.number_of_objects = u32::try_from(object_num) + .map_err(|_| GitError::InvalidObjectInfo(format!("Packfile is too large"))) + .unwrap() as usize; + let mut cache = PackObjectCache::default(); + + for idx_item in idx.idx_items.iter() { + Pack::next_object( + pack_file, + idx_item.offset.try_into().unwrap(), + &mut cache, + &MysqlStorage::default(), + ) + .await + .unwrap(); + } + let mut result = decode::ObjDecodedMap::default(); + result.update_from_cache(&mut cache); + _pack.signature = idx.pack_signature.clone(); + Ok(_pack) + } + + /// Decode the object info from the pack file,
+ /// but we don't decode the object further info ,
+ /// Instead, it stores **all un decoded object information** to a `Vec`.
+ /// This function also return A Pack Struct,which only the Attr cache named `result` is invalid + pub fn decode_raw_data(pack_file: &mut File) -> (Self, Vec) { + let mut raw_pack = Self::check_header(pack_file).unwrap(); + let mut _raw: Vec = Vec::new(); + pack_file.read_to_end(&mut _raw).unwrap(); + let raw_info = _raw[.._raw.len() - 20].to_vec(); + let _hash = Hash::from_row(&_raw[_raw.len() - 20..]); + raw_pack.signature = _hash; + (raw_pack, raw_info) + } + /// Get the Object from File by the Give Offset
+ /// By the way , the cache can hold the fount object + #[async_recursion] + pub async fn next_object( + pack_file: &mut File, + offset: u64, + cache: &mut PackObjectCache, + storage: &T, + ) -> Result, GitError> { + use super::object::types::ObjectType; + utils::seek(pack_file, offset).unwrap(); + let (type_num, size) = utils::read_type_and_size(pack_file).unwrap(); + //Get the Object according to the Types Enum + let object = match type_num { + // Undelta representation + 1..=4 => utils::read_zlib_stream_exact(pack_file, |decompressed| { + let mut contents = Vec::with_capacity(size); + decompressed.read_to_end(&mut contents).unwrap(); + if contents.len() != size { + return Err(GitError::InvalidObjectInfo(format!( + "Incorrect object size" + ))); + } + cache.base += 1; + Ok(MetaData::new(ObjectType::number_type(type_num), &contents)) + }), + // Delta; base object is at an offset in the same packfile + 6 => { + let delta_offset = utils::read_offset_encoding(pack_file).unwrap(); + let base_offset = offset.checked_sub(delta_offset).ok_or_else(|| { + GitError::InvalidObjectInfo(format!("Invalid OffsetDelta offset")) + })?; + let offset = utils::get_offset(pack_file).unwrap(); + + let base_object = if let Some(object) = cache.offset_object(base_offset) { + Arc::clone(object) + } else { + //递归调用 找出base object + Pack::next_object(pack_file, base_offset, cache, storage).await? + }; + utils::seek(pack_file, offset).unwrap(); + let base_obj = base_object.as_ref(); + let objs = apply_delta(pack_file, base_obj)?; + cache.delta += 1; + Ok(objs) + } + // Delta; base object is given by a hash outside the packfile + //TODO : This Type need to be completed ,对应多文件的todo + 7 => { + let hash = utils::read_hash(pack_file).unwrap(); + //let object; + let base_object = if let Some(object) = cache.hash_object(hash) { + object.to_owned() + } else { + // object = read_object(hash)?; + // &object + Arc::new(storage.get_hash_object(&hash.to_plain_str()).await.unwrap()) + }; + apply_delta(pack_file, &base_object) + } + _ => { + return Err(GitError::InvalidObjectType( + ObjectType::number_type(type_num).to_string(), + )); + } + }?; + + // //Debug Code: Print the hash & type of the parsed object + // match PackObjectType::type_number2_type(object_type) { + // Some(a) => println!("Hash:{} \t Types: {:?}",object.hash(), a), + // None =>{}, + // } + + let obj = Arc::new(object); + cache.update(Arc::clone(&obj), offset); + Ok(obj) + } + + pub fn get_object_number(&self) -> usize { + return self.number_of_objects as usize; + } + pub fn get_cache(&self) -> PackObjectCache { + return self.result.as_ref().clone(); + } + pub fn get_hash(&self) -> Hash { + return self.signature.clone(); + } + + /// Decode a pack file according to the given pack file path + /// # Examples + /// ``` + /// let decoded_pack = Pack::decode_file("./resources/data/test/pack-6590ba86f4e863e1c2c985b046e1d2f1a78a0089.pack"); + /// assert_eq!( + /// "6590ba86f4e863e1c2c985b046e1d2f1a78a0089", + /// decoded_pack.signature.to_plain_str() + /// ); + /// ``` + /// + #[allow(unused)] + pub async fn decode_file(file: &str) -> Pack { + let mut pack_file = File::open(&Path::new(file)).unwrap(); + let decoded_pack = match Pack::decode(&mut pack_file, &MysqlStorage::default()).await { + Ok(f) => f, + Err(e) => match e { + GitError::NotFountHashValue(a) => panic!("{}", a), + _ => panic!("unknow error"), + }, + }; + assert_eq!(*b"PACK", decoded_pack.head); + assert_eq!(2, decoded_pack.version); + decoded_pack + } +} + +/// +#[cfg(test)] +mod tests { + use std::fs::File; + use std::io::BufReader; + use std::io::Read; + use std::path::Path; + use tokio_test::block_on; + + use crate::git::idx::Idx; + + use super::Pack; + + /// Test the pack File decode standalone + #[test] + fn test_decode_pack_file1() { + let decoded_pack = block_on(Pack::decode_file( + "./resources/data/test/pack-6590ba86f4e863e1c2c985b046e1d2f1a78a0089.pack", + )); + assert_eq!( + "6590ba86f4e863e1c2c985b046e1d2f1a78a0089", + decoded_pack.signature.to_plain_str() + ); + } + + #[test] + fn test_decode_pack_file_with_print() { + let decoded_pack = block_on(Pack::decode_file( + "./resources/data/test/pack-8d36a6464e1f284e5e9d06683689ee751d4b2687.pack", + )); + assert_eq!( + "8d36a6464e1f284e5e9d06683689ee751d4b2687", + decoded_pack.signature.to_plain_str() + ); + } + + #[test] + fn test_parse_simple_pack() { + let decoded_pack = block_on(Pack::decode_file( + "./resources/test1/pack-1d0e6c14760c956c173ede71cb28f33d921e232f.pack", + )); + assert_eq!( + "1d0e6c14760c956c173ede71cb28f33d921e232f", + decoded_pack.signature.to_plain_str() + ); + print!("{}", decoded_pack.get_object_number()); + } + + #[test] + fn test_parse_simple_pack2() { + let decoded_pack = block_on(Pack::decode_file( + "./resources/test2/pack-8c81e90db37ef77494efe4f31daddad8b494e099.pack", + )); + assert_eq!( + "8c81e90db37ef77494efe4f31daddad8b494e099", + decoded_pack.signature.to_plain_str() + ); + print!("{}", decoded_pack.get_object_number()); + } + + #[test] + fn test_read_raw_pack() { + let mut pack_file = File::open(&Path::new( + "./resources/test1/pack-1d0e6c14760c956c173ede71cb28f33d921e232f.pack", + )) + .unwrap(); + let (raw_pack, _raw_data) = Pack::decode_raw_data(&mut pack_file); + assert_eq!( + "1d0e6c14760c956c173ede71cb28f33d921e232f", + raw_pack.signature.to_plain_str() + ); + } + + ///Test the pack decode by the Idx File + #[test] + fn test_pack_idx_decode() { + let mut pack_file = File::open(&Path::new( + "./resources/data/test/pack-8d36a6464e1f284e5e9d06683689ee751d4b2687.pack", + )) + .unwrap(); + let idx_file = File::open(&Path::new( + "./resources/data/test/pack-8d36a6464e1f284e5e9d06683689ee751d4b2687.idx", + )) + .unwrap(); + let mut reader = BufReader::new(idx_file); + let mut buffer = Vec::new(); + reader.read_to_end(&mut buffer).ok(); + + let mut idx = Idx::default(); + idx.decode(buffer).unwrap(); + let decoded_pack = block_on(Pack::decode_by_idx(&mut idx, &mut pack_file)).unwrap(); + assert_eq!(*b"PACK", decoded_pack.head); + assert_eq!(2, decoded_pack.version); + assert_eq!( + "8d36a6464e1f284e5e9d06683689ee751d4b2687", + decoded_pack.signature.to_plain_str() + ); + } + + #[ignore] + #[test] + pub fn test_create_time() { + let pack_file = File::open(&Path::new( + "./resources/friger/pack-6cf1ec1a89de3757f7ba776e4dc108b88367c460.pack", + )) + .unwrap(); + let metadata = pack_file.metadata().unwrap(); + print!("{:?}", metadata.created().unwrap()); + + let pack_file = File::open(&Path::new( + "./resources/friger/pack-040de05aef75a0d847bff37f8cacab22dae377c9.pack", + )) + .unwrap(); + let metadata = pack_file.metadata().unwrap(); + print!("{:?}", metadata.created().unwrap()); + } +} diff --git a/gust_integrate_lfs/src/git/pack/multidecode.rs b/gust_integrate_lfs/src/git/pack/multidecode.rs new file mode 100644 index 00000000..58fd10d3 --- /dev/null +++ b/gust_integrate_lfs/src/git/pack/multidecode.rs @@ -0,0 +1,94 @@ +//! +//! +//! +//! +use std::cmp::Ordering; +use std::convert::TryFrom; +use std::{fs::File, sync::Arc}; + +use crate::git::errors::GitError; +use crate::git::hash::Hash; +use crate::git::pack::cache::PackObjectCache; +use crate::git::pack::Pack; +use crate::git::utils; +use crate::gust::driver::database::mysql::storage::MysqlStorage; + +impl Eq for Pack {} + +impl Ord for Pack { + fn cmp(&self, other: &Self) -> Ordering { + let a = self.pack_file.metadata().unwrap().created().unwrap(); + let b = other.pack_file.metadata().unwrap().created().unwrap(); + if a == b { + return Ordering::Equal; + } else if a > b { + return Ordering::Greater; + } else { + return Ordering::Less; + } + } +} + +impl PartialOrd for Pack { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl PartialEq for Pack { + fn eq(&self, other: &Self) -> bool { + let a = self.pack_file.metadata().unwrap().created().unwrap(); + let b = other.pack_file.metadata().unwrap().created().unwrap(); + a == b + } +} + +impl Pack { + #[allow(unused)] + pub async fn decode_with_cache(&self, cache: &mut PackObjectCache) -> Result { + let mut pack_file = File::open(self.pack_file.clone()).unwrap(); + // Check the Header of Pack File + let mut _pack = Self::check_header(&mut pack_file).unwrap(); + + for _ in 0.._pack.number_of_objects { + //update offset of the Object + let offset = utils::get_offset(&mut pack_file).unwrap(); + //Get the next Object by the Pack::next_object() func + let object = Pack::next_object(&mut pack_file, offset, cache, &MysqlStorage::default()).await.unwrap(); + // Larger offsets would require a version-2 pack index + let offset = u32::try_from(offset) + .map_err(|_| GitError::InvalidObjectInfo(format!("Packfile is too large"))) + .unwrap(); + } + + // CheckSum sha-1 + let _id: [u8; 20] = utils::read_bytes(&mut pack_file).unwrap(); + _pack.signature = Hash::from_row(&_id[..]); + print!("{}", cache.by_hash.len()); + Ok(_pack) + } + + #[allow(dead_code)] + pub async fn multi_decode(root: &str) -> Result { + let mut total_pack = Self::default(); + total_pack.number_of_objects = 0; + let (files, _hash_vec) = utils::find_all_pack_file(root); + let mut pack_vec = vec![]; + for _file_ in files.iter() { + let mut _pack = Pack::default(); + _pack.pack_file = _file_.clone(); + pack_vec.push(_pack); + } + pack_vec.sort(); + let mut cache = PackObjectCache::default(); + for _pack_ in pack_vec.iter_mut() { + _pack_.decode_with_cache(&mut cache).await?; + total_pack.number_of_objects += _pack_.number_of_objects; + } + total_pack.result = Arc::new(cache); + Ok(total_pack) + } +} + +#[cfg(test)] +pub mod test {} diff --git a/gust_integrate_lfs/src/git/protocol/http.rs b/gust_integrate_lfs/src/git/protocol/http.rs new file mode 100644 index 00000000..cf8fe4ac --- /dev/null +++ b/gust_integrate_lfs/src/git/protocol/http.rs @@ -0,0 +1,111 @@ +use std::collections::HashMap; + +use anyhow::Result; +use axum::body::Body; +use axum::http::response::Builder; +use axum::http::{Response, StatusCode}; + +use bytes::{BufMut, Bytes, BytesMut}; + +use futures::StreamExt; +use hyper::body::Sender; +use hyper::Request; + +use tokio::io::{AsyncReadExt, BufReader}; + +use crate::gust::driver::ObjectStorage; + +use super::{pack, PackProtocol}; + +pub fn build_res_header(content_type: String) -> Builder { + let mut headers = HashMap::new(); + headers.insert("Content-Type".to_string(), content_type); + headers.insert( + "Cache-Control".to_string(), + "no-cache, max-age=0, must-revalidate".to_string(), + ); + let mut resp = Response::builder(); + + for (key, val) in headers { + resp = resp.header(&key, val); + } + resp +} + +pub async fn send_pack( + mut sender: Sender, + result: Vec, + pack_protocol: PackProtocol, +) -> Result<(), (StatusCode, &'static str)> { + let mut reader = BufReader::new(result.as_slice()); + loop { + let mut temp = BytesMut::new(); + temp.reserve(65500); + let length = reader.read_buf(&mut temp).await.unwrap(); + if temp.is_empty() { + let mut bytes_out = BytesMut::new(); + bytes_out.put_slice(pack::PKT_LINE_END_MARKER); + tracing::info!("send: bytes_out: {:?}", bytes_out.clone().freeze()); + sender.send_data(bytes_out.freeze()).await.unwrap(); + return Ok(()); + } + let bytes_out = pack_protocol.build_side_band_format(temp, length); + tracing::info!("send: bytes_out: {:?}", bytes_out.clone().freeze()); + sender.send_data(bytes_out.freeze()).await.unwrap(); + } +} + +pub async fn git_upload_pack( + req: Request, + mut pack_protocol: PackProtocol, +) -> Result, (StatusCode, String)> { + let (_parts, mut body) = req.into_parts(); + + let mut upload_request = BytesMut::new(); + + while let Some(chunk) = body.next().await { + tracing::info!("client sends :{:?}", chunk); + let bytes = chunk.unwrap(); + upload_request.extend_from_slice(&bytes); + } + + let (send_pack_data, buf) = pack_protocol + .git_upload_pack(&mut upload_request.freeze()) + .await + .unwrap(); + let resp = build_res_header("application/x-git-upload-pack-result".to_owned()); + + tracing::info!("send buf: {:?}", buf); + + let (mut sender, body) = Body::channel(); + sender.send_data(buf.freeze()).await.unwrap(); + + tokio::spawn(send_pack(sender, send_pack_data, pack_protocol)); + Ok(resp.body(body).unwrap()) +} + +pub async fn git_receive_pack( + req: Request, + mut pack_protocol: PackProtocol, +) -> Result, (StatusCode, String)> { + let (_parts, mut body) = req.into_parts(); + let mut combined_body_bytes = Vec::new(); + while let Some(chunk) = body.next().await { + let body_bytes = chunk.unwrap(); + combined_body_bytes.extend(&body_bytes); + } + + let pack_data = pack_protocol + .git_receive_pack(Bytes::from(combined_body_bytes)) + .await + .unwrap(); + + let buf = pack_protocol.git_receive_pack(pack_data).await.unwrap(); + + let body = Body::from(buf); + tracing::info!("report status:{:?}", body); + let resp = build_res_header("application/x-git-receive-pack-result".to_owned()); + + let resp = resp.body(body).unwrap(); + Ok(resp) +} diff --git a/gust_integrate_lfs/src/git/protocol/mod.rs b/gust_integrate_lfs/src/git/protocol/mod.rs new file mode 100644 index 00000000..93874eb1 --- /dev/null +++ b/gust_integrate_lfs/src/git/protocol/mod.rs @@ -0,0 +1,235 @@ +//! +//! +//! +//! + +use std::{fs::File, path::PathBuf, str::FromStr, sync::Arc}; + +use entity::refs; +use sea_orm::{ActiveValue::NotSet, Set}; + +use crate::{ + git::protocol::pack::SP, + gust::driver::{ObjectStorage, ZERO_ID}, +}; + +use super::pack::Pack; +pub mod http; +pub mod pack; +pub mod ssh; + +#[derive(Debug, Clone, Default)] +pub struct PackProtocol { + pub protocol: Protocol, + pub capabilities: Vec, + pub path: PathBuf, + pub service_type: Option, + pub storage: Arc, + pub command_list: Vec, +} + +// Is that useful? +#[derive(Debug, PartialEq, Clone, Copy, Default)] +pub enum Protocol { + Local, + #[default] + Http, + Ssh, + Git, +} + +#[derive(Debug, PartialEq, Clone, Copy)] +pub enum ServiceType { + UploadPack, + ReceivePack, +} + +impl ServiceType { + pub fn to_string(&self) -> String { + match self { + ServiceType::UploadPack => "git-upload-pack".to_owned(), + ServiceType::ReceivePack => "git-receive-pack".to_owned(), + } + } +} +impl FromStr for ServiceType { + type Err = (); + + fn from_str(s: &str) -> Result { + match s { + "git-upload-pack" => Ok(ServiceType::UploadPack), + "git-receive-pack" => Ok(ServiceType::ReceivePack), + _ => Err(()), + } + } +} + +// TODO: Additional Capabilitys need to be supplemented. +#[derive(Debug, Clone, PartialEq)] +pub enum Capability { + MultiAck, + MultiAckDetailed, + NoDone, + SideBand, + SideBand64k, + ReportStatus, + ReportStatusv2, + OfsDelta, + DeepenSince, + DeepenNot, +} + +impl FromStr for Capability { + type Err = (); + + fn from_str(s: &str) -> Result { + match s { + "report-status" => Ok(Capability::ReportStatus), + "report-status-v2" => Ok(Capability::ReportStatusv2), + "side-band" => Ok(Capability::SideBand), + "side-band-64k" => Ok(Capability::SideBand64k), + "ofs-delta" => Ok(Capability::OfsDelta), + "multi_ack" => Ok(Capability::MultiAck), + "multi_ack_detailed" => Ok(Capability::MultiAckDetailed), + "no-done" => Ok(Capability::NoDone), + "deepen-since" => Ok(Capability::DeepenSince), + "deepen-not" => Ok(Capability::DeepenNot), + _ => Err(()), + } + } +} + +pub enum SideBind { + // sideband 1 will contain packfile data, + PackfileData, + // sideband 2 will be used for progress information that the client will generally print to stderr and + ProgressInfo, + // sideband 3 is used for error information. + Error, +} + +impl SideBind { + pub fn value(&self) -> u8 { + match self { + Self::PackfileData => b'\x01', + Self::ProgressInfo => b'\x02', + Self::Error => b'\x03', + } + } +} +pub struct RefUpdateRequet { + pub comand_list: Vec, +} + +#[derive(Debug, Clone)] +pub struct RefCommand { + pub ref_name: String, + pub old_id: String, + pub new_id: String, + pub status: String, + pub error_msg: String, + pub command_type: Command, +} + +#[derive(Debug, Clone)] +pub enum Command { + Create, + Delete, + Update, +} + +impl RefCommand { + const OK_STATUS: &str = "ok"; + + const FAILED_STATUS: &str = "ng"; + + pub fn new(old_id: String, new_id: String, ref_name: String) -> Self { + let command_type = if ZERO_ID == old_id { + Command::Create + } else if ZERO_ID == new_id { + Command::Delete + } else { + Command::Update + }; + RefCommand { + ref_name, + old_id, + new_id, + status: RefCommand::OK_STATUS.to_owned(), + error_msg: "".to_owned(), + command_type, + } + } + + pub async fn unpack( + &mut self, + pack_file: &mut File, + storage: &T, + ) -> Result { + match Pack::decode(pack_file, storage).await { + Ok(decoded_pack) => { + self.status = RefCommand::OK_STATUS.to_owned(); + Ok(decoded_pack) + } + Err(err) => { + self.status = RefCommand::FAILED_STATUS.to_owned(); + self.error_msg = err.to_string(); + Err(err.into()) + } + } + } + + pub fn get_status(&self) -> String { + if RefCommand::OK_STATUS == self.status { + format!("{}{}{}", self.status, SP, self.ref_name,) + } else { + format!( + "{}{}{}{}{}", + self.status, + SP, + self.ref_name, + SP, + self.error_msg.clone() + ) + } + } + + pub fn failed(&mut self, msg: String) { + self.status = RefCommand::FAILED_STATUS.to_owned(); + self.error_msg = msg; + } + + pub fn convert_to_model(&self, path: &str) -> refs::ActiveModel { + refs::ActiveModel { + id: NotSet, + ref_git_id: Set(self.new_id.to_owned()), + ref_name: Set(self.ref_name.to_string()), + repo_path: Set(path.to_owned()), + created_at: Set(chrono::Utc::now().naive_utc()), + updated_at: Set(chrono::Utc::now().naive_utc()), + } + } +} + +#[allow(unused)] +impl PackProtocol { + pub fn new(path: PathBuf, service_name: &str, storage: Arc, protocol: Protocol) -> Self { + let service_type = if service_name.is_empty() { + None + } else { + Some(service_name.parse::().unwrap()) + }; + PackProtocol { + protocol, + capabilities: Vec::new(), + service_type, + path, + storage, + command_list: Vec::new(), + } + } + + // pub fn service_type(&mut self, service_name: &str) { + // self.service_type = Some(ServiceType::new(&service_name)); + // } +} diff --git a/gust_integrate_lfs/src/git/protocol/pack.rs b/gust_integrate_lfs/src/git/protocol/pack.rs new file mode 100644 index 00000000..f31b79ec --- /dev/null +++ b/gust_integrate_lfs/src/git/protocol/pack.rs @@ -0,0 +1,444 @@ +//! +//! +//! +//! +use std::collections::{HashMap, HashSet}; +use std::fs::{self, OpenOptions}; +use std::io::Write; +use std::path::Path; +use std::sync::Arc; + +use anyhow::Result; +use bytes::{Buf, BufMut, Bytes, BytesMut}; +use chrono::Utc; + +use crate::git::hash::Hash; +use crate::git::object::base::blob::Blob; +use crate::git::object::base::commit::Commit; +use crate::git::object::base::tree::{Tree, TreeItemType}; +use crate::git::object::metadata::MetaData; +use crate::git::protocol::{PackProtocol, RefCommand}; +use crate::gust::driver::{ObjectStorage, ZERO_ID}; + +use super::{Capability, Protocol, ServiceType, SideBind}; + +const LF: char = '\n'; + +pub const SP: char = ' '; + +const NUL: char = '\0'; + +pub const PKT_LINE_END_MARKER: &[u8; 4] = b"0000"; + +// The atomic, report-status, report-status-v2, delete-refs, quiet, +// and push-cert capabilities are sent and recognized by the receive-pack (push to server) process. +const RECEIVE_CAP_LIST: &str = "report-status report-status-v2 delete-refs quiet atomic "; + +// The ofs-delta and side-band-64k capabilities are sent and recognized by both upload-pack and receive-pack protocols. +// The agent and session-id capabilities may optionally be sent in both protocols. +const CAP_LIST: &str = "side-band-64k ofs-delta object-format=sha1"; + +// All other capabilities are only recognized by the upload-pack (fetch from server) process. +const UPLOAD_CAP_LIST: &str = + "shallow deepen-since deepen-not deepen-relative multi_ack_detailed no-done "; + +impl PackProtocol { + pub async fn git_info_refs(&mut self) -> BytesMut { + let service_type = self.service_type.unwrap(); + // The stream MUST include capability declarations behind a NUL on the first ref. + let object_id = self.storage.get_head_object_id(&self.path).await; + let name = if object_id == ZERO_ID { + "capabilities^{}" + } else { + "HEAD" + }; + let cap_list = match self.service_type { + Some(ServiceType::UploadPack) => format!("{}{}", UPLOAD_CAP_LIST, CAP_LIST), + Some(ServiceType::ReceivePack) => format!("{}{}", RECEIVE_CAP_LIST, CAP_LIST), + _ => CAP_LIST.to_owned(), + }; + let pkt_line = format!("{}{}{}{}{}{}", object_id, SP, name, NUL, cap_list, LF); + let mut ref_list = vec![pkt_line]; + + let obj_ids = self.storage.get_ref_object_id(&self.path).await; + for (object_id, name) in obj_ids { + let pkt_line = format!("{}{}{}{}", object_id, SP, name, LF); + ref_list.push(pkt_line); + } + let pkt_line_stream = self.build_smart_reply(&ref_list, service_type.to_string()); + tracing::info!("git_info_refs response: {:?}", pkt_line_stream); + pkt_line_stream + } + + pub async fn git_upload_pack( + &mut self, + upload_request: &mut Bytes, + ) -> Result<(Vec, BytesMut)> { + let mut want: HashSet = HashSet::new(); + let mut have: HashSet = HashSet::new(); + + let mut first_line = true; + loop { + let (bytes_take, pkt_line) = read_pkt_line(upload_request); + // if read 0000 + if bytes_take == 0 && pkt_line.is_empty() { + continue; + } + tracing::debug!("read line: {:?}", pkt_line); + let dst = pkt_line.to_vec(); + let commands = &dst[0..4]; + + match commands { + b"want" => want.insert(String::from_utf8(dst[5..45].to_vec()).unwrap()), + b"have" => have.insert(String::from_utf8(dst[5..45].to_vec()).unwrap()), + b"done" => break, + other => { + tracing::error!( + "unsupported command: {:?}", + String::from_utf8(other.to_vec()) + ); + continue; + } + }; + if first_line { + self.parse_capabilities(&String::from_utf8(dst[46..].to_vec()).unwrap()); + first_line = false; + } + } + + tracing::info!( + "want commands: {:?}, have commans: {:?}, caps:{:?}", + want, + have, + self.capabilities + ); + + let mut send_pack_data = vec![]; + let mut buf = BytesMut::new(); + + if have.is_empty() { + send_pack_data = self.storage.get_full_pack_data(&self.path).await.unwrap(); + add_pkt_line_string(&mut buf, String::from("NAK\n")); + } else { + // let object_root = self.path.join(".git/objects"); + + // let mut decoded_pack = Pack::default(); + // let meta_map: HashMap = + // find_common_base(Hash::from_str(&want[0]).unwrap(), &object_root, &have); + // send_pack_data = decoded_pack.encode(Some(meta_map.into_values().collect())); + + if self.capabilities.contains(&Capability::MultiAckDetailed) { + // multi_ack_detailed mode, the server will differentiate the ACKs where it is signaling that + // it is ready to send data with ACK obj-id ready lines, + // and signals the identified common commits with ACK obj-id common lines + for hash in &have { + if self.storage.get_commit_by_hash(hash).await.is_ok() { + add_pkt_line_string(&mut buf, format!("ACK {} common\n", hash)); + } + // no need to send NAK in this mode if missing commit? + } + + send_pack_data = self + .storage + .get_incremental_pack_data(&self.path, &want, &have) + .await + .unwrap(); + + for hash in &want { + if self.storage.get_commit_by_hash(hash).await.is_ok() { + add_pkt_line_string(&mut buf, format!("ACK {} common\n", hash)); + } + if self.capabilities.contains(&Capability::NoDone) { + // If multi_ack_detailed and no-done are both present, then the sender is free to immediately send a pack + // following its first "ACK obj-id ready" message. + add_pkt_line_string(&mut buf, format!("ACK {} ready\n", hash)); + } + } + } else { + tracing::error!("capability unsupported"); + } + + add_pkt_line_string( + &mut buf, + format!("ACK {} \n", "27dd8d4cf39f3868c6eee38b601bc9e9939304f5"), + ); + } + Ok((send_pack_data, buf)) + } + + pub async fn git_receive_pack(&mut self, mut body_bytes: Bytes) -> Result { + if body_bytes.len() < 1000 { + tracing::debug!("bytes from client: {:?}", body_bytes); + } + + if body_bytes.starts_with(&[b'P', b'A', b'C', b'K']) { + let command = self.command_list.last_mut().unwrap(); + let temp_file = format!("./temp-{}.pack", Utc::now().timestamp()); + let mut file = OpenOptions::new() + .write(true) + .create(true) + .open(&temp_file) + .unwrap(); + file.write_all(&body_bytes).unwrap(); + let decoded_pack = command + .unpack( + &mut std::fs::File::open(&temp_file).unwrap(), + self.storage.as_ref(), + ) + .await + .unwrap(); + let pack_result = self.storage.save_packfile(decoded_pack, &self.path).await; + if pack_result.is_ok() { + self.storage.handle_refs(command, &self.path).await; + } else { + tracing::error!("{}", pack_result.err().unwrap()); + command.failed(String::from("db operation failed")); + } + fs::remove_file(temp_file).unwrap(); + + // After receiving the pack data from the sender, the receiver sends a report + let mut report_status = BytesMut::new(); + // TODO: replace this hard code "unpack ok\n" + add_pkt_line_string(&mut report_status, "unpack ok\n".to_owned()); + for command in &self.command_list { + add_pkt_line_string(&mut report_status, command.get_status()); + } + report_status.put(&PKT_LINE_END_MARKER[..]); + + let length = report_status.len(); + let mut buf = self.build_side_band_format(report_status, length); + buf.put(&PKT_LINE_END_MARKER[..]); + Ok(buf.into()) + } else { + let (bytes_take, mut pkt_line) = read_pkt_line(&mut body_bytes); + if bytes_take == 0 && pkt_line.is_empty() { + return Ok(body_bytes); + } + let command = self.parse_ref_update(&mut pkt_line); + self.parse_capabilities(&String::from_utf8(pkt_line.to_vec()).unwrap()); + tracing::debug!("init comamnd: {:?}, caps:{:?}", command, self.capabilities); + self.command_list.push(command); + Ok(body_bytes.split_off(4)) + } + } + + // if SideBand/64k capability is enabled, pack data should send with sideband format + pub fn build_side_band_format(&self, from_bytes: BytesMut, length: usize) -> BytesMut { + let capabilities = &self.capabilities; + if capabilities.contains(&Capability::SideBand) + || capabilities.contains(&Capability::SideBand64k) + { + let mut to_bytes = BytesMut::new(); + let length = length + 5; + to_bytes.put(Bytes::from(format!("{length:04x}"))); + to_bytes.put_u8(SideBind::PackfileData.value()); + to_bytes.put(from_bytes); + return to_bytes; + } + from_bytes + } + + pub fn build_smart_reply(&self, ref_list: &Vec, service: String) -> BytesMut { + let mut pkt_line_stream = BytesMut::new(); + if self.protocol == Protocol::Http { + add_pkt_line_string(&mut pkt_line_stream, format!("# service={}\n", service)); + pkt_line_stream.put(&PKT_LINE_END_MARKER[..]); + } + + for ref_line in ref_list { + add_pkt_line_string(&mut pkt_line_stream, ref_line.to_string()); + } + pkt_line_stream.put(&PKT_LINE_END_MARKER[..]); + pkt_line_stream + } + + pub fn parse_capabilities(&mut self, cap_str: &str) { + let cap_vec: Vec<_> = cap_str.split(' ').collect(); + for cap in cap_vec { + let res = cap.trim().parse::(); + if let Ok(cap) = res { + self.capabilities.push(cap); + } + } + } + + // the first line contains the capabilities + pub fn parse_ref_update(&self, pkt_line: &mut Bytes) -> RefCommand { + RefCommand::new( + read_until_white_space(pkt_line), + read_until_white_space(pkt_line), + read_until_white_space(pkt_line), + ) + } +} + +fn read_until_white_space(bytes: &mut Bytes) -> String { + let mut buf = Vec::new(); + while bytes.has_remaining() { + let c = bytes.get_u8(); + if c.is_ascii_whitespace() { + break; + } + buf.push(c); + } + String::from_utf8(buf).unwrap() +} + +fn find_common_base( + mut obj_id: Hash, + object_root: &Path, + have: &[String], +) -> HashMap { + let mut result: HashMap = HashMap::new(); + let mut basic_objects: HashSet = HashSet::new(); + let common_base_commit: Commit; + let mut commits: Vec = vec![]; + loop { + let commit = Commit::parse_from_file( + object_root + .join(obj_id.to_folder()) + .join(obj_id.to_filename()), + ); + // stop when find common base commit + if have.contains(&obj_id.to_plain_str()) { + common_base_commit = commit; + tracing::info!("found common base commit:{}", obj_id); + break; + } + commits.push(commit.clone()); + result.insert(commit.meta.id, Arc::try_unwrap(commit.meta).unwrap()); + + let parent_ids = commit.parent_tree_ids; + + if parent_ids.len() == 1 { + obj_id = parent_ids[0]; + } else { + tracing::error!("multi branch not supported yet"); + todo!(); + } + } + + // init basic hashset by common base commit + parse_tree( + object_root, + common_base_commit.tree_id, + &mut result, + &mut basic_objects, + true, + ); + for commit in commits.iter().rev() { + let tree_id = commit.tree_id; + parse_tree(object_root, tree_id, &mut result, &mut basic_objects, false); + } + result +} + +fn parse_tree( + object_root: &Path, + tree_id: Hash, + result: &mut HashMap, + basic_objects: &mut HashSet, + init_basic: bool, +) { + if basic_objects.contains(&tree_id) { + return; + } + let tree = Tree::parse_from_file( + object_root + .join(tree_id.to_folder()) + .join(tree_id.to_filename()), + ); + basic_objects.insert(tree_id); + if !init_basic { + result.insert(tree_id, Arc::try_unwrap(tree.meta).unwrap()); + } + + for tree_item in tree.tree_items { + // this itme has been parsed + if basic_objects.contains(&tree_item.id) { + continue; + } + match tree_item.item_type { + TreeItemType::Blob => { + if !init_basic { + let blob = Blob::parse_from_file( + object_root + .join(tree_item.id.to_folder()) + .join(tree_item.id.to_filename()), + ); + result.insert(blob.meta.id, Arc::try_unwrap(blob.meta).unwrap()); + } + } + TreeItemType::BlobExecutable => todo!(), + TreeItemType::Tree => { + parse_tree(object_root, tree_item.id, result, basic_objects, init_basic); + } + TreeItemType::Commit => todo!(), + TreeItemType::Link => todo!(), + } + basic_objects.insert(tree_item.id); + } +} + +fn add_pkt_line_string(pkt_line_stream: &mut BytesMut, buf_str: String) { + let buf_str_length = buf_str.len() + 4; + pkt_line_stream.put(Bytes::from(format!("{buf_str_length:04x}"))); + pkt_line_stream.put(buf_str.as_bytes()); +} + +/// Read a single pkt-format line from body chunk, return the single line length and line bytes +pub fn read_pkt_line(bytes: &mut Bytes) -> (usize, Bytes) { + if bytes.is_empty() { + return (0, Bytes::new()); + } + let pkt_length = bytes.copy_to_bytes(4); + let pkt_length = + usize::from_str_radix(&String::from_utf8(pkt_length.to_vec()).unwrap(), 16).unwrap(); + + if pkt_length == 0 { + return (0, Bytes::new()); + } + // this operation will change the original bytes + let pkt_line = bytes.copy_to_bytes(pkt_length - 4); + + (pkt_length, pkt_line) +} + +#[cfg(test)] +pub mod test { + use bytes::{Bytes, BytesMut}; + + use super::{add_pkt_line_string, read_pkt_line}; + + #[test] + pub fn test_read_pkt_line() { + let mut bytes = Bytes::from_static(b"001e# service=git-upload-pack\n"); + let (pkt_length, pkt_line) = read_pkt_line(&mut bytes); + assert_eq!(pkt_length, 30); + assert_eq!(&pkt_line[..], b"# service=git-upload-pack\n"); + } + + // #[test] + // pub fn test_build_smart_reply() { + // let ref_list = vec![String::from("7bdc783132575d5b3e78400ace9971970ff43a18 refs/heads/master\0report-status report-status-v2 thin-pack side-band side-band-64k ofs-delta shallow deepen-since deepen-not deepen-relative multi_ack_detailed no-done object-format=sha1\n")]; + // let pkt_line_stream = build_smart_reply(&ref_list, String::from("git-upload-pack")); + // assert_eq!(&pkt_line_stream[..], b"001e# service=git-upload-pack\n000000e87bdc783132575d5b3e78400ace9971970ff43a18 refs/heads/master\0report-status report-status-v2 thin-pack side-band side-band-64k ofs-delta shallow deepen-since deepen-not deepen-relative multi_ack_detailed no-done object-format=sha1\n0000") + // } + + #[test] + pub fn test_add_to_pkt_line() { + let mut buf = BytesMut::new(); + add_pkt_line_string( + &mut buf, + format!( + "ACK {} common\n", + "7bdc783132575d5b3e78400ace9971970ff43a18" + ), + ); + add_pkt_line_string( + &mut buf, + format!("ACK {} ready\n", "7bdc783132575d5b3e78400ace9971970ff43a18"), + ); + assert_eq!(&buf.freeze()[..], b"0038ACK 7bdc783132575d5b3e78400ace9971970ff43a18 common\n0037ACK 7bdc783132575d5b3e78400ace9971970ff43a18 ready\n"); + } +} diff --git a/gust_integrate_lfs/src/git/protocol/ssh.rs b/gust_integrate_lfs/src/git/protocol/ssh.rs new file mode 100644 index 00000000..ed0e0264 --- /dev/null +++ b/gust_integrate_lfs/src/git/protocol/ssh.rs @@ -0,0 +1,199 @@ +//! +//! +//! +//! + +use async_trait::async_trait; +use bytes::{BufMut, Bytes, BytesMut}; +use russh::server::{Auth, Msg, Session}; +use russh::*; +use russh_keys::*; +use std::collections::HashMap; + +use std::path::PathBuf; +use std::sync::{Arc, Mutex}; +use tokio::io::{AsyncReadExt, BufReader}; + +use crate::git::protocol::ServiceType; +use crate::gust::driver::ObjectStorage; + +use super::pack::{self}; +use super::{PackProtocol, Protocol}; + +#[derive(Clone)] +pub struct SshServer { + pub client_pubkey: Arc, + pub clients: Arc>>>, + pub id: usize, + pub storage: T, + // is it a good choice to bind data here? + pub pack_protocol: Option>, +} + +impl server::Server for SshServer { + type Handler = Self; + fn new_client(&mut self, _: Option) -> Self { + let s = self.clone(); + self.id += 1; + s + } +} + +#[async_trait] +impl server::Handler for SshServer { + type Error = anyhow::Error; + + async fn channel_open_session( + self, + channel: Channel, + session: Session, + ) -> Result<(Self, bool, Session), Self::Error> { + tracing::info!("SshServer::channel_open_session:{}", channel.id()); + { + let mut clients = self.clients.lock().unwrap(); + clients.insert((self.id, channel.id()), channel); + } + Ok((self, true, session)) + } + + async fn exec_request( + mut self, + channel: ChannelId, + data: &[u8], + mut session: Session, + ) -> Result<(Self, Session), Self::Error> { + let data = String::from_utf8_lossy(data).trim().to_owned(); + tracing::info!("exec: {:?},{}", channel, data); + let res = self.handle_git_command(&data).await; + session.data(channel, res.into()); + Ok((self, session)) + } + + async fn auth_publickey( + self, + user: &str, + public_key: &key::PublicKey, + ) -> Result<(Self, Auth), Self::Error> { + tracing::info!("auth_publickey: {} / {:?}", user, public_key); + Ok((self, server::Auth::Accept)) + } + + async fn auth_password(self, user: &str, password: &str) -> Result<(Self, Auth), Self::Error> { + tracing::info!("auth_password: {} / {}", user, password); + // in this example implementation, any username/password combination is accepted + Ok((self, server::Auth::Accept)) + } + + async fn data( + mut self, + channel: ChannelId, + data: &[u8], + mut session: Session, + ) -> Result<(Self, Session), Self::Error> { + let pack_protocol = self.pack_protocol.as_mut().unwrap(); + let data_str = String::from_utf8_lossy(data).trim().to_owned(); + tracing::info!("SSH: client sends data: {:?}, channel:{}", data_str, channel); + match pack_protocol.service_type { + Some(ServiceType::UploadPack) => { + self.handle_upload_pack(channel, data, &mut session).await; + } + Some(ServiceType::ReceivePack) => { + self.handle_receive_pack(channel, data, &mut session).await; + } + None => panic!(), + }; + Ok((self, session)) + } + + // async fn channel_eof( + // self, + // channel: ChannelId, + // mut session: Session, + // ) -> Result<(Self, Session), Self::Error> { + // // session.close(channel); + // // match session.flush() { + // // Ok(_) => {}, + // // Err(e) => println!("Error flushing session: {:?}", e), + // // } + // // session.disconnect(Disconnect::ByApplication, "channel close", "en"); + // // match session.disconnect(None, "Closing session") { + // // Ok(_) => {}, + // // Err(e) => println!("Error disconnecting session: {:?}", e), + // // } + // Ok((self, session)) + // } + + // async fn channel_close( + // self, + // channel: ChannelId, + // session: Session, + // ) -> Result<(Self, Session), Self::Error> { + // tracing::info!("channel_close: {:?}", channel); + // Ok((self, session)) + // } +} + +impl SshServer { + async fn handle_git_command(&mut self, command: &str) -> String { + let command: Vec<_> = command.split(' ').collect(); + // command: + // Push: git-receive-pack '/root/repotest/src.git' + // Pull: git-upload-pack '/root/repotest/src.git' + let path = command[1]; + let end = path.len() - ".git'".len(); + let mut pack_protocol = PackProtocol::new( + PathBuf::from(&path[2..end]), + command[0], + Arc::new(self.storage.clone()), + Protocol::Ssh, + ); + let res = pack_protocol.git_info_refs().await; + self.pack_protocol = Some(pack_protocol); + String::from_utf8(res.to_vec()).unwrap() + } + + async fn handle_upload_pack(&mut self, channel: ChannelId, data: &[u8], session: &mut Session) { + let pack_protocol = self.pack_protocol.as_mut().unwrap(); + + let (send_pack_data, buf) = pack_protocol + .git_upload_pack(&mut Bytes::copy_from_slice(data)) + .await + .unwrap(); + + tracing::info!("buf is {:?}", buf); + session.data(channel, String::from_utf8(buf.to_vec()).unwrap().into()); + + let mut reader = BufReader::new(send_pack_data.as_slice()); + loop { + let mut temp = BytesMut::new(); + let length = reader.read_buf(&mut temp).await.unwrap(); + if temp.is_empty() { + let mut bytes_out = BytesMut::new(); + bytes_out.put_slice(pack::PKT_LINE_END_MARKER); + tracing::info!("send: ends: {:?}", bytes_out.clone().freeze()); + session.data(channel, bytes_out.to_vec().into()); + return; + } + let bytes_out = pack_protocol.build_side_band_format(temp, length); + tracing::info!("send: bytes_out: {:?}", bytes_out.clone().freeze()); + session.data(channel, bytes_out.to_vec().into()); + } + } + + async fn handle_receive_pack( + &mut self, + channel: ChannelId, + data: &[u8], + session: &mut Session, + ) { + let pack_protocol = self.pack_protocol.as_mut().unwrap(); + + let buf = pack_protocol + .git_receive_pack(Bytes::from(data.to_vec())) + .await + .unwrap(); + if !buf.is_empty() { + session.data(channel, buf.to_vec().into()); + } + } +} diff --git a/gust_integrate_lfs/src/git/utils.rs b/gust_integrate_lfs/src/git/utils.rs new file mode 100644 index 00000000..89c1770d --- /dev/null +++ b/gust_integrate_lfs/src/git/utils.rs @@ -0,0 +1,309 @@ +//! +//! +//! +//! + +use std::{ + fs::File, + io::{self, Read, Seek, SeekFrom}, + path::PathBuf, + str::FromStr, + vec, +}; + +use flate2::read::ZlibDecoder; + +use crate::git::errors::GitError; +use crate::git::hash::Hash; + +const TYPE_BITS: u8 = 3; +const VAR_INT_ENCODING_BITS: u8 = 7; +const TYPE_BYTE_SIZE_BITS: u8 = VAR_INT_ENCODING_BITS - TYPE_BITS; +const VAR_INT_CONTINUE_FLAG: u8 = 1 << VAR_INT_ENCODING_BITS; + +/// Preserve the last bits of value binary +/// +#[allow(unused)] +fn keep_bits(value: usize, bits: u8) -> usize { + value & ((1 << bits) - 1) +} + +/// Read the next N bytes from the reader +/// +#[allow(unused)] +pub fn read_bytes(stream: &mut R) -> io::Result<[u8; N]> { + let mut bytes = [0; N]; + stream.read_exact(&mut bytes)?; + + Ok(bytes) +} + +/// Read a u32 from the reader +/// +#[allow(unused)] +pub fn read_u32(stream: &mut R) -> io::Result { + let bytes = read_bytes(stream)?; + + Ok(u32::from_be_bytes(bytes)) +} + +/// Read a hash from the reader +/// +#[allow(unused)] +pub fn read_hash(stream: &mut R) -> io::Result { + let bytes = read_bytes(stream)?; + + Ok(Hash(bytes)) +} + +/// Read a vec until the delimiter is read +/// +#[allow(unused)] +pub fn read_until_delimiter(stream: &mut R, delimiter: u8) -> io::Result> { + let mut bytes = vec![]; + + loop { + let [byte] = read_bytes(stream)?; + if byte == delimiter { + break; + } + + bytes.push(byte); + } + + Ok(bytes) +} + +/// Returns whether the first bit of u8 is 1 and returns the 7-bit truth value +/// +#[allow(unused)] +pub fn read_var_int_byte(stream: &mut R) -> io::Result<(u8, bool)> { + let [byte] = read_bytes(stream)?; + let value = byte & !VAR_INT_CONTINUE_FLAG; + let more_bytes = byte & VAR_INT_CONTINUE_FLAG != 0; + + Ok((value, more_bytes)) +} + +/// Read the type and size of the object +/// +#[allow(unused)] +pub fn read_size_encoding(stream: &mut R) -> io::Result { + let mut value = 0; + let mut length = 0; + + loop { + let (byte_value, more_bytes) = read_var_int_byte(stream).unwrap(); + value |= (byte_value as usize) << length; + if !more_bytes { + return Ok(value); + } + + length += VAR_INT_ENCODING_BITS; + } +} + +/// +/// +#[allow(unused)] +pub fn write_size_encoding(number: usize) -> Vec { + let mut num = vec![]; + let mut number = number; + + loop { + if number >> VAR_INT_ENCODING_BITS > 0 { + num.push((number & 0x7f) as u8 | 0x80); + } else { + num.push((number & 0x7f) as u8); + break; + } + + number >>= VAR_INT_ENCODING_BITS; + } + + num +} + +/// Read the first few fields of the object and parse +/// +#[allow(unused)] +pub fn read_type_and_size(stream: &mut R) -> io::Result<(u8, usize)> { + // Object type and uncompressed pack data size + // are stored in a "size-encoding" variable-length integer. + // Bits 4 through 6 store the type and the remaining bits store the size. + let value = read_size_encoding(stream)?; + let object_type = keep_bits(value >> TYPE_BYTE_SIZE_BITS, TYPE_BITS) as u8; + let size = keep_bits(value, TYPE_BYTE_SIZE_BITS) + | (value >> VAR_INT_ENCODING_BITS << TYPE_BYTE_SIZE_BITS); + + Ok((object_type, size)) +} + +/// The offset for an OffsetDelta object +/// +#[allow(unused)] +pub fn read_offset_encoding(stream: &mut R) -> io::Result { + // Like the object length, the offset for an OffsetDelta object + // is stored in a variable number of bytes, + // with the most significant bit of each byte indicating whether more bytes follow. + // However, the object length encoding allows redundant values, + // e.g. the 7-bit value [n] is the same as the 14- or 21-bit values [n, 0] or [n, 0, 0]. + // Instead, the offset encoding adds 1 to the value of each byte except the least significant one. + // And just for kicks, the bytes are ordered from *most* to *least* significant. + let mut value = 0; + loop { + let (byte_value, more_bytes) = read_var_int_byte(stream)?; + + value = (value << VAR_INT_ENCODING_BITS) | byte_value as u64; + if !more_bytes { + return Ok(value); + } + + value += 1; + } +} + +/// +/// # Example +/// +/// ``` +/// let ns :u64 = 0x4af; +/// let re = write_offset_encoding(ns); +/// println!("{:?}",re); +/// ``` +/// +#[allow(unused)] +pub fn write_offset_encoding(number: u64) -> Vec { + let mut num = vec![]; + let mut number = number; + + num.push((number & 0x7f) as u8); + number >>= 7; + + while number > 0 { + num.push(((number & 0x7f) - 1) as u8 | 0x80); + number >>= 7; + } + + num.reverse(); + + num +} + +/// +/// +/// +#[allow(unused)] +pub fn read_partial_int( + stream: &mut R, + bytes: u8, + present_bytes: &mut u8, +) -> io::Result { + let mut value: usize = 0; + for byte_index in 0..bytes { + if *present_bytes & 1 != 0 { + let [byte] = read_bytes(stream)?; + value |= (byte as usize) << (byte_index * 8); + } + *present_bytes >>= 1; + } + Ok(value) +} + +/// +/// +#[allow(unused)] +pub fn seek(file: &mut File, offset: u64) -> io::Result<()> { + file.seek(SeekFrom::Start(offset))?; + + Ok(()) +} + +/// +/// +#[allow(unused)] +pub fn get_offset(file: &mut File) -> io::Result { + file.stream_position() +} + +/// Call reader() to process a zlib stream from a file. +/// Reset the file offset afterwards to the end of the zlib stream, +/// since ZlibDecoder uses BufReader, which may consume extra bytes. +#[allow(unused)] +pub fn read_zlib_stream_exact(file: &mut File, reader: F) -> Result +where + F: FnOnce(&mut ZlibDecoder<&mut File>) -> Result, +{ + let offset = get_offset(file).unwrap(); + let mut decompressed = ZlibDecoder::new(file); + let result = reader(&mut decompressed); + let zlib_end = offset + decompressed.total_in(); + + seek(decompressed.into_inner(), zlib_end).unwrap(); + + result +} + +/// +/// +/// +#[allow(unused)] +pub fn u32_vec(value: u32) -> Vec { + let mut result: Vec = vec![]; + result.push((value >> 24 & 0xff) as u8); + result.push((value >> 16 & 0xff) as u8); + result.push((value >> 8 & 0xff) as u8); + result.push((value & 0xff) as u8); + result +} + +/// +/// +/// +#[allow(unused)] +pub fn get_pack_raw_data(data: Vec) -> Vec { + let result = &data[12..data.len() - 20]; + let result = result.to_vec(); + result +} + +/// +/// +/// +#[allow(unused)] +fn get_hash_form_filename(filename: &str) -> String { + String::from(&filename[5..45]) +} + +/// Return a list of pack files in the pack directory. +#[allow(unused)] +pub fn find_all_pack_file(pack_dir: &str) -> (Vec, Vec) { + let mut file_path = vec![]; + let mut hash_list = vec![]; + let mut object_root = std::path::PathBuf::from(pack_dir); + + let paths = std::fs::read_dir(&object_root).unwrap(); + + for path in paths { + if let Ok(pack_file) = path { + let _file_name = pack_file.file_name(); + let _file_name = _file_name.to_str().unwrap(); + + // only find the .pack file, and find the .idx file + if &_file_name[_file_name.len() - 4..] == "pack" { + let hash_string = get_hash_form_filename(&_file_name); + let _hash = Hash::from_str(&hash_string).unwrap(); + hash_list.push(_hash); + + object_root.push(&_file_name.to_string()); + file_path.push(object_root.clone()); + object_root.pop(); + } + } + } + + (file_path, hash_list) +} + +#[cfg(test)] +mod test {} diff --git a/gust_integrate_lfs/src/gust/driver/database/mod.rs b/gust_integrate_lfs/src/gust/driver/database/mod.rs new file mode 100644 index 00000000..4fb2dd9f --- /dev/null +++ b/gust_integrate_lfs/src/gust/driver/database/mod.rs @@ -0,0 +1 @@ +pub mod mysql; diff --git a/gust_integrate_lfs/src/gust/driver/database/mysql/mod.rs b/gust_integrate_lfs/src/gust/driver/database/mysql/mod.rs new file mode 100644 index 00000000..cd08292f --- /dev/null +++ b/gust_integrate_lfs/src/gust/driver/database/mysql/mod.rs @@ -0,0 +1,27 @@ +pub mod storage; + +use std::{env, time::Duration}; + +use sea_orm::{ConnectOptions, Database}; +use tracing::log; + +use self::storage::MysqlStorage; + +pub async fn init() -> MysqlStorage { + let db_url = env::var("DATABASE_URL").expect("DATABASE_URL is not set in .env file"); + let mut opt = ConnectOptions::new(db_url.to_owned()); + // max_connections is properly for double size of the cpu core + opt.max_connections(32) + .min_connections(8) + .acquire_timeout(Duration::from_secs(30)) + .connect_timeout(Duration::from_secs(20)) + .idle_timeout(Duration::from_secs(8)) + .max_lifetime(Duration::from_secs(8)) + .sqlx_logging(true) + .sqlx_logging_level(log::LevelFilter::Debug); + MysqlStorage::new( + Database::connect(opt) + .await + .expect("Database connection failed"), + ) +} diff --git a/gust_integrate_lfs/src/gust/driver/database/mysql/storage.rs b/gust_integrate_lfs/src/gust/driver/database/mysql/storage.rs new file mode 100644 index 00000000..d76528d8 --- /dev/null +++ b/gust_integrate_lfs/src/gust/driver/database/mysql/storage.rs @@ -0,0 +1,726 @@ +use std::cmp::min; +use std::collections::{HashMap, HashSet}; +use std::path::Path; +use std::str::FromStr; +use std::sync::Arc; + +use crate::git::errors::{GitError, GitLFSError}; +use crate::git::hash::Hash; +use crate::git::lfs::structs::*; +use crate::git::object::base::commit::Commit; +use crate::git::object::base::tree::Tree; +use crate::git::object::metadata::MetaData; +use crate::git::object::types::ObjectType; +use crate::git::pack::decode::ObjDecodedMap; +use crate::git::pack::Pack; +use crate::git::protocol::{Command, RefCommand}; +use crate::gust::driver::structure::nodes::build_node_tree; +use crate::gust::driver::{ObjectStorage, ZERO_ID}; +use async_recursion::async_recursion; +use async_trait::async_trait; +use chrono::prelude::*; +use entity::{commit, locks, meta, node, refs}; +use futures::lock; +use rayon::vec; +use sea_orm::ActiveValue::NotSet; +use sea_orm::{ + ActiveModelTrait, ColumnTrait, DatabaseBackend, DatabaseConnection, DbErr, EntityTrait, + QueryFilter, Set, Statement, +}; + +#[derive(Debug, Default, Clone)] +pub struct MysqlStorage { + pub connection: DatabaseConnection, +} + +impl MysqlStorage { + pub fn new(connection: DatabaseConnection) -> MysqlStorage { + MysqlStorage { connection } + } +} + +#[async_trait] +impl ObjectStorage for MysqlStorage { + async fn get_head_object_id(&self, repo_path: &Path) -> String { + let path_str = repo_path.to_str().unwrap(); + let refs_list = self.search_refs(path_str).await.unwrap(); + + if refs_list.is_empty() { + ZERO_ID.to_string() + } else { + for refs in &refs_list { + if repo_path.to_str().unwrap() == refs.repo_path { + return refs.ref_git_id.clone(); + } + } + for refs in &refs_list { + // repo_path is subdirectory of some commit + if repo_path.starts_with(refs.repo_path.clone()) { + return self.generate_child_commit_and_refs(refs, repo_path).await; + } + } + //situation: repo_path: root/repotest2/src, commit: root/repotest + ZERO_ID.to_string() + } + } + + async fn get_ref_object_id(&self, repo_path: &Path) -> HashMap { + // assuming HEAD points to branch master. + let mut map = HashMap::new(); + let refs: Vec = refs::Entity::find() + .filter(refs::Column::RepoPath.eq(repo_path.to_str())) + .all(&self.connection) + .await + .unwrap(); + for git_ref in refs { + map.insert(git_ref.ref_git_id, git_ref.ref_name); + } + map + } + + async fn handle_refs(&self, command: &RefCommand, path: &Path) { + match command.command_type { + Command::Create => self.save_refs(command, path).await, + Command::Delete => self.delete_refs(command, path).await, + Command::Update => self.update_refs(command, path).await, + } + } + + async fn save_packfile( + &self, + decoded_pack: Pack, + repo_path: &Path, + ) -> Result<(), anyhow::Error> { + let mut result = ObjDecodedMap::default(); + result.update_from_cache(&decoded_pack.result); + let nodes = build_node_tree(&result, repo_path).await.unwrap(); + self.save_nodes(nodes).await.unwrap(); + self.save_commits(&result.commits, repo_path).await.unwrap(); + Ok(()) + } + + async fn get_full_pack_data(&self, repo_path: &Path) -> Result, GitError> { + let mut hash_meta: HashMap = HashMap::new(); + + let commit_metas = self.get_all_commits_by_path(repo_path).await.unwrap(); + let mut commits = Vec::new(); + let mut tree_ids = Vec::new(); + + for c_meta in commit_metas { + let c = Commit::new(Arc::new(c_meta)); + tree_ids.push(c.tree_id.to_plain_str()); + commits.push(c); + } + let trees = self.get_nodes_by_ids(tree_ids).await; + for commit in commits { + hash_meta.insert( + commit.meta.id.to_plain_str(), + Arc::try_unwrap(commit.meta).unwrap(), + ); + if let Some(root) = trees.get(&commit.tree_id) { + self.get_child_trees(&root, &mut hash_meta).await + } else { + return Err(GitError::InvalidTreeObject(commit.tree_id.to_plain_str())); + }; + } + let result: Vec = Pack::default().encode(Some(hash_meta.into_values().collect())); + Ok(result) + } + + async fn get_incremental_pack_data( + &self, + repo_path: &Path, + want: &HashSet, + _have: &HashSet, + ) -> Result, GitError> { + let mut hash_meta: HashMap = HashMap::new(); + let all_commits = self.get_all_commits_by_path(repo_path).await.unwrap(); + + for c_meta in all_commits { + if want.contains(&c_meta.id.to_plain_str()) { + let c = Commit::new(Arc::new(c_meta)); + if let Some(root) = self.get_node_by_id(&c.tree_id.to_plain_str()).await { + self.get_child_trees(&root, &mut hash_meta).await + } else { + return Err(GitError::InvalidTreeObject(c.tree_id.to_plain_str())); + }; + } + } + + let result: Vec = Pack::default().encode(Some(hash_meta.into_values().collect())); + Ok(result) + } + + async fn get_commit_by_hash(&self, hash: &str) -> Result { + let commit: Option = commit::Entity::find() + .filter(commit::Column::GitId.eq(hash)) + .one(&self.connection) + .await + .unwrap(); + if let Some(commit) = commit { + Ok(MetaData::new(ObjectType::Commit, &commit.meta)) + } else { + return Err(GitError::InvalidCommitObject(hash.to_string())); + } + } + + async fn get_hash_object(&self, hash: &str) -> Result { + tracing::info!("hash:{}", hash); + let model = node::Entity::find() + .filter(node::Column::GitId.eq(hash)) + .one(&self.connection) + .await + .unwrap(); + + if let Some(model) = model { + if model.node_type == "tree" { + // let mut tree_items: Vec = Vec::new(); + // let childs = node::Entity::find() + // .filter(node::Column::Pid.eq(hash)) + // .all(&self.connection) + // .await + // .unwrap(); + // for c in childs { + // tree_items.push(TreeItem::convert_from_model(c)); + // } + // let t = Tree::convert_from_model(&model, tree_items); + // let meta = t.encode_metadata().unwrap(); + Ok(MetaData::new(ObjectType::Tree, &model.data)) + } else { + Ok(MetaData::new(ObjectType::Blob, &model.data)) + } + } else { + return Err(GitError::NotFountHashValue(hash.to_string())); + } + } + + async fn lfs_get_meta(&self, v: &RequestVars) -> Result { + let result = meta::Entity::find_by_id(v.oid.clone()) + .one(&self.connection) + .await + .unwrap(); + + match result { + Some(val) => Ok(MetaObject { + oid: val.oid, + size: val.size, + exist: val.exist, + }), + None => Err(GitLFSError::GeneralError("".to_string())), + } + } + + async fn lfs_put_meta(&self, v: &RequestVars) -> Result { + // Check if already exist. + let result = meta::Entity::find_by_id(v.oid.clone()) + .one(&self.connection) + .await + .unwrap(); + if result.is_some() { + let result = result.unwrap(); + return Ok(MetaObject { + oid: result.oid, + size: result.size, + exist: true, + }); + } + + // Put into database if not exist. + let meta = MetaObject { + oid: v.oid.to_string(), + size: v.size, + exist: true, + }; + + let meta_to = meta::ActiveModel { + oid: Set(meta.oid.to_owned()), + size: Set(meta.size.to_owned()), + exist: Set(true), + }; + + let res = meta::Entity::insert(meta_to).exec(&self.connection).await; + match res { + Ok(_) => Ok(meta), + Err(_) => Err(GitLFSError::GeneralError("".to_string())), + } + } + + async fn lfs_delete_meta(&self, v: &RequestVars) -> Result<(), GitLFSError> { + let res = meta::Entity::delete_by_id(v.oid.to_owned()) + .exec(&self.connection) + .await; + match res { + Ok(_) => Ok(()), + Err(_) => Err(GitLFSError::GeneralError("".to_string())), + } + } + + async fn lfs_get_locks(&self, refspec: &str) -> Result, GitLFSError> { + let result = locks::Entity::find_by_id(refspec) + .one(&self.connection) + .await + .unwrap(); + + match result { + Some(val) => { + let data = val.data.to_owned(); + let locks: Vec = serde_json::from_str(&data).unwrap(); + Ok(locks) + } + None => Err(GitLFSError::GeneralError("".to_string())), + } + } + + async fn lfs_get_filtered_locks( + &self, + refspec: &str, + path: &str, + cursor: &str, + limit: &str, + ) -> Result<(Vec, String), GitLFSError> { + let mut locks = match self.lfs_get_locks(refspec).await { + Ok(locks) => locks, + Err(_) => vec![], + }; + + println!("Locks retrieved: {:?}", locks); + + if cursor != "" { + let mut last_seen = -1; + for (i, v) in locks.iter().enumerate() { + if v.id == *cursor { + last_seen = i as i32; + break; + } + } + + if last_seen > -1 { + locks = locks.split_off(last_seen as usize); + } else { + // Cursor not found. + return Err(GitLFSError::GeneralError("".to_string())); + } + } + + if path != "" { + let mut filterd = Vec::::new(); + for lock in locks.iter() { + if lock.path == *path { + filterd.push(Lock { + id: lock.id.to_owned(), + path: lock.path.to_owned(), + owner: lock.owner.clone(), + locked_at: lock.locked_at.to_owned(), + }); + } + } + locks = filterd; + } + + let mut next = "".to_string(); + if limit != "" { + let mut size = limit.parse::().unwrap(); + size = min(size, locks.len() as i64); + + if size + 1 < locks.len() as i64 { + next = locks[size as usize].id.to_owned(); + } + let _ = locks.split_off(size as usize); + } + + Ok((locks, next)) + } + + async fn lfs_add_lock(&self, repo: &str, locks: Vec) -> Result<(), GitLFSError> { + let result = locks::Entity::find_by_id(repo.to_owned()) + .one(&self.connection) + .await + .unwrap(); + + match result { + // Update + Some(val) => { + let d = val.data.to_owned(); + let mut locks_from_data = if d != "" { + let locks_from_data: Vec = serde_json::from_str(&d).unwrap(); + locks_from_data + } else { + vec![] + }; + let mut locks = locks; + locks_from_data.append(&mut locks); + + locks_from_data.sort_by(|a, b| { + a.locked_at + .partial_cmp(&b.locked_at) + .unwrap_or(std::cmp::Ordering::Equal) + }); + let d = serde_json::to_string(&locks_from_data).unwrap(); + + let mut lock_to: locks::ActiveModel = val.into(); + lock_to.data = Set(d.to_owned()); + let res = lock_to.update(&self.connection).await; + match res.is_ok() { + true => Ok(()), + false => Err(GitLFSError::GeneralError("".to_string())), + } + } + // Insert + None => { + let mut locks = locks; + locks.sort_by(|a, b| { + a.locked_at + .partial_cmp(&b.locked_at) + .unwrap_or(std::cmp::Ordering::Equal) + }); + let data = serde_json::to_string(&locks).unwrap(); + let lock_to = locks::ActiveModel { + id: Set(repo.to_owned()), + data: Set(data.to_owned()), + }; + let res = locks::Entity::insert(lock_to).exec(&self.connection).await; + match res.is_ok() { + true => Ok(()), + false => Err(GitLFSError::GeneralError("".to_string())), + } + } + } + } + + async fn lfs_delete_lock( + &self, + repo: &str, + _user: Option, + id: &str, + force: bool, + ) -> Result { + let empty_lock = Lock { + id: "".to_owned(), + path: "".to_owned(), + owner: None, + locked_at: { + let locked_at: DateTime = DateTime::::MIN_UTC; + locked_at.to_rfc3339().to_string() + }, + }; + let result = locks::Entity::find_by_id(repo.to_owned()) + .one(&self.connection) + .await + .unwrap(); + + match result { + // Exist, then delete. + Some(val) => { + let d = val.data.to_owned(); + let locks_from_data = if d != "" { + let locks_from_data: Vec = serde_json::from_str(&d).unwrap(); + locks_from_data + } else { + vec![] + }; + + let mut new_locks = Vec::::new(); + let mut lock_to_delete = Lock { + id: "".to_owned(), + path: "".to_owned(), + owner: None, + locked_at: { + let locked_at: DateTime = DateTime::::MIN_UTC; + locked_at.to_rfc3339().to_string() + }, + }; + + for lock in locks_from_data.iter() { + if lock.id == *id { + if lock.owner != None && !force { + return Err(GitLFSError::GeneralError("".to_string())); + } + lock_to_delete.id = lock.id.to_owned(); + lock_to_delete.path = lock.path.to_owned(); + lock_to_delete.owner = lock.owner.clone(); + lock_to_delete.locked_at = lock.locked_at.to_owned(); + } else if lock.id.len() > 0 { + new_locks.push(Lock { + id: lock.id.to_owned(), + path: lock.path.to_owned(), + owner: lock.owner.clone(), + locked_at: lock.locked_at.to_owned(), + }); + } + } + if lock_to_delete.id == "" { + return Err(GitLFSError::GeneralError("".to_string())); + } + + // No locks remains, delete the repo from database. + if new_locks.len() == 0 { + locks::Entity::delete_by_id(repo.to_owned()) + .exec(&self.connection) + .await + .unwrap(); + + return Ok(lock_to_delete); + } + + // Update remaining locks. + let data = serde_json::to_string(&new_locks).unwrap(); + + let mut lock_to: locks::ActiveModel = val.into(); + lock_to.data = Set(data.to_owned()); + let res = lock_to.update(&self.connection).await; + match res.is_ok() { + true => Ok(lock_to_delete), + false => Err(GitLFSError::GeneralError("".to_string())), + } + } + // Not exist, error. + None => Err(GitLFSError::GeneralError("".to_string())), + } + } +} + +impl MysqlStorage { + async fn get_all_commits_by_path(&self, path: &Path) -> Result, anyhow::Error> { + let commits: Vec = commit::Entity::find() + .filter(commit::Column::RepoPath.eq(path.to_str().unwrap())) + .all(&self.connection) + .await + .unwrap(); + let mut result = vec![]; + for commit in commits { + result.push(MetaData::new(ObjectType::Commit, &commit.meta)) + } + Ok(result) + } + + async fn search_refs(&self, path_str: &str) -> Result, DbErr> { + refs::Entity::find() + .from_raw_sql(Statement::from_sql_and_values( + DatabaseBackend::MySql, + r#"SELECT * FROM gust.refs where ? LIKE CONCAT(repo_path, '%') and ref_name = 'refs/heads/master' "#, + [path_str.into()], + )) + .all(&self.connection) + .await + } + + async fn save_refs(&self, command: &RefCommand, path: &Path) { + let save_models: Vec = + vec![command.convert_to_model(path.to_str().unwrap())]; + batch_save_model(&self.connection, save_models) + .await + .unwrap(); + } + + async fn update_refs(&self, command: &RefCommand, path: &Path) { + let ref_data: Option = refs::Entity::find() + .filter(refs::Column::RefGitId.eq(&command.old_id)) + .filter(refs::Column::RepoPath.eq(path.to_str().unwrap())) + .one(&self.connection) + .await + .unwrap(); + let mut ref_data: refs::ActiveModel = ref_data.unwrap().into(); + ref_data.ref_git_id = Set(command.new_id.to_owned()); + ref_data.updated_at = Set(chrono::Utc::now().naive_utc()); + ref_data.update(&self.connection).await.unwrap(); + } + + async fn delete_refs(&self, command: &RefCommand, path: &Path) { + let delete_ref = refs::ActiveModel { + ref_git_id: Set(command.old_id.to_owned()), + repo_path: Set(path.to_str().unwrap().to_owned()), + ..Default::default() + }; + refs::Entity::delete(delete_ref) + .exec(&self.connection) + .await + .unwrap(); + } + + async fn search_commits(&self, path_str: &str) -> Result, DbErr> { + commit::Entity::find() + .from_raw_sql(Statement::from_sql_and_values( + DatabaseBackend::MySql, + r#"SELECT * FROM gust.commit where ? LIKE CONCAT(repo_path, '%')"#, + [path_str.into()], + )) + .all(&self.connection) + .await + } + + async fn save_nodes(&self, nodes: Vec) -> Result { + let conn = &self.connection; + let mut sum = 0; + let mut batch_nodes = Vec::new(); + for node in nodes { + // let model = node.try_into_model().unwrap(); + let size = node.data.as_ref().len(); + let limit = 10 * 1024 * 1024; + if sum + size < limit && batch_nodes.len() < 50 { + sum += size; + batch_nodes.push(node); + } else { + node::Entity::insert_many(batch_nodes) + .exec(conn) + .await + .unwrap(); + sum = 0; + batch_nodes = Vec::new(); + batch_nodes.push(node); + } + } + if batch_nodes.len() != 0 { + node::Entity::insert_many(batch_nodes) + .exec(conn) + .await + .unwrap(); + } + Ok(true) + } + + async fn save_commits( + &self, + commits: &Vec, + repo_path: &Path, + ) -> Result { + let conn = &self.connection; + let mut save_models: Vec = Vec::new(); + for commit in commits { + save_models.push(commit.convert_to_model(repo_path)); + } + batch_save_model(conn, save_models).await.unwrap(); + Ok(true) + } + + // async fn save_node_data( + // &self, + // save_models: Vec, + // ) -> Result { + // batch_save_model(&self.connection, save_models) + // .await + // .unwrap(); + // Ok(true) + // } + + /// Because the requested path is a subdirectory of the original project directory, + /// a new fake commit is needed to point the subdirectory, so we need to + /// 1. find root commit by root_ref + /// 2. convert commit to git Commit object, and calculate it's hash + /// 3. save the new fake commit with hash and repo_path + async fn generate_child_commit_and_refs(&self, refs: &refs::Model, repo_path: &Path) -> String { + let root_commit = commit::Entity::find() + .filter(commit::Column::GitId.eq(&refs.ref_git_id)) + .one(&self.connection) + .await + .unwrap() + .unwrap(); + + if let Some(root_tree) = self.search_root_node_by_path(repo_path).await { + let child_commit = Commit::build_from_model_and_root(&root_commit, root_tree); + self.save_commits(&vec![child_commit.clone()], repo_path) + .await + .unwrap(); + let commit_id = child_commit.meta.id.to_plain_str(); + let child_refs = refs::ActiveModel { + id: NotSet, + repo_path: Set(repo_path.to_str().unwrap().to_string()), + ref_name: Set(refs.ref_name.clone()), + ref_git_id: Set(commit_id.clone()), + created_at: Set(chrono::Utc::now().naive_utc()), + updated_at: Set(chrono::Utc::now().naive_utc()), + }; + batch_save_model(&self.connection, vec![child_refs]) + .await + .unwrap(); + commit_id + } else { + ZERO_ID.to_string() + } + } + + async fn search_root_node_by_path(&self, repo_path: &Path) -> Option { + tracing::debug!("file_name: {:?}", repo_path.file_name()); + let res = node::Entity::find() + .filter(node::Column::Name.eq(repo_path.file_name().unwrap().to_str().unwrap())) + .one(&self.connection) + .await + .unwrap(); + if let Some(res) = res { + Some(res) + } else { + node::Entity::find() + // .filter(node::Column::Path.eq(repo_path.to_str().unwrap())) + .filter(node::Column::Name.eq("")) + .one(&self.connection) + .await + .unwrap() + } + } + + async fn get_node_by_id(&self, id: &str) -> Option { + node::Entity::find() + .filter(node::Column::GitId.eq(id)) + .one(&self.connection) + .await + .unwrap() + } + + async fn get_nodes_by_ids(&self, ids: Vec) -> HashMap { + node::Entity::find() + .filter(node::Column::GitId.is_in(ids)) + .all(&self.connection) + .await + .unwrap() + .into_iter() + .map(|f| (Hash::from_str(&f.git_id).unwrap(), f)) + .collect() + } + + // retrieve all sub trees recursively + #[async_recursion] + async fn get_child_trees(&self, root: &node::Model, hash_meta: &mut HashMap) { + let t = Tree::new(Arc::new(MetaData::new(ObjectType::Tree, &root.data))); + let mut child_ids = vec![]; + for item in t.tree_items { + if !hash_meta.contains_key(&item.id.to_plain_str()) { + child_ids.push(item.id.to_plain_str()); + } + } + let childs = node::Entity::find() + .filter(node::Column::GitId.is_in(child_ids)) + .all(&self.connection) + .await + .unwrap(); + for c in childs { + if c.node_type == "tree" { + self.get_child_trees(&c, hash_meta).await; + } else { + let b_meta = MetaData::new(ObjectType::Blob, &c.data); + hash_meta.insert(b_meta.id.to_plain_str(), b_meta); + } + } + let t_meta = t.meta; + tracing::info!("{}, {}", t_meta.id, t.tree_name); + hash_meta.insert(t_meta.id.to_plain_str(), Arc::try_unwrap(t_meta).unwrap()); + } +} + +// mysql sea_orm bathc insert +async fn batch_save_model( + conn: &DatabaseConnection, + save_models: Vec, +) -> Result<(), anyhow::Error> +where + E: EntityTrait, + A: ActiveModelTrait + From<::Model> + Send, +{ + let mut futures = Vec::new(); + + // notice that sqlx not support packets larger than 16MB now + for chunk in save_models.chunks(100) { + let save_result = E::insert_many(chunk.iter().cloned()).exec(conn).await; + futures.push(save_result); + } + // futures::future::join_all(futures).await; + Ok(()) +} diff --git a/gust_integrate_lfs/src/gust/driver/fs/file_system.rs b/gust_integrate_lfs/src/gust/driver/fs/file_system.rs new file mode 100644 index 00000000..e5319d7b --- /dev/null +++ b/gust_integrate_lfs/src/gust/driver/fs/file_system.rs @@ -0,0 +1,67 @@ +use std::{collections::HashMap, path::PathBuf}; + +use crate::{git::pack::Pack, gust::driver::ObjectStorage}; +use async_trait::async_trait; + +#[derive(Debug, Default, Clone)] +pub struct FileSystem {} + +#[async_trait] +impl ObjectStorage for FileSystem { + async fn get_head_object_id(&self, repo_dir: &PathBuf) -> String { + let base_path = repo_dir.join(".git"); + let content = std::fs::read_to_string(base_path.join("HEAD")).unwrap(); + let content = content.replace("ref: ", ""); + let content = content.strip_suffix('\n').unwrap(); + let object_id = match std::fs::read_to_string(base_path.join(content)) { + Ok(object_id) => object_id.strip_suffix('\n').unwrap().to_owned(), + _ => String::from_utf8_lossy(&ZERO_ID).to_string(), + }; + + // init repo: if dir not exists or is empty + // let init_repo = !self.repo_dir.exists(); + // todo: replace git command + // if init_repo { + // Command::new("git") + // .args(["init", "--bare", self.repo_dir.to_str().unwrap()]) + // .output() + // .expect("git init failed!"); + // } + object_id + } + + async fn save_packfile( + &self, + decoded_pack: Pack, + repo_dir: &PathBuf, + ) -> Result { + todo!() + } + + async fn get_full_pack_data(&self, repo_dir: &PathBuf) -> Vec { + let object_root = repo_dir.join(".git/objects"); + let loose_vec = Pack::find_all_loose(object_root.to_str().unwrap()); + let (mut _loose_pack, loose_data) = + Pack::pack_loose(loose_vec, object_root.to_str().unwrap()); + loose_data + } + + async fn handle_pull_pack_data(&self) -> Vec { + todo!(); + } + + async fn get_ref_object_id(&self, repo_dir: &PathBuf) -> HashMap { + let mut name = String::from(".git/refs/heads/"); + //TOOD: need to read from .git/packed-refs after run git gc, check how git show-ref command work + let path = repo_dir.join(&name); + let paths = std::fs::read_dir(&path).unwrap(); + let mut res = HashMap::new(); + for ref_file in paths.flatten() { + name.push_str(ref_file.file_name().to_str().unwrap()); + let object_id = std::fs::read_to_string(ref_file.path()).unwrap(); + let object_id = object_id.strip_suffix('\n').unwrap(); + res.insert(object_id.to_owned(), name.to_owned()); + } + res + } +} diff --git a/gust_integrate_lfs/src/gust/driver/fs/mod.rs b/gust_integrate_lfs/src/gust/driver/fs/mod.rs new file mode 100644 index 00000000..716c6ba2 --- /dev/null +++ b/gust_integrate_lfs/src/gust/driver/fs/mod.rs @@ -0,0 +1 @@ +// pub mod file_system; diff --git a/gust_integrate_lfs/src/gust/driver/lfs_content_store/mod.rs b/gust_integrate_lfs/src/gust/driver/lfs_content_store/mod.rs new file mode 100644 index 00000000..b47e6ce0 --- /dev/null +++ b/gust_integrate_lfs/src/gust/driver/lfs_content_store/mod.rs @@ -0,0 +1,87 @@ +use crate::git::lfs::structs::MetaObject; +use sha256::digest; +use std::fs; +use std::io::prelude::*; +use std::path::PathBuf; +use std::path; + +pub struct ContentStore { + base_path: PathBuf, +} + +impl ContentStore { + pub async fn new(base: PathBuf) -> ContentStore { + fs::create_dir_all(&base).expect("Create directory failed!"); + ContentStore { base_path: base } + } + + pub async fn get(&self, meta: &MetaObject, start: i64) -> fs::File { + let path = path::Path::new(&self.base_path).join(transform_key(meta.oid.to_owned())); + + let mut file = fs::File::open(&path).expect("Open file failed!"); + if start > 0 { + file.seek(std::io::SeekFrom::Start(start as u64)) + .expect("Shift file pointer failed"); + } + + file + } + + pub async fn put(&self, meta: &MetaObject, body_content: &[u8]) -> bool { + let path = path::Path::new(&self.base_path).join(transform_key(meta.oid.to_owned())); + let dir = path.parent().unwrap(); + fs::create_dir_all(&dir).expect("Create directory failed!"); + + let mut file = fs::File::create(&path).expect("Open file failed"); + let lenght_written = file.write(body_content).expect("Write file failed"); + if lenght_written as i64 != meta.size { + return false; + } + + let hash = digest(body_content); + if hash != meta.oid { + return false; + } + true + } + + pub async fn exist(&self, meta: &MetaObject) -> bool { + let path = path::Path::new(&self.base_path).join(transform_key(meta.oid.to_owned())); + + path::Path::exists(&path) + } +} + +fn transform_key(key: String) -> String { + if key.len() < 5 { + key + } else { + path::Path::new(&key[0..2]) + .join(&key[2..4]) + .join(&key[4..key.len()]) + .into_os_string() + .into_string() + .unwrap() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[tokio::test] + async fn test_content_store() { + let meta = MetaObject { + oid: "6ae8a75555209fd6c44157c0aed8016e763ff435a19cf186f76863140143ff72".to_owned(), + size: 12, + exist: false, + }; + + let content = "test content".as_bytes(); + + let content_store = ContentStore::new(PathBuf::from("content-store")).await; + assert!(content_store.put(&meta, content).await); + + assert!(content_store.exist(&meta).await); + } +} diff --git a/gust_integrate_lfs/src/gust/driver/mod.rs b/gust_integrate_lfs/src/gust/driver/mod.rs new file mode 100644 index 00000000..9f5ad81f --- /dev/null +++ b/gust_integrate_lfs/src/gust/driver/mod.rs @@ -0,0 +1,85 @@ +//! +//! +//! + +use std::{ + collections::{HashMap, HashSet}, + path::Path, +}; + +use async_trait::async_trait; +use hyper::Request; + +use crate::git::lfs::structs::*; +use crate::git::{ + errors::{GitError, GitLFSError}, + object::metadata::MetaData, + pack::Pack, + protocol::RefCommand, +}; + +pub mod database; +pub mod fs; +pub mod lfs_content_store; +pub mod structure; +pub mod utils; + +pub const ZERO_ID: &'static str = match std::str::from_utf8(&[b'0'; 40]) { + Ok(s) => s, + Err(_) => panic!("can't get ZERO_ID"), +}; + +#[async_trait] +pub trait ObjectStorage: Clone + Send + Sync + std::fmt::Debug { + async fn get_head_object_id(&self, path: &Path) -> String; + + async fn get_ref_object_id(&self, path: &Path) -> HashMap; + + async fn handle_refs(&self, command: &RefCommand, path: &Path); + + async fn save_packfile( + &self, + decoded_pack: Pack, + repo_path: &Path, + ) -> Result<(), anyhow::Error>; + + async fn get_full_pack_data(&self, repo_path: &Path) -> Result, GitError>; + + async fn get_incremental_pack_data( + &self, + repo_path: &Path, + want: &HashSet, + have: &HashSet, + ) -> Result, GitError>; + + async fn get_commit_by_hash(&self, hash: &str) -> Result; + + // get hash object from db if missing cache in unpack process, this object must be tree or blob + async fn get_hash_object(&self, hash: &str) -> Result; + + async fn lfs_get_meta(&self, v: &RequestVars) -> Result; + + async fn lfs_put_meta(&self, v: &RequestVars) -> Result; + + async fn lfs_delete_meta(&self, v: &RequestVars) -> Result<(), GitLFSError>; + + async fn lfs_get_locks(&self, refspec: &str) -> Result, GitLFSError>; + + async fn lfs_get_filtered_locks( + &self, + refspec: &str, + path: &str, + cursor: &str, + limit: &str, + ) -> Result<(Vec, String), GitLFSError>; + + async fn lfs_add_lock(&self, refspec: &str, locks: Vec) -> Result<(), GitLFSError>; + + async fn lfs_delete_lock( + &self, + refspec: &str, + user: Option, + id: &str, + force: bool, + ) -> Result; +} diff --git a/gust_integrate_lfs/src/gust/driver/structure/mod.rs b/gust_integrate_lfs/src/gust/driver/structure/mod.rs new file mode 100644 index 00000000..b53e9526 --- /dev/null +++ b/gust_integrate_lfs/src/gust/driver/structure/mod.rs @@ -0,0 +1,175 @@ +use sea_orm::{ActiveValue::NotSet, Set}; + +use crate::git::{ + hash::Hash, + object::{ + base::{ + blob::Blob, + commit::Commit, + tree::{Tree, TreeItem}, + }, + metadata::MetaData, + types::ObjectType, + }, +}; +use std::{ + path::{Path, PathBuf}, + str::FromStr, + sync::Arc, +}; + +use self::nodes::{FileNode, Node, TreeNode}; +use super::utils::id_generator::{self, generate_id}; +use entity::{commit, node}; + +pub mod nodes; + +/// only blob and tree should implement this trait +pub trait GitNodeObject { + fn convert_to_node(&self, item: Option<&TreeItem>) -> Box; + + fn convert_from_model(model: &node::Model) -> Self + where + Self: Sized, + { + todo!() + } + + fn generate_id(&self) -> i64 { + id_generator::generate_id() + } +} + +impl GitNodeObject for Blob { + fn convert_to_node(&self, item: Option<&TreeItem>) -> Box { + Box::new(FileNode { + nid: self.generate_id(), + pid: "".to_owned(), + git_id: self.meta.id, + path: PathBuf::new(), + mode: if let Some(item) = item { + item.mode.clone() + } else { + Vec::new() + }, + name: if let Some(item) = item { + item.filename.clone() + } else { + "".to_owned() + }, + data: self.meta.data.clone(), + }) + } + // pub fn convert_to_model(&self, node_id: i64) -> node::ActiveModel { + // node::ActiveModel { + // id: NotSet, + // node_id: Set(node_id), + // git_id: Set(self.meta.id.to_plain_str()), + // data: Set(self.meta.data.clone()), + // content_sha: NotSet, + // mode: Set(Vec::new()), + // name: Set(), + // node_type: Set("blob".to_owned()), + // created_at: Set(chrono::Utc::now().naive_utc()), + // updated_at: Set(chrono::Utc::now().naive_utc()), + // } + // } +} + +impl Commit { + pub fn build_from_model_and_root(model: &commit::Model, root: node::Model) -> Commit { + let mut c = Commit::new(Arc::new(MetaData::new(ObjectType::Commit, &model.meta))); + c.tree_id = Hash::from_str(&root.git_id).unwrap(); + c.parent_tree_ids.clear(); + c.meta = Arc::new(c.encode_metadata().unwrap()); + c + } + + pub fn convert_to_model(&self, repo_path: &Path) -> commit::ActiveModel { + commit::ActiveModel { + id: NotSet, + git_id: Set(self.meta.id.to_plain_str()), + tree: Set(self.tree_id.to_plain_str()), + pid: NotSet, + meta: Set(self.meta.data.clone()), + repo_path: Set(repo_path.to_str().unwrap().to_owned()), + author: NotSet, + committer: NotSet, + content: NotSet, + created_at: Set(chrono::Utc::now().naive_utc()), + updated_at: Set(chrono::Utc::now().naive_utc()), + } + } +} + +impl GitNodeObject for Tree { + // pub fn convert_from_model(model: &node::Model, tree_items: Vec) -> Tree { + // Tree { + // meta: MetaData::new(ObjectType::Tree, &Vec::new()), + // tree_items, + // tree_name: model.name.clone(), + // } + // } + + fn convert_to_node(&self, item: Option<&TreeItem>) -> Box { + Box::new(TreeNode { + nid: generate_id(), + pid: "".to_owned(), + git_id: self.meta.id, + name: if let Some(item) = item { + item.filename.clone() + } else { + "".to_owned() + }, + path: PathBuf::new(), + mode: if let Some(item) = item { + item.mode.clone() + } else { + Vec::new() + }, + children: Vec::new(), + data: self.meta.data.clone(), + }) + } +} + +// impl TreeItem { +// pub fn convert_from_model(model: node::Model) -> TreeItem { +// let item_type = if model.node_type == "tree" { +// TreeItemType::Tree +// } else { +// TreeItemType::Blob +// }; +// TreeItem { +// mode: model.mode, +// item_type, +// id: Hash::from_bytes(model.git_id.as_bytes()).unwrap(), +// filename: model.name, +// } +// } +// } + +// impl GitNodeObject for TreeItem { +// fn convert_to_node(&self) -> Box { +// match self.item_type { +// TreeItemType::Blob => Box::new(FileNode { +// nid: self.generate_id(), +// pid: "".to_owned(), +// git_id: self.id, +// path: PathBuf::new(), +// mode: self.mode.clone(), +// name: self.filename.clone(), +// }), +// TreeItemType::Tree => Box::new(TreeNode { +// nid: self.generate_id(), +// pid: "".to_owned(), +// git_id: self.id, +// name: self.filename.clone(), +// path: PathBuf::new(), +// mode: self.mode.clone(), +// children: Vec::new(), +// }), +// _ => panic!("not supported type"), +// } +// } +// } diff --git a/gust_integrate_lfs/src/gust/driver/structure/nodes.rs b/gust_integrate_lfs/src/gust/driver/structure/nodes.rs new file mode 100644 index 00000000..7d618a51 --- /dev/null +++ b/gust_integrate_lfs/src/gust/driver/structure/nodes.rs @@ -0,0 +1,451 @@ +use std::{ + any::Any, + collections::{HashMap, HashSet}, + path::{Path, PathBuf}, +}; + +use entity::node; +use sea_orm::{ActiveValue::NotSet, Set}; + +use crate::{ + git::{ + hash::Hash, + object::base::{ + blob::Blob, + tree::{Tree, TreeItemType}, + }, + pack::decode::ObjDecodedMap, + }, + gust::driver::utils::id_generator::{self, generate_id}, +}; + +use super::GitNodeObject; + +pub struct Repo { + // pub repo_root: Box, + pub tree_map: HashMap, + pub blob_map: HashMap, + pub tree_build_cache: HashSet, + // todo: limit the size of the cache + // pub cache: LruCache, +} + +pub struct TreeNode { + pub nid: i64, + pub pid: String, + pub git_id: Hash, + pub name: String, + pub path: PathBuf, + pub mode: Vec, + pub children: Vec>, + pub data: Vec, +} + +#[derive(Debug, Clone)] +pub struct FileNode { + pub nid: i64, + pub pid: String, + pub git_id: Hash, + pub name: String, + pub path: PathBuf, + pub mode: Vec, + pub data: Vec, +} + +/// define the node common behaviour +pub trait Node { + fn get_id(&self) -> i64; + + fn get_pid(&self) -> &str; + + fn get_git_id(&self) -> Hash; + + fn get_name(&self) -> &str; + + fn get_mode(&self) -> Vec; + + fn get_children(&self) -> &Vec>; + + fn generate_id(&self) -> i64 { + id_generator::generate_id() + } + + fn new(name: String, pid: String) -> Self + where + Self: Sized; + + fn find_child(&mut self, name: &str) -> Option<&mut Box>; + + fn add_child(&mut self, child: Box); + + fn is_a_directory(&self) -> bool; + + fn as_any(&self) -> &dyn Any; + + // since we use lazy load, need manually fetch data, and might need to use a LRU cache to store the data? + fn read_data(&self) -> String { + "".to_string() + } + + fn convert_to_model(&self) -> node::ActiveModel; + + // fn convert_from_model(node: node::Model, children: Vec>) -> Box + // where + // Self: Sized; +} + +impl Node for TreeNode { + fn get_id(&self) -> i64 { + self.nid + } + fn get_pid(&self) -> &str { + &self.pid + } + + fn get_git_id(&self) -> Hash { + self.git_id + } + fn get_name(&self) -> &str { + &self.name + } + + fn get_mode(&self) -> Vec { + self.mode.clone() + } + + fn get_children(&self) -> &Vec> { + &self.children + } + + fn new(name: String, pid: String) -> TreeNode { + TreeNode { + nid: generate_id(), + pid, + name, + path: PathBuf::new(), + mode: Vec::new(), + git_id: Hash::default(), + children: Vec::new(), + data: Vec::new(), + } + } + + /// convert children relations to data vec + fn convert_to_model(&self) -> node::ActiveModel { + // tracing::info!("tree {}", Arc::strong_count(&self.data)); + // tracing::info!("tree {}", Arc::strong_count(&Arc::clone(&self.data))); + node::ActiveModel { + id: NotSet, + node_id: Set(self.nid), + git_id: Set(self.git_id.to_plain_str()), + node_type: Set("tree".to_owned()), + name: Set(self.name.to_string()), + mode: Set(self.mode.clone()), + content_sha: NotSet, + data: Set(self.data.clone()), + created_at: Set(chrono::Utc::now().naive_utc()), + updated_at: Set(chrono::Utc::now().naive_utc()), + } + } + + fn find_child(&mut self, name: &str) -> Option<&mut Box> { + self.children.iter_mut().find(|c| c.get_name() == name) + } + + fn add_child(&mut self, content: Box) { + self.children.push(content); + } + + fn is_a_directory(&self) -> bool { + true + } + + fn as_any(&self) -> &dyn Any { + self + } + + // fn convert_from_model(node: node::Model, children: Vec>) -> Box { + // Box::new(TreeNode { + // nid: node.node_id, + // pid: node.pid, + // git_id: Hash::from_bytes(node.git_id.as_bytes()).unwrap(), + // name: node.name, + // path: PathBuf::new(), + // mode: node.mode, + // children, + // data: Vec::new(), + // }) + // } +} + +impl Node for FileNode { + fn get_id(&self) -> i64 { + self.nid + } + + fn get_pid(&self) -> &str { + &self.pid + } + + fn get_git_id(&self) -> Hash { + self.git_id + } + fn get_name(&self) -> &str { + &self.name + } + + fn get_mode(&self) -> Vec { + self.mode.clone() + } + + fn get_children(&self) -> &Vec> { + panic!("not supported") + } + + fn new(name: String, pid: String) -> FileNode { + FileNode { + nid: generate_id(), + pid, + path: PathBuf::new(), + name, + git_id: Hash::default(), + mode: Vec::new(), + data: Vec::new(), + } + } + + fn convert_to_model(&self) -> node::ActiveModel { + node::ActiveModel { + id: NotSet, + node_id: Set(self.nid), + git_id: Set(self.git_id.to_plain_str()), + node_type: Set("blob".to_owned()), + name: Set(self.name.to_string()), + mode: Set(self.mode.clone()), + content_sha: NotSet, + data: Set(self.data.clone()), + created_at: Set(chrono::Utc::now().naive_utc()), + updated_at: Set(chrono::Utc::now().naive_utc()), + } + } + + fn find_child(&mut self, _: &str) -> Option<&mut Box> { + panic!("not supported") + } + + fn add_child(&mut self, _: Box) { + panic!("not supported") + } + + fn is_a_directory(&self) -> bool { + false + } + + fn as_any(&self) -> &dyn Any { + self + } + + // fn convert_from_model(node: node::Model, _: Vec>) -> Box { + // Box::new(FileNode { + // nid: node.node_id, + // pid: node.pid, + // git_id: Hash::from_bytes(node.git_id.as_bytes()).unwrap(), + // name: node.name, + // path: PathBuf::new(), + // mode: node.mode, + // }) + // } +} + +impl TreeNode { + // since root tree doesn't have name, we can only use node id to build it. + pub fn get_root_from_nid(nid: i64) -> Box { + Box::new(TreeNode { + nid, + pid: "".to_owned(), + git_id: Hash::default(), + name: "".to_owned(), + path: PathBuf::from("/"), + mode: Vec::new(), + children: Vec::new(), + data: Vec::new(), + }) + } +} + +/// this method is used to build node tree and persist node data to database. Conversion order: +/// 1. Git TreeItem => Struct Node => DB Model +/// 2. Git Blob => DB Model +/// current: protocol => storage => structure +/// expected: protocol => structure => storage +pub async fn build_node_tree( + result: &ObjDecodedMap, + _: &Path, +) -> Result, anyhow::Error> { + let tree_map: HashMap = result + .trees + .clone() + .into_iter() + .map(|tree| (tree.meta.id, tree)) + .collect(); + + let blob_map: HashMap = result + .blobs + .clone() + .into_iter() + .map(|b| (b.meta.id, b)) + .collect(); + + let mut repo = Repo { + tree_map, + blob_map, + tree_build_cache: HashSet::new(), + }; + + let mut nodes = Vec::new(); + + for commit in &result.commits { + let commit_tree_id = commit.tree_id; + let tree = &repo.tree_map.get(&commit_tree_id).unwrap().clone(); + let mut root_node = tree.convert_to_node(None); + repo.build_node_tree(tree, &mut root_node); + nodes.extend(repo.convert_node_to_model(root_node.as_ref(), 0)); + print!("--------------------------------"); + } + Ok(nodes) +} + +impl Repo { + /// convert Git TreeItem => Struct Node and build node tree + pub fn build_node_tree(&mut self, tree: &Tree, node: &mut Box) { + for item in &tree.tree_items { + if let Some(_) = self.tree_build_cache.get(&item.id) { + continue; + } + if item.item_type == TreeItemType::Tree { + // repo_path.push(item.filename.clone()); + let tree = self.tree_map.get(&item.id).unwrap(); + node.add_child(tree.convert_to_node(Some(item))); + let child_node = match node.find_child(&item.filename) { + Some(child) => child, + None => panic!("Something wrong!:{}", &item.filename), + }; + let item = self.tree_map.get(&item.id); + if let Some(item) = item { + self.build_node_tree(&item.clone(), child_node); + } + // repo_path.pop(); + } else { + let blob = self.blob_map.get(&item.id).unwrap(); + node.add_child(blob.convert_to_node(Some(item))); + } + self.tree_build_cache.insert(item.id); + } + } + + /// conver Node to db entity and for later persistent + pub fn convert_node_to_model(&self, node: &dyn Node, depth: u32) -> Vec { + print_node(node, depth); + let mut nodes: Vec = Vec::new(); + nodes.push(node.convert_to_model()); + if node.is_a_directory() { + for child in node.get_children() { + nodes.extend(self.convert_node_to_model(child.as_ref(), depth + 1)); + } + } + nodes + } +} + +// Model => Node => Tree ? +// pub fn model_to_node(nodes_model: &Vec, pid: &str) -> Vec> { +// let mut nodes: Vec> = Vec::new(); +// for model in nodes_model { +// if model.pid == pid { +// if model.node_type == "blob" { +// nodes.push(FileNode::convert_from_model(model.clone(), Vec::new())); +// } else { +// let childs = model_to_node(nodes_model, &model.pid); +// nodes.push(TreeNode::convert_from_model(model.clone(), childs)); +// } +// } +// } +// nodes +// } + +/// Print a node with format. +pub fn print_node(node: &dyn Node, depth: u32) { + if depth == 0 { + println!("{}", node.get_name()); + } else { + println!( + "{:indent$}└── {} {}", + "", + node.get_name(), + node.get_id(), + indent = ((depth as usize) - 1) * 4 + ); + } +} + +#[cfg(test)] +mod test { + use crate::gust::driver::{ + structure::nodes::{Node, TreeNode}, + utils::id_generator, + }; + use std::path::PathBuf; + + use super::FileNode; + + #[test] + pub fn main() { + // Form our INPUT: a list of paths. + let paths = vec![ + PathBuf::from("child1/grandchild1.txt"), + PathBuf::from("child1/grandchild2.txt"), + PathBuf::from("child2/grandchild3.txt"), + PathBuf::from("child3"), + ]; + println!("Input Paths:\n{:#?}\n", paths); + id_generator::set_up_options().unwrap(); + // let mut root = init_root(); + // for path in paths.iter() { + // build_tree(&mut root, path, 0) + // } + + // let mut save_models: Vec = Vec::new(); + + // traverse_node(root.as_ref(), 0, &mut save_models); + } + + fn build_tree(node: &mut Box, path: &PathBuf, depth: usize) { + let parts: Vec<&str> = path.to_str().unwrap().split("/").collect(); + + if depth < parts.len() { + let child_name = parts[depth]; + + let child = match node.find_child(&child_name) { + Some(child) => child, + None => { + if path.is_file() { + node.add_child(Box::new(FileNode::new( + child_name.to_owned(), + "".to_owned(), + ))); + } else { + node.add_child(Box::new(TreeNode::new( + child_name.to_owned(), + "".to_owned(), + ))); + }; + match node.find_child(&child_name) { + Some(child) => child, + None => panic!("Something wrong!:{}, {}", &child_name, depth), + } + } + }; + build_tree(child, path, depth + 1); + } + } +} diff --git a/gust_integrate_lfs/src/gust/driver/utils/id_generator.rs b/gust_integrate_lfs/src/gust/driver/utils/id_generator.rs new file mode 100644 index 00000000..73f73dc4 --- /dev/null +++ b/gust_integrate_lfs/src/gust/driver/utils/id_generator.rs @@ -0,0 +1,33 @@ +use idgenerator::*; +use std::time::Instant; + +pub fn set_up_options() -> Result<(), OptionError> { + // Setup the option for the id generator instance. + let options = IdGeneratorOptions::new().worker_id(1).worker_id_bit_len(6); + + // Initialize the id generator instance with the option. + // Other options not set will be given the default value. + let _ = IdInstance::init(options)?; + + // Get the option from the id generator instance. + let options = IdInstance::get_options(); + println!("First setting: {:?}", options); + Ok(()) +} + +pub fn generate_id() -> i64 { + let mut new_id: i64 = 0; + let mut times = 100; + let start = Instant::now(); + while times > 0 { + // Call `next_id` to generate a new unique id. + new_id = IdInstance::next_id(); + times -= 1; + } + let duration = start.elapsed().as_millis(); + // tracing::info!( + // "Program finished after {} millis seconds! Last id {}", + // duration, new_id + // ); + new_id +} diff --git a/gust_integrate_lfs/src/gust/driver/utils/mod.rs b/gust_integrate_lfs/src/gust/driver/utils/mod.rs new file mode 100644 index 00000000..e6e877b9 --- /dev/null +++ b/gust_integrate_lfs/src/gust/driver/utils/mod.rs @@ -0,0 +1 @@ +pub mod id_generator; diff --git a/gust_integrate_lfs/src/gust/mod.rs b/gust_integrate_lfs/src/gust/mod.rs new file mode 100644 index 00000000..42e92035 --- /dev/null +++ b/gust_integrate_lfs/src/gust/mod.rs @@ -0,0 +1,4 @@ +//! +//! +//! +pub mod driver; diff --git a/gust_integrate_lfs/src/main.rs b/gust_integrate_lfs/src/main.rs new file mode 100644 index 00000000..a767dd42 --- /dev/null +++ b/gust_integrate_lfs/src/main.rs @@ -0,0 +1,118 @@ +//! +//! # Gust - A Monorepo Platform for Git +//! +//! Google has a monorepo system, __Piper__, with more than 100 TB of data. It's building +//! on top of Google's infrastructure. Gust's purpose is to imitate Piper's architecture to +//! implement a developing a monorepo platform which compatible Git and trunk-based development flow for +//! collaboration, open source compliance and supply chain management and DevSecOps. +//! +//! ## Git Compatible +//! +//! Git is a content-addressable file system. It is also a distributed collaboration system. All of +//! the files in a single repository are persisted on the machine's hard drive. This has many +//! advantages for performance and maintenance. But it also presents challenges for monorepo. It +//! isn't easy to manage a large code repository, such as a 20TB repo, which is typical in a +//! medium-sized company. +//! +//! Git is the world's most widely used version control system, and Gust aims to build a bridge +//! between Git and Monorepo. Git can 'clone' or 'pull' any folder from Monorepo into the local +//! development environment as a Git repository and 'push' it back. Gust hosts a codebase of +//! monorepo with distribution databases such as SQL, NoSQL, and Graph Database. +//! +//! ## Trunk-based Development +//! +//! ## References +//! +//! 1. [What is monorepo? (and should you use it?)](https://semaphoreci.com/blog/what-is-monorepo) +//! 2. [Monorepo: A single repository for all your code](https://medium.com/@mattklein123/monorepo-a-single-repository-for-all-your-code-86a852bff054) +//! 3. [Why Google Stores Billions of Lines of Code in a Single Repository](https://cacm.acm.org/magazines/2016/7/204032-why-google-stores-billions-of-lines-of-code-in-a-single-repository) +//! 4. [Trunk Based Development](https://trunkbaseddevelopment.com) +//! 5. [Branching strategies: Git-flow vs trunk-based development](https://www.devbridge.com/articles/branching-strategies-git-flow-vs-trunk-based-development/) +//! 6. [Monorepo.tools](https://monorepo.tools) +//! 7. [Google Open Source Third Party](https://opensource.google/documentation/reference/thirdparty) + +pub mod errors; +pub mod gateway; +pub mod git; +pub mod gust; +pub mod utils; + +use std::env; +use std::path::PathBuf; + +use anyhow::Result; +use clap::{command, Args, Parser, Subcommand}; +use gateway::api::lib; +use gust::driver::utils::id_generator; + +#[derive(Parser)] +#[command(author = "Open Rust Initiative")] +#[command( + about = "Mega is building a monorepo engine to enable Git and trunk-based development at scale" +)] +#[command( version, long_about = None)] +struct Cli { + /// custom configuration file + #[arg(short, long, value_name = "FILE")] + config: Option, + + /// custom log file + #[arg(short, long, value_name = "FILE")] + log_path: Option, + + /// subcommand serve + #[command(subcommand)] + serve_command: ServeCommand, +} + +/// The main entry of the application. +/// +/// ### TODO +/// 1. Add `clap` to parse command line arguments, don't start gateway service directly in the main function. +/// 2. Add `log` function and initialization to log the application's running status. +/// 3. Add `config` function to load the configuration file when the application running. +#[tokio::main] +pub async fn main() -> Result<()> { + env::set_var("RUST_LOG", "debug"); + tracing_subscriber::fmt::init(); + id_generator::set_up_options().unwrap(); + dotenvy::dotenv().ok(); + + let cli = Cli::parse(); + + match &cli.serve_command { + ServeCommand::Http(config) => { + lib::http_server(config).await.unwrap(); + } + ServeCommand::Ssh(config) => { + gateway::ssh_server::server(config).await.unwrap(); + } + } + Ok(()) +} + +#[derive(Subcommand)] +pub enum ServeCommand { + /// start http server + Http(ServeConfig), + /// start ssh server + Ssh(ServeConfig), +} + +#[derive(Args, Clone)] +pub struct ServeConfig { + #[arg(long, default_value_t = String::from("127.0.0.1"))] + host: String, + + #[arg(short, long, default_value_t = 8000)] + port: u16, + + #[arg(short, long, value_name = "FILE")] + key_path: Option, + + #[arg(short, long, value_name = "FILE")] + cert_path: Option, + + #[arg(short, long, default_value_os_t = PathBuf::from("lfs_content"))] + lfs_content_path: PathBuf, +} diff --git a/gust_integrate_lfs/src/utils/mod.rs b/gust_integrate_lfs/src/utils/mod.rs new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/gust_integrate_lfs/src/utils/mod.rs @@ -0,0 +1 @@ + -- Gitee