From aa437bb2628082e819277f067c790678a22367bf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=94=A1=E5=A7=AC?= Date: Sat, 11 May 2024 08:57:53 +0000 Subject: [PATCH 1/3] =?UTF-8?q?=E6=96=B0=E5=BB=BA=20index-rs?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- index-rs/.keep | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 index-rs/.keep diff --git a/index-rs/.keep b/index-rs/.keep new file mode 100644 index 00000000..e69de29b -- Gitee From fb26a7fdfbe60c28e0d79684b11bf0dd55e37589 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=94=A1=E5=A7=AC?= Date: Sat, 11 May 2024 09:00:18 +0000 Subject: [PATCH 2/3] add index.rs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 蔡姬 --- index-rs/index.rs | 512 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 512 insertions(+) create mode 100644 index-rs/index.rs diff --git a/index-rs/index.rs b/index-rs/index.rs new file mode 100644 index 00000000..ea0736d1 --- /dev/null +++ b/index-rs/index.rs @@ -0,0 +1,512 @@ +use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt}; +use sha1::{Digest, Sha1}; +use std::collections::BTreeMap; +use std::fmt::{Display, Formatter}; +use std::fs::{self, File}; +use std::io; +use std::io::{BufReader, Read, Write}; +#[cfg(unix)] +use std::os::unix::fs::MetadataExt; +use std::path::{Path, PathBuf}; +use std::time::{SystemTime, UNIX_EPOCH}; + +use venus::errors::GitError; +use venus::hash::SHA1; +use mercury::internal::pack::wrapper::Wrapper; +use crate::utils::{path, util}; + +#[derive(PartialEq, Eq, Debug, Clone)] +pub struct Time { + seconds: u32, + nanos: u32, +} +impl Time { + pub fn from_stream(stream: &mut impl Read) -> Result { + let seconds = stream.read_u32::()?; + let nanos = stream.read_u32::()?; + Ok(Time { seconds, nanos }) + } + + #[allow(dead_code)] + fn to_system_time(&self) -> SystemTime { + UNIX_EPOCH + std::time::Duration::new(self.seconds.into(), self.nanos) + } + + fn from_system_time(system_time: SystemTime) -> Self { + match system_time.duration_since(UNIX_EPOCH) { + Ok(duration) => { + let seconds = duration + .as_secs() + .try_into() + .expect("Time is too far in the future"); + let nanos = duration.subsec_nanos(); + Time { seconds, nanos } + } + Err(_) => panic!("Time is before the UNIX epoch"), + } + } +} +impl Display for Time { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "{}:{}", self.seconds, self.nanos) + } +} + +/// 16 bits +#[derive(Debug)] +pub struct Flags { + pub assume_valid: bool, + pub extended: bool, // must be 0 in v2 + pub stage: u8, // 2-bit during merge + pub name_length: u16, // 12-bit +} +// TODO From Trait +impl Flags { + pub fn new(name_len: u16) -> Self { + Flags { + assume_valid: true, + extended: false, + stage: 0, + name_length: name_len, + } + } + + pub fn from_u16(flags: u16) -> Self { + Flags { + assume_valid: flags & 0x8000 != 0, + extended: flags & 0x4000 != 0, + stage: ((flags & 0x3000) >> 12) as u8, + name_length: flags & 0xFFF, + } + } + + pub fn to_u16(&self) -> u16 { + let mut flags = 0u16; + if self.assume_valid { + flags |= 0x8000; + } + if self.extended { + flags |= 0x4000; + } + flags |= (self.stage as u16) << 12; + assert!(self.name_length <= 0xFFF, "Name length is too long"); + flags |= self.name_length; + flags + } +} + +pub struct IndexEntry { + pub ctime: Time, + pub mtime: Time, + pub dev: u32, // 0 for windows + pub ino: u32, // 0 for windows + pub mode: u32, // 0o100644 // 4-bit object type + 3-bit unused + 9-bit unix permission + pub uid: u32, // 0 for windows + pub gid: u32, // 0 for windows + pub size: u32, + pub hash: SHA1, + pub flags: Flags, + pub name: String, +} +impl Display for IndexEntry { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "IndexEntry {{ ctime: {}, mtime: {}, dev: {}, ino: {}, mode: {:o}, uid: {}, gid: {}, size: {}, hash: {}, flags: {:?}, name: {} }}", + self.ctime, self.mtime, self.dev, self.ino, self.mode, self.uid, self.gid, self.size, self.hash, self.flags, self.name) + } +} + +impl IndexEntry { + /** Metadata must be got by [fs::symlink_metadata] to avoid following symlink */ + pub fn new(meta: &fs::Metadata, hash: SHA1, name: String) -> Self { + let mut entry = IndexEntry { + ctime: Time::from_system_time(meta.created().unwrap()), + mtime: Time::from_system_time(meta.modified().unwrap()), + dev: 0, + ino: 0, + uid: 0, + gid: 0, + size: meta.len() as u32, + hash, + flags: Flags::new(name.len() as u16), + name, + mode: 0o100644, + }; + #[cfg(unix)] + { + entry.dev = meta.dev() as u32; + entry.ino = meta.ino() as u32; + entry.uid = meta.uid(); + entry.gid = meta.gid(); + + entry.mode = match meta.mode() & 0o170000/* file mode */ { + 0o100000 => { + match meta.mode() & 0o111 { + 0 => 0o100644, // no execute permission + _ => 0o100755, // with execute permission + } + } + 0o120000 => 0o120000, // symlink + _ => entry.mode, // keep the original mode + } + } + #[cfg(windows)] + { + if meta.is_symlink() { + entry.mode = 0o120000; + } + } + entry + } + + /// absolute or relative path (to current dir) + pub fn new_from_file(file: &Path, hash: SHA1) -> io::Result { + let meta = fs::symlink_metadata(file)?; // without following symlink + let name = util::to_workdir_path(file).into_os_string().into_string().unwrap(); + let index = IndexEntry::new(&meta, hash, name); + Ok(index) + } + + pub fn new_from_blob(name: String, hash: SHA1, size: u32) -> Self { + IndexEntry { + ctime: Time { seconds: 0, nanos: 0 }, + mtime: Time { seconds: 0, nanos: 0 }, + dev: 0, + ino: 0, + mode: 0o100644, + uid: 0, + gid: 0, + size, + hash, + flags: Flags::new(name.len() as u16), + name, + } + } +} + +/// see [index-format](https://git-scm.com/docs/index-format) +///
to Working Dir relative path +pub struct Index { + entries: BTreeMap<(String, u8), IndexEntry>, +} + +impl Index { + fn check_header(file: &mut impl Read) -> Result { + let mut magic = [0; 4]; + file.read_exact(&mut magic)?; + if magic != *b"DIRC" { + return Err(GitError::InvalidIndexHeader( + String::from_utf8_lossy(&magic).to_string(), + )); + } + + let version = file.read_u32::()?; + // only support v2 now + if version != 2 { + return Err(GitError::InvalidIndexHeader(version.to_string())); + } + + let entries = file.read_u32::()?; + Ok(entries) + } + + pub fn new() -> Self { + Index { + entries: BTreeMap::new(), + } + } + + pub fn size(&self) -> usize { + self.entries.len() + } + + pub fn from_file(path: impl AsRef) -> Result { + let file = File::open(path.as_ref())?; // read-only + let total_size = file.metadata()?.len(); + let file = &mut Wrapper::new(BufReader::new(file)); // TODO move Wrapper & utils to a common module + + let num = Index::check_header(file)?; + let mut index = Index::new(); + + for _ in 0..num { + let mut entry = IndexEntry { + ctime: Time::from_stream(file)?, + mtime: Time::from_stream(file)?, + dev: file.read_u32::()?, //utils::read_u32_be(file)?, + ino: file.read_u32::()?, + mode: file.read_u32::()?, + uid: file.read_u32::()?, + gid: file.read_u32::()?, + size: file.read_u32::()?, + hash: utils::read_sha1(file)?, + flags: Flags::from_u16(file.read_u16::()?), + name: String::new(), + }; + let name_len = entry.flags.name_length as usize; + let mut name = vec![0; name_len]; + file.read_exact(&mut name)?; + // The exact encoding is undefined, but the '.' and '/' characters are encoded in 7-bit ASCII + entry.name = String::from_utf8(name)?; // TODO check the encoding + index.entries.insert((entry.name.clone(), entry.flags.stage), entry); + + // 1-8 nul bytes as necessary to pad the entry to a multiple of eight bytes + // while keeping the name NUL-terminated. // so at least 1 byte nul + let padding = 8 - ((22 + name_len) % 8); // 22 = sha1 + flags, others are 40 % 8 == 0 + utils::read_bytes(file, padding)?; + } + + // Extensions + while file.bytes_read() + utils::SHA1_SIZE < total_size as usize { + // The remaining 20 bytes must be checksum + let sign = utils::read_bytes(file, 4)?; + println!("{:?}", String::from_utf8(sign.clone())?); + // If the first byte is 'A'...'Z' the extension is optional and can be ignored. + if sign[0] >= b'A' && sign[0] <= b'Z' { + // Optional extension + let size = file.read_u32::()?; + utils::read_bytes(file, size as usize)?; // Ignore the extension + } else { + // 'link' or 'sdir' extension + return Err(GitError::InvalidIndexFile( + "Unsupported extension".to_string(), + )); + } + } + + // check sum + let file_hash = file.final_hash(); + let check_sum = utils::read_sha1(file)?; + if file_hash != check_sum { + return Err(GitError::InvalidIndexFile("Check sum failed".to_string())); + } + assert_eq!(index.size(), num as usize); + Ok(index) + } + + pub fn to_file(&self, path: impl AsRef) -> Result<(), GitError> { + let mut file = File::create(path)?; + let mut hash = Sha1::new(); + + let mut header = Vec::new(); + header.write_all(b"DIRC")?; + header.write_u32::(2u32)?; // version 2 + header.write_u32::(self.entries.len() as u32)?; + file.write_all(&header)?; + hash.update(&header); + + for (_, entry) in self.entries.iter() { + let mut entry_bytes = Vec::new(); + entry_bytes.write_u32::(entry.ctime.seconds)?; + entry_bytes.write_u32::(entry.ctime.nanos)?; + entry_bytes.write_u32::(entry.mtime.seconds)?; + entry_bytes.write_u32::(entry.mtime.nanos)?; + entry_bytes.write_u32::(entry.dev)?; + entry_bytes.write_u32::(entry.ino)?; + entry_bytes.write_u32::(entry.mode)?; + entry_bytes.write_u32::(entry.uid)?; + entry_bytes.write_u32::(entry.gid)?; + entry_bytes.write_u32::(entry.size)?; + entry_bytes.write_all(&entry.hash.0)?; + entry_bytes.write_u16::(entry.flags.to_u16())?; + entry_bytes.write_all(entry.name.as_bytes())?; + let padding = 8 - ((22 + entry.name.len()) % 8); + entry_bytes.write_all(&vec![0; padding])?; + + file.write_all(&entry_bytes)?; + hash.update(&entry_bytes); + } + + // Extensions + + // check sum + let file_hash: [u8; 20] = hash.finalize().into(); + file.write_all(&file_hash)?; + Ok(()) + } +} + +impl Index { + /// Load index, if not exist, return an empty index + pub fn load() -> Result { + let path = path::index(); + if !path.exists() { + return Ok(Index::new()); + } + Index::from_file(path) + } + + pub fn update(&mut self, entry: IndexEntry) { + self.add(entry) + } + + pub fn add(&mut self, entry: IndexEntry) { + self.entries.insert((entry.name.clone(), entry.flags.stage), entry); + } + + pub fn remove(&mut self, name: &str, stage: u8) -> Option { + self.entries.remove(&(name.to_string(), stage)) + } + + pub fn get(&self, name: &str, stage: u8) -> Option<&IndexEntry> { + self.entries.get(&(name.to_string(), stage)) + } + + pub fn tracked(&self, name: &str, stage: u8) -> bool { + self.entries.contains_key(&(name.to_string(), stage)) + } + + pub fn get_hash(&self, file: &str, stage: u8) -> Option { + self.get(file, stage).map(|entry| entry.hash) + } + + pub fn verify_hash(&self, file: &str, stage: u8, hash: &SHA1) -> bool { + let inner_hash = self.get_hash(file, stage); + if let Some(inner_hash) = inner_hash { + &inner_hash == hash + } else { + false + } + } + /// is file modified after last `add` (need hash to confirm content change) + pub fn is_modified(&self, file: &str, stage: u8) -> bool { + if let Some(entry) = self.get(file, stage) { + let path_abs = util::workdir_to_absolute(Path::new(file)); + let meta = path_abs.symlink_metadata().unwrap(); + // TODO more filed + let same = entry.ctime == Time::from_system_time(meta.created().unwrap_or(SystemTime::now())) + && entry.mtime == Time::from_system_time(meta.modified().unwrap_or(SystemTime::now())) + && entry.size == meta.len() as u32; + + !same + } else { + panic!("File not found in index"); + } + } + + /// Get all entries with the same stage + pub fn tracked_entries(&self, stage: u8) -> Vec<&IndexEntry> { + // ? should use stage or not + self.entries + .iter() + .filter(|(_, entry)| entry.flags.stage == stage) + .map(|(_, entry)| entry) + .collect() + } + + /// Get all tracked files(stage = 0) + pub fn tracked_files(&self) -> Vec { + self.tracked_entries(0).iter().map(|entry| PathBuf::from(&entry.name)).collect() + } + + /// Judge if the file(s) of `dir` is in the index + /// - false if `dir` is a file + pub fn contains_dir_file(&self, dir: &str) -> bool { + let dir = Path::new(dir); + self.entries.iter().any(|((name, _), _)| { + let path = Path::new(name); + path.starts_with(dir) && path != dir // TODO change to is_sub_path! + }) + } + + /// remove all files in `dir` from index + /// - do nothing if `dir` is a file + pub fn remove_dir_files(&mut self, dir: &str) -> Vec { + let dir = Path::new(dir); + let mut removed = Vec::new(); + self.entries.retain(|(name, _), _| { + let path = Path::new(name); + if path.starts_with(dir) && path != dir { + removed.push(name.clone()); + false + } else { + true + } + }); + removed + } + + /// saved to index file + pub fn save(&self) -> Result<(), GitError> { + self.to_file(path::index()) + } +} + +mod utils { + use std::io; + use std::io::Read; + use venus::hash::SHA1; + + pub const SHA1_SIZE: usize = 20; + + pub fn read_bytes(file: &mut impl Read, len: usize) -> io::Result> { + let mut buf = vec![0; len]; + file.read_exact(&mut buf)?; + Ok(buf) + } + + pub fn read_sha1(file: &mut impl Read) -> io::Result { + let mut buf = [0; 20]; + file.read_exact(&mut buf)?; + Ok(SHA1::from_bytes(&buf)) + } +} + +#[cfg(test)] +mod tests { + use path_abs::PathOps; + use super::*; + use crate::utils::test; + + #[test] + fn test_time() { + let time = Time { + seconds: 0, + nanos: 0, + }; + let system_time = time.to_system_time(); + let new_time = Time::from_system_time(system_time); + assert_eq!(time, new_time); + } + + #[test] + fn test_check_header() { + let file = File::open("../tests/data/index/index-2").unwrap(); + let entries = Index::check_header(&mut BufReader::new(file)).unwrap(); + assert_eq!(entries, 2); + } + + #[test] + fn test_index() { + let index = Index::from_file("../tests/data/index/index-760").unwrap(); + assert_eq!(index.size(), 760); + for (_, entry) in index.entries.iter() { + println!("{}", entry); + } + } + + #[test] + fn test_libra_index() { + let index = Index::from_file(PathBuf::from(test::TEST_DIR).join(".libra/index")).unwrap(); + for (_, entry) in index.entries.iter() { + println!("{}", entry); + } + } + + #[test] + fn test_index_to_file() { + let index = Index::from_file("../tests/data/index/index-760").unwrap(); + index.to_file("/tmp/index-760").unwrap(); + let new_index = Index::from_file("/tmp/index-760").unwrap(); + assert_eq!(index.size(), new_index.size()); + } + + #[tokio::test] + async fn test_index_entry_create() { + // test create index entry from file + test::setup_with_new_libra().await; + test::ensure_file("src/test.rs", None); + let file = Path::new("src/test.rs"); // use as a normal file + let hash = SHA1::from_bytes(&[0; 20]); + let entry = IndexEntry::new_from_file(file, hash).unwrap(); + println!("{}", entry); + } +} -- Gitee From d732d88d9a64e84cfe18d285edc82921c70ad210 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=94=A1=E5=A7=AC?= Date: Sat, 11 May 2024 09:00:25 +0000 Subject: [PATCH 3/3] =?UTF-8?q?=E5=88=A0=E9=99=A4=E6=96=87=E4=BB=B6=20inde?= =?UTF-8?q?x-rs/.keep?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- index-rs/.keep | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 index-rs/.keep diff --git a/index-rs/.keep b/index-rs/.keep deleted file mode 100644 index e69de29b..00000000 -- Gitee