diff --git a/.gitignore b/.gitignore index 600d2d33badf45cc068e01d2e3c837e11c417bc4..4d99d618719020f7460a5e087270deb4d8809f24 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ -.vscode \ No newline at end of file +.vscode +target \ No newline at end of file diff --git a/utils/rust/tar/Cargo.toml b/utils/rust/tar/Cargo.toml new file mode 100644 index 0000000000000000000000000000000000000000..744c7f161b50cb87e95ace4183c295a1ac6c44ec --- /dev/null +++ b/utils/rust/tar/Cargo.toml @@ -0,0 +1,33 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +# Copyright (c) 2023 Huawei Device Co., Ltd. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +[package] +name = "tar_rust" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +clap = { version = "4.3.19", features = ["derive"] } +glob = "0.3.1" +lz4_flex = { version = "0.11", default-features = false, features = ["frame"], optional = true } +tar = "0.4.39" +walkdir = "2.3.3" +ylong_runtime = { git = "https://gitee.com/openharmony-sig/commonlibrary_rust_ylong_runtime.git", features = ["full"]} +libc = "0.2.101" + +[features] +compress_lz4_flex = ["dep:lz4_flex"] diff --git a/utils/rust/tar/src/backup.rs b/utils/rust/tar/src/backup.rs new file mode 100644 index 0000000000000000000000000000000000000000..93e25d46d624c86c4ace6a749a660aebbdd835e5 --- /dev/null +++ b/utils/rust/tar/src/backup.rs @@ -0,0 +1,356 @@ +/* + * Copyright (c) 2023 Huawei Device Co., Ltd. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +use glob::{Paths, Pattern}; +#[cfg(feature = "compress_lz4_flex")] +use lz4_flex::frame::FrameEncoder; +use std::error::Error; +use std::fs::File; +use std::io; +use std::path::{Path, PathBuf}; +use tar::Builder; +use ylong_runtime::sync::mpsc::bounded::{BoundedReceiver, BoundedSender}; + +#[derive(Clone, Copy)] +#[non_exhaustive] +pub enum CompressAlgorithm { + #[allow(dead_code)] + None, + + #[cfg(feature = "compress_lz4_flex")] + #[allow(dead_code)] + Lz4Flex, +} + +pub struct Options { + /// The working directory to store the files, default is "backup". + pub stash_dir: PathBuf, + + /// The threshold to determine if a file is huge in byte. A huge file will be transferred directly without + /// archiving. Currently the default value is `usize::MAX`, which means no file is huge. + pub threshold_huge_file: usize, + + /// The threshold to split the tar in byte. A new tar will be created if the size of the current tar exceeds this + /// threshold. Currently the default value is `usize::MAX`, which means no tar will be split. + pub threshold_split_tar: usize, + + /// The compress algorithm to use, default is `CompressAlgorithm::None`. + pub compress_algorithm: CompressAlgorithm, +} + +#[derive(Debug)] +pub struct Archive { + #[allow(dead_code)] + /// The path of the archive file + path: PathBuf, + + #[allow(dead_code)] + /// The type of the archive file + archive_type: ArchiveType, + + #[allow(dead_code)] + /// The size of the file's content in byte + content_size: usize, +} + +#[derive(Debug)] +pub enum ArchiveType { + /// Files are backup-ed directly + Plain, + + /// Files are backup-ed in a tar + Tar, +} + +struct BackupContext { + option: Options, + + cur_tar: Option, + cur_tar_size: usize, + tar_cnt: usize, + + inputs: BoundedReceiver, + outputs: BoundedSender, +} + +enum Output { + Uncompressed(Builder), + + #[cfg(feature = "compress_lz4_flex")] + CompressedLz4Flex(Builder>), +} + +pub async fn scan_files( + includes: Vec, + excludes: Vec, + sender: BoundedSender, +) -> Result<(), Box> +where + S: AsRef, +{ + let exclude_patterns = build_exclude_patterns(excludes)?; + traverse_and_send(includes, exclude_patterns, sender).await +} + +pub async fn backup_files( + option: Options, + paths_receiver: BoundedReceiver, + archives_sender: BoundedSender, +) -> Result<(), Box> { + BackupContext::new(option, paths_receiver, archives_sender) + .archive() + .await?; + + Ok(()) +} + +fn build_exclude_patterns(excludes: Vec) -> Result, Box> +where + S: AsRef, +{ + for exclude in &excludes { + if !exclude.as_ref().starts_with("/") { + let err = io::Error::new( + io::ErrorKind::InvalidInput, + "exclude pattern must be absolute", + ); + return Err(Box::new(err)); + } + } + + let excludes = excludes.iter().map(|exclude| { + if let Some(stripped) = exclude.as_ref().strip_suffix("/") { + stripped + } else { + exclude.as_ref() + } + }); + + let exclude_patterns = excludes + .into_iter() + .map(|p| glob::Pattern::new(p.as_ref())) + .collect::, _>>()?; + + Ok(exclude_patterns) +} + +async fn traverse_and_send( + includes: Vec, + excludes: Vec, + sender: BoundedSender, +) -> Result<(), Box> +where + S: AsRef, +{ + let mut option = glob::MatchOptions::new(); + option.require_literal_separator = true; + + let includes = includes + .iter() + .map(|p| glob::glob_with(p.as_ref(), option)) + .collect::, _>>()?; + + for path in includes.into_iter().flatten() { + let path = path?; + if excludes.iter().any(|p| p.matches_path(&path)) { + continue; + } + + let is_path_exclude = + |path: &Path| -> bool { excludes.iter().any(|p| p.matches_path(path)) }; + + let metadata = path.metadata()?; + if metadata.is_file() { + if !is_path_exclude(&path.as_path()) { + sender.send(path).await?; + } + } else if metadata.is_dir() { + let walker = walkdir::WalkDir::new(path); + for entry in walker + .into_iter() + .filter_entry(|e| !is_path_exclude(e.path())) + { + let entry = entry?; + if entry.file_type().is_file() { + sender.send(entry.path().to_path_buf()).await?; + } + } + } + } + + Ok(()) +} + +impl Default for Options { + fn default() -> Self { + Self { + stash_dir: PathBuf::from("backup"), + threshold_huge_file: usize::MAX, // TODO: fix me + threshold_split_tar: usize::MAX, // TODO: fix me + compress_algorithm: CompressAlgorithm::None, + } + } +} + +impl Output { + fn new(path: PathBuf, compress_algorithm: CompressAlgorithm) -> Result { + let prefix = path + .parent() + .expect(format!("failed to get parent of {:?}", path).as_str()); + if !prefix.exists() { + std::fs::create_dir_all(prefix) + .expect(format!("failed to create {:?}", prefix).as_str()); + } + + match compress_algorithm { + CompressAlgorithm::None => Ok(Self::Uncompressed(Builder::new(File::create(path)?))), + #[cfg(feature = "compress_lz4_flex")] + CompressAlgorithm::Lz4Flex => Ok(Self::CompressedLz4Flex(Builder::new( + FrameEncoder::new(File::create(path)?), + ))), + } + } + + fn append_file(&mut self, achievable_path: &PathBuf) -> Result { + #[cfg(feature = "ohos")] + assert_eq!(std::env::current_dir(), r#"/"#); + + let abs_path = achievable_path + .canonicalize() + .expect("File to append is not achievable"); + let relative_path = abs_path + .strip_prefix(std::env::current_dir().expect("Cannot get current dir")) + .expect("File to append is not in current dir"); + + match self { + Self::Uncompressed(builder) => { + builder.append_path_with_name(&achievable_path, relative_path)? + } + + #[cfg(feature = "compress_lz4_flex")] + Self::CompressedLz4Flex(builder) => { + builder.append_path_with_name(&achievable_path, relative_path)? + } + }; + Ok(std::fs::metadata(achievable_path)?.len() as usize) + } + + fn finish(self) -> Result<(), io::Error> { + match self { + Self::Uncompressed(mut builder) => { + builder.finish()?; + } + #[cfg(feature = "compress_lz4_flex")] + Self::CompressedLz4Flex(builder) => { + let encoder = builder.into_inner().expect("failed to get encoder"); + encoder.finish()?; + } + } + Ok(()) + } +} + +impl BackupContext { + fn new( + option: Options, + inputs: BoundedReceiver, + outputs: BoundedSender, + ) -> Self { + Self { + option, + cur_tar: None, + cur_tar_size: 0, + tar_cnt: 0, + inputs, + outputs, + } + } + + async fn archive(&mut self) -> Result<(), Box> { + while let Ok(path) = self.inputs.recv().await { + assert!(path.exists()); + assert!(path.metadata()?.is_file()); + + if let Some(archive) = self.archive_single_file(path).await? { + self.outputs.send(archive).await?; + } + } + + if let Some(archive) = self.retrieve_cur_tar()? { + self.outputs.send(archive).await?; + } + + Ok(()) + } + + async fn archive_single_file(&mut self, path: PathBuf) -> Result, io::Error> { + let file_size = path.metadata()?.len() as usize; + + if file_size > self.option.threshold_huge_file { + return Ok(Some(Archive { + path, + archive_type: ArchiveType::Plain, + content_size: file_size, + })); + } + + if self.cur_tar.is_none() { + self.cur_tar = Some(Output::new( + self.next_tar_path(), + self.option.compress_algorithm, + )?); + } + + let cur_tar = self.cur_tar.as_mut().unwrap(); + self.cur_tar_size += cur_tar.append_file(&path)?; + if self.cur_tar_size > self.option.threshold_split_tar { + return self.retrieve_cur_tar(); + } + + Ok(None) + } + + fn cur_tar_path(&self) -> PathBuf { + let path = self.option.stash_dir.join(self.tar_cnt.to_string()); + match self.option.compress_algorithm { + CompressAlgorithm::None => path.with_extension("tar"), + + #[cfg(feature = "compress_lz4_flex")] + CompressAlgorithm::Lz4Flex => path.with_extension("tar.lz4"), + } + } + + fn next_tar_path(&mut self) -> PathBuf { + self.tar_cnt += 1; + self.cur_tar_path() + } + + fn retrieve_cur_tar(&mut self) -> Result, io::Error> { + if let None = self.cur_tar { + return Ok(None); + } + let last_tar = self + .cur_tar + .take() + .expect("last_tar is guaranteed to be Some"); + last_tar.finish()?; + let archive = Archive { + path: self.cur_tar_path(), + archive_type: ArchiveType::Tar, + content_size: self.cur_tar_size, + }; + Ok(Some(archive)) + } +} diff --git a/utils/rust/tar/src/main.rs b/utils/rust/tar/src/main.rs new file mode 100644 index 0000000000000000000000000000000000000000..de414226f0eb58e98bb4bafb7be7faaa1a3405bf --- /dev/null +++ b/utils/rust/tar/src/main.rs @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2023 Huawei Device Co., Ltd. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +use clap::Parser; +use std::path::PathBuf; +use ylong_runtime::{block_on, sync::mpsc::bounded::bounded_channel}; + +mod backup; + +#[derive(Parser, Debug)] +#[command(author, version, about, long_about = None)] +struct Args { + /// Tar file saving path + #[clap(short, long)] + stash_dir: String, + + /// Files and directories that you want to back up + #[clap(short, long)] + includes: Vec, + + /// includes exceptions that do not need to be backed up + #[clap(short, long)] + excludes: Vec, +} + +fn backup_main() { + let mut args = Args::parse(); + args.excludes.push(args.stash_dir.clone()); + println!("{:#?}", args); + + let (paths_to_backed_tx, paths_to_backed_rx) = bounded_channel(100); + let (outputs_tx, mut outputs_rx) = bounded_channel(100); + + let handles = vec![ + ylong_runtime::spawn(async move { + let _ = backup::scan_files(args.includes, args.excludes, paths_to_backed_tx) + .await + .unwrap(); + }), + ylong_runtime::spawn(async move { + let option = backup::Options { + stash_dir: PathBuf::from(args.stash_dir), + ..Default::default() + }; + let _ = backup::backup_files(option, paths_to_backed_rx, outputs_tx) + .await + .unwrap(); + }), + ylong_runtime::spawn(async move { + while let Ok(archive) = outputs_rx.recv().await { + println!("output: {:?}", archive); + } + }), + ]; + for handle in handles { + block_on(handle).unwrap(); + } +} + +fn main() { + backup_main(); +}