From 606a2e64ce7a908956016ce8bb653303db8b0e89 Mon Sep 17 00:00:00 2001 From: doruche Date: Sat, 20 Sep 2025 12:05:59 +0800 Subject: [PATCH 1/3] src modification done. --- driver/ddriver.sh | 2 +- driver/user_ddriver/ddriver.c | 2 +- fs/simplefs/include/types.h | 3 +- fs/simplefs/src/sfs_utils.c | 9 +- fs/template/tests/eval.sh | 3 + fs/template/tests/mtrain/Makefile | 18 ++++ fs/template/tests/mtrain/data_setup.py | 80 ++++++++++++++++++ fs/template/tests/mtrain/engine.py | 110 +++++++++++++++++++++++++ fs/template/tests/mtrain/eval.py | 40 +++++++++ fs/template/tests/mtrain/model.py | 50 +++++++++++ fs/template/tests/mtrain/train.py | 45 ++++++++++ fs/template/tests/mtrain/utils.py | 33 ++++++++ fs/template/tests/train.sh | 3 + 13 files changed, 394 insertions(+), 4 deletions(-) create mode 100644 fs/template/tests/eval.sh create mode 100644 fs/template/tests/mtrain/Makefile create mode 100644 fs/template/tests/mtrain/data_setup.py create mode 100644 fs/template/tests/mtrain/engine.py create mode 100644 fs/template/tests/mtrain/eval.py create mode 100644 fs/template/tests/mtrain/model.py create mode 100644 fs/template/tests/mtrain/train.py create mode 100644 fs/template/tests/mtrain/utils.py create mode 100644 fs/template/tests/train.sh diff --git a/driver/ddriver.sh b/driver/ddriver.sh index fab085e..8dabc89 100755 --- a/driver/ddriver.sh +++ b/driver/ddriver.sh @@ -19,7 +19,7 @@ fi cd "$WORK_DIR" || exit CONFIG_BLOCK_SZ=512 -BLOCK_COUNT=8192 +BLOCK_COUNT=163840 function usage(){ diff --git a/driver/user_ddriver/ddriver.c b/driver/user_ddriver/ddriver.c index 4dc52fc..8a8f681 100644 --- a/driver/user_ddriver/ddriver.c +++ b/driver/user_ddriver/ddriver.c @@ -44,7 +44,7 @@ extern int errno; #define DRIVER_DESC "A Fake disk driver in user space" #define DRIVER_VERSION "0.1.0" -#define CONFIG_DISK_SZ (4 * 1024 * 1024) +#define CONFIG_DISK_SZ (80 * 1024 * 1024) #define CONFIG_BLOCK_SZ (512) /****************************************************************************** * SECTION: Macro Functions diff --git a/fs/simplefs/include/types.h b/fs/simplefs/include/types.h index cb1cd1c..fc12194 100644 --- a/fs/simplefs/include/types.h +++ b/fs/simplefs/include/types.h @@ -39,7 +39,8 @@ typedef enum sfs_file_type { #define SFS_MAX_FILE_NAME 128 #define SFS_INODE_PER_FILE 1 -#define SFS_DATA_PER_FILE 16 +// #define SFS_DATA_PER_FILE 16 +#define SFS_DATA_PER_FILE 1024 #define SFS_DEFAULT_PERM 0777 #define SFS_IOC_MAGIC 'S' diff --git a/fs/simplefs/src/sfs_utils.c b/fs/simplefs/src/sfs_utils.c index d535374..5e91186 100644 --- a/fs/simplefs/src/sfs_utils.c +++ b/fs/simplefs/src/sfs_utils.c @@ -450,7 +450,14 @@ struct sfs_dentry* sfs_lookup(const char * path, boolean* is_find, boolean* is_r int lvl = 0; boolean is_hit; char* fname = NULL; - char* path_cpy = (char*)malloc(sizeof(path)); + + // char* path_cpy = (char*)malloc(sizeof(path)); + char* path_cpy = (char*)malloc(strlen(path) + 1); + if (path_cpy == NULL) { + SFS_DBG("[%s] malloc error\n", __func__); + return NULL; + } + *is_root = FALSE; strcpy(path_cpy, path); diff --git a/fs/template/tests/eval.sh b/fs/template/tests/eval.sh new file mode 100644 index 0000000..6ac8f36 --- /dev/null +++ b/fs/template/tests/eval.sh @@ -0,0 +1,3 @@ +#! /bin/bash +cd mtrain && make eval WORKDIR=../mnt +cd .. diff --git a/fs/template/tests/mtrain/Makefile b/fs/template/tests/mtrain/Makefile new file mode 100644 index 0000000..691063b --- /dev/null +++ b/fs/template/tests/mtrain/Makefile @@ -0,0 +1,18 @@ +WORKDIR="./" + +.PHONY: train clean eval + +train: + @python data_setup.py $(WORKDIR) + @python model.py $(WORKDIR) + @python train.py $(WORKDIR) + +eval: + @python eval.py $(WORKDIR) + +clean: + # Note this only cleans files generated in the current directory. + # Should delete files manually when WORKDIR is not "./" + @rm -rf __pycache__ + @rm -rf $(WORKDIR)trimmed + @rm -rf $(WORKDIR)model \ No newline at end of file diff --git a/fs/template/tests/mtrain/data_setup.py b/fs/template/tests/mtrain/data_setup.py new file mode 100644 index 0000000..5be5675 --- /dev/null +++ b/fs/template/tests/mtrain/data_setup.py @@ -0,0 +1,80 @@ +import numpy as np +import torch +import random +from torchvision.datasets import MNIST +from torchvision import transforms + +from utils import info + +def scale_MNIST(dataset, num_samples_per_class, n): + """ + Trim a dataset to have a specified number of samples per class, + which is further divided equally into n datasets. + """ + + indices = [] + + for class_label in range(10): + class_indices = [ + i for i, label in enumerate(dataset.targets) if label == class_label + ] + sel_indices = torch.randperm(len(class_indices))[:num_samples_per_class] + indices.extend([class_indices[i] for i in sel_indices]) + random.shuffle(indices) + + n_indices = np.split(np.array(indices), n) + + return [torch.utils.data.Subset(dataset, torch.tensor(subset)) for subset in n_indices] + +def extract(subset): + """ + extract images and labels from a subset and return as tensors + """ + images = [] + labels = [] + for img, label in subset: + images.append(img) + labels.append(label) + images = torch.stack(images) + labels = torch.tensor(labels) + return images, labels + +def main(): + t = transforms.Compose([ + transforms.ToTensor(), + ]) + train_set = MNIST( + root='data', + train=True, + download=True, + transform=t, + ) + test_set = MNIST( + root='data', + train=False, + download=True, + transform=t, + ) + + num_samples_per_class_train = 320 + num_samples_per_class_test = 32 + train_subsets = scale_MNIST(train_set, num_samples_per_class_train, 10) + test_subsets = scale_MNIST(test_set, num_samples_per_class_test, 10) + + scaled_train_sets = [extract(subset) for subset in train_subsets] + scaled_test_sets = [extract(subset) for subset in test_subsets] + + # distribute to separate files + import sys + import os + path_prefix = f"{sys.argv[1]}/trimmed" if len(sys.argv) > 1 else "./trimmed" + if not os.path.exists(path_prefix): + os.makedirs(path_prefix) + for i, (train_data, test_data) in enumerate(zip(scaled_train_sets, scaled_test_sets)): + torch.save(train_data, f'{path_prefix}/scaled_mnist_train_{i}.pt') + torch.save(test_data, f'{path_prefix}/scaled_mnist_test_{i}.pt') + + +if __name__ == '__main__': + main() + info("data_setup.py: data setup complete") \ No newline at end of file diff --git a/fs/template/tests/mtrain/engine.py b/fs/template/tests/mtrain/engine.py new file mode 100644 index 0000000..381c0e6 --- /dev/null +++ b/fs/template/tests/mtrain/engine.py @@ -0,0 +1,110 @@ +import torch + +from tqdm.auto import tqdm + +def train_step( + model: torch.nn.Module, + dataloader: torch.utils.data.DataLoader, + loss_fn: torch.nn.Module, + optimizer: torch.optim.Optimizer, + device: torch.device, +): + """ + Train a single epoch of a model. + """ + model.train() + + train_loss = 0. + train_acc = 0. + + for batch, (X, y) in enumerate(dataloader): + X, y = X.to(device), y.to(device) + + y_pred = model(X) + loss = loss_fn(y_pred, y) + train_loss += loss.item() + acc = (y_pred.argmax(dim=1) == y).sum().item() / len(y) + train_acc += acc + + optimizer.zero_grad() + loss.backward() + optimizer.step() + + train_loss = train_loss / len(dataloader) + train_acc = train_acc / len(dataloader) + return train_loss, train_acc + +def test_step( + model: torch.nn.Module, + dataloader: torch.utils.data.DataLoader, + loss_fn: torch.nn.Module, + device: torch.device, +): + """ + Test a model. + """ + model.eval() + + test_loss = 0. + test_acc = 0. + + with torch.inference_mode(): + for batch, (X, y) in enumerate(dataloader): + X, y = X.to(device), y.to(device) + + y_pred = model(X) + loss = loss_fn(y_pred, y) + test_loss += loss.item() + acc = (y_pred.argmax(dim=1) == y).sum().item() / len(y) + test_acc += acc + + test_loss = test_loss / len(dataloader) + test_acc = test_acc / len(dataloader) + return test_loss, test_acc + +def train( + model: torch.nn.Module, + train_dataloader: torch.utils.data.DataLoader, + test_dataloader: torch.utils.data.DataLoader, + loss_fn: torch.nn.Module, + optimizer: torch.optim.Optimizer, + device: torch.device, + epochs: int = 5, +): + """ + Train and test a model for a number of epochs. + """ + results = { + "train_loss": [], + "train_acc": [], + "test_loss": [], + "test_acc": [], + } + + for epoch in tqdm(range(epochs)): + train_loss, train_acc = train_step( + model=model, + dataloader=train_dataloader, + loss_fn=loss_fn, + optimizer=optimizer, + device=device, + ) + test_loss, test_acc = test_step( + model=model, + dataloader=test_dataloader, + loss_fn=loss_fn, + device=device, + ) + + results["train_loss"].append(train_loss) + results["train_acc"].append(train_acc) + results["test_loss"].append(test_loss) + results["test_acc"].append(test_acc) + + print( + f"Epoch: {epoch+1} | " + f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f} | " + f"Test Loss: {test_loss:.4f}, Test Acc: {test_acc:.4f}" + ) + + return results \ No newline at end of file diff --git a/fs/template/tests/mtrain/eval.py b/fs/template/tests/mtrain/eval.py new file mode 100644 index 0000000..f9ca511 --- /dev/null +++ b/fs/template/tests/mtrain/eval.py @@ -0,0 +1,40 @@ +# Evaluate a model already trained by train.py +import engine +import utils +import model as m +import torch + +EPOCHS = 5 + +path_prefix = "./" + +def main(): + model_path = f"{path_prefix}model/model.pth" + + train_set, test_set = utils.merge_subsets(10, f"{path_prefix}trimmed") + train_loader = utils.load(train_set) + test_loader = utils.load(test_set) + + utils.info("train.py: data loaded") + + device = "cuda" if torch.cuda.is_available() else "cpu" + model = m.TinyVGG() + model.load_state_dict(torch.load(model_path)) + model.to(device) + utils.info(f"train.py: using device: {device}") + + test_loss, test_acc = engine.test_step( + model, + test_loader, + torch.nn.CrossEntropyLoss(), + device=device, + ) + utils.info(f"test loss: {test_loss:.4f}, test acc: {test_acc:.4f}") + + utils.info("train.py: training complete") + +if __name__ == "__main__": + import sys, os + path_prefix = f"{sys.argv[1]}/" if len(sys.argv) > 1 else "./" + + main() \ No newline at end of file diff --git a/fs/template/tests/mtrain/model.py b/fs/template/tests/mtrain/model.py new file mode 100644 index 0000000..1edb9e8 --- /dev/null +++ b/fs/template/tests/mtrain/model.py @@ -0,0 +1,50 @@ +import torch +import torch.nn as nn + +from utils import info + +class TinyVGG(nn.Module): + """ + TinyVGG model architecture. + """ + def __init__( + self, + in_channels=1, + hidden_units=8, + out_features=10, + ): + super().__init__() + self.layer_stack = nn.Sequential( + nn.Conv2d(in_channels=in_channels, out_channels=hidden_units, kernel_size=3, padding=1), + nn.ReLU(), + nn.Conv2d(in_channels=hidden_units, out_channels=hidden_units, kernel_size=3, padding=1), + nn.ReLU(), + nn.MaxPool2d(kernel_size=2, stride=2), + nn.Conv2d(in_channels=hidden_units, out_channels=hidden_units, kernel_size=3, padding=1), + nn.ReLU(), + nn.Conv2d(in_channels=hidden_units, out_channels=hidden_units, kernel_size=3, padding=1), + nn.ReLU(), + nn.MaxPool2d(kernel_size=2, stride=2), + + nn.Flatten(), + nn.Linear(in_features=hidden_units * 7 * 7, out_features=out_features), + ) + + def forward(self, x): + return self.layer_stack(x) + +def main(): + """ + Build a new model and save it. + """ + model = TinyVGG() + + import sys, os + path_prefix = f"{sys.argv[1]}/model" if len(sys.argv) > 1 else "./model" + if not os.path.exists(path_prefix): + os.makedirs(path_prefix) + torch.save(model.state_dict(), f"{path_prefix}/model.pth") + +if __name__ == "__main__": + main() + info("model.py: model built and saved successfully.") \ No newline at end of file diff --git a/fs/template/tests/mtrain/train.py b/fs/template/tests/mtrain/train.py new file mode 100644 index 0000000..2af60c6 --- /dev/null +++ b/fs/template/tests/mtrain/train.py @@ -0,0 +1,45 @@ +# Train a new model with data already created by data_setup.py and save it. +import engine +import utils +import model as m +import torch + +LR = 0.1 +EPOCHS = 5 + +path_prefix = "./" + +def main(): + model_path = f"{path_prefix}model/model.pth" + + train_set, test_set = utils.merge_subsets(10, f"{path_prefix}trimmed") + train_loader = utils.load(train_set) + test_loader = utils.load(test_set) + + utils.info("train.py: data loaded") + + device = "cuda" if torch.cuda.is_available() else "cpu" + model = m.TinyVGG() + model.load_state_dict(torch.load(model_path)) + model.to(device) + utils.info(f"train.py: using device: {device}") + + engine.train( + model, + train_loader, + test_loader, + torch.nn.CrossEntropyLoss(), + torch.optim.SGD(model.parameters(), lr=LR), + device=device, + epochs=EPOCHS, + ) + + utils.info("train.py: training complete") + + torch.save(model.state_dict(), model_path) + +if __name__ == "__main__": + import sys, os + path_prefix = f"{sys.argv[1]}/" if len(sys.argv) > 1 else "./" + + main() \ No newline at end of file diff --git a/fs/template/tests/mtrain/utils.py b/fs/template/tests/mtrain/utils.py new file mode 100644 index 0000000..c7a1c4a --- /dev/null +++ b/fs/template/tests/mtrain/utils.py @@ -0,0 +1,33 @@ +import torch + +def info(msg): + BLUE = '\033[94m' + RST = '\033[0m' + print(f"{BLUE}{msg}{RST}") + +def merge_subsets(n_subsets, path): + train_prefix = f"{path}/scaled_mnist_train_" + test_prefix = f"{path}/scaled_mnist_test_" + train_sets = [torch.load(f"{train_prefix}{i}.pt") for i in range(n_subsets)] + test_sets = [torch.load(f"{test_prefix}{i}.pt") for i in range(n_subsets)] + train_set = ( + torch.cat([train_sets[i][0] for i in range(n_subsets)], dim=0), # images + torch.cat([train_sets[i][1] for i in range(n_subsets)], dim=0) # labels + ) + test_set = ( + torch.cat([test_sets[i][0] for i in range(n_subsets)], dim=0), + torch.cat([test_sets[i][1] for i in range(n_subsets)], dim=0) + ) + return train_set, test_set + + +def load( + data, # ([samples], [labels]) + batch_size=32, +): + data_loader = torch.utils.data.DataLoader( + dataset=torch.utils.data.TensorDataset(*data), + batch_size=batch_size, + shuffle=True, + ) + return data_loader \ No newline at end of file diff --git a/fs/template/tests/train.sh b/fs/template/tests/train.sh new file mode 100644 index 0000000..2623354 --- /dev/null +++ b/fs/template/tests/train.sh @@ -0,0 +1,3 @@ +#! /bin/bash +cd mtrain && make train WORKDIR=../mnt +cd .. \ No newline at end of file -- Gitee From 185babb15c5834a59103f97268f719887b060280 Mon Sep 17 00:00:00 2001 From: doruche Date: Sat, 20 Sep 2025 12:24:18 +0800 Subject: [PATCH 2/3] perm changed --- fs/template/tests/eval.sh | 0 fs/template/tests/train.sh | 0 2 files changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 fs/template/tests/eval.sh mode change 100644 => 100755 fs/template/tests/train.sh diff --git a/fs/template/tests/eval.sh b/fs/template/tests/eval.sh old mode 100644 new mode 100755 diff --git a/fs/template/tests/train.sh b/fs/template/tests/train.sh old mode 100644 new mode 100755 -- Gitee From 24676432bf41a2499ec419b4080ad29446c818c7 Mon Sep 17 00:00:00 2001 From: doruche Date: Sat, 20 Sep 2025 06:14:43 +0000 Subject: [PATCH 3/3] update fs/simplefs/include/types.h. Signed-off-by: doruche --- fs/simplefs/include/types.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/simplefs/include/types.h b/fs/simplefs/include/types.h index fc12194..3687ab7 100644 --- a/fs/simplefs/include/types.h +++ b/fs/simplefs/include/types.h @@ -39,8 +39,8 @@ typedef enum sfs_file_type { #define SFS_MAX_FILE_NAME 128 #define SFS_INODE_PER_FILE 1 -// #define SFS_DATA_PER_FILE 16 -#define SFS_DATA_PER_FILE 1024 +#define SFS_DATA_PER_FILE 16 +// #define SFS_DATA_PER_FILE 1024 #define SFS_DEFAULT_PERM 0777 #define SFS_IOC_MAGIC 'S' -- Gitee