From 7f8360eab5e98b44ecd7276573f51bb0b6f8ff98 Mon Sep 17 00:00:00 2001
From: "hongliang.yuan" <hongliang.yuan@iluvatar.com>
Date: Tue, 26 Nov 2024 05:45:39 +0000
Subject: [PATCH] 11/26/2024 05:45:39 sync PyTorch file

---
 cv/detection/yolov5/pytorch/Dockerfile        |  17 +-
 .../yolov5/pytorch/data/Argoverse.yaml        |  67 ++
 cv/detection/yolov5/pytorch/data/coco128.yaml |  35 +-
 .../pytorch/data/hyps/hyp.scratch-high.yaml   |  34 +
 .../pytorch/data/hyps/hyp.scratch-low.yaml    |  34 +
 .../yolov5/pytorch/data/hyps/hyp.scratch.yaml |   4 +-
 .../pytorch/data/scripts/download_weights.sh  |  11 +-
 .../yolov5/pytorch/data/scripts/get_coco.sh   |  18 +-
 .../pytorch/data/scripts/get_coco128.sh       |  20 +-
 cv/detection/yolov5/pytorch/detect.py         | 191 +++--
 cv/detection/yolov5/pytorch/export.py         | 383 +++++++---
 cv/detection/yolov5/pytorch/hubconf.py        |  27 +-
 cv/detection/yolov5/pytorch/init.sh           |  57 +-
 .../yolov5/pytorch/loggers/__init__.py        | 149 ++++
 .../yolov5/pytorch/loggers/wandb/README.md    | 147 ++++
 .../yolov5/pytorch/loggers/wandb/__init__.py  |   0
 .../pytorch/loggers/wandb/log_dataset.py      |  23 +
 .../yolov5/pytorch/loggers/wandb/sweep.py     |  36 +
 .../yolov5/pytorch/loggers/wandb/sweep.yaml   | 143 ++++
 .../pytorch/loggers/wandb/wandb_utils.py      | 528 +++++++++++++
 cv/detection/yolov5/pytorch/models/common.py  | 229 ++++--
 .../yolov5/pytorch/models/experimental.py     |  64 +-
 .../pytorch/models/hub/yolov5-bifpn.yaml      |  48 ++
 .../yolov5/pytorch/models/hub/yolov5-p2.yaml  |  78 +-
 .../yolov5/pytorch/models/hub/yolov5l6.yaml   |  94 +--
 .../yolov5/pytorch/models/hub/yolov5m6.yaml   |  94 +--
 .../yolov5/pytorch/models/hub/yolov5n6.yaml   |  60 ++
 .../pytorch/models/hub/yolov5s-ghost.yaml     |  48 ++
 .../yolov5/pytorch/models/hub/yolov5s6.yaml   |  94 +--
 .../yolov5/pytorch/models/hub/yolov5x6.yaml   |  94 +--
 cv/detection/yolov5/pytorch/models/tf.py      | 450 +++++++++++
 cv/detection/yolov5/pytorch/models/yolo.py    | 197 ++---
 .../yolov5/pytorch/models/yolov5l.yaml        |  14 +-
 .../yolov5/pytorch/models/yolov5m.yaml        |  14 +-
 .../yolov5/pytorch/models/yolov5n.yaml        |  48 ++
 .../yolov5/pytorch/models/yolov5s.yaml        |  14 +-
 .../yolov5/pytorch/models/yolov5x.yaml        |  14 +-
 cv/detection/yolov5/pytorch/requirements.txt  |  37 +-
 cv/detection/yolov5/pytorch/run.sh            |  44 +-
 cv/detection/yolov5/pytorch/train.py          | 716 +++++++-----------
 .../yolov5/pytorch/utils/activations.py       |   5 +-
 .../yolov5/pytorch/utils/augmentations.py     |  88 ++-
 .../yolov5/pytorch/utils/autoanchor.py        |  31 +-
 .../yolov5/pytorch/utils/aws/resume.py        |   9 +-
 .../yolov5/pytorch/utils/aws/userdata.sh      |   2 +-
 .../yolov5/pytorch/utils/callbacks.py         |  76 ++
 cv/detection/yolov5/pytorch/utils/datasets.py | 263 ++++---
 .../yolov5/pytorch/utils/downloads.py         | 150 ++++
 cv/detection/yolov5/pytorch/utils/general.py  | 362 ++++++---
 cv/detection/yolov5/pytorch/utils/loss.py     |  16 +-
 cv/detection/yolov5/pytorch/utils/metrics.py  |  14 +-
 cv/detection/yolov5/pytorch/utils/plots.py    | 377 +++++----
 .../yolov5/pytorch/utils/torch_utils.py       | 130 ++--
 cv/detection/yolov5/pytorch/val.py            | 364 +++++++++
 54 files changed, 4565 insertions(+), 1697 deletions(-)
 create mode 100755 cv/detection/yolov5/pytorch/data/Argoverse.yaml
 create mode 100755 cv/detection/yolov5/pytorch/data/hyps/hyp.scratch-high.yaml
 create mode 100755 cv/detection/yolov5/pytorch/data/hyps/hyp.scratch-low.yaml
 create mode 100755 cv/detection/yolov5/pytorch/loggers/__init__.py
 create mode 100755 cv/detection/yolov5/pytorch/loggers/wandb/README.md
 create mode 100755 cv/detection/yolov5/pytorch/loggers/wandb/__init__.py
 create mode 100755 cv/detection/yolov5/pytorch/loggers/wandb/log_dataset.py
 create mode 100755 cv/detection/yolov5/pytorch/loggers/wandb/sweep.py
 create mode 100755 cv/detection/yolov5/pytorch/loggers/wandb/sweep.yaml
 create mode 100755 cv/detection/yolov5/pytorch/loggers/wandb/wandb_utils.py
 create mode 100755 cv/detection/yolov5/pytorch/models/hub/yolov5-bifpn.yaml
 create mode 100755 cv/detection/yolov5/pytorch/models/hub/yolov5n6.yaml
 create mode 100755 cv/detection/yolov5/pytorch/models/hub/yolov5s-ghost.yaml
 create mode 100755 cv/detection/yolov5/pytorch/models/tf.py
 create mode 100755 cv/detection/yolov5/pytorch/models/yolov5n.yaml
 create mode 100755 cv/detection/yolov5/pytorch/utils/callbacks.py
 create mode 100755 cv/detection/yolov5/pytorch/utils/downloads.py
 create mode 100755 cv/detection/yolov5/pytorch/val.py

diff --git a/cv/detection/yolov5/pytorch/Dockerfile b/cv/detection/yolov5/pytorch/Dockerfile
index e22c110..0ee89b4 100644
--- a/cv/detection/yolov5/pytorch/Dockerfile
+++ b/cv/detection/yolov5/pytorch/Dockerfile
@@ -1,3 +1,5 @@
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+
 # Start FROM Nvidia PyTorch image https://ngc.nvidia.com/catalog/containers/nvidia:pytorch
 FROM nvcr.io/nvidia/pytorch:21.05-py3
 
@@ -8,9 +10,9 @@ RUN apt update && apt install -y zip htop screen libgl1-mesa-glx
 COPY requirements.txt .
 RUN python -m pip install --upgrade pip
 RUN pip uninstall -y nvidia-tensorboard nvidia-tensorboard-plugin-dlprof
-RUN pip install --no-cache -r requirements.txt coremltools onnx gsutil notebook
+RUN pip install --no-cache -r requirements.txt coremltools onnx gsutil notebook wandb>=0.12.2
 RUN pip install --no-cache -U torch torchvision numpy
-# RUN pip install --no-cache torch==1.9.0+cu111 torchvision==0.10.0+cu111 -f https://download.pytorch.org/whl/torch_stable.html
+# RUN pip install --no-cache torch==1.9.1+cu111 torchvision==0.10.1+cu111 -f https://download.pytorch.org/whl/torch_stable.html
 
 # Create working directory
 RUN mkdir -p /usr/src/app
@@ -19,8 +21,11 @@ WORKDIR /usr/src/app
 # Copy contents
 COPY . /usr/src/app
 
+# Downloads to user config dir
+ADD https://ultralytics.com/assets/Arial.ttf /root/.config/Ultralytics/
+
 # Set environment variables
-ENV HOME=/usr/src/app
+# ENV HOME=/usr/src/app
 
 
 # Usage Examples -------------------------------------------------------------------------------------------------------
@@ -48,3 +53,9 @@ ENV HOME=/usr/src/app
 
 # Clean up
 # docker system prune -a --volumes
+
+# Update Ubuntu drivers
+# https://www.maketecheasier.com/install-nvidia-drivers-ubuntu/
+
+# DDP test
+# python -m torch.distributed.run --nproc_per_node 2 --master_port 1 train.py --epochs 3
diff --git a/cv/detection/yolov5/pytorch/data/Argoverse.yaml b/cv/detection/yolov5/pytorch/data/Argoverse.yaml
new file mode 100755
index 0000000..1625dd1
--- /dev/null
+++ b/cv/detection/yolov5/pytorch/data/Argoverse.yaml
@@ -0,0 +1,67 @@
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+# Argoverse-HD dataset (ring-front-center camera) http://www.cs.cmu.edu/~mengtial/proj/streaming/
+# Example usage: python train.py --data Argoverse.yaml
+# parent
+# ├── yolov5
+# └── datasets
+#     └── Argoverse  ← downloads here
+
+
+# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
+path: ../datasets/Argoverse  # dataset root dir
+train: Argoverse-1.1/images/train/  # train images (relative to 'path') 39384 images
+val: Argoverse-1.1/images/val/  # val images (relative to 'path') 15062 images
+test: Argoverse-1.1/images/test/  # test images (optional) https://eval.ai/web/challenges/challenge-page/800/overview
+
+# Classes
+nc: 8  # number of classes
+names: ['person',  'bicycle',  'car',  'motorcycle',  'bus',  'truck',  'traffic_light',  'stop_sign']  # class names
+
+
+# Download script/URL (optional) ---------------------------------------------------------------------------------------
+download: |
+  import json
+
+  from tqdm import tqdm
+  from utils.general import download, Path
+
+
+  def argoverse2yolo(set):
+      labels = {}
+      a = json.load(open(set, "rb"))
+      for annot in tqdm(a['annotations'], desc=f"Converting {set} to YOLOv5 format..."):
+          img_id = annot['image_id']
+          img_name = a['images'][img_id]['name']
+          img_label_name = img_name[:-3] + "txt"
+
+          cls = annot['category_id']  # instance class id
+          x_center, y_center, width, height = annot['bbox']
+          x_center = (x_center + width / 2) / 1920.0  # offset and scale
+          y_center = (y_center + height / 2) / 1200.0  # offset and scale
+          width /= 1920.0  # scale
+          height /= 1200.0  # scale
+
+          img_dir = set.parents[2] / 'Argoverse-1.1' / 'labels' / a['seq_dirs'][a['images'][annot['image_id']]['sid']]
+          if not img_dir.exists():
+              img_dir.mkdir(parents=True, exist_ok=True)
+
+          k = str(img_dir / img_label_name)
+          if k not in labels:
+              labels[k] = []
+          labels[k].append(f"{cls} {x_center} {y_center} {width} {height}\n")
+
+      for k in labels:
+          with open(k, "w") as f:
+              f.writelines(labels[k])
+
+
+  # Download
+  dir = Path('../datasets/Argoverse')  # dataset root dir
+  urls = ['https://argoverse-hd.s3.us-east-2.amazonaws.com/Argoverse-HD-Full.zip']
+  download(urls, dir=dir, delete=False)
+
+  # Convert
+  annotations_dir = 'Argoverse-HD/annotations/'
+  (dir / 'Argoverse-1.1' / 'tracking').rename(dir / 'Argoverse-1.1' / 'images')  # rename 'tracking' to 'images'
+  for d in "train.json", "val.json":
+      argoverse2yolo(dir / annotations_dir / d)  # convert VisDrone annotations to YOLO labels
diff --git a/cv/detection/yolov5/pytorch/data/coco128.yaml b/cv/detection/yolov5/pytorch/data/coco128.yaml
index e75628d..70cf52c 100644
--- a/cv/detection/yolov5/pytorch/data/coco128.yaml
+++ b/cv/detection/yolov5/pytorch/data/coco128.yaml
@@ -1,29 +1,30 @@
-# COCO 2017 dataset http://cocodataset.org - first 128 training images
-# Train command: python train.py --data coco128.yaml
-# Default dataset location is next to YOLOv5:
-#   /parent
-#     /datasets/coco128
-#     /yolov5
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+# COCO128 dataset https://www.kaggle.com/ultralytics/coco128 (first 128 images from COCO train2017)
+# Example usage: python train.py --data coco128.yaml
+# parent
+# ├── yolov5
+# └── datasets
+#     └── coco128  ← downloads here
 
 
 # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
-path: ./datasets/coco128  # dataset root dir
+path: ../datasets/coco128  # dataset root dir
 train: images/train2017  # train images (relative to 'path') 128 images
 val: images/train2017  # val images (relative to 'path') 128 images
 test:  # test images (optional)
 
 # Classes
 nc: 80  # number of classes
-names: [ 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
-         'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
-         'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
-         'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
-         'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
-         'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
-         'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
-         'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
-         'hair drier', 'toothbrush' ]  # class names
+names: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
+        'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
+        'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
+        'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
+        'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
+        'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
+        'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
+        'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
+        'hair drier', 'toothbrush']  # class names
 
 
 # Download script/URL (optional)
-download: https://github.com/ultralytics/yolov5/releases/download/v1.0/coco128.zip
+download: https://github.com/ultralytics/yolov5/releases/download/v1.0/coco128.zip
\ No newline at end of file
diff --git a/cv/detection/yolov5/pytorch/data/hyps/hyp.scratch-high.yaml b/cv/detection/yolov5/pytorch/data/hyps/hyp.scratch-high.yaml
new file mode 100755
index 0000000..862b575
--- /dev/null
+++ b/cv/detection/yolov5/pytorch/data/hyps/hyp.scratch-high.yaml
@@ -0,0 +1,34 @@
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+# Hyperparameters for high-augmentation COCO training from scratch
+# python train.py --batch 32 --cfg yolov5m6.yaml --weights '' --data coco.yaml --img 1280 --epochs 300
+# See tutorials for hyperparameter evolution https://github.com/ultralytics/yolov5#tutorials
+
+lr0: 0.01  # initial learning rate (SGD=1E-2, Adam=1E-3)
+lrf: 0.01  # final OneCycleLR learning rate (lr0 * lrf)
+momentum: 0.937  # SGD momentum/Adam beta1
+weight_decay: 0.0005  # optimizer weight decay 5e-4
+warmup_epochs: 3.0  # warmup epochs (fractions ok)
+warmup_momentum: 0.8  # warmup initial momentum
+warmup_bias_lr: 0.1  # warmup initial bias lr
+box: 0.05  # box loss gain
+cls: 0.5  # cls loss gain
+cls_pw: 1.0  # cls BCELoss positive_weight
+obj: 1.0  # obj loss gain (scale with pixels)
+obj_pw: 1.0  # obj BCELoss positive_weight
+iou_t: 0.20  # IoU training threshold
+anchor_t: 4.0  # anchor-multiple threshold
+# anchors: 3  # anchors per output layer (0 to ignore)
+fl_gamma: 0.0  # focal loss gamma (efficientDet default gamma=1.5)
+hsv_h: 0.015  # image HSV-Hue augmentation (fraction)
+hsv_s: 0.7  # image HSV-Saturation augmentation (fraction)
+hsv_v: 0.4  # image HSV-Value augmentation (fraction)
+degrees: 0.0  # image rotation (+/- deg)
+translate: 0.1  # image translation (+/- fraction)
+scale: 0.9  # image scale (+/- gain)
+shear: 0.0  # image shear (+/- deg)
+perspective: 0.0  # image perspective (+/- fraction), range 0-0.001
+flipud: 0.0  # image flip up-down (probability)
+fliplr: 0.5  # image flip left-right (probability)
+mosaic: 1.0  # image mosaic (probability)
+mixup: 0.1  # image mixup (probability)
+copy_paste: 0.1  # segment copy-paste (probability)
\ No newline at end of file
diff --git a/cv/detection/yolov5/pytorch/data/hyps/hyp.scratch-low.yaml b/cv/detection/yolov5/pytorch/data/hyps/hyp.scratch-low.yaml
new file mode 100755
index 0000000..b093a95
--- /dev/null
+++ b/cv/detection/yolov5/pytorch/data/hyps/hyp.scratch-low.yaml
@@ -0,0 +1,34 @@
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+# Hyperparameters for low-augmentation COCO training from scratch
+# python train.py --batch 64 --cfg yolov5n6.yaml --weights '' --data coco.yaml --img 640 --epochs 300 --linear
+# See tutorials for hyperparameter evolution https://github.com/ultralytics/yolov5#tutorials
+
+lr0: 0.01  # initial learning rate (SGD=1E-2, Adam=1E-3)
+lrf: 0.01  # final OneCycleLR learning rate (lr0 * lrf)
+momentum: 0.937  # SGD momentum/Adam beta1
+weight_decay: 0.0005  # optimizer weight decay 5e-4
+warmup_epochs: 3.0  # warmup epochs (fractions ok)
+warmup_momentum: 0.8  # warmup initial momentum
+warmup_bias_lr: 0.1  # warmup initial bias lr
+box: 0.05  # box loss gain
+cls: 0.5  # cls loss gain
+cls_pw: 1.0  # cls BCELoss positive_weight
+obj: 1.0  # obj loss gain (scale with pixels)
+obj_pw: 1.0  # obj BCELoss positive_weight
+iou_t: 0.20  # IoU training threshold
+anchor_t: 4.0  # anchor-multiple threshold
+# anchors: 3  # anchors per output layer (0 to ignore)
+fl_gamma: 0.0  # focal loss gamma (efficientDet default gamma=1.5)
+hsv_h: 0.015  # image HSV-Hue augmentation (fraction)
+hsv_s: 0.7  # image HSV-Saturation augmentation (fraction)
+hsv_v: 0.4  # image HSV-Value augmentation (fraction)
+degrees: 0.0  # image rotation (+/- deg)
+translate: 0.1  # image translation (+/- fraction)
+scale: 0.5  # image scale (+/- gain)
+shear: 0.0  # image shear (+/- deg)
+perspective: 0.0  # image perspective (+/- fraction), range 0-0.001
+flipud: 0.0  # image flip up-down (probability)
+fliplr: 0.5  # image flip left-right (probability)
+mosaic: 1.0  # image mosaic (probability)
+mixup: 0.0  # image mixup (probability)
+copy_paste: 0.0  # segment copy-paste (probability)
\ No newline at end of file
diff --git a/cv/detection/yolov5/pytorch/data/hyps/hyp.scratch.yaml b/cv/detection/yolov5/pytorch/data/hyps/hyp.scratch.yaml
index b2cf2e3..31f6d14 100644
--- a/cv/detection/yolov5/pytorch/data/hyps/hyp.scratch.yaml
+++ b/cv/detection/yolov5/pytorch/data/hyps/hyp.scratch.yaml
@@ -1,10 +1,10 @@
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
 # Hyperparameters for COCO training from scratch
 # python train.py --batch 40 --cfg yolov5m.yaml --weights '' --data coco.yaml --img 640 --epochs 300
 # See tutorials for hyperparameter evolution https://github.com/ultralytics/yolov5#tutorials
 
-
 lr0: 0.01  # initial learning rate (SGD=1E-2, Adam=1E-3)
-lrf: 0.2  # final OneCycleLR learning rate (lr0 * lrf)
+lrf: 0.1  # final OneCycleLR learning rate (lr0 * lrf)
 momentum: 0.937  # SGD momentum/Adam beta1
 weight_decay: 0.0005  # optimizer weight decay 5e-4
 warmup_epochs: 3.0  # warmup epochs (fractions ok)
diff --git a/cv/detection/yolov5/pytorch/data/scripts/download_weights.sh b/cv/detection/yolov5/pytorch/data/scripts/download_weights.sh
index 6a279f1..b4b0ccd 100644
--- a/cv/detection/yolov5/pytorch/data/scripts/download_weights.sh
+++ b/cv/detection/yolov5/pytorch/data/scripts/download_weights.sh
@@ -1,10 +1,15 @@
 #!/bin/bash
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
 # Download latest models from https://github.com/ultralytics/yolov5/releases
-# Usage:
-#    $ bash path/to/download_weights.sh
+# Example usage: bash path/to/download_weights.sh
+# parent
+# └── yolov5
+#     ├── yolov5s.pt  ← downloads here
+#     ├── yolov5m.pt
+#     └── ...
 
 python - <<EOF
-from utils.google_utils import attempt_download
+from utils.downloads import attempt_download
 
 for x in ['s', 'm', 'l', 'x']:
     attempt_download(f'yolov5{x}.pt')
diff --git a/cv/detection/yolov5/pytorch/data/scripts/get_coco.sh b/cv/detection/yolov5/pytorch/data/scripts/get_coco.sh
index bce692c..0210c8e 100644
--- a/cv/detection/yolov5/pytorch/data/scripts/get_coco.sh
+++ b/cv/detection/yolov5/pytorch/data/scripts/get_coco.sh
@@ -1,18 +1,18 @@
 #!/bin/bash
-# COCO 2017 dataset http://cocodataset.org
-# Download command: bash data/scripts/get_coco.sh
-# Train command: python train.py --data coco.yaml
-# Default dataset location is next to YOLOv5:
-#   /parent_folder
-#     /coco
-#     /yolov5
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+# Download COCO 2017 dataset http://cocodataset.org
+# Example usage: bash data/scripts/get_coco.sh
+# parent
+# ├── yolov5
+# └── datasets
+#     └── coco  ← downloads here
 
 # Download/unzip labels
 d='../datasets' # unzip directory
 url=https://github.com/ultralytics/yolov5/releases/download/v1.0/
 f='coco2017labels.zip' # or 'coco2017labels-segments.zip', 68 MB
 echo 'Downloading' $url$f ' ...'
-curl -L $url$f -o $f && unzip -q $f -d $d && rm $f & # download, unzip, remove in background
+curl -L $url$f -o $f && unzip -q $f -d $d && rm $f &
 
 # Download/unzip images
 d='../datasets/coco/images' # unzip directory
@@ -22,6 +22,6 @@ f2='val2017.zip'   # 1G, 5k images
 f3='test2017.zip'  # 7G, 41k images (optional)
 for f in $f1 $f2; do
   echo 'Downloading' $url$f '...'
-  curl -L $url$f -o $f && unzip -q $f -d $d && rm $f & # download, unzip, remove in background
+  curl -L $url$f -o $f && unzip -q $f -d $d && rm $f &
 done
 wait # finish background tasks
diff --git a/cv/detection/yolov5/pytorch/data/scripts/get_coco128.sh b/cv/detection/yolov5/pytorch/data/scripts/get_coco128.sh
index 395043b..ee05a86 100644
--- a/cv/detection/yolov5/pytorch/data/scripts/get_coco128.sh
+++ b/cv/detection/yolov5/pytorch/data/scripts/get_coco128.sh
@@ -1,17 +1,17 @@
 #!/bin/bash
-# COCO128 dataset https://www.kaggle.com/ultralytics/coco128
-# Download command: bash data/scripts/get_coco128.sh
-# Train command: python train.py --data coco128.yaml
-# Default dataset location is next to /yolov5:
-#   /parent_folder
-#     /coco128
-#     /yolov5
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+# Download COCO128 dataset https://www.kaggle.com/ultralytics/coco128 (first 128 images from COCO train2017)
+# Example usage: bash data/scripts/get_coco128.sh
+# parent
+# ├── yolov5
+# └── datasets
+#     └── coco128  ← downloads here
 
 # Download/unzip images and labels
-d='../' # unzip directory
+d='../datasets' # unzip directory
 url=https://github.com/ultralytics/yolov5/releases/download/v1.0/
-f='coco128.zip' # or 'coco2017labels-segments.zip', 68 MB
+f='coco128.zip' # or 'coco128-segments.zip', 68 MB
 echo 'Downloading' $url$f ' ...'
-curl -L $url$f -o $f && unzip -q $f -d $d && rm $f & # download, unzip, remove in background
+curl -L $url$f -o $f && unzip -q $f -d $d && rm $f &
 
 wait # finish background tasks
diff --git a/cv/detection/yolov5/pytorch/detect.py b/cv/detection/yolov5/pytorch/detect.py
index 44b33eb..4e49730 100644
--- a/cv/detection/yolov5/pytorch/detect.py
+++ b/cv/detection/yolov5/pytorch/detect.py
@@ -1,32 +1,39 @@
-"""Run inference with a YOLOv5 model on images, videos, directories, streams
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+"""
+Run inference on images, videos, directories, streams, etc.
 
 Usage:
     $ python path/to/detect.py --source path/to/img.jpg --weights yolov5s.pt --img 640
 """
 
 import argparse
+import os
 import sys
-import time
 from pathlib import Path
 
 import cv2
+import numpy as np
 import torch
 import torch.backends.cudnn as cudnn
 
-FILE = Path(__file__).absolute()
-sys.path.append(FILE.parents[0].as_posix())  # add yolov5/ to path
+FILE = Path(__file__).resolve()
+ROOT = FILE.parents[0]  # YOLOv5 root directory
+if str(ROOT) not in sys.path:
+    sys.path.append(str(ROOT))  # add ROOT to PATH
+ROOT = Path(os.path.relpath(ROOT, Path.cwd()))  # relative
 
 from models.experimental import attempt_load
-from utils.datasets import LoadStreams, LoadImages
-from utils.general import check_img_size, check_requirements, check_imshow, colorstr, non_max_suppression, \
-    apply_classifier, scale_coords, xyxy2xywh, strip_optimizer, set_logging, increment_path, save_one_box
-from utils.plots import colors, plot_one_box
-from utils.torch_utils import select_device, load_classifier, time_synchronized
+from utils.datasets import LoadImages, LoadStreams
+from utils.general import apply_classifier, check_img_size, check_imshow, check_requirements, check_suffix, colorstr, \
+    increment_path, non_max_suppression, print_args, save_one_box, scale_coords, set_logging, \
+    strip_optimizer, xyxy2xywh
+from utils.plots import Annotator, colors
+from utils.torch_utils import load_classifier, select_device, time_sync
 
 
 @torch.no_grad()
-def run(weights='yolov5s.pt',  # model.pt path(s)
-        source='data/images',  # file/dir/URL/glob, 0 for webcam
+def run(weights=ROOT / 'yolov5s.pt',  # model.pt path(s)
+        source=ROOT / 'data/images',  # file/dir/URL/glob, 0 for webcam
         imgsz=640,  # inference size (pixels)
         conf_thres=0.25,  # confidence threshold
         iou_thres=0.45,  # NMS IOU threshold
@@ -42,14 +49,16 @@ def run(weights='yolov5s.pt',  # model.pt path(s)
         augment=False,  # augmented inference
         visualize=False,  # visualize features
         update=False,  # update all models
-        project='runs/detect',  # save results to project/name
+        project=ROOT / 'runs/detect',  # save results to project/name
         name='exp',  # save results to project/name
         exist_ok=False,  # existing project/name ok, do not increment
         line_thickness=3,  # bounding box thickness (pixels)
         hide_labels=False,  # hide labels
         hide_conf=False,  # hide confidences
         half=False,  # use FP16 half-precision inference
+        dnn=False,  # use OpenCV DNN for ONNX inference
         ):
+    source = str(source)
     save_img = not nosave and not source.endswith('.txt')  # save inference images
     webcam = source.isnumeric() or source.endswith('.txt') or source.lower().startswith(
         ('rtsp://', 'rtmp://', 'http://', 'https://'))
@@ -64,57 +73,123 @@ def run(weights='yolov5s.pt',  # model.pt path(s)
     half &= device.type != 'cpu'  # half precision only supported on CUDA
 
     # Load model
-    model = attempt_load(weights, map_location=device)  # load FP32 model
-    stride = int(model.stride.max())  # model stride
+    w = str(weights[0] if isinstance(weights, list) else weights)
+    classify, suffix, suffixes = False, Path(w).suffix.lower(), ['.pt', '.onnx', '.tflite', '.pb', '']
+    check_suffix(w, suffixes)  # check weights have acceptable suffix
+    pt, onnx, tflite, pb, saved_model = (suffix == x for x in suffixes)  # backend booleans
+    stride, names = 64, [f'class{i}' for i in range(1000)]  # assign defaults
+    if pt:
+        model = torch.jit.load(w) if 'torchscript' in w else attempt_load(weights, map_location=device)
+        stride = int(model.stride.max())  # model stride
+        names = model.module.names if hasattr(model, 'module') else model.names  # get class names
+        if half:
+            model.half()  # to FP16
+        if classify:  # second-stage classifier
+            modelc = load_classifier(name='resnet50', n=2)  # initialize
+            modelc.load_state_dict(torch.load('resnet50.pt', map_location=device)['model']).to(device).eval()
+    elif onnx:
+        if dnn:
+            # check_requirements(('opencv-python>=4.5.4',))
+            net = cv2.dnn.readNetFromONNX(w)
+        else:
+            check_requirements(('onnx', 'onnxruntime'))
+            import onnxruntime
+            session = onnxruntime.InferenceSession(w, None)
+    else:  # TensorFlow models
+        check_requirements(('tensorflow>=2.4.1',))
+        import tensorflow as tf
+        if pb:  # https://www.tensorflow.org/guide/migrate#a_graphpb_or_graphpbtxt
+            def wrap_frozen_graph(gd, inputs, outputs):
+                x = tf.compat.v1.wrap_function(lambda: tf.compat.v1.import_graph_def(gd, name=""), [])  # wrapped import
+                return x.prune(tf.nest.map_structure(x.graph.as_graph_element, inputs),
+                               tf.nest.map_structure(x.graph.as_graph_element, outputs))
+
+            graph_def = tf.Graph().as_graph_def()
+            graph_def.ParseFromString(open(w, 'rb').read())
+            frozen_func = wrap_frozen_graph(gd=graph_def, inputs="x:0", outputs="Identity:0")
+        elif saved_model:
+            model = tf.keras.models.load_model(w)
+        elif tflite:
+            interpreter = tf.lite.Interpreter(model_path=w)  # load TFLite model
+            interpreter.allocate_tensors()  # allocate
+            input_details = interpreter.get_input_details()  # inputs
+            output_details = interpreter.get_output_details()  # outputs
+            int8 = input_details[0]['dtype'] == np.uint8  # is TFLite quantized uint8 model
     imgsz = check_img_size(imgsz, s=stride)  # check image size
-    names = model.module.names if hasattr(model, 'module') else model.names  # get class names
-    if half:
-        model.half()  # to FP16
-
-    # Second-stage classifier
-    classify = False
-    if classify:
-        modelc = load_classifier(name='resnet50', n=2)  # initialize
-        modelc.load_state_dict(torch.load('resnet50.pt', map_location=device)['model']).to(device).eval()
 
     # Dataloader
     if webcam:
         view_img = check_imshow()
         cudnn.benchmark = True  # set True to speed up constant image size inference
-        dataset = LoadStreams(source, img_size=imgsz, stride=stride)
+        dataset = LoadStreams(source, img_size=imgsz, stride=stride, auto=pt)
         bs = len(dataset)  # batch_size
     else:
-        dataset = LoadImages(source, img_size=imgsz, stride=stride)
+        dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt)
         bs = 1  # batch_size
     vid_path, vid_writer = [None] * bs, [None] * bs
 
     # Run inference
-    if device.type != 'cpu':
-        model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.parameters())))  # run once
-    t0 = time.time()
+    if pt and device.type != 'cpu':
+        model(torch.zeros(1, 3, *imgsz).to(device).type_as(next(model.parameters())))  # run once
+    dt, seen = [0.0, 0.0, 0.0], 0
     for path, img, im0s, vid_cap in dataset:
-        img = torch.from_numpy(img).to(device)
-        img = img.half() if half else img.float()  # uint8 to fp16/32
-        img /= 255.0  # 0 - 255 to 0.0 - 1.0
-        if img.ndimension() == 3:
-            img = img.unsqueeze(0)
+        t1 = time_sync()
+        if onnx:
+            img = img.astype('float32')
+        else:
+            img = torch.from_numpy(img).to(device)
+            img = img.half() if half else img.float()  # uint8 to fp16/32
+        img = img / 255.0  # 0 - 255 to 0.0 - 1.0
+        if len(img.shape) == 3:
+            img = img[None]  # expand for batch dim
+        t2 = time_sync()
+        dt[0] += t2 - t1
 
         # Inference
-        t1 = time_synchronized()
-        pred = model(img,
-                     augment=augment,
-                     visualize=increment_path(save_dir / 'features', mkdir=True) if visualize else False)[0]
-
-        # Apply NMS
+        if pt:
+            visualize = increment_path(save_dir / Path(path).stem, mkdir=True) if visualize else False
+            pred = model(img, augment=augment, visualize=visualize)[0]
+        elif onnx:
+            if dnn:
+                net.setInput(img)
+                pred = torch.tensor(net.forward())
+            else:
+                pred = torch.tensor(session.run([session.get_outputs()[0].name], {session.get_inputs()[0].name: img}))
+        else:  # tensorflow model (tflite, pb, saved_model)
+            imn = img.permute(0, 2, 3, 1).cpu().numpy()  # image in numpy
+            if pb:
+                pred = frozen_func(x=tf.constant(imn)).numpy()
+            elif saved_model:
+                pred = model(imn, training=False).numpy()
+            elif tflite:
+                if int8:
+                    scale, zero_point = input_details[0]['quantization']
+                    imn = (imn / scale + zero_point).astype(np.uint8)  # de-scale
+                interpreter.set_tensor(input_details[0]['index'], imn)
+                interpreter.invoke()
+                pred = interpreter.get_tensor(output_details[0]['index'])
+                if int8:
+                    scale, zero_point = output_details[0]['quantization']
+                    pred = (pred.astype(np.float32) - zero_point) * scale  # re-scale
+            pred[..., 0] *= imgsz[1]  # x
+            pred[..., 1] *= imgsz[0]  # y
+            pred[..., 2] *= imgsz[1]  # w
+            pred[..., 3] *= imgsz[0]  # h
+            pred = torch.tensor(pred)
+        t3 = time_sync()
+        dt[1] += t3 - t2
+
+        # NMS
         pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det)
-        t2 = time_synchronized()
+        dt[2] += time_sync() - t3
 
-        # Apply Classifier
+        # Second-stage classifier (optional)
         if classify:
             pred = apply_classifier(pred, modelc, img, im0s)
 
-        # Process detections
-        for i, det in enumerate(pred):  # detections per image
+        # Process predictions
+        for i, det in enumerate(pred):  # per image
+            seen += 1
             if webcam:  # batch_size >= 1
                 p, s, im0, frame = path[i], f'{i}: ', im0s[i].copy(), dataset.count
             else:
@@ -126,6 +201,7 @@ def run(weights='yolov5s.pt',  # model.pt path(s)
             s += '%gx%g ' % img.shape[2:]  # print string
             gn = torch.tensor(im0.shape)[[1, 0, 1, 0]]  # normalization gain whwh
             imc = im0.copy() if save_crop else im0  # for save_crop
+            annotator = Annotator(im0, line_width=line_thickness, example=str(names))
             if len(det):
                 # Rescale boxes from img_size to im0 size
                 det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()
@@ -146,14 +222,15 @@ def run(weights='yolov5s.pt',  # model.pt path(s)
                     if save_img or save_crop or view_img:  # Add bbox to image
                         c = int(cls)  # integer class
                         label = None if hide_labels else (names[c] if hide_conf else f'{names[c]} {conf:.2f}')
-                        plot_one_box(xyxy, im0, label=label, color=colors(c, True), line_thickness=line_thickness)
+                        annotator.box_label(xyxy, label, color=colors(c, True))
                         if save_crop:
                             save_one_box(xyxy, imc, file=save_dir / 'crops' / names[c] / f'{p.stem}.jpg', BGR=True)
 
-            # Print time (inference + NMS)
-            print(f'{s}Done. ({t2 - t1:.3f}s)')
+            # Print time (inference-only)
+            print(f'{s}Done. ({t3 - t2:.3f}s)')
 
             # Stream results
+            im0 = annotator.result()
             if view_img:
                 cv2.imshow(str(p), im0)
                 cv2.waitKey(1)  # 1 millisecond
@@ -177,21 +254,21 @@ def run(weights='yolov5s.pt',  # model.pt path(s)
                         vid_writer[i] = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h))
                     vid_writer[i].write(im0)
 
+    # Print results
+    t = tuple(x / seen * 1E3 for x in dt)  # speeds per image
+    print(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {(1, 3, *imgsz)}' % t)
     if save_txt or save_img:
         s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ''
-        print(f"Results saved to {save_dir}{s}")
-
+        print(f"Results saved to {colorstr('bold', save_dir)}{s}")
     if update:
         strip_optimizer(weights)  # update model (to fix SourceChangeWarning)
 
-    print(f'Done. ({time.time() - t0:.3f}s)')
-
 
 def parse_opt():
     parser = argparse.ArgumentParser()
-    parser.add_argument('--weights', nargs='+', type=str, default='yolov5s.pt', help='model.pt path(s)')
-    parser.add_argument('--source', type=str, default='data/images', help='file/dir/URL/glob, 0 for webcam')
-    parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=640, help='inference size (pixels)')
+    parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolov5s.pt', help='model path(s)')
+    parser.add_argument('--source', type=str, default=ROOT / 'data/images', help='file/dir/URL/glob, 0 for webcam')
+    parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=[640], help='inference size h,w')
     parser.add_argument('--conf-thres', type=float, default=0.25, help='confidence threshold')
     parser.add_argument('--iou-thres', type=float, default=0.45, help='NMS IoU threshold')
     parser.add_argument('--max-det', type=int, default=1000, help='maximum detections per image')
@@ -201,24 +278,26 @@ def parse_opt():
     parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels')
     parser.add_argument('--save-crop', action='store_true', help='save cropped prediction boxes')
     parser.add_argument('--nosave', action='store_true', help='do not save images/videos')
-    parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --class 0, or --class 0 2 3')
+    parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --classes 0, or --classes 0 2 3')
     parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS')
     parser.add_argument('--augment', action='store_true', help='augmented inference')
     parser.add_argument('--visualize', action='store_true', help='visualize features')
     parser.add_argument('--update', action='store_true', help='update all models')
-    parser.add_argument('--project', default='runs/detect', help='save results to project/name')
+    parser.add_argument('--project', default=ROOT / 'runs/detect', help='save results to project/name')
     parser.add_argument('--name', default='exp', help='save results to project/name')
     parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
     parser.add_argument('--line-thickness', default=3, type=int, help='bounding box thickness (pixels)')
     parser.add_argument('--hide-labels', default=False, action='store_true', help='hide labels')
     parser.add_argument('--hide-conf', default=False, action='store_true', help='hide confidences')
     parser.add_argument('--half', action='store_true', help='use FP16 half-precision inference')
+    parser.add_argument('--dnn', action='store_true', help='use OpenCV DNN for ONNX inference')
     opt = parser.parse_args()
+    opt.imgsz *= 2 if len(opt.imgsz) == 1 else 1  # expand
+    print_args(FILE.stem, opt)
     return opt
 
 
 def main(opt):
-    print(colorstr('detect: ') + ', '.join(f'{k}={v}' for k, v in vars(opt).items()))
     check_requirements(exclude=('tensorboard', 'thop'))
     run(**vars(opt))
 
diff --git a/cv/detection/yolov5/pytorch/export.py b/cv/detection/yolov5/pytorch/export.py
index b7ff074..2aca0f3 100644
--- a/cv/detection/yolov5/pytorch/export.py
+++ b/cv/detection/yolov5/pytorch/export.py
@@ -1,10 +1,28 @@
-"""Export a YOLOv5 *.pt model to TorchScript, ONNX, CoreML formats
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+"""
+Export a YOLOv5 PyTorch model to TorchScript, ONNX, CoreML, TensorFlow (saved_model, pb, TFLite, TF.js,) formats
+TensorFlow exports authored by https://github.com/zldrobit
 
 Usage:
-    $ python path/to/export.py --weights yolov5s.pt --img 640 --batch 1
+    $ python path/to/export.py --weights yolov5s.pt --include torchscript onnx coreml saved_model pb tflite tfjs
+
+Inference:
+    $ python path/to/detect.py --weights yolov5s.pt
+                                         yolov5s.onnx  (must export with --dynamic)
+                                         yolov5s_saved_model
+                                         yolov5s.pb
+                                         yolov5s.tflite
+
+TensorFlow.js:
+    $ cd .. && git clone https://github.com/zldrobit/tfjs-yolov5-example.git && cd tfjs-yolov5-example
+    $ npm install
+    $ ln -s ../../yolov5/yolov5s_web_model public/yolov5s_web_model
+    $ npm start
 """
 
 import argparse
+import os
+import subprocess
 import sys
 import time
 from pathlib import Path
@@ -13,19 +31,224 @@ import torch
 import torch.nn as nn
 from torch.utils.mobile_optimizer import optimize_for_mobile
 
-FILE = Path(__file__).absolute()
-sys.path.append(FILE.parents[0].as_posix())  # add yolov5/ to path
+FILE = Path(__file__).resolve()
+ROOT = FILE.parents[0]  # YOLOv5 root directory
+if str(ROOT) not in sys.path:
+    sys.path.append(str(ROOT))  # add ROOT to PATH
+ROOT = Path(os.path.relpath(ROOT, Path.cwd()))  # relative
 
 from models.common import Conv
-from models.yolo import Detect
 from models.experimental import attempt_load
-from utils.activations import Hardswish, SiLU
-from utils.general import colorstr, check_img_size, check_requirements, file_size, set_logging
+from models.yolo import Detect
+from utils.activations import SiLU
+from utils.datasets import LoadImages
+from utils.general import colorstr, check_dataset, check_img_size, check_requirements, file_size, print_args, \
+    set_logging, url2file
 from utils.torch_utils import select_device
 
 
-def run(weights='./yolov5s.pt',  # weights path
-        img_size=(640, 640),  # image (height, width)
+def export_torchscript(model, im, file, optimize, prefix=colorstr('TorchScript:')):
+    # YOLOv5 TorchScript model export
+    try:
+        print(f'\n{prefix} starting export with torch {torch.__version__}...')
+        f = file.with_suffix('.torchscript.pt')
+
+        ts = torch.jit.trace(model, im, strict=False)
+        (optimize_for_mobile(ts) if optimize else ts).save(f)
+
+        print(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)')
+    except Exception as e:
+        print(f'{prefix} export failure: {e}')
+
+
+def export_onnx(model, im, file, opset, train, dynamic, simplify, prefix=colorstr('ONNX:')):
+    # YOLOv5 ONNX export
+    try:
+        check_requirements(('onnx',))
+        import onnx
+
+        print(f'\n{prefix} starting export with onnx {onnx.__version__}...')
+        f = file.with_suffix('.onnx')
+
+        torch.onnx.export(model, im, f, verbose=False, opset_version=opset,
+                          training=torch.onnx.TrainingMode.TRAINING if train else torch.onnx.TrainingMode.EVAL,
+                          do_constant_folding=not train,
+                          input_names=['images'],
+                          output_names=['output'],
+                          dynamic_axes={'images': {0: 'batch', 2: 'height', 3: 'width'},  # shape(1,3,640,640)
+                                        'output': {0: 'batch', 1: 'anchors'}  # shape(1,25200,85)
+                                        } if dynamic else None)
+
+        # Checks
+        model_onnx = onnx.load(f)  # load onnx model
+        onnx.checker.check_model(model_onnx)  # check onnx model
+        # print(onnx.helper.printable_graph(model_onnx.graph))  # print
+
+        # Simplify
+        if simplify:
+            try:
+                check_requirements(('onnx-simplifier',))
+                import onnxsim
+
+                print(f'{prefix} simplifying with onnx-simplifier {onnxsim.__version__}...')
+                model_onnx, check = onnxsim.simplify(
+                    model_onnx,
+                    dynamic_input_shape=dynamic,
+                    input_shapes={'images': list(im.shape)} if dynamic else None)
+                assert check, 'assert check failed'
+                onnx.save(model_onnx, f)
+            except Exception as e:
+                print(f'{prefix} simplifier failure: {e}')
+        print(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)')
+        print(f"{prefix} run --dynamic ONNX model inference with: 'python detect.py --weights {f}'")
+    except Exception as e:
+        print(f'{prefix} export failure: {e}')
+
+
+def export_coreml(model, im, file, prefix=colorstr('CoreML:')):
+    # YOLOv5 CoreML export
+    ct_model = None
+    try:
+        check_requirements(('coremltools',))
+        import coremltools as ct
+
+        print(f'\n{prefix} starting export with coremltools {ct.__version__}...')
+        f = file.with_suffix('.mlmodel')
+
+        model.train()  # CoreML exports should be placed in model.train() mode
+        ts = torch.jit.trace(model, im, strict=False)  # TorchScript model
+        ct_model = ct.convert(ts, inputs=[ct.ImageType('image', shape=im.shape, scale=1 / 255.0, bias=[0, 0, 0])])
+        ct_model.save(f)
+
+        print(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)')
+    except Exception as e:
+        print(f'\n{prefix} export failure: {e}')
+
+    return ct_model
+
+
+def export_saved_model(model, im, file, dynamic,
+                       tf_nms=False, agnostic_nms=False, topk_per_class=100, topk_all=100, iou_thres=0.45,
+                       conf_thres=0.25, prefix=colorstr('TensorFlow saved_model:')):
+    # YOLOv5 TensorFlow saved_model export
+    keras_model = None
+    try:
+        import tensorflow as tf
+        from tensorflow import keras
+        from models.tf import TFModel, TFDetect
+
+        print(f'\n{prefix} starting export with tensorflow {tf.__version__}...')
+        f = str(file).replace('.pt', '_saved_model')
+        batch_size, ch, *imgsz = list(im.shape)  # BCHW
+
+        tf_model = TFModel(cfg=model.yaml, model=model, nc=model.nc, imgsz=imgsz)
+        im = tf.zeros((batch_size, *imgsz, 3))  # BHWC order for TensorFlow
+        y = tf_model.predict(im, tf_nms, agnostic_nms, topk_per_class, topk_all, iou_thres, conf_thres)
+        inputs = keras.Input(shape=(*imgsz, 3), batch_size=None if dynamic else batch_size)
+        outputs = tf_model.predict(inputs, tf_nms, agnostic_nms, topk_per_class, topk_all, iou_thres, conf_thres)
+        keras_model = keras.Model(inputs=inputs, outputs=outputs)
+        keras_model.trainable = False
+        keras_model.summary()
+        keras_model.save(f, save_format='tf')
+
+        print(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)')
+    except Exception as e:
+        print(f'\n{prefix} export failure: {e}')
+
+    return keras_model
+
+
+def export_pb(keras_model, im, file, prefix=colorstr('TensorFlow GraphDef:')):
+    # YOLOv5 TensorFlow GraphDef *.pb export https://github.com/leimao/Frozen_Graph_TensorFlow
+    try:
+        import tensorflow as tf
+        from tensorflow.python.framework.convert_to_constants import convert_variables_to_constants_v2
+
+        print(f'\n{prefix} starting export with tensorflow {tf.__version__}...')
+        f = file.with_suffix('.pb')
+
+        m = tf.function(lambda x: keras_model(x))  # full model
+        m = m.get_concrete_function(tf.TensorSpec(keras_model.inputs[0].shape, keras_model.inputs[0].dtype))
+        frozen_func = convert_variables_to_constants_v2(m)
+        frozen_func.graph.as_graph_def()
+        tf.io.write_graph(graph_or_graph_def=frozen_func.graph, logdir=str(f.parent), name=f.name, as_text=False)
+
+        print(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)')
+    except Exception as e:
+        print(f'\n{prefix} export failure: {e}')
+
+
+def export_tflite(keras_model, im, file, int8, data, ncalib, prefix=colorstr('TensorFlow Lite:')):
+    # YOLOv5 TensorFlow Lite export
+    try:
+        import tensorflow as tf
+        from models.tf import representative_dataset_gen
+
+        print(f'\n{prefix} starting export with tensorflow {tf.__version__}...')
+        batch_size, ch, *imgsz = list(im.shape)  # BCHW
+        f = str(file).replace('.pt', '-fp16.tflite')
+
+        converter = tf.lite.TFLiteConverter.from_keras_model(keras_model)
+        converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS]
+        converter.target_spec.supported_types = [tf.float16]
+        converter.optimizations = [tf.lite.Optimize.DEFAULT]
+        if int8:
+            dataset = LoadImages(check_dataset(data)['train'], img_size=imgsz, auto=False)  # representative data
+            converter.representative_dataset = lambda: representative_dataset_gen(dataset, ncalib)
+            converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
+            converter.target_spec.supported_types = []
+            converter.inference_input_type = tf.uint8  # or tf.int8
+            converter.inference_output_type = tf.uint8  # or tf.int8
+            converter.experimental_new_quantizer = False
+            f = str(file).replace('.pt', '-int8.tflite')
+
+        tflite_model = converter.convert()
+        open(f, "wb").write(tflite_model)
+        print(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)')
+
+    except Exception as e:
+        print(f'\n{prefix} export failure: {e}')
+
+
+def export_tfjs(keras_model, im, file, prefix=colorstr('TensorFlow.js:')):
+    # YOLOv5 TensorFlow.js export
+    try:
+        check_requirements(('tensorflowjs',))
+        import re
+        import tensorflowjs as tfjs
+
+        print(f'\n{prefix} starting export with tensorflowjs {tfjs.__version__}...')
+        f = str(file).replace('.pt', '_web_model')  # js dir
+        f_pb = file.with_suffix('.pb')  # *.pb path
+        f_json = f + '/model.json'  # *.json path
+
+        cmd = f"tensorflowjs_converter --input_format=tf_frozen_model " \
+              f"--output_node_names='Identity,Identity_1,Identity_2,Identity_3' {f_pb} {f}"
+        subprocess.run(cmd, shell=True)
+
+        json = open(f_json).read()
+        with open(f_json, 'w') as j:  # sort JSON Identity_* in ascending order
+            subst = re.sub(
+                r'{"outputs": {"Identity.?.?": {"name": "Identity.?.?"}, '
+                r'"Identity.?.?": {"name": "Identity.?.?"}, '
+                r'"Identity.?.?": {"name": "Identity.?.?"}, '
+                r'"Identity.?.?": {"name": "Identity.?.?"}}}',
+                r'{"outputs": {"Identity": {"name": "Identity"}, '
+                r'"Identity_1": {"name": "Identity_1"}, '
+                r'"Identity_2": {"name": "Identity_2"}, '
+                r'"Identity_3": {"name": "Identity_3"}}}',
+                json)
+            j.write(subst)
+
+        print(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)')
+    except Exception as e:
+        print(f'\n{prefix} export failure: {e}')
+
+
+@torch.no_grad()
+def run(data=ROOT / 'data/coco128.yaml',  # 'dataset.yaml path'
+        weights=ROOT / 'yolov5s.pt',  # weights path
+        imgsz=(640, 640),  # image (height, width)
         batch_size=1,  # batch size
         device='cpu',  # cuda device, i.e. 0 or 0,1,2,3 or cpu
         include=('torchscript', 'onnx', 'coreml'),  # include formats
@@ -33,35 +256,39 @@ def run(weights='./yolov5s.pt',  # weights path
         inplace=False,  # set YOLOv5 Detect() inplace=True
         train=False,  # model.train() mode
         optimize=False,  # TorchScript: optimize for mobile
-        dynamic=False,  # ONNX: dynamic axes
+        int8=False,  # CoreML/TF INT8 quantization
+        dynamic=False,  # ONNX/TF: dynamic axes
         simplify=False,  # ONNX: simplify model
-        opset_version=12,  # ONNX: opset version
+        opset=12,  # ONNX: opset version
+        topk_per_class=100,  # TF.js NMS: topk per class to keep
+        topk_all=100,  # TF.js NMS: topk for all classes to keep
+        iou_thres=0.45,  # TF.js NMS: IoU threshold
+        conf_thres=0.25  # TF.js NMS: confidence threshold
         ):
     t = time.time()
     include = [x.lower() for x in include]
-    img_size *= 2 if len(img_size) == 1 else 1  # expand
+    tf_exports = list(x in include for x in ('saved_model', 'pb', 'tflite', 'tfjs'))  # TensorFlow exports
+    imgsz *= 2 if len(imgsz) == 1 else 1  # expand
+    file = Path(url2file(weights) if str(weights).startswith(('http:/', 'https:/')) else weights)
 
     # Load PyTorch model
     device = select_device(device)
     assert not (device.type == 'cpu' and half), '--half only compatible with GPU export, i.e. use --device 0'
-    model = attempt_load(weights, map_location=device)  # load FP32 model
-    labels = model.names
+    model = attempt_load(weights, map_location=device, inplace=True, fuse=True)  # load FP32 model
+    nc, names = model.nc, model.names  # number of classes, class names
 
     # Input
     gs = int(max(model.stride))  # grid size (max stride)
-    img_size = [check_img_size(x, gs) for x in img_size]  # verify img_size are gs-multiples
-    img = torch.zeros(batch_size, 3, *img_size).to(device)  # image size(1,3,320,192) iDetection
+    imgsz = [check_img_size(x, gs) for x in imgsz]  # verify img_size are gs-multiples
+    im = torch.zeros(batch_size, 3, *imgsz).to(device)  # image size(1,3,320,192) BCHW iDetection
 
     # Update model
     if half:
-        img, model = img.half(), model.half()  # to FP16
+        im, model = im.half(), model.half()  # to FP16
     model.train() if train else model.eval()  # training mode = no Detect() layer grid construction
     for k, m in model.named_modules():
-        m._non_persistent_buffers_set = set()  # pytorch 1.6.0 compatibility
         if isinstance(m, Conv):  # assign export-friendly activations
-            if isinstance(m.act, nn.Hardswish):
-                m.act = Hardswish()
-            elif isinstance(m.act, nn.SiLU):
+            if isinstance(m.act, nn.SiLU):
                 m.act = SiLU()
         elif isinstance(m, Detect):
             m.inplace = inplace
@@ -69,102 +296,66 @@ def run(weights='./yolov5s.pt',  # weights path
             # m.forward = m.forward_export  # assign forward (optional)
 
     for _ in range(2):
-        y = model(img)  # dry runs
-    print(f"\n{colorstr('PyTorch:')} starting from {weights} ({file_size(weights):.1f} MB)")
-
-    # TorchScript export -----------------------------------------------------------------------------------------------
-    if 'torchscript' in include or 'coreml' in include:
-        prefix = colorstr('TorchScript:')
-        try:
-            print(f'\n{prefix} starting export with torch {torch.__version__}...')
-            f = weights.replace('.pt', '.torchscript.pt')  # filename
-            ts = torch.jit.trace(model, img, strict=False)
-            (optimize_for_mobile(ts) if optimize else ts).save(f)
-            print(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)')
-        except Exception as e:
-            print(f'{prefix} export failure: {e}')
-
-    # ONNX export ------------------------------------------------------------------------------------------------------
+        y = model(im)  # dry runs
+    print(f"\n{colorstr('PyTorch:')} starting from {file} ({file_size(file):.1f} MB)")
+
+    # Exports
+    if 'torchscript' in include:
+        export_torchscript(model, im, file, optimize)
     if 'onnx' in include:
-        prefix = colorstr('ONNX:')
-        try:
-            import onnx
-
-            print(f'{prefix} starting export with onnx {onnx.__version__}...')
-            f = weights.replace('.pt', '.onnx')  # filename
-            torch.onnx.export(model, img, f, verbose=False, opset_version=opset_version,
-                              training=torch.onnx.TrainingMode.TRAINING if train else torch.onnx.TrainingMode.EVAL,
-                              do_constant_folding=not train,
-                              input_names=['images'],
-                              output_names=['output'],
-                              dynamic_axes={'images': {0: 'batch', 2: 'height', 3: 'width'},  # shape(1,3,640,640)
-                                            'output': {0: 'batch', 1: 'anchors'}  # shape(1,25200,85)
-                                            } if dynamic else None)
-
-            # Checks
-            model_onnx = onnx.load(f)  # load onnx model
-            onnx.checker.check_model(model_onnx)  # check onnx model
-            # print(onnx.helper.printable_graph(model_onnx.graph))  # print
-
-            # Simplify
-            if simplify:
-                try:
-                    check_requirements(['onnx-simplifier'])
-                    import onnxsim
-
-                    print(f'{prefix} simplifying with onnx-simplifier {onnxsim.__version__}...')
-                    model_onnx, check = onnxsim.simplify(
-                        model_onnx,
-                        dynamic_input_shape=dynamic,
-                        input_shapes={'images': list(img.shape)} if dynamic else None)
-                    assert check, 'assert check failed'
-                    onnx.save(model_onnx, f)
-                except Exception as e:
-                    print(f'{prefix} simplifier failure: {e}')
-            print(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)')
-        except Exception as e:
-            print(f'{prefix} export failure: {e}')
-
-    # CoreML export ----------------------------------------------------------------------------------------------------
+        export_onnx(model, im, file, opset, train, dynamic, simplify)
     if 'coreml' in include:
-        prefix = colorstr('CoreML:')
-        try:
-            import coremltools as ct
-
-            print(f'{prefix} starting export with coremltools {ct.__version__}...')
-            assert train, 'CoreML exports should be placed in model.train() mode with `python export.py --train`'
-            model = ct.convert(ts, inputs=[ct.ImageType('image', shape=img.shape, scale=1 / 255.0, bias=[0, 0, 0])])
-            f = weights.replace('.pt', '.mlmodel')  # filename
-            model.save(f)
-            print(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)')
-        except Exception as e:
-            print(f'{prefix} export failure: {e}')
+        export_coreml(model, im, file)
+
+    # TensorFlow Exports
+    if any(tf_exports):
+        pb, tflite, tfjs = tf_exports[1:]
+        assert not (tflite and tfjs), 'TFLite and TF.js models must be exported separately, please pass only one type.'
+        model = export_saved_model(model, im, file, dynamic, tf_nms=tfjs, agnostic_nms=tfjs,
+                                   topk_per_class=topk_per_class, topk_all=topk_all, conf_thres=conf_thres,
+                                   iou_thres=iou_thres)  # keras model
+        if pb or tfjs:  # pb prerequisite to tfjs
+            export_pb(model, im, file)
+        if tflite:
+            export_tflite(model, im, file, int8=int8, data=data, ncalib=100)
+        if tfjs:
+            export_tfjs(model, im, file)
 
     # Finish
-    print(f'\nExport complete ({time.time() - t:.2f}s). Visualize with https://github.com/lutzroeder/netron.')
+    print(f'\nExport complete ({time.time() - t:.2f}s)'
+          f"\nResults saved to {colorstr('bold', file.parent.resolve())}"
+          f'\nVisualize with https://netron.app')
 
 
 def parse_opt():
     parser = argparse.ArgumentParser()
-    parser.add_argument('--weights', type=str, default='./yolov5s.pt', help='weights path')
-    parser.add_argument('--img-size', nargs='+', type=int, default=[640, 640], help='image (height, width)')
+    parser.add_argument('--data', type=str, default=ROOT / 'data/coco128.yaml', help='dataset.yaml path')
+    parser.add_argument('--weights', type=str, default=ROOT / 'yolov5s.pt', help='weights path')
+    parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=[640, 640], help='image (h, w)')
     parser.add_argument('--batch-size', type=int, default=1, help='batch size')
     parser.add_argument('--device', default='cpu', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
-    parser.add_argument('--include', nargs='+', default=['torchscript', 'onnx', 'coreml'], help='include formats')
     parser.add_argument('--half', action='store_true', help='FP16 half-precision export')
     parser.add_argument('--inplace', action='store_true', help='set YOLOv5 Detect() inplace=True')
     parser.add_argument('--train', action='store_true', help='model.train() mode')
     parser.add_argument('--optimize', action='store_true', help='TorchScript: optimize for mobile')
-    parser.add_argument('--dynamic', action='store_true', help='ONNX: dynamic axes')
+    parser.add_argument('--int8', action='store_true', help='CoreML/TF INT8 quantization')
+    parser.add_argument('--dynamic', action='store_true', help='ONNX/TF: dynamic axes')
     parser.add_argument('--simplify', action='store_true', help='ONNX: simplify model')
-    parser.add_argument('--opset-version', type=int, default=12, help='ONNX: opset version')
+    parser.add_argument('--opset', type=int, default=13, help='ONNX: opset version')
+    parser.add_argument('--topk-per-class', type=int, default=100, help='TF.js NMS: topk per class to keep')
+    parser.add_argument('--topk-all', type=int, default=100, help='TF.js NMS: topk for all classes to keep')
+    parser.add_argument('--iou-thres', type=float, default=0.45, help='TF.js NMS: IoU threshold')
+    parser.add_argument('--conf-thres', type=float, default=0.25, help='TF.js NMS: confidence threshold')
+    parser.add_argument('--include', nargs='+',
+                        default=['torchscript', 'onnx'],
+                        help='available formats are (torchscript, onnx, coreml, saved_model, pb, tflite, tfjs)')
     opt = parser.parse_args()
+    print_args(FILE.stem, opt)
     return opt
 
 
 def main(opt):
     set_logging()
-    print(colorstr('export: ') + ', '.join(f'{k}={v}' for k, v in vars(opt).items()))
     run(**vars(opt))
 
 
diff --git a/cv/detection/yolov5/pytorch/hubconf.py b/cv/detection/yolov5/pytorch/hubconf.py
index 55536c3..a697e03 100644
--- a/cv/detection/yolov5/pytorch/hubconf.py
+++ b/cv/detection/yolov5/pytorch/hubconf.py
@@ -1,4 +1,6 @@
-"""YOLOv5 PyTorch Hub models https://pytorch.org/hub/ultralytics_yolov5/
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+"""
+PyTorch Hub models https://pytorch.org/hub/ultralytics_yolov5/
 
 Usage:
     import torch
@@ -25,13 +27,14 @@ def _create(name, pretrained=True, channels=3, classes=80, autoshape=True, verbo
     """
     from pathlib import Path
 
-    from models.yolo import Model, attempt_load
+    from models.yolo import Model
+    from models.experimental import attempt_load
     from utils.general import check_requirements, set_logging
-    from utils.google_utils import attempt_download
+    from utils.downloads import attempt_download
     from utils.torch_utils import select_device
 
-    file = Path(__file__).absolute()
-    check_requirements(requirements=file.parent / 'requirements.txt', exclude=('tensorboard', 'thop', 'opencv-python'))
+    file = Path(__file__).resolve()
+    check_requirements(exclude=('tensorboard', 'thop', 'opencv-python'))
     set_logging(verbose=verbose)
 
     save_dir = Path('') if str(name).endswith('.pt') else file.parent
@@ -67,6 +70,11 @@ def custom(path='path/to/model.pt', autoshape=True, verbose=True, device=None):
     return _create(path, autoshape=autoshape, verbose=verbose, device=device)
 
 
+def yolov5n(pretrained=True, channels=3, classes=80, autoshape=True, verbose=True, device=None):
+    # YOLOv5-nano model https://github.com/ultralytics/yolov5
+    return _create('yolov5n', pretrained, channels, classes, autoshape, verbose, device)
+
+
 def yolov5s(pretrained=True, channels=3, classes=80, autoshape=True, verbose=True, device=None):
     # YOLOv5-small model https://github.com/ultralytics/yolov5
     return _create('yolov5s', pretrained, channels, classes, autoshape, verbose, device)
@@ -87,6 +95,11 @@ def yolov5x(pretrained=True, channels=3, classes=80, autoshape=True, verbose=Tru
     return _create('yolov5x', pretrained, channels, classes, autoshape, verbose, device)
 
 
+def yolov5n6(pretrained=True, channels=3, classes=80, autoshape=True, verbose=True, device=None):
+    # YOLOv5-nano-P6 model https://github.com/ultralytics/yolov5
+    return _create('yolov5n6', pretrained, channels, classes, autoshape, verbose, device)
+
+
 def yolov5s6(pretrained=True, channels=3, classes=80, autoshape=True, verbose=True, device=None):
     # YOLOv5-small-P6 model https://github.com/ultralytics/yolov5
     return _create('yolov5s6', pretrained, channels, classes, autoshape, verbose, device)
@@ -115,9 +128,11 @@ if __name__ == '__main__':
     import cv2
     import numpy as np
     from PIL import Image
+    from pathlib import Path
 
     imgs = ['data/images/zidane.jpg',  # filename
-            'https://github.com/ultralytics/yolov5/releases/download/v1.0/zidane.jpg',  # URI
+            Path('data/images/zidane.jpg'),  # Path
+            'https://ultralytics.com/images/zidane.jpg',  # URI
             cv2.imread('data/images/bus.jpg')[:, :, ::-1],  # OpenCV
             Image.open('data/images/bus.jpg'),  # PIL
             np.zeros((320, 640, 3))]  # numpy
diff --git a/cv/detection/yolov5/pytorch/init.sh b/cv/detection/yolov5/pytorch/init.sh
index 80e367c..c233520 100644
--- a/cv/detection/yolov5/pytorch/init.sh
+++ b/cv/detection/yolov5/pytorch/init.sh
@@ -1,42 +1,43 @@
 #!/bin/bash
-# Copyright (c) 2023, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# Copyright (c) 2023-2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
 # All Rights Reserved.
 #
-#    Licensed under the Apache License, Version 2.0 (the "License"); you may
-#    not use this file except in compliance with the License. You may obtain
-#    a copy of the License at
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
 #
-#         http://www.apache.org/licenses/LICENSE-2.0
+#     http://www.apache.org/licenses/LICENSE-2.0
 #
-#    Unless required by applicable law or agreed to in writing, software
-#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
-#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
-#    License for the specific language governing permissions and limitations
-#    under the License.
-
-if [[ "$(uname)" == "Linux" ]]; then
-    if command -v apt &> /dev/null; then
-        apt install -y numactl libgl1-mesa-dev
-    elif command -v yum &> /dev/null; then
-        yum install -y numactl mesa-libGL
-    else
-        echo "Unsupported package manager"
-        exit 1
-    fi
-else
-    echo "Unsupported operating system"
-    exit 1
-fi
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 
 pip3 install -r requirements.txt
+mkdir datasets
+cd datasets
+wget -q http://files.deepspark.org.cn:880/deepspark/data/datasets/coco_yolov5.tar.gz
+tar -zxf coco_yolov5.tar.gz
+mv coco_yolov5 coco
+cd ..
+
+
 PY_VERSION=$(python3 -V 2>&1|awk '{print $2}'|awk -F '.' '{print $2}')
-if [ "$PY_VERSION" == "10" ]; then
+if [ "$PY_VERSION" == "10" ];
+then
+   
+   pip3 install -r requirements.txt
    pip3 install matplotlib==3.8.2
-   pip3 install numpy==1.22.4
-   pip3 install Pillow==9.5 
+   pip3 install numpy==1.26.4
+   pip3 install pandas==2.0.0
 else
-   echo "only for python3.10"
+   pip3 install -r requirements.txt
 fi
 
+
+
 wandb disabled
 pip3 install pycocotools
+
diff --git a/cv/detection/yolov5/pytorch/loggers/__init__.py b/cv/detection/yolov5/pytorch/loggers/__init__.py
new file mode 100755
index 0000000..f453f44
--- /dev/null
+++ b/cv/detection/yolov5/pytorch/loggers/__init__.py
@@ -0,0 +1,149 @@
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+"""
+Logging utils
+"""
+
+import warnings
+from threading import Thread
+
+import torch
+from torch.utils.tensorboard import SummaryWriter
+
+from utils.general import colorstr, emojis
+from utils.loggers.wandb.wandb_utils import WandbLogger
+from utils.plots import plot_images, plot_results
+from utils.torch_utils import de_parallel
+
+LOGGERS = ('csv', 'tb', 'wandb')  # text-file, TensorBoard, Weights & Biases
+
+try:
+    import wandb
+
+    assert hasattr(wandb, '__version__')  # verify package import not local dir
+except (ImportError, AssertionError):
+    wandb = None
+
+
+class Loggers():
+    # YOLOv5 Loggers class
+    def __init__(self, save_dir=None, weights=None, opt=None, hyp=None, logger=None, include=LOGGERS):
+        self.save_dir = save_dir
+        self.weights = weights
+        self.opt = opt
+        self.hyp = hyp
+        self.logger = logger  # for printing results to console
+        self.include = include
+        self.keys = ['train/box_loss', 'train/obj_loss', 'train/cls_loss',  # train loss
+                     'metrics/precision', 'metrics/recall', 'metrics/mAP_0.5', 'metrics/mAP_0.5:0.95',  # metrics
+                     'val/box_loss', 'val/obj_loss', 'val/cls_loss',  # val loss
+                     'x/lr0', 'x/lr1', 'x/lr2']  # params
+        for k in LOGGERS:
+            setattr(self, k, None)  # init empty logger dictionary
+        self.csv = True  # always log to csv
+
+        # Message
+        if not wandb:
+            prefix = colorstr('Weights & Biases: ')
+            s = f"{prefix}run 'pip install wandb' to automatically track and visualize YOLOv5 🚀 runs (RECOMMENDED)"
+            print(emojis(s))
+
+        # TensorBoard
+        s = self.save_dir
+        if 'tb' in self.include and not self.opt.evolve:
+            prefix = colorstr('TensorBoard: ')
+            self.logger.info(f"{prefix}Start with 'tensorboard --logdir {s.parent}', view at http://localhost:6006/")
+            self.tb = SummaryWriter(str(s))
+
+        # W&B
+        if wandb and 'wandb' in self.include:
+            wandb_artifact_resume = isinstance(self.opt.resume, str) and self.opt.resume.startswith('wandb-artifact://')
+            run_id = torch.load(self.weights).get('wandb_id') if self.opt.resume and not wandb_artifact_resume else None
+            self.opt.hyp = self.hyp  # add hyperparameters
+            self.wandb = WandbLogger(self.opt, run_id)
+        else:
+            self.wandb = None
+
+    def on_pretrain_routine_end(self):
+        # Callback runs on pre-train routine end
+        paths = self.save_dir.glob('*labels*.jpg')  # training labels
+        if self.wandb:
+            self.wandb.log({"Labels": [wandb.Image(str(x), caption=x.name) for x in paths]})
+
+    def on_train_batch_end(self, ni, model, imgs, targets, paths, plots, sync_bn):
+        # Callback runs on train batch end
+        if plots:
+            if ni == 0:
+                if not sync_bn:  # tb.add_graph() --sync known issue https://github.com/ultralytics/yolov5/issues/3754
+                    with warnings.catch_warnings():
+                        warnings.simplefilter('ignore')  # suppress jit trace warning
+                        self.tb.add_graph(torch.jit.trace(de_parallel(model), imgs[0:1], strict=False), [])
+            #if ni < 3:
+             #   f = self.save_dir / f'train_batch{ni}.jpg'  # filename
+              #  Thread(target=plot_images, args=(imgs, targets, paths, f), daemon=True).start()
+            if self.wandb and ni == 10:
+                files = sorted(self.save_dir.glob('train*.jpg'))
+                self.wandb.log({'Mosaics': [wandb.Image(str(f), caption=f.name) for f in files if f.exists()]})
+
+    def on_train_epoch_end(self, epoch):
+        # Callback runs on train epoch end
+        if self.wandb:
+            self.wandb.current_epoch = epoch + 1
+
+    def on_val_image_end(self, pred, predn, path, names, im):
+        # Callback runs on val image end
+        if self.wandb:
+            self.wandb.val_one_image(pred, predn, path, names, im)
+
+    def on_val_end(self):
+        # Callback runs on val end
+        if self.wandb:
+            files = sorted(self.save_dir.glob('val*.jpg'))
+            self.wandb.log({"Validation": [wandb.Image(str(f), caption=f.name) for f in files]})
+
+    def on_fit_epoch_end(self, vals, epoch, best_fitness, fi):
+        # Callback runs at the end of each fit (train+val) epoch
+        x = {k: v for k, v in zip(self.keys, vals)}  # dict
+        if self.csv:
+            file = self.save_dir / 'results.csv'
+            n = len(x) + 1  # number of cols
+            s = '' if file.exists() else (('%20s,' * n % tuple(['epoch'] + self.keys)).rstrip(',') + '\n')  # add header
+            with open(file, 'a') as f:
+                f.write(s + ('%20.5g,' * n % tuple([epoch] + vals)).rstrip(',') + '\n')
+
+        if self.tb:
+            for k, v in x.items():
+                self.tb.add_scalar(k, v, epoch)
+
+        if self.wandb:
+            self.wandb.log(x)
+            self.wandb.end_epoch(best_result=best_fitness == fi)
+
+    def on_model_save(self, last, epoch, final_epoch, best_fitness, fi):
+        # Callback runs on model save event
+        if self.wandb:
+            if ((epoch + 1) % self.opt.save_period == 0 and not final_epoch) and self.opt.save_period != -1:
+                self.wandb.log_model(last.parent, self.opt, epoch, fi, best_model=best_fitness == fi)
+
+    def on_train_end(self, last, best, plots, epoch):
+        # Callback runs on training end
+        if plots:
+            plot_results(file=self.save_dir / 'results.csv')  # save results.png
+        files = ['results.png', 'confusion_matrix.png', *[f'{x}_curve.png' for x in ('F1', 'PR', 'P', 'R')]]
+        files = [(self.save_dir / f) for f in files if (self.save_dir / f).exists()]  # filter
+
+        if self.tb:
+            import cv2
+            for f in files:
+                self.tb.add_image(f.stem, cv2.imread(str(f))[..., ::-1], epoch, dataformats='HWC')
+
+        if self.wandb:
+            self.wandb.log({"Results": [wandb.Image(str(f), caption=f.name) for f in files]})
+            # Calling wandb.log. TODO: Refactor this into WandbLogger.log_model
+            if not self.opt.evolve:
+                wandb.log_artifact(str(best if best.exists() else last), type='model',
+                                   name='run_' + self.wandb.wandb_run.id + '_model',
+                                   aliases=['latest', 'best', 'stripped'])
+                self.wandb.finish_run()
+            else:
+                self.wandb.finish_run()
+                self.wandb = WandbLogger(self.opt)
diff --git a/cv/detection/yolov5/pytorch/loggers/wandb/README.md b/cv/detection/yolov5/pytorch/loggers/wandb/README.md
new file mode 100755
index 0000000..dd7dc1e
--- /dev/null
+++ b/cv/detection/yolov5/pytorch/loggers/wandb/README.md
@@ -0,0 +1,147 @@
+📚 This guide explains how to use **Weights & Biases** (W&B) with YOLOv5 🚀. UPDATED 29 September 2021.
+* [About Weights & Biases](#about-weights-&-biases)
+* [First-Time Setup](#first-time-setup)
+* [Viewing runs](#viewing-runs)
+* [Advanced Usage: Dataset Versioning and Evaluation](#advanced-usage)
+* [Reports: Share your work with the world!](#reports)
+
+## About Weights & Biases
+Think of [W&B](https://wandb.ai/site?utm_campaign=repo_yolo_wandbtutorial) like GitHub for machine learning models. With a few lines of code, save everything you need to debug, compare and reproduce your models — architecture, hyperparameters, git commits, model weights, GPU usage, and even datasets and predictions.
+
+Used by top researchers including teams at OpenAI, Lyft, Github, and MILA, W&B is part of the new standard of best practices for machine learning. How W&B can help you optimize your machine learning workflows:
+
+ * [Debug](https://wandb.ai/wandb/getting-started/reports/Visualize-Debug-Machine-Learning-Models--VmlldzoyNzY5MDk#Free-2) model performance in real time
+ * [GPU usage](https://wandb.ai/wandb/getting-started/reports/Visualize-Debug-Machine-Learning-Models--VmlldzoyNzY5MDk#System-4) visualized automatically
+ * [Custom charts](https://wandb.ai/wandb/customizable-charts/reports/Powerful-Custom-Charts-To-Debug-Model-Peformance--VmlldzoyNzY4ODI) for powerful, extensible visualization
+ * [Share insights](https://wandb.ai/wandb/getting-started/reports/Visualize-Debug-Machine-Learning-Models--VmlldzoyNzY5MDk#Share-8) interactively with collaborators
+ * [Optimize hyperparameters](https://docs.wandb.com/sweeps) efficiently
+ * [Track](https://docs.wandb.com/artifacts) datasets, pipelines, and production models
+
+## First-Time Setup
+<details open>
+ <summary> Toggle Details </summary>
+When you first train, W&B will prompt you to create a new account and will generate an **API key** for you. If you are an existing user you can retrieve your key from https://wandb.ai/authorize. This key is used to tell W&B where to log your data. You only need to supply your key once, and then it is remembered on the same device.
+
+W&B will create a cloud **project** (default is 'YOLOv5') for your training runs, and each new training run will be provided a unique run **name** within that project as project/name. You can also manually set your project and run name as:
+
+ ```shell
+ $ python train.py --project ... --name ...
+ ```
+
+YOLOv5 notebook example: <a href="https://colab.research.google.com/github/ultralytics/yolov5/blob/master/tutorial.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"></a> <a href="https://www.kaggle.com/ultralytics/yolov5"><img src="https://kaggle.com/static/images/open-in-kaggle.svg" alt="Open In Kaggle"></a>
+<img width="960" alt="Screen Shot 2021-09-29 at 10 23 13 PM" src="https://user-images.githubusercontent.com/26833433/135392431-1ab7920a-c49d-450a-b0b0-0c86ec86100e.png">
+
+
+ </details>
+
+## Viewing Runs
+<details open>
+  <summary> Toggle Details </summary>
+Run information streams from your environment to the W&B cloud console as you train. This allows you to monitor and even cancel runs in <b>realtime</b> . All important information is logged:
+
+ * Training & Validation losses
+ * Metrics: Precision, Recall, mAP@0.5, mAP@0.5:0.95
+ * Learning Rate over time
+ * A bounding box debugging panel, showing the training progress over time
+ * GPU: Type, **GPU Utilization**, power, temperature, **CUDA memory usage**
+ * System: Disk I/0, CPU utilization, RAM memory usage
+ * Your trained model as W&B Artifact
+ * Environment: OS and Python types, Git repository and state, **training command**
+
+<p align="center"><img width="900" alt="Weights & Biases dashboard" src="https://user-images.githubusercontent.com/26833433/135390767-c28b050f-8455-4004-adb0-3b730386e2b2.png"></p>
+
+
+</details>
+
+## Advanced Usage
+You can leverage W&B artifacts and Tables integration to easily visualize and manage your datasets, models and training evaluations. Here are some quick examples to get you started.
+<details open>
+ <h3>1. Visualize and Version Datasets</h3>
+ Log, visualize, dynamically query, and understand your data with <a href='https://docs.wandb.ai/guides/data-vis/tables'>W&B Tables</a>. You can use the following command to log your dataset as a W&B Table. This will generate a <code>{dataset}_wandb.yaml</code> file which can be used to train from dataset artifact.
+ <details>
+  <summary> <b>Usage</b> </summary>
+   <b>Code</b> <code> $ python utils/logger/wandb/log_dataset.py --project ... --name ... --data .. </code>
+   
+ ![Screenshot (64)](https://user-images.githubusercontent.com/15766192/128486078-d8433890-98a3-4d12-8986-b6c0e3fc64b9.png)
+ </details>
+  
+ <h3> 2: Train and Log Evaluation simultaneousy </h3>
+   This is an extension of the previous section, but it'll also training after uploading the dataset. <b> This also evaluation Table</b>
+   Evaluation table compares your predictions and ground truths across the validation set for each epoch. It uses the references to the already uploaded datasets,
+   so no images will be uploaded from your system more than once.
+ <details>
+  <summary> <b>Usage</b> </summary>
+   <b>Code</b> <code> $ python utils/logger/wandb/log_dataset.py --data ..  --upload_data </code>
+   
+![Screenshot (72)](https://user-images.githubusercontent.com/15766192/128979739-4cf63aeb-a76f-483f-8861-1c0100b938a5.png)
+ </details>
+  
+ <h3> 3: Train using dataset artifact </h3>
+   When you upload a dataset as described in the first section, you get a new config file with an added `_wandb` to its name. This file contains the information that 
+   can be used to train a model directly from the dataset artifact. <b> This also logs evaluation </b>
+ <details>
+  <summary> <b>Usage</b> </summary>
+   <b>Code</b> <code> $ python utils/logger/wandb/log_dataset.py --data {data}_wandb.yaml </code>
+   
+![Screenshot (72)](https://user-images.githubusercontent.com/15766192/128979739-4cf63aeb-a76f-483f-8861-1c0100b938a5.png)
+ </details>
+  
+   <h3> 4: Save model checkpoints as artifacts </h3>
+  To enable saving and versioning checkpoints of your experiment, pass `--save_period n` with the base cammand, where `n` represents checkpoint interval. 
+  You can also log both the dataset and model checkpoints simultaneously. If not passed, only the final model will be logged
+
+ <details>
+  <summary> <b>Usage</b> </summary>
+   <b>Code</b> <code> $ python train.py --save_period 1 </code>
+   
+![Screenshot (68)](https://user-images.githubusercontent.com/15766192/128726138-ec6c1f60-639d-437d-b4ee-3acd9de47ef3.png)
+ </details>
+  
+</details>
+
+ <h3> 5: Resume runs from checkpoint artifacts. </h3>
+Any run can be resumed using artifacts if the <code>--resume</code> argument starts with <code>wandb-artifact://</code> prefix followed by the run path, i.e, <code>wandb-artifact://username/project/runid </code>. This doesn't require the model checkpoint to be present on the local system.
+
+ <details>
+  <summary> <b>Usage</b> </summary>
+   <b>Code</b> <code> $ python train.py --resume wandb-artifact://{run_path} </code>
+  
+![Screenshot (70)](https://user-images.githubusercontent.com/15766192/128728988-4e84b355-6c87-41ae-a591-14aecf45343e.png)
+ </details>
+ 
+  <h3> 6: Resume runs from dataset artifact & checkpoint artifacts. </h3>
+ <b> Local dataset or model checkpoints are not required. This can be used to resume runs directly on a different device </b>
+ The syntax is same as the previous section, but you'll need to lof both the dataset and model checkpoints as artifacts, i.e, set bot <code>--upload_dataset</code> or 
+ train from <code>_wandb.yaml</code> file and set <code>--save_period</code>
+
+ <details>
+  <summary> <b>Usage</b> </summary>
+   <b>Code</b> <code> $ python train.py --resume wandb-artifact://{run_path} </code>
+  
+![Screenshot (70)](https://user-images.githubusercontent.com/15766192/128728988-4e84b355-6c87-41ae-a591-14aecf45343e.png)
+ </details>
+  
+</details>
+
+
+ <h3> Reports </h3>
+W&B Reports can be created from your saved runs for sharing online. Once a report is created you will receive a link you can use to publically share your results. Here is an example report created from the COCO128 tutorial trainings of all four YOLOv5 models ([link](https://wandb.ai/glenn-jocher/yolov5_tutorial/reports/YOLOv5-COCO128-Tutorial-Results--VmlldzozMDI5OTY)).
+ 
+<img width="900" alt="Weights & Biases Reports" src="https://user-images.githubusercontent.com/26833433/135394029-a17eaf86-c6c1-4b1d-bb80-b90e83aaffa7.png">
+
+
+## Environments
+
+YOLOv5 may be run in any of the following up-to-date verified environments (with all dependencies including [CUDA](https://developer.nvidia.com/cuda)/[CUDNN](https://developer.nvidia.com/cudnn), [Python](https://www.python.org/) and [PyTorch](https://pytorch.org/) preinstalled):
+
+- **Google Colab and Kaggle** notebooks with free GPU: <a href="https://colab.research.google.com/github/ultralytics/yolov5/blob/master/tutorial.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"></a> <a href="https://www.kaggle.com/ultralytics/yolov5"><img src="https://kaggle.com/static/images/open-in-kaggle.svg" alt="Open In Kaggle"></a>
+- **Google Cloud** Deep Learning VM. See [GCP Quickstart Guide](https://github.com/ultralytics/yolov5/wiki/GCP-Quickstart)
+- **Amazon** Deep Learning AMI. See [AWS Quickstart Guide](https://github.com/ultralytics/yolov5/wiki/AWS-Quickstart)
+- **Docker Image**. See [Docker Quickstart Guide](https://github.com/ultralytics/yolov5/wiki/Docker-Quickstart) <a href="https://hub.docker.com/r/ultralytics/yolov5"><img src="https://img.shields.io/docker/pulls/ultralytics/yolov5?logo=docker" alt="Docker Pulls"></a>
+
+
+## Status
+
+![CI CPU testing](https://github.com/ultralytics/yolov5/workflows/CI%20CPU%20testing/badge.svg)
+
+If this badge is green, all [YOLOv5 GitHub Actions](https://github.com/ultralytics/yolov5/actions) Continuous Integration (CI) tests are currently passing. CI tests verify correct operation of YOLOv5 training ([train.py](https://github.com/ultralytics/yolov5/blob/master/train.py)), validation ([val.py](https://github.com/ultralytics/yolov5/blob/master/val.py)), inference ([detect.py](https://github.com/ultralytics/yolov5/blob/master/detect.py)) and export ([export.py](https://github.com/ultralytics/yolov5/blob/master/export.py)) on MacOS, Windows, and Ubuntu every 24 hours and on every commit.
diff --git a/cv/detection/yolov5/pytorch/loggers/wandb/__init__.py b/cv/detection/yolov5/pytorch/loggers/wandb/__init__.py
new file mode 100755
index 0000000..e69de29
diff --git a/cv/detection/yolov5/pytorch/loggers/wandb/log_dataset.py b/cv/detection/yolov5/pytorch/loggers/wandb/log_dataset.py
new file mode 100755
index 0000000..8447272
--- /dev/null
+++ b/cv/detection/yolov5/pytorch/loggers/wandb/log_dataset.py
@@ -0,0 +1,23 @@
+import argparse
+
+from wandb_utils import WandbLogger
+
+WANDB_ARTIFACT_PREFIX = 'wandb-artifact://'
+
+
+def create_dataset_artifact(opt):
+    logger = WandbLogger(opt, None, job_type='Dataset Creation')  # TODO: return value unused
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--data', type=str, default='data/coco128.yaml', help='data.yaml path')
+    parser.add_argument('--single-cls', action='store_true', help='train as single-class dataset')
+    parser.add_argument('--project', type=str, default='YOLOv5', help='name of W&B Project')
+    parser.add_argument('--entity', default=None, help='W&B entity')
+    parser.add_argument('--name', type=str, default='log dataset', help='name of W&B run')
+
+    opt = parser.parse_args()
+    opt.resume = False  # Explicitly disallow resume check for dataset upload job
+
+    create_dataset_artifact(opt)
diff --git a/cv/detection/yolov5/pytorch/loggers/wandb/sweep.py b/cv/detection/yolov5/pytorch/loggers/wandb/sweep.py
new file mode 100755
index 0000000..fdabec4
--- /dev/null
+++ b/cv/detection/yolov5/pytorch/loggers/wandb/sweep.py
@@ -0,0 +1,36 @@
+import sys
+from pathlib import Path
+
+import wandb
+
+FILE = Path(__file__).resolve()
+ROOT = FILE.parents[3]  # YOLOv5 root directory
+if str(ROOT) not in sys.path:
+    sys.path.append(str(ROOT))  # add ROOT to PATH
+
+from train import train, parse_opt
+from utils.general import increment_path
+from utils.torch_utils import select_device
+from utils.callbacks import Callbacks
+
+
+def sweep():
+    wandb.init()
+    # Get hyp dict from sweep agent
+    hyp_dict = vars(wandb.config).get("_items")
+
+    # Workaround: get necessary opt args
+    opt = parse_opt(known=True)
+    opt.batch_size = hyp_dict.get("batch_size")
+    opt.save_dir = str(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok or opt.evolve))
+    opt.epochs = hyp_dict.get("epochs")
+    opt.nosave = True
+    opt.data = hyp_dict.get("data")
+    device = select_device(opt.device, batch_size=opt.batch_size)
+
+    # train
+    train(hyp_dict, opt, device, callbacks=Callbacks())
+
+
+if __name__ == "__main__":
+    sweep()
diff --git a/cv/detection/yolov5/pytorch/loggers/wandb/sweep.yaml b/cv/detection/yolov5/pytorch/loggers/wandb/sweep.yaml
new file mode 100755
index 0000000..c3727de
--- /dev/null
+++ b/cv/detection/yolov5/pytorch/loggers/wandb/sweep.yaml
@@ -0,0 +1,143 @@
+# Hyperparameters for training
+# To set range- 
+# Provide min and max values as:
+#      parameter:
+#         
+#         min: scalar
+#         max: scalar
+# OR
+#
+# Set a specific list of search space-
+#     parameter: 
+#         values: [scalar1, scalar2, scalar3...]
+#         
+# You can use grid, bayesian and hyperopt search strategy 
+# For more info on configuring sweeps visit - https://docs.wandb.ai/guides/sweeps/configuration
+
+program: utils/loggers/wandb/sweep.py
+method: random
+metric:
+  name: metrics/mAP_0.5
+  goal: maximize
+
+parameters:
+  # hyperparameters: set either min, max range or values list
+  data:
+    value: "data/coco128.yaml"
+  batch_size:
+    values: [64]
+  epochs:
+    values: [10]
+
+  lr0:
+    distribution: uniform
+    min: 1e-5
+    max: 1e-1
+  lrf:
+    distribution: uniform
+    min: 0.01
+    max: 1.0
+  momentum:
+    distribution: uniform
+    min: 0.6
+    max: 0.98
+  weight_decay:
+    distribution: uniform
+    min: 0.0
+    max: 0.001
+  warmup_epochs:
+    distribution: uniform
+    min: 0.0
+    max: 5.0
+  warmup_momentum:
+    distribution: uniform
+    min: 0.0
+    max: 0.95
+  warmup_bias_lr:
+    distribution: uniform
+    min: 0.0
+    max: 0.2
+  box:
+    distribution: uniform
+    min: 0.02
+    max: 0.2
+  cls:
+    distribution: uniform
+    min: 0.2
+    max: 4.0
+  cls_pw:
+    distribution: uniform
+    min: 0.5
+    max: 2.0
+  obj:
+    distribution: uniform
+    min: 0.2
+    max: 4.0
+  obj_pw:
+    distribution: uniform
+    min: 0.5
+    max: 2.0
+  iou_t:
+    distribution: uniform
+    min: 0.1
+    max: 0.7
+  anchor_t:
+    distribution: uniform
+    min: 2.0
+    max: 8.0
+  fl_gamma:
+    distribution: uniform
+    min: 0.0
+    max: 0.1
+  hsv_h:
+    distribution: uniform
+    min: 0.0
+    max: 0.1
+  hsv_s:
+    distribution: uniform
+    min: 0.0
+    max: 0.9
+  hsv_v:
+    distribution: uniform
+    min: 0.0
+    max: 0.9
+  degrees:
+    distribution: uniform
+    min: 0.0
+    max: 45.0
+  translate:
+    distribution: uniform
+    min: 0.0
+    max: 0.9
+  scale:
+    distribution: uniform
+    min: 0.0
+    max: 0.9
+  shear:
+    distribution: uniform
+    min: 0.0
+    max: 10.0
+  perspective:
+    distribution: uniform
+    min: 0.0
+    max: 0.001
+  flipud:
+    distribution: uniform
+    min: 0.0
+    max: 1.0
+  fliplr:
+    distribution: uniform
+    min: 0.0
+    max: 1.0
+  mosaic:
+    distribution: uniform
+    min: 0.0
+    max: 1.0
+  mixup:
+    distribution: uniform
+    min: 0.0
+    max: 1.0
+  copy_paste:
+    distribution: uniform
+    min: 0.0
+    max: 1.0
diff --git a/cv/detection/yolov5/pytorch/loggers/wandb/wandb_utils.py b/cv/detection/yolov5/pytorch/loggers/wandb/wandb_utils.py
new file mode 100755
index 0000000..92fdd27
--- /dev/null
+++ b/cv/detection/yolov5/pytorch/loggers/wandb/wandb_utils.py
@@ -0,0 +1,528 @@
+"""Utilities and tools for tracking runs with Weights & Biases."""
+
+import logging
+import os
+import sys
+from contextlib import contextmanager
+from pathlib import Path
+
+import pkg_resources as pkg
+import yaml
+from tqdm import tqdm
+
+FILE = Path(__file__).resolve()
+ROOT = FILE.parents[3]  # YOLOv5 root directory
+if str(ROOT) not in sys.path:
+    sys.path.append(str(ROOT))  # add ROOT to PATH
+
+from utils.datasets import LoadImagesAndLabels
+from utils.datasets import img2label_paths
+from utils.general import check_dataset, check_file
+
+RANK = int(os.getenv('RANK', -1))
+
+try:
+    import wandb
+
+    assert hasattr(wandb, '__version__')  # verify package import not local dir
+    if pkg.parse_version(wandb.__version__) >= pkg.parse_version('0.12.2') and RANK in [0, -1]:
+        wandb.login(timeout=30)
+except (ImportError, AssertionError):
+    wandb = None
+
+WANDB_ARTIFACT_PREFIX = 'wandb-artifact://'
+
+
+def remove_prefix(from_string, prefix=WANDB_ARTIFACT_PREFIX):
+    return from_string[len(prefix):]
+
+
+def check_wandb_config_file(data_config_file):
+    wandb_config = '_wandb.'.join(data_config_file.rsplit('.', 1))  # updated data.yaml path
+    if Path(wandb_config).is_file():
+        return wandb_config
+    return data_config_file
+
+
+def check_wandb_dataset(data_file):
+    is_trainset_wandb_artifact = False
+    is_valset_wandb_artifact = False
+    if check_file(data_file) and data_file.endswith('.yaml'):
+        with open(data_file, errors='ignore') as f:
+            data_dict = yaml.safe_load(f)
+        is_trainset_wandb_artifact = (isinstance(data_dict['train'], str) and
+                                      data_dict['train'].startswith(WANDB_ARTIFACT_PREFIX))
+        is_valset_wandb_artifact = (isinstance(data_dict['val'], str) and
+                                    data_dict['val'].startswith(WANDB_ARTIFACT_PREFIX))
+    if is_trainset_wandb_artifact or is_valset_wandb_artifact:
+        return data_dict
+    else:
+        return check_dataset(data_file)
+
+
+def get_run_info(run_path):
+    run_path = Path(remove_prefix(run_path, WANDB_ARTIFACT_PREFIX))
+    run_id = run_path.stem
+    project = run_path.parent.stem
+    entity = run_path.parent.parent.stem
+    model_artifact_name = 'run_' + run_id + '_model'
+    return entity, project, run_id, model_artifact_name
+
+
+def check_wandb_resume(opt):
+    process_wandb_config_ddp_mode(opt) if RANK not in [-1, 0] else None
+    if isinstance(opt.resume, str):
+        if opt.resume.startswith(WANDB_ARTIFACT_PREFIX):
+            if RANK not in [-1, 0]:  # For resuming DDP runs
+                entity, project, run_id, model_artifact_name = get_run_info(opt.resume)
+                api = wandb.Api()
+                artifact = api.artifact(entity + '/' + project + '/' + model_artifact_name + ':latest')
+                modeldir = artifact.download()
+                opt.weights = str(Path(modeldir) / "last.pt")
+            return True
+    return None
+
+
+def process_wandb_config_ddp_mode(opt):
+    with open(check_file(opt.data), errors='ignore') as f:
+        data_dict = yaml.safe_load(f)  # data dict
+    train_dir, val_dir = None, None
+    if isinstance(data_dict['train'], str) and data_dict['train'].startswith(WANDB_ARTIFACT_PREFIX):
+        api = wandb.Api()
+        train_artifact = api.artifact(remove_prefix(data_dict['train']) + ':' + opt.artifact_alias)
+        train_dir = train_artifact.download()
+        train_path = Path(train_dir) / 'data/images/'
+        data_dict['train'] = str(train_path)
+
+    if isinstance(data_dict['val'], str) and data_dict['val'].startswith(WANDB_ARTIFACT_PREFIX):
+        api = wandb.Api()
+        val_artifact = api.artifact(remove_prefix(data_dict['val']) + ':' + opt.artifact_alias)
+        val_dir = val_artifact.download()
+        val_path = Path(val_dir) / 'data/images/'
+        data_dict['val'] = str(val_path)
+    if train_dir or val_dir:
+        ddp_data_path = str(Path(val_dir) / 'wandb_local_data.yaml')
+        with open(ddp_data_path, 'w') as f:
+            yaml.safe_dump(data_dict, f)
+        opt.data = ddp_data_path
+
+
+class WandbLogger():
+    """Log training runs, datasets, models, and predictions to Weights & Biases.
+
+    This logger sends information to W&B at wandb.ai. By default, this information
+    includes hyperparameters, system configuration and metrics, model metrics,
+    and basic data metrics and analyses.
+
+    By providing additional command line arguments to train.py, datasets,
+    models and predictions can also be logged.
+
+    For more on how this logger is used, see the Weights & Biases documentation:
+    https://docs.wandb.com/guides/integrations/yolov5
+    """
+
+    def __init__(self, opt, run_id=None, job_type='Training'):
+        """
+        - Initialize WandbLogger instance
+        - Upload dataset if opt.upload_dataset is True
+        - Setup trainig processes if job_type is 'Training'
+
+        arguments:
+        opt (namespace) -- Commandline arguments for this run
+        run_id (str) -- Run ID of W&B run to be resumed
+        job_type (str) -- To set the job_type for this run 
+
+       """
+        # Pre-training routine --
+        self.job_type = job_type
+        self.wandb, self.wandb_run = wandb, None if not wandb else wandb.run
+        self.val_artifact, self.train_artifact = None, None
+        self.train_artifact_path, self.val_artifact_path = None, None
+        self.result_artifact = None
+        self.val_table, self.result_table = None, None
+        self.bbox_media_panel_images = []
+        self.val_table_path_map = None
+        self.max_imgs_to_log = 16
+        self.wandb_artifact_data_dict = None
+        self.data_dict = None
+        # It's more elegant to stick to 1 wandb.init call, but useful config data is overwritten in the WandbLogger's wandb.init call
+        if isinstance(opt.resume, str):  # checks resume from artifact
+            if opt.resume.startswith(WANDB_ARTIFACT_PREFIX):
+                entity, project, run_id, model_artifact_name = get_run_info(opt.resume)
+                model_artifact_name = WANDB_ARTIFACT_PREFIX + model_artifact_name
+                assert wandb, 'install wandb to resume wandb runs'
+                # Resume wandb-artifact:// runs here| workaround for not overwriting wandb.config
+                self.wandb_run = wandb.init(id=run_id,
+                                            project=project,
+                                            entity=entity,
+                                            resume='allow',
+                                            allow_val_change=True)
+                opt.resume = model_artifact_name
+        elif self.wandb:
+            self.wandb_run = wandb.init(config=opt,
+                                        resume="allow",
+                                        project='YOLOv5' if opt.project == 'runs/train' else Path(opt.project).stem,
+                                        entity=opt.entity,
+                                        name=opt.name if opt.name != 'exp' else None,
+                                        job_type=job_type,
+                                        id=run_id,
+                                        allow_val_change=True) if not wandb.run else wandb.run
+        if self.wandb_run:
+            if self.job_type == 'Training':
+                if opt.upload_dataset:
+                    if not opt.resume:
+                        self.wandb_artifact_data_dict = self.check_and_upload_dataset(opt)
+
+                if opt.resume:
+                    # resume from artifact
+                    if isinstance(opt.resume, str) and opt.resume.startswith(WANDB_ARTIFACT_PREFIX):
+                        self.data_dict = dict(self.wandb_run.config.data_dict)
+                    else:  # local resume
+                        self.data_dict = check_wandb_dataset(opt.data)
+                else:
+                    self.data_dict = check_wandb_dataset(opt.data)
+                    self.wandb_artifact_data_dict = self.wandb_artifact_data_dict or self.data_dict
+
+                    # write data_dict to config. useful for resuming from artifacts. Do this only when not resuming.
+                    self.wandb_run.config.update({'data_dict': self.wandb_artifact_data_dict},
+                                                 allow_val_change=True)
+                self.setup_training(opt)
+
+            if self.job_type == 'Dataset Creation':
+                self.data_dict = self.check_and_upload_dataset(opt)
+
+    def check_and_upload_dataset(self, opt):
+        """
+        Check if the dataset format is compatible and upload it as W&B artifact
+
+        arguments:
+        opt (namespace)-- Commandline arguments for current run
+
+        returns:
+        Updated dataset info dictionary where local dataset paths are replaced by WAND_ARFACT_PREFIX links.
+        """
+        assert wandb, 'Install wandb to upload dataset'
+        config_path = self.log_dataset_artifact(opt.data,
+                                                opt.single_cls,
+                                                'YOLOv5' if opt.project == 'runs/train' else Path(opt.project).stem)
+        print("Created dataset config file ", config_path)
+        with open(config_path, errors='ignore') as f:
+            wandb_data_dict = yaml.safe_load(f)
+        return wandb_data_dict
+
+    def setup_training(self, opt):
+        """
+        Setup the necessary processes for training YOLO models:
+          - Attempt to download model checkpoint and dataset artifacts if opt.resume stats with WANDB_ARTIFACT_PREFIX
+          - Update data_dict, to contain info of previous run if resumed and the paths of dataset artifact if downloaded
+          - Setup log_dict, initialize bbox_interval 
+
+        arguments:
+        opt (namespace) -- commandline arguments for this run
+
+        """
+        self.log_dict, self.current_epoch = {}, 0
+        self.bbox_interval = opt.bbox_interval
+        if isinstance(opt.resume, str):
+            modeldir, _ = self.download_model_artifact(opt)
+            if modeldir:
+                self.weights = Path(modeldir) / "last.pt"
+                config = self.wandb_run.config
+                opt.weights, opt.save_period, opt.batch_size, opt.bbox_interval, opt.epochs, opt.hyp = str(
+                    self.weights), config.save_period, config.batch_size, config.bbox_interval, config.epochs, \
+                                                                                                       config.hyp
+        data_dict = self.data_dict
+        if self.val_artifact is None:  # If --upload_dataset is set, use the existing artifact, don't download
+            self.train_artifact_path, self.train_artifact = self.download_dataset_artifact(data_dict.get('train'),
+                                                                                           opt.artifact_alias)
+            self.val_artifact_path, self.val_artifact = self.download_dataset_artifact(data_dict.get('val'),
+                                                                                       opt.artifact_alias)
+
+        if self.train_artifact_path is not None:
+            train_path = Path(self.train_artifact_path) / 'data/images/'
+            data_dict['train'] = str(train_path)
+        if self.val_artifact_path is not None:
+            val_path = Path(self.val_artifact_path) / 'data/images/'
+            data_dict['val'] = str(val_path)
+
+        if self.val_artifact is not None:
+            self.result_artifact = wandb.Artifact("run_" + wandb.run.id + "_progress", "evaluation")
+            self.result_table = wandb.Table(["epoch", "id", "ground truth", "prediction", "avg_confidence"])
+            self.val_table = self.val_artifact.get("val")
+            if self.val_table_path_map is None:
+                self.map_val_table_path()
+        if opt.bbox_interval == -1:
+            self.bbox_interval = opt.bbox_interval = (opt.epochs // 10) if opt.epochs > 10 else 1
+        train_from_artifact = self.train_artifact_path is not None and self.val_artifact_path is not None
+        # Update the the data_dict to point to local artifacts dir
+        if train_from_artifact:
+            self.data_dict = data_dict
+
+    def download_dataset_artifact(self, path, alias):
+        """
+        download the model checkpoint artifact if the path starts with WANDB_ARTIFACT_PREFIX
+
+        arguments:
+        path -- path of the dataset to be used for training
+        alias (str)-- alias of the artifact to be download/used for training
+
+        returns:
+        (str, wandb.Artifact) -- path of the downladed dataset and it's corresponding artifact object if dataset
+        is found otherwise returns (None, None)
+        """
+        if isinstance(path, str) and path.startswith(WANDB_ARTIFACT_PREFIX):
+            artifact_path = Path(remove_prefix(path, WANDB_ARTIFACT_PREFIX) + ":" + alias)
+            dataset_artifact = wandb.use_artifact(artifact_path.as_posix().replace("\\", "/"))
+            assert dataset_artifact is not None, "'Error: W&B dataset artifact doesn\'t exist'"
+            datadir = dataset_artifact.download()
+            return datadir, dataset_artifact
+        return None, None
+
+    def download_model_artifact(self, opt):
+        """
+        download the model checkpoint artifact if the resume path starts with WANDB_ARTIFACT_PREFIX
+
+        arguments:
+        opt (namespace) -- Commandline arguments for this run
+        """
+        if opt.resume.startswith(WANDB_ARTIFACT_PREFIX):
+            model_artifact = wandb.use_artifact(remove_prefix(opt.resume, WANDB_ARTIFACT_PREFIX) + ":latest")
+            assert model_artifact is not None, 'Error: W&B model artifact doesn\'t exist'
+            modeldir = model_artifact.download()
+            epochs_trained = model_artifact.metadata.get('epochs_trained')
+            total_epochs = model_artifact.metadata.get('total_epochs')
+            is_finished = total_epochs is None
+            assert not is_finished, 'training is finished, can only resume incomplete runs.'
+            return modeldir, model_artifact
+        return None, None
+
+    def log_model(self, path, opt, epoch, fitness_score, best_model=False):
+        """
+        Log the model checkpoint as W&B artifact
+
+        arguments:
+        path (Path)   -- Path of directory containing the checkpoints
+        opt (namespace) -- Command line arguments for this run
+        epoch (int)  -- Current epoch number
+        fitness_score (float) -- fitness score for current epoch 
+        best_model (boolean) -- Boolean representing if the current checkpoint is the best yet.
+        """
+        model_artifact = wandb.Artifact('run_' + wandb.run.id + '_model', type='model', metadata={
+            'original_url': str(path),
+            'epochs_trained': epoch + 1,
+            'save period': opt.save_period,
+            'project': opt.project,
+            'total_epochs': opt.epochs,
+            'fitness_score': fitness_score
+        })
+        model_artifact.add_file(str(path / 'last.pt'), name='last.pt')
+        wandb.log_artifact(model_artifact,
+                           aliases=['latest', 'last', 'epoch ' + str(self.current_epoch), 'best' if best_model else ''])
+        print("Saving model artifact on epoch ", epoch + 1)
+
+    def log_dataset_artifact(self, data_file, single_cls, project, overwrite_config=False):
+        """
+        Log the dataset as W&B artifact and return the new data file with W&B links
+
+        arguments:
+        data_file (str) -- the .yaml file with information about the dataset like - path, classes etc.
+        single_class (boolean)  -- train multi-class data as single-class
+        project (str) -- project name. Used to construct the artifact path
+        overwrite_config (boolean) -- overwrites the data.yaml file if set to true otherwise creates a new 
+        file with _wandb postfix. Eg -> data_wandb.yaml
+
+        returns:
+        the new .yaml file with artifact links. it can be used to start training directly from artifacts
+        """
+        self.data_dict = check_dataset(data_file)  # parse and check
+        data = dict(self.data_dict)
+        nc, names = (1, ['item']) if single_cls else (int(data['nc']), data['names'])
+        names = {k: v for k, v in enumerate(names)}  # to index dictionary
+        self.train_artifact = self.create_dataset_table(LoadImagesAndLabels(
+            data['train'], rect=True, batch_size=1), names, name='train') if data.get('train') else None
+        self.val_artifact = self.create_dataset_table(LoadImagesAndLabels(
+            data['val'], rect=True, batch_size=1), names, name='val') if data.get('val') else None
+        if data.get('train'):
+            data['train'] = WANDB_ARTIFACT_PREFIX + str(Path(project) / 'train')
+        if data.get('val'):
+            data['val'] = WANDB_ARTIFACT_PREFIX + str(Path(project) / 'val')
+        path = Path(data_file).stem
+        path = (path if overwrite_config else path + '_wandb') + '.yaml'  # updated data.yaml path
+        data.pop('download', None)
+        data.pop('path', None)
+        with open(path, 'w') as f:
+            yaml.safe_dump(data, f)
+
+        if self.job_type == 'Training':  # builds correct artifact pipeline graph
+            self.wandb_run.use_artifact(self.val_artifact)
+            self.wandb_run.use_artifact(self.train_artifact)
+            self.val_artifact.wait()
+            self.val_table = self.val_artifact.get('val')
+            self.map_val_table_path()
+        else:
+            self.wandb_run.log_artifact(self.train_artifact)
+            self.wandb_run.log_artifact(self.val_artifact)
+        return path
+
+    def map_val_table_path(self):
+        """
+        Map the validation dataset Table like name of file -> it's id in the W&B Table.
+        Useful for - referencing artifacts for evaluation.
+        """
+        self.val_table_path_map = {}
+        print("Mapping dataset")
+        for i, data in enumerate(tqdm(self.val_table.data)):
+            self.val_table_path_map[data[3]] = data[0]
+
+    def create_dataset_table(self, dataset, class_to_id, name='dataset'):
+        """
+        Create and return W&B artifact containing W&B Table of the dataset.
+
+        arguments:
+        dataset (LoadImagesAndLabels) -- instance of LoadImagesAndLabels class used to iterate over the data to build Table
+        class_to_id (dict(int, str)) -- hash map that maps class ids to labels
+        name (str) -- name of the artifact
+
+        returns:
+        dataset artifact to be logged or used
+        """
+        # TODO: Explore multiprocessing to slpit this loop parallely| This is essential for speeding up the the logging
+        artifact = wandb.Artifact(name=name, type="dataset")
+        img_files = tqdm([dataset.path]) if isinstance(dataset.path, str) and Path(dataset.path).is_dir() else None
+        img_files = tqdm(dataset.img_files) if not img_files else img_files
+        for img_file in img_files:
+            if Path(img_file).is_dir():
+                artifact.add_dir(img_file, name='data/images')
+                labels_path = 'labels'.join(dataset.path.rsplit('images', 1))
+                artifact.add_dir(labels_path, name='data/labels')
+            else:
+                artifact.add_file(img_file, name='data/images/' + Path(img_file).name)
+                label_file = Path(img2label_paths([img_file])[0])
+                artifact.add_file(str(label_file),
+                                  name='data/labels/' + label_file.name) if label_file.exists() else None
+        table = wandb.Table(columns=["id", "train_image", "Classes", "name"])
+        class_set = wandb.Classes([{'id': id, 'name': name} for id, name in class_to_id.items()])
+        for si, (img, labels, paths, shapes) in enumerate(tqdm(dataset)):
+            box_data, img_classes = [], {}
+            for cls, *xywh in labels[:, 1:].tolist():
+                cls = int(cls)
+                box_data.append({"position": {"middle": [xywh[0], xywh[1]], "width": xywh[2], "height": xywh[3]},
+                                 "class_id": cls,
+                                 "box_caption": "%s" % (class_to_id[cls])})
+                img_classes[cls] = class_to_id[cls]
+            boxes = {"ground_truth": {"box_data": box_data, "class_labels": class_to_id}}  # inference-space
+            table.add_data(si, wandb.Image(paths, classes=class_set, boxes=boxes), list(img_classes.values()),
+                           Path(paths).name)
+        artifact.add(table, name)
+        return artifact
+
+    def log_training_progress(self, predn, path, names):
+        """
+        Build evaluation Table. Uses reference from validation dataset table.
+
+        arguments:
+        predn (list): list of predictions in the native space in the format - [xmin, ymin, xmax, ymax, confidence, class]
+        path (str): local path of the current evaluation image 
+        names (dict(int, str)): hash map that maps class ids to labels
+        """
+        class_set = wandb.Classes([{'id': id, 'name': name} for id, name in names.items()])
+        box_data = []
+        total_conf = 0
+        for *xyxy, conf, cls in predn.tolist():
+            if conf >= 0.25:
+                box_data.append(
+                    {"position": {"minX": xyxy[0], "minY": xyxy[1], "maxX": xyxy[2], "maxY": xyxy[3]},
+                     "class_id": int(cls),
+                     "box_caption": "%s %.3f" % (names[cls], conf),
+                     "scores": {"class_score": conf},
+                     "domain": "pixel"})
+                total_conf = total_conf + conf
+        boxes = {"predictions": {"box_data": box_data, "class_labels": names}}  # inference-space
+        id = self.val_table_path_map[Path(path).name]
+        self.result_table.add_data(self.current_epoch,
+                                   id,
+                                   self.val_table.data[id][1],
+                                   wandb.Image(self.val_table.data[id][1], boxes=boxes, classes=class_set),
+                                   total_conf / max(1, len(box_data))
+                                   )
+
+    def val_one_image(self, pred, predn, path, names, im):
+        """
+        Log validation data for one image. updates the result Table if validation dataset is uploaded and log bbox media panel
+
+        arguments:
+        pred (list): list of scaled predictions in the format - [xmin, ymin, xmax, ymax, confidence, class]
+        predn (list): list of predictions in the native space - [xmin, ymin, xmax, ymax, confidence, class]
+        path (str): local path of the current evaluation image 
+        """
+        if self.val_table and self.result_table:  # Log Table if Val dataset is uploaded as artifact
+            self.log_training_progress(predn, path, names)
+
+        if len(self.bbox_media_panel_images) < self.max_imgs_to_log and self.current_epoch > 0:
+            if self.current_epoch % self.bbox_interval == 0:
+                box_data = [{"position": {"minX": xyxy[0], "minY": xyxy[1], "maxX": xyxy[2], "maxY": xyxy[3]},
+                             "class_id": int(cls),
+                             "box_caption": "%s %.3f" % (names[cls], conf),
+                             "scores": {"class_score": conf},
+                             "domain": "pixel"} for *xyxy, conf, cls in pred.tolist()]
+                boxes = {"predictions": {"box_data": box_data, "class_labels": names}}  # inference-space
+                self.bbox_media_panel_images.append(wandb.Image(im, boxes=boxes, caption=path.name))
+
+    def log(self, log_dict):
+        """
+        save the metrics to the logging dictionary
+
+        arguments:
+        log_dict (Dict) -- metrics/media to be logged in current step
+        """
+        if self.wandb_run:
+            for key, value in log_dict.items():
+                self.log_dict[key] = value
+
+    def end_epoch(self, best_result=False):
+        """
+        commit the log_dict, model artifacts and Tables to W&B and flush the log_dict.
+
+        arguments:
+        best_result (boolean): Boolean representing if the result of this evaluation is best or not
+        """
+        if self.wandb_run:
+            with all_logging_disabled():
+                if self.bbox_media_panel_images:
+                    self.log_dict["Bounding Box Debugger/Images"] = self.bbox_media_panel_images
+                wandb.log(self.log_dict)
+                self.log_dict = {}
+                self.bbox_media_panel_images = []
+            if self.result_artifact:
+                self.result_artifact.add(self.result_table, 'result')
+                wandb.log_artifact(self.result_artifact, aliases=['latest', 'last', 'epoch ' + str(self.current_epoch),
+                                                                  ('best' if best_result else '')])
+
+                wandb.log({"evaluation": self.result_table})
+                self.result_table = wandb.Table(["epoch", "id", "ground truth", "prediction", "avg_confidence"])
+                self.result_artifact = wandb.Artifact("run_" + wandb.run.id + "_progress", "evaluation")
+
+    def finish_run(self):
+        """
+        Log metrics if any and finish the current W&B run
+        """
+        if self.wandb_run:
+            if self.log_dict:
+                with all_logging_disabled():
+                    wandb.log(self.log_dict)
+            wandb.run.finish()
+
+
+@contextmanager
+def all_logging_disabled(highest_level=logging.CRITICAL):
+    """ source - https://gist.github.com/simon-weber/7853144
+    A context manager that will prevent any logging messages triggered during the body from being processed.
+    :param highest_level: the maximum logging level in use.
+      This would only need to be changed if a custom level greater than CRITICAL is defined.
+    """
+    previous_level = logging.root.manager.disable
+    logging.disable(highest_level)
+    try:
+        yield
+    finally:
+        logging.disable(previous_level)
diff --git a/cv/detection/yolov5/pytorch/models/common.py b/cv/detection/yolov5/pytorch/models/common.py
index 7603e9e..fd892a2 100644
--- a/cv/detection/yolov5/pytorch/models/common.py
+++ b/cv/detection/yolov5/pytorch/models/common.py
@@ -1,12 +1,16 @@
-# Copyright (c) 2023, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# Copyright (c) 2023-2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
 # All Rights Reserved.
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+"""
+Common modules
+"""
 
-# YOLOv5 common modules
-
+import logging
+import math
+import warnings
 from copy import copy
 from pathlib import Path
 
-import math
 import numpy as np
 import pandas as pd
 import requests
@@ -16,10 +20,13 @@ from PIL import Image
 from torch.cuda import amp
 
 from utils.datasets import exif_transpose, letterbox
-from utils.general import non_max_suppression, make_divisible, scale_coords, increment_path, xyxy2xywh, save_one_box
-from utils.plots import colors, plot_one_box
-from utils.torch_utils import time_synchronized
-from utils.activations import SiLU
+from utils.general import colorstr, increment_path, make_divisible, non_max_suppression, save_one_box, \
+    scale_coords, xyxy2xywh
+from utils.plots import Annotator, colors
+from utils.torch_utils import time_sync
+
+LOGGER = logging.getLogger(__name__)
+
 
 def autopad(k, p=None):  # kernel, padding
     # Pad to 'same'
@@ -28,27 +35,27 @@ def autopad(k, p=None):  # kernel, padding
     return p
 
 
-def DWConv(c1, c2, k=1, s=1, act=True):
-    # Depthwise convolution
-    return Conv(c1, c2, k, s, g=math.gcd(c1, c2), act=act)
-
-
 class Conv(nn.Module):
     # Standard convolution
     def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True):  # ch_in, ch_out, kernel, stride, padding, groups
-        super(Conv, self).__init__()
+        super().__init__()
         self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False)
         self.bn = nn.BatchNorm2d(c2)
         self.act = nn.SiLU() if act is True else (act if isinstance(act, nn.Module) else nn.Identity())
-        #self.act = SiLU() if act is True else (act if isinstance(act, nn.Module) else nn.Identity())
 
     def forward(self, x):
         return self.act(self.bn(self.conv(x)))
 
-    def fuseforward(self, x):
+    def forward_fuse(self, x):
         return self.act(self.conv(x))
 
 
+class DWConv(Conv):
+    # Depth-wise convolution class
+    def __init__(self, c1, c2, k=1, s=1, act=True):  # ch_in, ch_out, kernel, stride, padding, groups
+        super().__init__(c1, c2, k, s, g=math.gcd(c1, c2), act=act)
+
+
 class TransformerLayer(nn.Module):
     # Transformer layer https://arxiv.org/abs/2010.11929 (LayerNorm layers removed for better performance)
     def __init__(self, c, num_heads):
@@ -88,7 +95,7 @@ class TransformerBlock(nn.Module):
 class Bottleneck(nn.Module):
     # Standard bottleneck
     def __init__(self, c1, c2, shortcut=True, g=1, e=0.5):  # ch_in, ch_out, shortcut, groups, expansion
-        super(Bottleneck, self).__init__()
+        super().__init__()
         c_ = int(c2 * e)  # hidden channels
         self.cv1 = Conv(c1, c_, 1, 1)
         self.cv2 = Conv(c_, c2, 3, 1, g=g)
@@ -101,7 +108,7 @@ class Bottleneck(nn.Module):
 class BottleneckCSP(nn.Module):
     # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
     def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):  # ch_in, ch_out, number, shortcut, groups, expansion
-        super(BottleneckCSP, self).__init__()
+        super().__init__()
         c_ = int(c2 * e)  # hidden channels
         self.cv1 = Conv(c1, c_, 1, 1)
         self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False)
@@ -120,7 +127,7 @@ class BottleneckCSP(nn.Module):
 class C3(nn.Module):
     # CSP Bottleneck with 3 convolutions
     def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):  # ch_in, ch_out, number, shortcut, groups, expansion
-        super(C3, self).__init__()
+        super().__init__()
         c_ = int(c2 * e)  # hidden channels
         self.cv1 = Conv(c1, c_, 1, 1)
         self.cv2 = Conv(c1, c_, 1, 1)
@@ -140,10 +147,26 @@ class C3TR(C3):
         self.m = TransformerBlock(c_, c_, 4, n)
 
 
+class C3SPP(C3):
+    # C3 module with SPP()
+    def __init__(self, c1, c2, k=(5, 9, 13), n=1, shortcut=True, g=1, e=0.5):
+        super().__init__(c1, c2, n, shortcut, g, e)
+        c_ = int(c2 * e)
+        self.m = SPP(c_, c_, k)
+
+
+class C3Ghost(C3):
+    # C3 module with GhostBottleneck()
+    def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
+        super().__init__(c1, c2, n, shortcut, g, e)
+        c_ = int(c2 * e)  # hidden channels
+        self.m = nn.Sequential(*[GhostBottleneck(c_, c_) for _ in range(n)])
+
+
 class SPP(nn.Module):
-    # Spatial pyramid pooling layer used in YOLOv3-SPP
+    # Spatial Pyramid Pooling (SPP) layer https://arxiv.org/abs/1406.4729
     def __init__(self, c1, c2, k=(5, 9, 13)):
-        super(SPP, self).__init__()
+        super().__init__()
         c_ = c1 // 2  # hidden channels
         self.cv1 = Conv(c1, c_, 1, 1)
         self.cv2 = Conv(c_ * (len(k) + 1), c2, 1, 1)
@@ -151,13 +174,33 @@ class SPP(nn.Module):
 
     def forward(self, x):
         x = self.cv1(x)
-        return self.cv2(torch.cat([x] + [m(x) for m in self.m], 1))
+        with warnings.catch_warnings():
+            warnings.simplefilter('ignore')  # suppress torch 1.9.0 max_pool2d() warning
+            return self.cv2(torch.cat([x] + [m(x) for m in self.m], 1))
+
+
+class SPPF(nn.Module):
+    # Spatial Pyramid Pooling - Fast (SPPF) layer for YOLOv5 by Glenn Jocher
+    def __init__(self, c1, c2, k=5):  # equivalent to SPP(k=(5, 9, 13))
+        super().__init__()
+        c_ = c1 // 2  # hidden channels
+        self.cv1 = Conv(c1, c_, 1, 1)
+        self.cv2 = Conv(c_ * 4, c2, 1, 1)
+        self.m = nn.MaxPool2d(kernel_size=k, stride=1, padding=k // 2)
+
+    def forward(self, x):
+        x = self.cv1(x)
+        with warnings.catch_warnings():
+            warnings.simplefilter('ignore')  # suppress torch 1.9.0 max_pool2d() warning
+            y1 = self.m(x)
+            y2 = self.m(y1)
+            return self.cv2(torch.cat([x, y1, y2, self.m(y2)], 1))
 
 
 class Focus(nn.Module):
     # Focus wh information into c-space
     def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True):  # ch_in, ch_out, kernel, stride, padding, groups
-        super(Focus, self).__init__()
+        super().__init__()
         self.conv = Conv(c1 * 4, c2, k, s, p, g, act)
         # self.contract = Contract(gain=2)
 
@@ -166,6 +209,34 @@ class Focus(nn.Module):
         # return self.conv(self.contract(x))
 
 
+class GhostConv(nn.Module):
+    # Ghost Convolution https://github.com/huawei-noah/ghostnet
+    def __init__(self, c1, c2, k=1, s=1, g=1, act=True):  # ch_in, ch_out, kernel, stride, groups
+        super().__init__()
+        c_ = c2 // 2  # hidden channels
+        self.cv1 = Conv(c1, c_, k, s, None, g, act)
+        self.cv2 = Conv(c_, c_, 5, 1, None, c_, act)
+
+    def forward(self, x):
+        y = self.cv1(x)
+        return torch.cat([y, self.cv2(y)], 1)
+
+
+class GhostBottleneck(nn.Module):
+    # Ghost Bottleneck https://github.com/huawei-noah/ghostnet
+    def __init__(self, c1, c2, k=3, s=1):  # ch_in, ch_out, kernel, stride
+        super().__init__()
+        c_ = c2 // 2
+        self.conv = nn.Sequential(GhostConv(c1, c_, 1, 1),  # pw
+                                  DWConv(c_, c_, k, s, act=False) if s == 2 else nn.Identity(),  # dw
+                                  GhostConv(c_, c2, 1, 1, act=False))  # pw-linear
+        self.shortcut = nn.Sequential(DWConv(c1, c1, k, s, act=False),
+                                      Conv(c1, c2, 1, 1, act=False)) if s == 2 else nn.Identity()
+
+    def forward(self, x):
+        return self.conv(x) + self.shortcut(x)
+
+
 class Contract(nn.Module):
     # Contract width-height into channels, i.e. x(1,64,80,80) to x(1,256,40,40)
     def __init__(self, gain=2):
@@ -173,11 +244,11 @@ class Contract(nn.Module):
         self.gain = gain
 
     def forward(self, x):
-        N, C, H, W = x.size()  # assert (H / s == 0) and (W / s == 0), 'Indivisible gain'
+        b, c, h, w = x.size()  # assert (h / s == 0) and (W / s == 0), 'Indivisible gain'
         s = self.gain
-        x = x.view(N, C, H // s, s, W // s, s)  # x(1,64,40,2,40,2)
+        x = x.view(b, c, h // s, s, w // s, s)  # x(1,64,40,2,40,2)
         x = x.permute(0, 3, 5, 1, 2, 4).contiguous()  # x(1,2,2,64,40,40)
-        return x.view(N, C * s * s, H // s, W // s)  # x(1,256,40,40)
+        return x.view(b, c * s * s, h // s, w // s)  # x(1,256,40,40)
 
 
 class Expand(nn.Module):
@@ -187,64 +258,61 @@ class Expand(nn.Module):
         self.gain = gain
 
     def forward(self, x):
-        N, C, H, W = x.size()  # assert C / s ** 2 == 0, 'Indivisible gain'
+        b, c, h, w = x.size()  # assert C / s ** 2 == 0, 'Indivisible gain'
         s = self.gain
-        x = x.view(N, s, s, C // s ** 2, H, W)  # x(1,2,2,16,80,80)
+        x = x.view(b, s, s, c // s ** 2, h, w)  # x(1,2,2,16,80,80)
         x = x.permute(0, 3, 4, 1, 5, 2).contiguous()  # x(1,16,80,2,80,2)
-        return x.view(N, C // s ** 2, H * s, W * s)  # x(1,16,160,160)
+        return x.view(b, c // s ** 2, h * s, w * s)  # x(1,16,160,160)
 
 
 class Concat(nn.Module):
     # Concatenate a list of tensors along dimension
     def __init__(self, dimension=1):
-        super(Concat, self).__init__()
+        super().__init__()
         self.d = dimension
 
     def forward(self, x):
         return torch.cat(x, self.d)
 
 
-class NMS(nn.Module):
-    # Non-Maximum Suppression (NMS) module
-    conf = 0.25  # confidence threshold
-    iou = 0.45  # IoU threshold
-    classes = None  # (optional list) filter by class
-    max_det = 1000  # maximum number of detections per image
-
-    def __init__(self):
-        super(NMS, self).__init__()
-
-    def forward(self, x):
-        return non_max_suppression(x[0], self.conf, iou_thres=self.iou, classes=self.classes, max_det=self.max_det)
-
-
 class AutoShape(nn.Module):
-    # input-robust model wrapper for passing cv2/np/PIL/torch inputs. Includes preprocessing, inference and NMS
+    # YOLOv5 input-robust model wrapper for passing cv2/np/PIL/torch inputs. Includes preprocessing, inference and NMS
     conf = 0.25  # NMS confidence threshold
     iou = 0.45  # NMS IoU threshold
     classes = None  # (optional list) filter by class
+    multi_label = False  # NMS multiple labels per box
     max_det = 1000  # maximum number of detections per image
 
     def __init__(self, model):
-        super(AutoShape, self).__init__()
+        super().__init__()
         self.model = model.eval()
 
     def autoshape(self):
-        print('AutoShape already enabled, skipping... ')  # model already converted to model.autoshape()
+        LOGGER.info('AutoShape already enabled, skipping... ')  # model already converted to model.autoshape()
+        return self
+
+    def _apply(self, fn):
+        # Apply to(), cpu(), cuda(), half() to model tensors that are not parameters or registered buffers
+        self = super()._apply(fn)
+        m = self.model.model[-1]  # Detect()
+        m.stride = fn(m.stride)
+        m.grid = list(map(fn, m.grid))
+        if isinstance(m.anchor_grid, list):
+            m.anchor_grid = list(map(fn, m.anchor_grid))
         return self
 
     @torch.no_grad()
     def forward(self, imgs, size=640, augment=False, profile=False):
         # Inference from various sources. For height=640, width=1280, RGB images example inputs are:
-        #   filename:   imgs = 'data/images/zidane.jpg'
-        #   URI:             = 'https://github.com/ultralytics/yolov5/releases/download/v1.0/zidane.jpg'
+        #   file:       imgs = 'data/images/zidane.jpg'  # str or PosixPath
+        #   URI:             = 'https://ultralytics.com/images/zidane.jpg'
         #   OpenCV:          = cv2.imread('image.jpg')[:,:,::-1]  # HWC BGR to RGB x(640,1280,3)
-        #   PIL:             = Image.open('image.jpg')  # HWC x(640,1280,3)
+        #   PIL:             = Image.open('image.jpg') or ImageGrab.grab()  # HWC x(640,1280,3)
         #   numpy:           = np.zeros((640,1280,3))  # HWC
         #   torch:           = torch.zeros(16,3,320,640)  # BCHW (scaled to size=640, 0-1 values)
         #   multiple:        = [Image.open('image1.jpg'), Image.open('image2.jpg'), ...]  # list of images
 
-        t = [time_synchronized()]
+        t = [time_sync()]
         p = next(self.model.parameters())  # for device and type
         if isinstance(imgs, torch.Tensor):  # torch
             with amp.autocast(enabled=p.device.type != 'cpu'):
@@ -255,8 +323,8 @@ class AutoShape(nn.Module):
         shape0, shape1, files = [], [], []  # image and inference shapes, filenames
         for i, im in enumerate(imgs):
             f = f'image{i}'  # filename
-            if isinstance(im, str):  # filename or uri
-                im, f = Image.open(requests.get(im, stream=True).raw if im.startswith('http') else im), im
+            if isinstance(im, (str, Path)):  # filename or uri
+                im, f = Image.open(requests.get(im, stream=True).raw if str(im).startswith('http') else im), im
                 im = np.asarray(exif_transpose(im))
             elif isinstance(im, Image.Image):  # PIL Image
                 im, f = np.asarray(exif_transpose(im)), getattr(im, 'filename', f) or f
@@ -274,26 +342,27 @@ class AutoShape(nn.Module):
         x = np.stack(x, 0) if n > 1 else x[0][None]  # stack
         x = np.ascontiguousarray(x.transpose((0, 3, 1, 2)))  # BHWC to BCHW
         x = torch.from_numpy(x).to(p.device).type_as(p) / 255.  # uint8 to fp16/32
-        t.append(time_synchronized())
+        t.append(time_sync())
 
         with amp.autocast(enabled=p.device.type != 'cpu'):
             # Inference
             y = self.model(x, augment, profile)[0]  # forward
-            t.append(time_synchronized())
+            t.append(time_sync())
 
             # Post-process
-            y = non_max_suppression(y, self.conf, iou_thres=self.iou, classes=self.classes, max_det=self.max_det)  # NMS
+            y = non_max_suppression(y, self.conf, iou_thres=self.iou, classes=self.classes,
+                                    multi_label=self.multi_label, max_det=self.max_det)  # NMS
             for i in range(n):
                 scale_coords(shape1, y[i][:, :4], shape0[i])
 
-            t.append(time_synchronized())
+            t.append(time_sync())
             return Detections(imgs, y, files, t, self.names, x.shape)
 
 
 class Detections:
-    # detections class for YOLOv5 inference results
+    # YOLOv5 detections class for inference results
     def __init__(self, imgs, pred, files, times=None, names=None, shape=None):
-        super(Detections, self).__init__()
+        super().__init__()
         d = pred[0].device  # device
         gn = [torch.tensor([*[im.shape[i] for i in [1, 0, 1, 0]], 1., 1.], device=d) for im in imgs]  # normalizations
         self.imgs = imgs  # list of images as numpy arrays
@@ -309,47 +378,59 @@ class Detections:
         self.s = shape  # inference BCHW shape
 
     def display(self, pprint=False, show=False, save=False, crop=False, render=False, save_dir=Path('')):
+        crops = []
         for i, (im, pred) in enumerate(zip(self.imgs, self.pred)):
-            str = f'image {i + 1}/{len(self.pred)}: {im.shape[0]}x{im.shape[1]} '
-            if pred is not None:
+            s = f'image {i + 1}/{len(self.pred)}: {im.shape[0]}x{im.shape[1]} '  # string
+            if pred.shape[0]:
                 for c in pred[:, -1].unique():
                     n = (pred[:, -1] == c).sum()  # detections per class
-                    str += f"{n} {self.names[int(c)]}{'s' * (n > 1)}, "  # add to string
+                    s += f"{n} {self.names[int(c)]}{'s' * (n > 1)}, "  # add to string
                 if show or save or render or crop:
+                    annotator = Annotator(im, example=str(self.names))
                     for *box, conf, cls in reversed(pred):  # xyxy, confidence, class
                         label = f'{self.names[int(cls)]} {conf:.2f}'
                         if crop:
-                            save_one_box(box, im, file=save_dir / 'crops' / self.names[int(cls)] / self.files[i])
+                            file = save_dir / 'crops' / self.names[int(cls)] / self.files[i] if save else None
+                            crops.append({'box': box, 'conf': conf, 'cls': cls, 'label': label,
+                                          'im': save_one_box(box, im, file=file, save=save)})
                         else:  # all others
-                            plot_one_box(box, im, label=label, color=colors(cls))
+                            annotator.box_label(box, label, color=colors(cls))
+                    im = annotator.im
+            else:
+                s += '(no detections)'
 
             im = Image.fromarray(im.astype(np.uint8)) if isinstance(im, np.ndarray) else im  # from np
             if pprint:
-                print(str.rstrip(', '))
+                LOGGER.info(s.rstrip(', '))
             if show:
                 im.show(self.files[i])  # show
             if save:
                 f = self.files[i]
                 im.save(save_dir / f)  # save
-                print(f"{'Saved' * (i == 0)} {f}", end=',' if i < self.n - 1 else f' to {save_dir}\n')
+                if i == self.n - 1:
+                    LOGGER.info(f"Saved {self.n} image{'s' * (self.n > 1)} to {colorstr('bold', save_dir)}")
             if render:
                 self.imgs[i] = np.asarray(im)
+        if crop:
+            if save:
+                LOGGER.info(f'Saved results to {save_dir}\n')
+            return crops
 
     def print(self):
         self.display(pprint=True)  # print results
-        print(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {tuple(self.s)}' % self.t)
+        LOGGER.info(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {tuple(self.s)}' %
+                    self.t)
 
     def show(self):
         self.display(show=True)  # show results
 
-    def save(self, save_dir='runs/hub/exp'):
-        save_dir = increment_path(save_dir, exist_ok=save_dir != 'runs/hub/exp', mkdir=True)  # increment save_dir
+    def save(self, save_dir='runs/detect/exp'):
+        save_dir = increment_path(save_dir, exist_ok=save_dir != 'runs/detect/exp', mkdir=True)  # increment save_dir
         self.display(save=True, save_dir=save_dir)  # save results
 
-    def crop(self, save_dir='runs/hub/exp'):
-        save_dir = increment_path(save_dir, exist_ok=save_dir != 'runs/hub/exp', mkdir=True)  # increment save_dir
-        self.display(crop=True, save_dir=save_dir)  # crop results
-        print(f'Saved results to {save_dir}\n')
+    def crop(self, save=True, save_dir='runs/detect/exp'):
+        save_dir = increment_path(save_dir, exist_ok=save_dir != 'runs/detect/exp', mkdir=True) if save else None
+        return self.display(crop=True, save=save, save_dir=save_dir)  # crop results
 
     def render(self):
         self.display(render=True)  # render results
@@ -380,7 +461,7 @@ class Detections:
 class Classify(nn.Module):
     # Classification head, i.e. x(b,c1,20,20) to x(b,c2)
     def __init__(self, c1, c2, k=1, s=1, p=None, g=1):  # ch_in, ch_out, kernel, stride, padding, groups
-        super(Classify, self).__init__()
+        super().__init__()
         self.aap = nn.AdaptiveAvgPool2d(1)  # to x(b,c1,1,1)
         self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g)  # to x(b,c2,1,1)
         self.flat = nn.Flatten()
diff --git a/cv/detection/yolov5/pytorch/models/experimental.py b/cv/detection/yolov5/pytorch/models/experimental.py
index d316b18..f89f767 100644
--- a/cv/detection/yolov5/pytorch/models/experimental.py
+++ b/cv/detection/yolov5/pytorch/models/experimental.py
@@ -1,18 +1,21 @@
-# YOLOv5 experimental modules
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+"""
+Experimental modules
+"""
 
 import numpy as np
 import torch
 import torch.nn as nn
 
-from models.common import Conv, DWConv
-from utils.google_utils import attempt_download
+from models.common import Conv
+from utils.downloads import attempt_download
 
 
 class CrossConv(nn.Module):
     # Cross Convolution Downsample
     def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False):
         # ch_in, ch_out, kernel, stride, groups, expansion, shortcut
-        super(CrossConv, self).__init__()
+        super().__init__()
         c_ = int(c2 * e)  # hidden channels
         self.cv1 = Conv(c1, c_, (1, k), (1, s))
         self.cv2 = Conv(c_, c2, (k, 1), (s, 1), g=g)
@@ -25,7 +28,7 @@ class CrossConv(nn.Module):
 class Sum(nn.Module):
     # Weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070
     def __init__(self, n, weight=False):  # n: number of inputs
-        super(Sum, self).__init__()
+        super().__init__()
         self.weight = weight  # apply weights boolean
         self.iter = range(n - 1)  # iter object
         if weight:
@@ -43,38 +46,10 @@ class Sum(nn.Module):
         return y
 
 
-class GhostConv(nn.Module):
-    # Ghost Convolution https://github.com/huawei-noah/ghostnet
-    def __init__(self, c1, c2, k=1, s=1, g=1, act=True):  # ch_in, ch_out, kernel, stride, groups
-        super(GhostConv, self).__init__()
-        c_ = c2 // 2  # hidden channels
-        self.cv1 = Conv(c1, c_, k, s, None, g, act)
-        self.cv2 = Conv(c_, c_, 5, 1, None, c_, act)
-
-    def forward(self, x):
-        y = self.cv1(x)
-        return torch.cat([y, self.cv2(y)], 1)
-
-
-class GhostBottleneck(nn.Module):
-    # Ghost Bottleneck https://github.com/huawei-noah/ghostnet
-    def __init__(self, c1, c2, k=3, s=1):  # ch_in, ch_out, kernel, stride
-        super(GhostBottleneck, self).__init__()
-        c_ = c2 // 2
-        self.conv = nn.Sequential(GhostConv(c1, c_, 1, 1),  # pw
-                                  DWConv(c_, c_, k, s, act=False) if s == 2 else nn.Identity(),  # dw
-                                  GhostConv(c_, c2, 1, 1, act=False))  # pw-linear
-        self.shortcut = nn.Sequential(DWConv(c1, c1, k, s, act=False),
-                                      Conv(c1, c2, 1, 1, act=False)) if s == 2 else nn.Identity()
-
-    def forward(self, x):
-        return self.conv(x) + self.shortcut(x)
-
-
 class MixConv2d(nn.Module):
-    # Mixed Depthwise Conv https://arxiv.org/abs/1907.09595
+    # Mixed Depth-wise Conv https://arxiv.org/abs/1907.09595
     def __init__(self, c1, c2, k=(1, 3), s=1, equal_ch=True):
-        super(MixConv2d, self).__init__()
+        super().__init__()
         groups = len(k)
         if equal_ch:  # equal c_ per group
             i = torch.linspace(0, groups - 1E-6, c2).floor()  # c2 indices
@@ -98,31 +73,40 @@ class MixConv2d(nn.Module):
 class Ensemble(nn.ModuleList):
     # Ensemble of models
     def __init__(self):
-        super(Ensemble, self).__init__()
+        super().__init__()
 
-    def forward(self, x, augment=False):
+    def forward(self, x, augment=False, profile=False, visualize=False):
         y = []
         for module in self:
-            y.append(module(x, augment)[0])
+            y.append(module(x, augment, profile, visualize)[0])
         # y = torch.stack(y).max(0)[0]  # max ensemble
         # y = torch.stack(y).mean(0)  # mean ensemble
         y = torch.cat(y, 1)  # nms ensemble
         return y, None  # inference, train output
 
 
-def attempt_load(weights, map_location=None, inplace=True):
+def attempt_load(weights, map_location=None, inplace=True, fuse=True):
     from models.yolo import Detect, Model
 
     # Loads an ensemble of models weights=[a,b,c] or a single model weights=[a] or weights=a
     model = Ensemble()
     for w in weights if isinstance(weights, list) else [weights]:
         ckpt = torch.load(attempt_download(w), map_location=map_location)  # load
-        model.append(ckpt['ema' if ckpt.get('ema') else 'model'].float().fuse().eval())  # FP32 model
+        if fuse:
+            # model.append(ckpt['ema' if ckpt.get('ema') else 'model'].float().fuse().eval())  # FP32 model
+            model.append(ckpt['ema' if ckpt.get('ema') else 'model'].float().eval())  # FP32 model
+        else:
+            model.append(ckpt['ema' if ckpt.get('ema') else 'model'].float().eval())  # without layer fuse
+
 
     # Compatibility updates
     for m in model.modules():
         if type(m) in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU, Detect, Model]:
             m.inplace = inplace  # pytorch 1.7.0 compatibility
+            if type(m) is Detect:
+                if not isinstance(m.anchor_grid, list):  # new Detect Layer compatibility
+                    delattr(m, 'anchor_grid')
+                    setattr(m, 'anchor_grid', [torch.zeros(1)] * m.nl)
         elif type(m) is Conv:
             m._non_persistent_buffers_set = set()  # pytorch 1.6.0 compatibility
 
diff --git a/cv/detection/yolov5/pytorch/models/hub/yolov5-bifpn.yaml b/cv/detection/yolov5/pytorch/models/hub/yolov5-bifpn.yaml
new file mode 100755
index 0000000..119aebb
--- /dev/null
+++ b/cv/detection/yolov5/pytorch/models/hub/yolov5-bifpn.yaml
@@ -0,0 +1,48 @@
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+
+# Parameters
+nc: 80  # number of classes
+depth_multiple: 1.0  # model depth multiple
+width_multiple: 1.0  # layer channel multiple
+anchors:
+  - [10,13, 16,30, 33,23]  # P3/8
+  - [30,61, 62,45, 59,119]  # P4/16
+  - [116,90, 156,198, 373,326]  # P5/32
+
+# YOLOv5 backbone
+backbone:
+  # [from, number, module, args]
+  [[-1, 1, Focus, [64, 3]],  # 0-P1/2
+   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
+   [-1, 3, C3, [128]],
+   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
+   [-1, 9, C3, [256]],
+   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
+   [-1, 9, C3, [512]]
+   [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
+   [-1, 1, SPP, [1024, [5, 9, 13]]],
+   [-1, 3, C3, [1024, False]],  # 9
+  ]
+
+# YOLOv5 BiFPN head
+head:
+  [[-1, 1, Conv, [512, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
+   [-1, 3, C3, [512, False]],  # 13
+
+   [-1, 1, Conv, [256, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
+   [-1, 3, C3, [256, False]],  # 17 (P3/8-small)
+
+   [-1, 1, Conv, [256, 3, 2]],
+   [[-1, 14, 6], 1, Concat, [1]],  # cat P4
+   [-1, 3, C3, [512, False]],  # 20 (P4/16-medium)
+
+   [-1, 1, Conv, [512, 3, 2]],
+   [[-1, 10], 1, Concat, [1]],  # cat head P5
+   [-1, 3, C3, [1024, False]],  # 23 (P5/32-large)
+
+   [[17, 20, 23], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
+  ]
diff --git a/cv/detection/yolov5/pytorch/models/hub/yolov5-p2.yaml b/cv/detection/yolov5/pytorch/models/hub/yolov5-p2.yaml
index 6212236..759e9f9 100644
--- a/cv/detection/yolov5/pytorch/models/hub/yolov5-p2.yaml
+++ b/cv/detection/yolov5/pytorch/models/hub/yolov5-p2.yaml
@@ -1,3 +1,5 @@
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+
 # Parameters
 nc: 80  # number of classes
 depth_multiple: 1.0  # model depth multiple
@@ -7,46 +9,46 @@ anchors: 3
 # YOLOv5 backbone
 backbone:
   # [from, number, module, args]
-  [ [ -1, 1, Focus, [ 64, 3 ] ],  # 0-P1/2
-    [ -1, 1, Conv, [ 128, 3, 2 ] ],  # 1-P2/4
-    [ -1, 3, C3, [ 128 ] ],
-    [ -1, 1, Conv, [ 256, 3, 2 ] ],  # 3-P3/8
-    [ -1, 9, C3, [ 256 ] ],
-    [ -1, 1, Conv, [ 512, 3, 2 ] ],  # 5-P4/16
-    [ -1, 9, C3, [ 512 ] ],
-    [ -1, 1, Conv, [ 1024, 3, 2 ] ],  # 7-P5/32
-    [ -1, 1, SPP, [ 1024, [ 5, 9, 13 ] ] ],
-    [ -1, 3, C3, [ 1024, False ] ],  # 9
+  [[-1, 1, Focus, [64, 3]],  # 0-P1/2
+   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
+   [-1, 3, C3, [128]],
+   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
+   [-1, 9, C3, [256]],
+   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
+   [-1, 9, C3, [512]],
+   [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
+   [-1, 1, SPP, [1024, [5, 9, 13]]],
+   [-1, 3, C3, [1024, False]],  # 9
   ]
 
 # YOLOv5 head
 head:
-  [ [ -1, 1, Conv, [ 512, 1, 1 ] ],
-    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
-    [ [ -1, 6 ], 1, Concat, [ 1 ] ],  # cat backbone P4
-    [ -1, 3, C3, [ 512, False ] ],  # 13
-
-    [ -1, 1, Conv, [ 256, 1, 1 ] ],
-    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
-    [ [ -1, 4 ], 1, Concat, [ 1 ] ],  # cat backbone P3
-    [ -1, 3, C3, [ 256, False ] ],  # 17 (P3/8-small)
-
-    [ -1, 1, Conv, [ 128, 1, 1 ] ],
-    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
-    [ [ -1, 2 ], 1, Concat, [ 1 ] ],  # cat backbone P2
-    [ -1, 1, C3, [ 128, False ] ],  # 21 (P2/4-xsmall)
-
-    [ -1, 1, Conv, [ 128, 3, 2 ] ],
-    [ [ -1, 18 ], 1, Concat, [ 1 ] ],  # cat head P3
-    [ -1, 3, C3, [ 256, False ] ],  # 24 (P3/8-small)
-
-    [ -1, 1, Conv, [ 256, 3, 2 ] ],
-    [ [ -1, 14 ], 1, Concat, [ 1 ] ],  # cat head P4
-    [ -1, 3, C3, [ 512, False ] ],  # 27 (P4/16-medium)
-
-    [ -1, 1, Conv, [ 512, 3, 2 ] ],
-    [ [ -1, 10 ], 1, Concat, [ 1 ] ],  # cat head P5
-    [ -1, 3, C3, [ 1024, False ] ],  # 30 (P5/32-large)
-
-    [ [ 24, 27, 30 ], 1, Detect, [ nc, anchors ] ],  # Detect(P3, P4, P5)
+  [[-1, 1, Conv, [512, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
+   [-1, 3, C3, [512, False]],  # 13
+
+   [-1, 1, Conv, [256, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
+   [-1, 3, C3, [256, False]],  # 17 (P3/8-small)
+
+   [-1, 1, Conv, [128, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 2], 1, Concat, [1]],  # cat backbone P2
+   [-1, 1, C3, [128, False]],  # 21 (P2/4-xsmall)
+
+   [-1, 1, Conv, [128, 3, 2]],
+   [[-1, 18], 1, Concat, [1]],  # cat head P3
+   [-1, 3, C3, [256, False]],  # 24 (P3/8-small)
+
+   [-1, 1, Conv, [256, 3, 2]],
+   [[-1, 14], 1, Concat, [1]],  # cat head P4
+   [-1, 3, C3, [512, False]],  # 27 (P4/16-medium)
+
+   [-1, 1, Conv, [512, 3, 2]],
+   [[-1, 10], 1, Concat, [1]],  # cat head P5
+   [-1, 3, C3, [1024, False]],  # 30 (P5/32-large)
+
+   [[21, 24, 27, 30], 1, Detect, [nc, anchors]],  # Detect(P2, P3, P4, P5)
   ]
diff --git a/cv/detection/yolov5/pytorch/models/hub/yolov5l6.yaml b/cv/detection/yolov5/pytorch/models/hub/yolov5l6.yaml
index 91c57da..632c2cb 100644
--- a/cv/detection/yolov5/pytorch/models/hub/yolov5l6.yaml
+++ b/cv/detection/yolov5/pytorch/models/hub/yolov5l6.yaml
@@ -1,58 +1,60 @@
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+
 # Parameters
 nc: 80  # number of classes
 depth_multiple: 1.0  # model depth multiple
 width_multiple: 1.0  # layer channel multiple
 anchors:
-  - [ 19,27,  44,40,  38,94 ]  # P3/8
-  - [ 96,68,  86,152,  180,137 ]  # P4/16
-  - [ 140,301,  303,264,  238,542 ]  # P5/32
-  - [ 436,615,  739,380,  925,792 ]  # P6/64
+  - [19,27,  44,40,  38,94]  # P3/8
+  - [96,68,  86,152,  180,137]  # P4/16
+  - [140,301,  303,264,  238,542]  # P5/32
+  - [436,615,  739,380,  925,792]  # P6/64
 
-# YOLOv5 backbone
+# YOLOv5 v6.0 backbone
 backbone:
   # [from, number, module, args]
-  [ [ -1, 1, Focus, [ 64, 3 ] ],  # 0-P1/2
-    [ -1, 1, Conv, [ 128, 3, 2 ] ],  # 1-P2/4
-    [ -1, 3, C3, [ 128 ] ],
-    [ -1, 1, Conv, [ 256, 3, 2 ] ],  # 3-P3/8
-    [ -1, 9, C3, [ 256 ] ],
-    [ -1, 1, Conv, [ 512, 3, 2 ] ],  # 5-P4/16
-    [ -1, 9, C3, [ 512 ] ],
-    [ -1, 1, Conv, [ 768, 3, 2 ] ],  # 7-P5/32
-    [ -1, 3, C3, [ 768 ] ],
-    [ -1, 1, Conv, [ 1024, 3, 2 ] ],  # 9-P6/64
-    [ -1, 1, SPP, [ 1024, [ 3, 5, 7 ] ] ],
-    [ -1, 3, C3, [ 1024, False ] ],  # 11
+  [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
+   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
+   [-1, 3, C3, [128]],
+   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
+   [-1, 6, C3, [256]],
+   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
+   [-1, 9, C3, [512]],
+   [-1, 1, Conv, [768, 3, 2]],  # 7-P5/32
+   [-1, 3, C3, [768]],
+   [-1, 1, Conv, [1024, 3, 2]],  # 9-P6/64
+   [-1, 3, C3, [1024]],
+   [-1, 1, SPPF, [1024, 5]],  # 11
   ]
 
-# YOLOv5 head
+# YOLOv5 v6.0 head
 head:
-  [ [ -1, 1, Conv, [ 768, 1, 1 ] ],
-    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
-    [ [ -1, 8 ], 1, Concat, [ 1 ] ],  # cat backbone P5
-    [ -1, 3, C3, [ 768, False ] ],  # 15
-
-    [ -1, 1, Conv, [ 512, 1, 1 ] ],
-    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
-    [ [ -1, 6 ], 1, Concat, [ 1 ] ],  # cat backbone P4
-    [ -1, 3, C3, [ 512, False ] ],  # 19
-
-    [ -1, 1, Conv, [ 256, 1, 1 ] ],
-    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
-    [ [ -1, 4 ], 1, Concat, [ 1 ] ],  # cat backbone P3
-    [ -1, 3, C3, [ 256, False ] ],  # 23 (P3/8-small)
-
-    [ -1, 1, Conv, [ 256, 3, 2 ] ],
-    [ [ -1, 20 ], 1, Concat, [ 1 ] ],  # cat head P4
-    [ -1, 3, C3, [ 512, False ] ],  # 26 (P4/16-medium)
-
-    [ -1, 1, Conv, [ 512, 3, 2 ] ],
-    [ [ -1, 16 ], 1, Concat, [ 1 ] ],  # cat head P5
-    [ -1, 3, C3, [ 768, False ] ],  # 29 (P5/32-large)
-
-    [ -1, 1, Conv, [ 768, 3, 2 ] ],
-    [ [ -1, 12 ], 1, Concat, [ 1 ] ],  # cat head P6
-    [ -1, 3, C3, [ 1024, False ] ],  # 32 (P6/64-xlarge)
-
-    [ [ 23, 26, 29, 32 ], 1, Detect, [ nc, anchors ] ],  # Detect(P3, P4, P5, P6)
+  [[-1, 1, Conv, [768, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 8], 1, Concat, [1]],  # cat backbone P5
+   [-1, 3, C3, [768, False]],  # 15
+
+   [-1, 1, Conv, [512, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
+   [-1, 3, C3, [512, False]],  # 19
+
+   [-1, 1, Conv, [256, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
+   [-1, 3, C3, [256, False]],  # 23 (P3/8-small)
+
+   [-1, 1, Conv, [256, 3, 2]],
+   [[-1, 20], 1, Concat, [1]],  # cat head P4
+   [-1, 3, C3, [512, False]],  # 26 (P4/16-medium)
+
+   [-1, 1, Conv, [512, 3, 2]],
+   [[-1, 16], 1, Concat, [1]],  # cat head P5
+   [-1, 3, C3, [768, False]],  # 29 (P5/32-large)
+
+   [-1, 1, Conv, [768, 3, 2]],
+   [[-1, 12], 1, Concat, [1]],  # cat head P6
+   [-1, 3, C3, [1024, False]],  # 32 (P6/64-xlarge)
+
+   [[23, 26, 29, 32], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5, P6)
   ]
diff --git a/cv/detection/yolov5/pytorch/models/hub/yolov5m6.yaml b/cv/detection/yolov5/pytorch/models/hub/yolov5m6.yaml
index 4bef2e0..ecc53fd 100644
--- a/cv/detection/yolov5/pytorch/models/hub/yolov5m6.yaml
+++ b/cv/detection/yolov5/pytorch/models/hub/yolov5m6.yaml
@@ -1,58 +1,60 @@
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+
 # Parameters
 nc: 80  # number of classes
 depth_multiple: 0.67  # model depth multiple
 width_multiple: 0.75  # layer channel multiple
 anchors:
-  - [ 19,27,  44,40,  38,94 ]  # P3/8
-  - [ 96,68,  86,152,  180,137 ]  # P4/16
-  - [ 140,301,  303,264,  238,542 ]  # P5/32
-  - [ 436,615,  739,380,  925,792 ]  # P6/64
+  - [19,27,  44,40,  38,94]  # P3/8
+  - [96,68,  86,152,  180,137]  # P4/16
+  - [140,301,  303,264,  238,542]  # P5/32
+  - [436,615,  739,380,  925,792]  # P6/64
 
-# YOLOv5 backbone
+# YOLOv5 v6.0 backbone
 backbone:
   # [from, number, module, args]
-  [ [ -1, 1, Focus, [ 64, 3 ] ],  # 0-P1/2
-    [ -1, 1, Conv, [ 128, 3, 2 ] ],  # 1-P2/4
-    [ -1, 3, C3, [ 128 ] ],
-    [ -1, 1, Conv, [ 256, 3, 2 ] ],  # 3-P3/8
-    [ -1, 9, C3, [ 256 ] ],
-    [ -1, 1, Conv, [ 512, 3, 2 ] ],  # 5-P4/16
-    [ -1, 9, C3, [ 512 ] ],
-    [ -1, 1, Conv, [ 768, 3, 2 ] ],  # 7-P5/32
-    [ -1, 3, C3, [ 768 ] ],
-    [ -1, 1, Conv, [ 1024, 3, 2 ] ],  # 9-P6/64
-    [ -1, 1, SPP, [ 1024, [ 3, 5, 7 ] ] ],
-    [ -1, 3, C3, [ 1024, False ] ],  # 11
+  [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
+   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
+   [-1, 3, C3, [128]],
+   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
+   [-1, 6, C3, [256]],
+   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
+   [-1, 9, C3, [512]],
+   [-1, 1, Conv, [768, 3, 2]],  # 7-P5/32
+   [-1, 3, C3, [768]],
+   [-1, 1, Conv, [1024, 3, 2]],  # 9-P6/64
+   [-1, 3, C3, [1024]],
+   [-1, 1, SPPF, [1024, 5]],  # 11
   ]
 
-# YOLOv5 head
+# YOLOv5 v6.0 head
 head:
-  [ [ -1, 1, Conv, [ 768, 1, 1 ] ],
-    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
-    [ [ -1, 8 ], 1, Concat, [ 1 ] ],  # cat backbone P5
-    [ -1, 3, C3, [ 768, False ] ],  # 15
-
-    [ -1, 1, Conv, [ 512, 1, 1 ] ],
-    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
-    [ [ -1, 6 ], 1, Concat, [ 1 ] ],  # cat backbone P4
-    [ -1, 3, C3, [ 512, False ] ],  # 19
-
-    [ -1, 1, Conv, [ 256, 1, 1 ] ],
-    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
-    [ [ -1, 4 ], 1, Concat, [ 1 ] ],  # cat backbone P3
-    [ -1, 3, C3, [ 256, False ] ],  # 23 (P3/8-small)
-
-    [ -1, 1, Conv, [ 256, 3, 2 ] ],
-    [ [ -1, 20 ], 1, Concat, [ 1 ] ],  # cat head P4
-    [ -1, 3, C3, [ 512, False ] ],  # 26 (P4/16-medium)
-
-    [ -1, 1, Conv, [ 512, 3, 2 ] ],
-    [ [ -1, 16 ], 1, Concat, [ 1 ] ],  # cat head P5
-    [ -1, 3, C3, [ 768, False ] ],  # 29 (P5/32-large)
-
-    [ -1, 1, Conv, [ 768, 3, 2 ] ],
-    [ [ -1, 12 ], 1, Concat, [ 1 ] ],  # cat head P6
-    [ -1, 3, C3, [ 1024, False ] ],  # 32 (P6/64-xlarge)
-
-    [ [ 23, 26, 29, 32 ], 1, Detect, [ nc, anchors ] ],  # Detect(P3, P4, P5, P6)
+  [[-1, 1, Conv, [768, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 8], 1, Concat, [1]],  # cat backbone P5
+   [-1, 3, C3, [768, False]],  # 15
+
+   [-1, 1, Conv, [512, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
+   [-1, 3, C3, [512, False]],  # 19
+
+   [-1, 1, Conv, [256, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
+   [-1, 3, C3, [256, False]],  # 23 (P3/8-small)
+
+   [-1, 1, Conv, [256, 3, 2]],
+   [[-1, 20], 1, Concat, [1]],  # cat head P4
+   [-1, 3, C3, [512, False]],  # 26 (P4/16-medium)
+
+   [-1, 1, Conv, [512, 3, 2]],
+   [[-1, 16], 1, Concat, [1]],  # cat head P5
+   [-1, 3, C3, [768, False]],  # 29 (P5/32-large)
+
+   [-1, 1, Conv, [768, 3, 2]],
+   [[-1, 12], 1, Concat, [1]],  # cat head P6
+   [-1, 3, C3, [1024, False]],  # 32 (P6/64-xlarge)
+
+   [[23, 26, 29, 32], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5, P6)
   ]
diff --git a/cv/detection/yolov5/pytorch/models/hub/yolov5n6.yaml b/cv/detection/yolov5/pytorch/models/hub/yolov5n6.yaml
new file mode 100755
index 0000000..0c0c71d
--- /dev/null
+++ b/cv/detection/yolov5/pytorch/models/hub/yolov5n6.yaml
@@ -0,0 +1,60 @@
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+
+# Parameters
+nc: 80  # number of classes
+depth_multiple: 0.33  # model depth multiple
+width_multiple: 0.25  # layer channel multiple
+anchors:
+  - [19,27,  44,40,  38,94]  # P3/8
+  - [96,68,  86,152,  180,137]  # P4/16
+  - [140,301,  303,264,  238,542]  # P5/32
+  - [436,615,  739,380,  925,792]  # P6/64
+
+# YOLOv5 v6.0 backbone
+backbone:
+  # [from, number, module, args]
+  [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
+   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
+   [-1, 3, C3, [128]],
+   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
+   [-1, 6, C3, [256]],
+   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
+   [-1, 9, C3, [512]],
+   [-1, 1, Conv, [768, 3, 2]],  # 7-P5/32
+   [-1, 3, C3, [768]],
+   [-1, 1, Conv, [1024, 3, 2]],  # 9-P6/64
+   [-1, 3, C3, [1024]],
+   [-1, 1, SPPF, [1024, 5]],  # 11
+  ]
+
+# YOLOv5 v6.0 head
+head:
+  [[-1, 1, Conv, [768, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 8], 1, Concat, [1]],  # cat backbone P5
+   [-1, 3, C3, [768, False]],  # 15
+
+   [-1, 1, Conv, [512, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
+   [-1, 3, C3, [512, False]],  # 19
+
+   [-1, 1, Conv, [256, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
+   [-1, 3, C3, [256, False]],  # 23 (P3/8-small)
+
+   [-1, 1, Conv, [256, 3, 2]],
+   [[-1, 20], 1, Concat, [1]],  # cat head P4
+   [-1, 3, C3, [512, False]],  # 26 (P4/16-medium)
+
+   [-1, 1, Conv, [512, 3, 2]],
+   [[-1, 16], 1, Concat, [1]],  # cat head P5
+   [-1, 3, C3, [768, False]],  # 29 (P5/32-large)
+
+   [-1, 1, Conv, [768, 3, 2]],
+   [[-1, 12], 1, Concat, [1]],  # cat head P6
+   [-1, 3, C3, [1024, False]],  # 32 (P6/64-xlarge)
+
+   [[23, 26, 29, 32], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5, P6)
+  ]
diff --git a/cv/detection/yolov5/pytorch/models/hub/yolov5s-ghost.yaml b/cv/detection/yolov5/pytorch/models/hub/yolov5s-ghost.yaml
new file mode 100755
index 0000000..dbf2c8e
--- /dev/null
+++ b/cv/detection/yolov5/pytorch/models/hub/yolov5s-ghost.yaml
@@ -0,0 +1,48 @@
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+
+# Parameters
+nc: 80  # number of classes
+depth_multiple: 0.33  # model depth multiple
+width_multiple: 0.50  # layer channel multiple
+anchors:
+  - [10,13, 16,30, 33,23]  # P3/8
+  - [30,61, 62,45, 59,119]  # P4/16
+  - [116,90, 156,198, 373,326]  # P5/32
+
+# YOLOv5 backbone
+backbone:
+  # [from, number, module, args]
+  [[-1, 1, Focus, [64, 3]],  # 0-P1/2
+   [-1, 1, GhostConv, [128, 3, 2]],  # 1-P2/4
+   [-1, 3, C3Ghost, [128]],
+   [-1, 1, GhostConv, [256, 3, 2]],  # 3-P3/8
+   [-1, 9, C3Ghost, [256]],
+   [-1, 1, GhostConv, [512, 3, 2]],  # 5-P4/16
+   [-1, 9, C3Ghost, [512]],
+   [-1, 1, GhostConv, [1024, 3, 2]],  # 7-P5/32
+   [-1, 1, SPP, [1024, [5, 9, 13]]],
+   [-1, 3, C3Ghost, [1024, False]],  # 9
+  ]
+
+# YOLOv5 head
+head:
+  [[-1, 1, GhostConv, [512, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
+   [-1, 3, C3Ghost, [512, False]],  # 13
+
+   [-1, 1, GhostConv, [256, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
+   [-1, 3, C3Ghost, [256, False]],  # 17 (P3/8-small)
+
+   [-1, 1, GhostConv, [256, 3, 2]],
+   [[-1, 14], 1, Concat, [1]],  # cat head P4
+   [-1, 3, C3Ghost, [512, False]],  # 20 (P4/16-medium)
+
+   [-1, 1, GhostConv, [512, 3, 2]],
+   [[-1, 10], 1, Concat, [1]],  # cat head P5
+   [-1, 3, C3Ghost, [1024, False]],  # 23 (P5/32-large)
+
+   [[17, 20, 23], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
+  ]
diff --git a/cv/detection/yolov5/pytorch/models/hub/yolov5s6.yaml b/cv/detection/yolov5/pytorch/models/hub/yolov5s6.yaml
index ba1025e..a28fb55 100644
--- a/cv/detection/yolov5/pytorch/models/hub/yolov5s6.yaml
+++ b/cv/detection/yolov5/pytorch/models/hub/yolov5s6.yaml
@@ -1,58 +1,60 @@
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+
 # Parameters
 nc: 80  # number of classes
 depth_multiple: 0.33  # model depth multiple
 width_multiple: 0.50  # layer channel multiple
 anchors:
-  - [ 19,27,  44,40,  38,94 ]  # P3/8
-  - [ 96,68,  86,152,  180,137 ]  # P4/16
-  - [ 140,301,  303,264,  238,542 ]  # P5/32
-  - [ 436,615,  739,380,  925,792 ]  # P6/64
+  - [19,27,  44,40,  38,94]  # P3/8
+  - [96,68,  86,152,  180,137]  # P4/16
+  - [140,301,  303,264,  238,542]  # P5/32
+  - [436,615,  739,380,  925,792]  # P6/64
 
-# YOLOv5 backbone
+# YOLOv5 v6.0 backbone
 backbone:
   # [from, number, module, args]
-  [ [ -1, 1, Focus, [ 64, 3 ] ],  # 0-P1/2
-    [ -1, 1, Conv, [ 128, 3, 2 ] ],  # 1-P2/4
-    [ -1, 3, C3, [ 128 ] ],
-    [ -1, 1, Conv, [ 256, 3, 2 ] ],  # 3-P3/8
-    [ -1, 9, C3, [ 256 ] ],
-    [ -1, 1, Conv, [ 512, 3, 2 ] ],  # 5-P4/16
-    [ -1, 9, C3, [ 512 ] ],
-    [ -1, 1, Conv, [ 768, 3, 2 ] ],  # 7-P5/32
-    [ -1, 3, C3, [ 768 ] ],
-    [ -1, 1, Conv, [ 1024, 3, 2 ] ],  # 9-P6/64
-    [ -1, 1, SPP, [ 1024, [ 3, 5, 7 ] ] ],
-    [ -1, 3, C3, [ 1024, False ] ],  # 11
+  [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
+   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
+   [-1, 3, C3, [128]],
+   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
+   [-1, 6, C3, [256]],
+   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
+   [-1, 9, C3, [512]],
+   [-1, 1, Conv, [768, 3, 2]],  # 7-P5/32
+   [-1, 3, C3, [768]],
+   [-1, 1, Conv, [1024, 3, 2]],  # 9-P6/64
+   [-1, 3, C3, [1024]],
+   [-1, 1, SPPF, [1024, 5]],  # 11
   ]
 
-# YOLOv5 head
+# YOLOv5 v6.0 head
 head:
-  [ [ -1, 1, Conv, [ 768, 1, 1 ] ],
-    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
-    [ [ -1, 8 ], 1, Concat, [ 1 ] ],  # cat backbone P5
-    [ -1, 3, C3, [ 768, False ] ],  # 15
-
-    [ -1, 1, Conv, [ 512, 1, 1 ] ],
-    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
-    [ [ -1, 6 ], 1, Concat, [ 1 ] ],  # cat backbone P4
-    [ -1, 3, C3, [ 512, False ] ],  # 19
-
-    [ -1, 1, Conv, [ 256, 1, 1 ] ],
-    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
-    [ [ -1, 4 ], 1, Concat, [ 1 ] ],  # cat backbone P3
-    [ -1, 3, C3, [ 256, False ] ],  # 23 (P3/8-small)
-
-    [ -1, 1, Conv, [ 256, 3, 2 ] ],
-    [ [ -1, 20 ], 1, Concat, [ 1 ] ],  # cat head P4
-    [ -1, 3, C3, [ 512, False ] ],  # 26 (P4/16-medium)
-
-    [ -1, 1, Conv, [ 512, 3, 2 ] ],
-    [ [ -1, 16 ], 1, Concat, [ 1 ] ],  # cat head P5
-    [ -1, 3, C3, [ 768, False ] ],  # 29 (P5/32-large)
-
-    [ -1, 1, Conv, [ 768, 3, 2 ] ],
-    [ [ -1, 12 ], 1, Concat, [ 1 ] ],  # cat head P6
-    [ -1, 3, C3, [ 1024, False ] ],  # 32 (P6/64-xlarge)
-
-    [ [ 23, 26, 29, 32 ], 1, Detect, [ nc, anchors ] ],  # Detect(P3, P4, P5, P6)
+  [[-1, 1, Conv, [768, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 8], 1, Concat, [1]],  # cat backbone P5
+   [-1, 3, C3, [768, False]],  # 15
+
+   [-1, 1, Conv, [512, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
+   [-1, 3, C3, [512, False]],  # 19
+
+   [-1, 1, Conv, [256, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
+   [-1, 3, C3, [256, False]],  # 23 (P3/8-small)
+
+   [-1, 1, Conv, [256, 3, 2]],
+   [[-1, 20], 1, Concat, [1]],  # cat head P4
+   [-1, 3, C3, [512, False]],  # 26 (P4/16-medium)
+
+   [-1, 1, Conv, [512, 3, 2]],
+   [[-1, 16], 1, Concat, [1]],  # cat head P5
+   [-1, 3, C3, [768, False]],  # 29 (P5/32-large)
+
+   [-1, 1, Conv, [768, 3, 2]],
+   [[-1, 12], 1, Concat, [1]],  # cat head P6
+   [-1, 3, C3, [1024, False]],  # 32 (P6/64-xlarge)
+
+   [[23, 26, 29, 32], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5, P6)
   ]
diff --git a/cv/detection/yolov5/pytorch/models/hub/yolov5x6.yaml b/cv/detection/yolov5/pytorch/models/hub/yolov5x6.yaml
index 4fc9c9a..ba795c4 100644
--- a/cv/detection/yolov5/pytorch/models/hub/yolov5x6.yaml
+++ b/cv/detection/yolov5/pytorch/models/hub/yolov5x6.yaml
@@ -1,58 +1,60 @@
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+
 # Parameters
 nc: 80  # number of classes
 depth_multiple: 1.33  # model depth multiple
 width_multiple: 1.25  # layer channel multiple
 anchors:
-  - [ 19,27,  44,40,  38,94 ]  # P3/8
-  - [ 96,68,  86,152,  180,137 ]  # P4/16
-  - [ 140,301,  303,264,  238,542 ]  # P5/32
-  - [ 436,615,  739,380,  925,792 ]  # P6/64
+  - [19,27,  44,40,  38,94]  # P3/8
+  - [96,68,  86,152,  180,137]  # P4/16
+  - [140,301,  303,264,  238,542]  # P5/32
+  - [436,615,  739,380,  925,792]  # P6/64
 
-# YOLOv5 backbone
+# YOLOv5 v6.0 backbone
 backbone:
   # [from, number, module, args]
-  [ [ -1, 1, Focus, [ 64, 3 ] ],  # 0-P1/2
-    [ -1, 1, Conv, [ 128, 3, 2 ] ],  # 1-P2/4
-    [ -1, 3, C3, [ 128 ] ],
-    [ -1, 1, Conv, [ 256, 3, 2 ] ],  # 3-P3/8
-    [ -1, 9, C3, [ 256 ] ],
-    [ -1, 1, Conv, [ 512, 3, 2 ] ],  # 5-P4/16
-    [ -1, 9, C3, [ 512 ] ],
-    [ -1, 1, Conv, [ 768, 3, 2 ] ],  # 7-P5/32
-    [ -1, 3, C3, [ 768 ] ],
-    [ -1, 1, Conv, [ 1024, 3, 2 ] ],  # 9-P6/64
-    [ -1, 1, SPP, [ 1024, [ 3, 5, 7 ] ] ],
-    [ -1, 3, C3, [ 1024, False ] ],  # 11
+  [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
+   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
+   [-1, 3, C3, [128]],
+   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
+   [-1, 6, C3, [256]],
+   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
+   [-1, 9, C3, [512]],
+   [-1, 1, Conv, [768, 3, 2]],  # 7-P5/32
+   [-1, 3, C3, [768]],
+   [-1, 1, Conv, [1024, 3, 2]],  # 9-P6/64
+   [-1, 3, C3, [1024]],
+   [-1, 1, SPPF, [1024, 5]],  # 11
   ]
 
-# YOLOv5 head
+# YOLOv5 v6.0 head
 head:
-  [ [ -1, 1, Conv, [ 768, 1, 1 ] ],
-    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
-    [ [ -1, 8 ], 1, Concat, [ 1 ] ],  # cat backbone P5
-    [ -1, 3, C3, [ 768, False ] ],  # 15
-
-    [ -1, 1, Conv, [ 512, 1, 1 ] ],
-    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
-    [ [ -1, 6 ], 1, Concat, [ 1 ] ],  # cat backbone P4
-    [ -1, 3, C3, [ 512, False ] ],  # 19
-
-    [ -1, 1, Conv, [ 256, 1, 1 ] ],
-    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
-    [ [ -1, 4 ], 1, Concat, [ 1 ] ],  # cat backbone P3
-    [ -1, 3, C3, [ 256, False ] ],  # 23 (P3/8-small)
-
-    [ -1, 1, Conv, [ 256, 3, 2 ] ],
-    [ [ -1, 20 ], 1, Concat, [ 1 ] ],  # cat head P4
-    [ -1, 3, C3, [ 512, False ] ],  # 26 (P4/16-medium)
-
-    [ -1, 1, Conv, [ 512, 3, 2 ] ],
-    [ [ -1, 16 ], 1, Concat, [ 1 ] ],  # cat head P5
-    [ -1, 3, C3, [ 768, False ] ],  # 29 (P5/32-large)
-
-    [ -1, 1, Conv, [ 768, 3, 2 ] ],
-    [ [ -1, 12 ], 1, Concat, [ 1 ] ],  # cat head P6
-    [ -1, 3, C3, [ 1024, False ] ],  # 32 (P6/64-xlarge)
-
-    [ [ 23, 26, 29, 32 ], 1, Detect, [ nc, anchors ] ],  # Detect(P3, P4, P5, P6)
+  [[-1, 1, Conv, [768, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 8], 1, Concat, [1]],  # cat backbone P5
+   [-1, 3, C3, [768, False]],  # 15
+
+   [-1, 1, Conv, [512, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
+   [-1, 3, C3, [512, False]],  # 19
+
+   [-1, 1, Conv, [256, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
+   [-1, 3, C3, [256, False]],  # 23 (P3/8-small)
+
+   [-1, 1, Conv, [256, 3, 2]],
+   [[-1, 20], 1, Concat, [1]],  # cat head P4
+   [-1, 3, C3, [512, False]],  # 26 (P4/16-medium)
+
+   [-1, 1, Conv, [512, 3, 2]],
+   [[-1, 16], 1, Concat, [1]],  # cat head P5
+   [-1, 3, C3, [768, False]],  # 29 (P5/32-large)
+
+   [-1, 1, Conv, [768, 3, 2]],
+   [[-1, 12], 1, Concat, [1]],  # cat head P6
+   [-1, 3, C3, [1024, False]],  # 32 (P6/64-xlarge)
+
+   [[23, 26, 29, 32], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5, P6)
   ]
diff --git a/cv/detection/yolov5/pytorch/models/tf.py b/cv/detection/yolov5/pytorch/models/tf.py
new file mode 100755
index 0000000..010fafd
--- /dev/null
+++ b/cv/detection/yolov5/pytorch/models/tf.py
@@ -0,0 +1,450 @@
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+"""
+TensorFlow, Keras and TFLite versions of YOLOv5
+Authored by https://github.com/zldrobit in PR https://github.com/ultralytics/yolov5/pull/1127
+
+Usage:
+    $ python models/tf.py --weights yolov5s.pt
+
+Export:
+    $ python path/to/export.py --weights yolov5s.pt --include saved_model pb tflite tfjs
+"""
+
+import argparse
+import logging
+import sys
+from copy import deepcopy
+from pathlib import Path
+
+FILE = Path(__file__).resolve()
+ROOT = FILE.parents[1]  # YOLOv5 root directory
+if str(ROOT) not in sys.path:
+    sys.path.append(str(ROOT))  # add ROOT to PATH
+# ROOT = ROOT.relative_to(Path.cwd())  # relative
+
+import numpy as np
+import tensorflow as tf
+import torch
+import torch.nn as nn
+from tensorflow import keras
+
+from models.common import Conv, Bottleneck, SPP, DWConv, Focus, BottleneckCSP, Concat, autopad, C3
+from models.experimental import CrossConv, MixConv2d, attempt_load
+from models.yolo import Detect
+from utils.general import make_divisible, print_args, set_logging
+from utils.activations import SiLU
+
+LOGGER = logging.getLogger(__name__)
+
+
+class TFBN(keras.layers.Layer):
+    # TensorFlow BatchNormalization wrapper
+    def __init__(self, w=None):
+        super(TFBN, self).__init__()
+        self.bn = keras.layers.BatchNormalization(
+            beta_initializer=keras.initializers.Constant(w.bias.numpy()),
+            gamma_initializer=keras.initializers.Constant(w.weight.numpy()),
+            moving_mean_initializer=keras.initializers.Constant(w.running_mean.numpy()),
+            moving_variance_initializer=keras.initializers.Constant(w.running_var.numpy()),
+            epsilon=w.eps)
+
+    def call(self, inputs):
+        return self.bn(inputs)
+
+
+class TFPad(keras.layers.Layer):
+    def __init__(self, pad):
+        super(TFPad, self).__init__()
+        self.pad = tf.constant([[0, 0], [pad, pad], [pad, pad], [0, 0]])
+
+    def call(self, inputs):
+        return tf.pad(inputs, self.pad, mode='constant', constant_values=0)
+
+
+class TFConv(keras.layers.Layer):
+    # Standard convolution
+    def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True, w=None):
+        # ch_in, ch_out, weights, kernel, stride, padding, groups
+        super(TFConv, self).__init__()
+        assert g == 1, "TF v2.2 Conv2D does not support 'groups' argument"
+        assert isinstance(k, int), "Convolution with multiple kernels are not allowed."
+        # TensorFlow convolution padding is inconsistent with PyTorch (e.g. k=3 s=2 'SAME' padding)
+        # see https://stackoverflow.com/questions/52975843/comparing-conv2d-with-padding-between-tensorflow-and-pytorch
+
+        conv = keras.layers.Conv2D(
+            c2, k, s, 'SAME' if s == 1 else 'VALID', use_bias=False if hasattr(w, 'bn') else True,
+            kernel_initializer=keras.initializers.Constant(w.conv.weight.permute(2, 3, 1, 0).numpy()),
+            bias_initializer='zeros' if hasattr(w, 'bn') else keras.initializers.Constant(w.conv.bias.numpy()))
+        self.conv = conv if s == 1 else keras.Sequential([TFPad(autopad(k, p)), conv])
+        self.bn = TFBN(w.bn) if hasattr(w, 'bn') else tf.identity
+
+        # YOLOv5 activations
+        if isinstance(w.act, nn.LeakyReLU):
+            self.act = (lambda x: keras.activations.relu(x, alpha=0.1)) if act else tf.identity
+        elif isinstance(w.act, nn.Hardswish):
+            self.act = (lambda x: x * tf.nn.relu6(x + 3) * 0.166666667) if act else tf.identity
+        elif isinstance(w.act, (nn.SiLU, SiLU)):
+            self.act = (lambda x: keras.activations.swish(x)) if act else tf.identity
+        else:
+            raise Exception(f'no matching TensorFlow activation found for {w.act}')
+
+    def call(self, inputs):
+        return self.act(self.bn(self.conv(inputs)))
+
+
+class TFFocus(keras.layers.Layer):
+    # Focus wh information into c-space
+    def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True, w=None):
+        # ch_in, ch_out, kernel, stride, padding, groups
+        super(TFFocus, self).__init__()
+        self.conv = TFConv(c1 * 4, c2, k, s, p, g, act, w.conv)
+
+    def call(self, inputs):  # x(b,w,h,c) -> y(b,w/2,h/2,4c)
+        # inputs = inputs / 255.  # normalize 0-255 to 0-1
+        return self.conv(tf.concat([inputs[:, ::2, ::2, :],
+                                    inputs[:, 1::2, ::2, :],
+                                    inputs[:, ::2, 1::2, :],
+                                    inputs[:, 1::2, 1::2, :]], 3))
+
+
+class TFBottleneck(keras.layers.Layer):
+    # Standard bottleneck
+    def __init__(self, c1, c2, shortcut=True, g=1, e=0.5, w=None):  # ch_in, ch_out, shortcut, groups, expansion
+        super(TFBottleneck, self).__init__()
+        c_ = int(c2 * e)  # hidden channels
+        self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
+        self.cv2 = TFConv(c_, c2, 3, 1, g=g, w=w.cv2)
+        self.add = shortcut and c1 == c2
+
+    def call(self, inputs):
+        return inputs + self.cv2(self.cv1(inputs)) if self.add else self.cv2(self.cv1(inputs))
+
+
+class TFConv2d(keras.layers.Layer):
+    # Substitution for PyTorch nn.Conv2D
+    def __init__(self, c1, c2, k, s=1, g=1, bias=True, w=None):
+        super(TFConv2d, self).__init__()
+        assert g == 1, "TF v2.2 Conv2D does not support 'groups' argument"
+        self.conv = keras.layers.Conv2D(
+            c2, k, s, 'VALID', use_bias=bias,
+            kernel_initializer=keras.initializers.Constant(w.weight.permute(2, 3, 1, 0).numpy()),
+            bias_initializer=keras.initializers.Constant(w.bias.numpy()) if bias else None, )
+
+    def call(self, inputs):
+        return self.conv(inputs)
+
+
+class TFBottleneckCSP(keras.layers.Layer):
+    # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
+    def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5, w=None):
+        # ch_in, ch_out, number, shortcut, groups, expansion
+        super(TFBottleneckCSP, self).__init__()
+        c_ = int(c2 * e)  # hidden channels
+        self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
+        self.cv2 = TFConv2d(c1, c_, 1, 1, bias=False, w=w.cv2)
+        self.cv3 = TFConv2d(c_, c_, 1, 1, bias=False, w=w.cv3)
+        self.cv4 = TFConv(2 * c_, c2, 1, 1, w=w.cv4)
+        self.bn = TFBN(w.bn)
+        self.act = lambda x: keras.activations.relu(x, alpha=0.1)
+        self.m = keras.Sequential([TFBottleneck(c_, c_, shortcut, g, e=1.0, w=w.m[j]) for j in range(n)])
+
+    def call(self, inputs):
+        y1 = self.cv3(self.m(self.cv1(inputs)))
+        y2 = self.cv2(inputs)
+        return self.cv4(self.act(self.bn(tf.concat((y1, y2), axis=3))))
+
+
+class TFC3(keras.layers.Layer):
+    # CSP Bottleneck with 3 convolutions
+    def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5, w=None):
+        # ch_in, ch_out, number, shortcut, groups, expansion
+        super(TFC3, self).__init__()
+        c_ = int(c2 * e)  # hidden channels
+        self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
+        self.cv2 = TFConv(c1, c_, 1, 1, w=w.cv2)
+        self.cv3 = TFConv(2 * c_, c2, 1, 1, w=w.cv3)
+        self.m = keras.Sequential([TFBottleneck(c_, c_, shortcut, g, e=1.0, w=w.m[j]) for j in range(n)])
+
+    def call(self, inputs):
+        return self.cv3(tf.concat((self.m(self.cv1(inputs)), self.cv2(inputs)), axis=3))
+
+
+class TFSPP(keras.layers.Layer):
+    # Spatial pyramid pooling layer used in YOLOv3-SPP
+    def __init__(self, c1, c2, k=(5, 9, 13), w=None):
+        super(TFSPP, self).__init__()
+        c_ = c1 // 2  # hidden channels
+        self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
+        self.cv2 = TFConv(c_ * (len(k) + 1), c2, 1, 1, w=w.cv2)
+        self.m = [keras.layers.MaxPool2D(pool_size=x, strides=1, padding='SAME') for x in k]
+
+    def call(self, inputs):
+        x = self.cv1(inputs)
+        return self.cv2(tf.concat([x] + [m(x) for m in self.m], 3))
+
+
+class TFDetect(keras.layers.Layer):
+    def __init__(self, nc=80, anchors=(), ch=(), imgsz=(640, 640), w=None):  # detection layer
+        super(TFDetect, self).__init__()
+        self.stride = tf.convert_to_tensor(w.stride.numpy(), dtype=tf.float32)
+        self.nc = nc  # number of classes
+        self.no = nc + 5  # number of outputs per anchor
+        self.nl = len(anchors)  # number of detection layers
+        self.na = len(anchors[0]) // 2  # number of anchors
+        self.grid = [tf.zeros(1)] * self.nl  # init grid
+        self.anchors = tf.convert_to_tensor(w.anchors.numpy(), dtype=tf.float32)
+        self.anchor_grid = tf.reshape(self.anchors * tf.reshape(self.stride, [self.nl, 1, 1]),
+                                      [self.nl, 1, -1, 1, 2])
+        self.m = [TFConv2d(x, self.no * self.na, 1, w=w.m[i]) for i, x in enumerate(ch)]
+        self.training = False  # set to False after building model
+        self.imgsz = imgsz
+        for i in range(self.nl):
+            ny, nx = self.imgsz[0] // self.stride[i], self.imgsz[1] // self.stride[i]
+            self.grid[i] = self._make_grid(nx, ny)
+
+    def call(self, inputs):
+        z = []  # inference output
+        x = []
+        for i in range(self.nl):
+            x.append(self.m[i](inputs[i]))
+            # x(bs,20,20,255) to x(bs,3,20,20,85)
+            ny, nx = self.imgsz[0] // self.stride[i], self.imgsz[1] // self.stride[i]
+            x[i] = tf.transpose(tf.reshape(x[i], [-1, ny * nx, self.na, self.no]), [0, 2, 1, 3])
+
+            if not self.training:  # inference
+                y = tf.sigmoid(x[i])
+                xy = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i]  # xy
+                wh = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i]
+                # Normalize xywh to 0-1 to reduce calibration error
+                xy /= tf.constant([[self.imgsz[1], self.imgsz[0]]], dtype=tf.float32)
+                wh /= tf.constant([[self.imgsz[1], self.imgsz[0]]], dtype=tf.float32)
+                y = tf.concat([xy, wh, y[..., 4:]], -1)
+                z.append(tf.reshape(y, [-1, 3 * ny * nx, self.no]))
+
+        return x if self.training else (tf.concat(z, 1), x)
+
+    @staticmethod
+    def _make_grid(nx=20, ny=20):
+        # yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)])
+        # return torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float()
+        xv, yv = tf.meshgrid(tf.range(nx), tf.range(ny))
+        return tf.cast(tf.reshape(tf.stack([xv, yv], 2), [1, 1, ny * nx, 2]), dtype=tf.float32)
+
+
+class TFUpsample(keras.layers.Layer):
+    def __init__(self, size, scale_factor, mode, w=None):  # warning: all arguments needed including 'w'
+        super(TFUpsample, self).__init__()
+        assert scale_factor == 2, "scale_factor must be 2"
+        self.upsample = lambda x: tf.image.resize(x, (x.shape[1] * 2, x.shape[2] * 2), method=mode)
+        # self.upsample = keras.layers.UpSampling2D(size=scale_factor, interpolation=mode)
+        # with default arguments: align_corners=False, half_pixel_centers=False
+        # self.upsample = lambda x: tf.raw_ops.ResizeNearestNeighbor(images=x,
+        #                                                            size=(x.shape[1] * 2, x.shape[2] * 2))
+
+    def call(self, inputs):
+        return self.upsample(inputs)
+
+
+class TFConcat(keras.layers.Layer):
+    def __init__(self, dimension=1, w=None):
+        super(TFConcat, self).__init__()
+        assert dimension == 1, "convert only NCHW to NHWC concat"
+        self.d = 3
+
+    def call(self, inputs):
+        return tf.concat(inputs, self.d)
+
+
+def parse_model(d, ch, model, imgsz):  # model_dict, input_channels(3)
+    LOGGER.info('\n%3s%18s%3s%10s  %-40s%-30s' % ('', 'from', 'n', 'params', 'module', 'arguments'))
+    anchors, nc, gd, gw = d['anchors'], d['nc'], d['depth_multiple'], d['width_multiple']
+    na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors  # number of anchors
+    no = na * (nc + 5)  # number of outputs = anchors * (classes + 5)
+
+    layers, save, c2 = [], [], ch[-1]  # layers, savelist, ch out
+    for i, (f, n, m, args) in enumerate(d['backbone'] + d['head']):  # from, number, module, args
+        m_str = m
+        m = eval(m) if isinstance(m, str) else m  # eval strings
+        for j, a in enumerate(args):
+            try:
+                args[j] = eval(a) if isinstance(a, str) else a  # eval strings
+            except NameError:
+                pass
+
+        n = max(round(n * gd), 1) if n > 1 else n  # depth gain
+        if m in [nn.Conv2d, Conv, Bottleneck, SPP, DWConv, MixConv2d, Focus, CrossConv, BottleneckCSP, C3]:
+            c1, c2 = ch[f], args[0]
+            c2 = make_divisible(c2 * gw, 8) if c2 != no else c2
+
+            args = [c1, c2, *args[1:]]
+            if m in [BottleneckCSP, C3]:
+                args.insert(2, n)
+                n = 1
+        elif m is nn.BatchNorm2d:
+            args = [ch[f]]
+        elif m is Concat:
+            c2 = sum([ch[-1 if x == -1 else x + 1] for x in f])
+        elif m is Detect:
+            args.append([ch[x + 1] for x in f])
+            if isinstance(args[1], int):  # number of anchors
+                args[1] = [list(range(args[1] * 2))] * len(f)
+            args.append(imgsz)
+        else:
+            c2 = ch[f]
+
+        tf_m = eval('TF' + m_str.replace('nn.', ''))
+        m_ = keras.Sequential([tf_m(*args, w=model.model[i][j]) for j in range(n)]) if n > 1 \
+            else tf_m(*args, w=model.model[i])  # module
+
+        torch_m_ = nn.Sequential(*[m(*args) for _ in range(n)]) if n > 1 else m(*args)  # module
+        t = str(m)[8:-2].replace('__main__.', '')  # module type
+        np = sum([x.numel() for x in torch_m_.parameters()])  # number params
+        m_.i, m_.f, m_.type, m_.np = i, f, t, np  # attach index, 'from' index, type, number params
+        LOGGER.info('%3s%18s%3s%10.0f  %-40s%-30s' % (i, f, n, np, t, args))  # print
+        save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1)  # append to savelist
+        layers.append(m_)
+        ch.append(c2)
+    return keras.Sequential(layers), sorted(save)
+
+
+class TFModel:
+    def __init__(self, cfg='yolov5s.yaml', ch=3, nc=None, model=None, imgsz=(640, 640)):  # model, channels, classes
+        super(TFModel, self).__init__()
+        if isinstance(cfg, dict):
+            self.yaml = cfg  # model dict
+        else:  # is *.yaml
+            import yaml  # for torch hub
+            self.yaml_file = Path(cfg).name
+            with open(cfg) as f:
+                self.yaml = yaml.load(f, Loader=yaml.FullLoader)  # model dict
+
+        # Define model
+        if nc and nc != self.yaml['nc']:
+            print('Overriding %s nc=%g with nc=%g' % (cfg, self.yaml['nc'], nc))
+            self.yaml['nc'] = nc  # override yaml value
+        self.model, self.savelist = parse_model(deepcopy(self.yaml), ch=[ch], model=model, imgsz=imgsz)
+
+    def predict(self, inputs, tf_nms=False, agnostic_nms=False, topk_per_class=100, topk_all=100, iou_thres=0.45,
+                conf_thres=0.25):
+        y = []  # outputs
+        x = inputs
+        for i, m in enumerate(self.model.layers):
+            if m.f != -1:  # if not from previous layer
+                x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f]  # from earlier layers
+
+            x = m(x)  # run
+            y.append(x if m.i in self.savelist else None)  # save output
+
+        # Add TensorFlow NMS
+        if tf_nms:
+            boxes = self._xywh2xyxy(x[0][..., :4])
+            probs = x[0][:, :, 4:5]
+            classes = x[0][:, :, 5:]
+            scores = probs * classes
+            if agnostic_nms:
+                nms = AgnosticNMS()((boxes, classes, scores), topk_all, iou_thres, conf_thres)
+                return nms, x[1]
+            else:
+                boxes = tf.expand_dims(boxes, 2)
+                nms = tf.image.combined_non_max_suppression(
+                    boxes, scores, topk_per_class, topk_all, iou_thres, conf_thres, clip_boxes=False)
+                return nms, x[1]
+
+        return x[0]  # output only first tensor [1,6300,85] = [xywh, conf, class0, class1, ...]
+        # x = x[0][0]  # [x(1,6300,85), ...] to x(6300,85)
+        # xywh = x[..., :4]  # x(6300,4) boxes
+        # conf = x[..., 4:5]  # x(6300,1) confidences
+        # cls = tf.reshape(tf.cast(tf.argmax(x[..., 5:], axis=1), tf.float32), (-1, 1))  # x(6300,1)  classes
+        # return tf.concat([conf, cls, xywh], 1)
+
+    @staticmethod
+    def _xywh2xyxy(xywh):
+        # Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
+        x, y, w, h = tf.split(xywh, num_or_size_splits=4, axis=-1)
+        return tf.concat([x - w / 2, y - h / 2, x + w / 2, y + h / 2], axis=-1)
+
+
+class AgnosticNMS(keras.layers.Layer):
+    # TF Agnostic NMS
+    def call(self, input, topk_all, iou_thres, conf_thres):
+        # wrap map_fn to avoid TypeSpec related error https://stackoverflow.com/a/65809989/3036450
+        return tf.map_fn(lambda x: self._nms(x, topk_all, iou_thres, conf_thres), input,
+                         fn_output_signature=(tf.float32, tf.float32, tf.float32, tf.int32),
+                         name='agnostic_nms')
+
+    @staticmethod
+    def _nms(x, topk_all=100, iou_thres=0.45, conf_thres=0.25):  # agnostic NMS
+        boxes, classes, scores = x
+        class_inds = tf.cast(tf.argmax(classes, axis=-1), tf.float32)
+        scores_inp = tf.reduce_max(scores, -1)
+        selected_inds = tf.image.non_max_suppression(
+            boxes, scores_inp, max_output_size=topk_all, iou_threshold=iou_thres, score_threshold=conf_thres)
+        selected_boxes = tf.gather(boxes, selected_inds)
+        padded_boxes = tf.pad(selected_boxes,
+                              paddings=[[0, topk_all - tf.shape(selected_boxes)[0]], [0, 0]],
+                              mode="CONSTANT", constant_values=0.0)
+        selected_scores = tf.gather(scores_inp, selected_inds)
+        padded_scores = tf.pad(selected_scores,
+                               paddings=[[0, topk_all - tf.shape(selected_boxes)[0]]],
+                               mode="CONSTANT", constant_values=-1.0)
+        selected_classes = tf.gather(class_inds, selected_inds)
+        padded_classes = tf.pad(selected_classes,
+                                paddings=[[0, topk_all - tf.shape(selected_boxes)[0]]],
+                                mode="CONSTANT", constant_values=-1.0)
+        valid_detections = tf.shape(selected_inds)[0]
+        return padded_boxes, padded_scores, padded_classes, valid_detections
+
+
+def representative_dataset_gen(dataset, ncalib=100):
+    # Representative dataset generator for use with converter.representative_dataset, returns a generator of np arrays
+    for n, (path, img, im0s, vid_cap) in enumerate(dataset):
+        input = np.transpose(img, [1, 2, 0])
+        input = np.expand_dims(input, axis=0).astype(np.float32)
+        input /= 255.0
+        yield [input]
+        if n >= ncalib:
+            break
+
+
+def run(weights=ROOT / 'yolov5s.pt',  # weights path
+        imgsz=(640, 640),  # inference size h,w
+        batch_size=1,  # batch size
+        dynamic=False,  # dynamic batch size
+        ):
+    # PyTorch model
+    im = torch.zeros((batch_size, 3, *imgsz))  # BCHW image
+    model = attempt_load(weights, map_location=torch.device('cpu'), inplace=True, fuse=False)
+    y = model(im)  # inference
+    model.info()
+
+    # TensorFlow model
+    im = tf.zeros((batch_size, *imgsz, 3))  # BHWC image
+    tf_model = TFModel(cfg=model.yaml, model=model, nc=model.nc, imgsz=imgsz)
+    y = tf_model.predict(im)  # inference
+
+    # Keras model
+    im = keras.Input(shape=(*imgsz, 3), batch_size=None if dynamic else batch_size)
+    keras_model = keras.Model(inputs=im, outputs=tf_model.predict(im))
+    keras_model.summary()
+
+
+def parse_opt():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--weights', type=str, default=ROOT / 'yolov5s.pt', help='weights path')
+    parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=[640], help='inference size h,w')
+    parser.add_argument('--batch-size', type=int, default=1, help='batch size')
+    parser.add_argument('--dynamic', action='store_true', help='dynamic batch size')
+    opt = parser.parse_args()
+    opt.imgsz *= 2 if len(opt.imgsz) == 1 else 1  # expand
+    print_args(FILE.stem, opt)
+    return opt
+
+
+def main(opt):
+    set_logging()
+    run(**vars(opt))
+
+
+if __name__ == "__main__":
+    opt = parse_opt()
+    main(opt)
diff --git a/cv/detection/yolov5/pytorch/models/yolo.py b/cv/detection/yolov5/pytorch/models/yolo.py
index b114433..2417978 100644
--- a/cv/detection/yolov5/pytorch/models/yolo.py
+++ b/cv/detection/yolov5/pytorch/models/yolo.py
@@ -1,32 +1,36 @@
-"""YOLOv5-specific modules
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+"""
+YOLO-specific modules
 
 Usage:
     $ python path/to/models/yolo.py --cfg yolov5s.yaml
 """
 
 import argparse
-import logging
 import sys
 from copy import deepcopy
 from pathlib import Path
 
-FILE = Path(__file__).absolute()
-sys.path.append(FILE.parents[1].as_posix())  # add yolov5/ to path
+FILE = Path(__file__).resolve()
+ROOT = FILE.parents[1]  # YOLOv5 root directory
+if str(ROOT) not in sys.path:
+    sys.path.append(str(ROOT))  # add ROOT to PATH
+# ROOT = ROOT.relative_to(Path.cwd())  # relative
 
 from models.common import *
 from models.experimental import *
 from utils.autoanchor import check_anchor_order
-from utils.general import make_divisible, check_file, set_logging
+from utils.general import check_yaml, make_divisible, print_args, set_logging
 from utils.plots import feature_visualization
-from utils.torch_utils import time_synchronized, fuse_conv_and_bn, model_info, scale_img, initialize_weights, \
-    select_device, copy_attr
+from utils.torch_utils import copy_attr, fuse_conv_and_bn, initialize_weights, model_info, scale_img, \
+    select_device, time_sync
 
 try:
     import thop  # for FLOPs computation
 except ImportError:
     thop = None
 
-logger = logging.getLogger(__name__)
+LOGGER = logging.getLogger(__name__)
 
 
 class Detect(nn.Module):
@@ -34,29 +38,29 @@ class Detect(nn.Module):
     onnx_dynamic = False  # ONNX export parameter
 
     def __init__(self, nc=80, anchors=(), ch=(), inplace=True):  # detection layer
-        super(Detect, self).__init__()
+        super().__init__()
         self.nc = nc  # number of classes
         self.no = nc + 5  # number of outputs per anchor
         self.nl = len(anchors)  # number of detection layers
         self.na = len(anchors[0]) // 2  # number of anchors
         self.grid = [torch.zeros(1)] * self.nl  # init grid
-        a = torch.tensor(anchors).float().view(self.nl, -1, 2)
-        self.register_buffer('anchors', a)  # shape(nl,na,2)
-        self.register_buffer('anchor_grid', a.clone().view(self.nl, 1, -1, 1, 1, 2))  # shape(nl,1,na,1,1,2)
-        self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch)  # output conv
+        self.anchor_grid = [torch.zeros(1)] * self.nl  # init anchor grid
+        self.register_buffer('anchors', torch.tensor(anchors).float().view(self.nl, -1, 2))  # shape(nl,na,2)
+        self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na +1, 1) for x in ch)  # output conv
         self.inplace = inplace  # use in-place ops (e.g. slice assignment)
 
     def forward(self, x):
-        # x = x.copy()  # for profiling
         z = []  # inference output
         for i in range(self.nl):
             x[i] = self.m[i](x[i])  # conv
+            x[i] = x[i].to(memory_format=torch.contiguous_format)
+            x[i] = x[i][:, :255, :, :]
             bs, _, ny, nx = x[i].shape  # x(bs,255,20,20) to x(bs,3,20,20,85)
             x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()
 
             if not self.training:  # inference
                 if self.grid[i].shape[2:4] != x[i].shape[2:4] or self.onnx_dynamic:
-                    self.grid[i] = self._make_grid(nx, ny).to(x[i].device)
+                    self.grid[i], self.anchor_grid[i] = self._make_grid(nx, ny, i)
 
                 y = x[i].sigmoid()
                 if self.inplace:
@@ -64,41 +68,43 @@ class Detect(nn.Module):
                     y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i]  # wh
                 else:  # for YOLOv5 on AWS Inferentia https://github.com/ultralytics/yolov5/pull/2953
                     xy = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i]  # xy
-                    wh = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i].view(1, self.na, 1, 1, 2)  # wh
+                    wh = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i]  # wh
                     y = torch.cat((xy, wh, y[..., 4:]), -1)
                 z.append(y.view(bs, -1, self.no))
 
         return x if self.training else (torch.cat(z, 1), x)
 
-    @staticmethod
-    def _make_grid(nx=20, ny=20):
-        yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)])
-        return torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float()
+    def _make_grid(self, nx=20, ny=20, i=0):
+        d = self.anchors[i].device
+        yv, xv = torch.meshgrid([torch.arange(ny).to(d), torch.arange(nx).to(d)])
+        grid = torch.stack((xv, yv), 2).expand((1, self.na, ny, nx, 2)).float()
+        anchor_grid = (self.anchors[i].clone() * self.stride[i]) \
+            .view((1, self.na, 1, 1, 2)).expand((1, self.na, ny, nx, 2)).float()
+        return grid, anchor_grid
 
 
 class Model(nn.Module):
     def __init__(self, cfg='yolov5s.yaml', ch=3, nc=None, anchors=None):  # model, input channels, number of classes
-        super(Model, self).__init__()
+        super().__init__()
         if isinstance(cfg, dict):
             self.yaml = cfg  # model dict
         else:  # is *.yaml
             import yaml  # for torch hub
             self.yaml_file = Path(cfg).name
-            with open(cfg) as f:
+            with open(cfg, errors='ignore') as f:
                 self.yaml = yaml.safe_load(f)  # model dict
 
         # Define model
         ch = self.yaml['ch'] = self.yaml.get('ch', ch)  # input channels
         if nc and nc != self.yaml['nc']:
-            logger.info(f"Overriding model.yaml nc={self.yaml['nc']} with nc={nc}")
+            LOGGER.info(f"Overriding model.yaml nc={self.yaml['nc']} with nc={nc}")
             self.yaml['nc'] = nc  # override yaml value
         if anchors:
-            logger.info(f'Overriding model.yaml anchors with anchors={anchors}')
+            LOGGER.info(f'Overriding model.yaml anchors with anchors={anchors}')
             self.yaml['anchors'] = round(anchors)  # override yaml value
         self.model, self.save = parse_model(deepcopy(self.yaml), ch=[ch])  # model, savelist
         self.names = [str(i) for i in range(self.yaml['nc'])]  # default names
         self.inplace = self.yaml.get('inplace', True)
-        # logger.info([x.shape for x in self.forward(torch.zeros(1, ch, 64, 64))])
 
         # Build strides, anchors
         m = self.model[-1]  # Detect()
@@ -110,55 +116,42 @@ class Model(nn.Module):
             check_anchor_order(m)
             self.stride = m.stride
             self._initialize_biases()  # only run once
-            # logger.info('Strides: %s' % m.stride.tolist())
 
         # Init weights, biases
         initialize_weights(self)
         self.info()
-        logger.info('')
+        LOGGER.info('')
 
     def forward(self, x, augment=False, profile=False, visualize=False):
         if augment:
-            return self.forward_augment(x)  # augmented inference, None
-        return self.forward_once(x, profile, visualize)  # single-scale inference, train
+            return self._forward_augment(x)  # augmented inference, None
+        return self._forward_once(x, profile, visualize)  # single-scale inference, train
 
-    def forward_augment(self, x):
+    def _forward_augment(self, x):
         img_size = x.shape[-2:]  # height, width
         s = [1, 0.83, 0.67]  # scales
         f = [None, 3, None]  # flips (2-ud, 3-lr)
         y = []  # outputs
         for si, fi in zip(s, f):
             xi = scale_img(x.flip(fi) if fi else x, si, gs=int(self.stride.max()))
-            yi = self.forward_once(xi)[0]  # forward
+            yi = self._forward_once(xi)[0]  # forward
             # cv2.imwrite(f'img_{si}.jpg', 255 * xi[0].cpu().numpy().transpose((1, 2, 0))[:, :, ::-1])  # save
             yi = self._descale_pred(yi, fi, si, img_size)
             y.append(yi)
+        y = self._clip_augmented(y)  # clip augmented tails
         return torch.cat(y, 1), None  # augmented inference, train
 
-    def forward_once(self, x, profile=False, visualize=False):
+    def _forward_once(self, x, profile=False, visualize=False):
         y, dt = [], []  # outputs
         for m in self.model:
             if m.f != -1:  # if not from previous layer
                 x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f]  # from earlier layers
-
             if profile:
-                o = thop.profile(m, inputs=(x,), verbose=False)[0] / 1E9 * 2 if thop else 0  # FLOPs
-                t = time_synchronized()
-                for _ in range(10):
-                    _ = m(x)
-                dt.append((time_synchronized() - t) * 100)
-                if m == self.model[0]:
-                    logger.info(f"{'time (ms)':>10s} {'GFLOPs':>10s} {'params':>10s}  {'module'}")
-                logger.info(f'{dt[-1]:10.2f} {o:10.2f} {m.np:10.0f}  {m.type}')
-
+                self._profile_one_layer(m, x, dt)
             x = m(x)  # run
             y.append(x if m.i in self.save else None)  # save output
-
             if visualize:
                 feature_visualization(x, m.type, m.i, save_dir=visualize)
-
-        if profile:
-            logger.info('%.1fms total' % sum(dt))
         return x
 
     def _descale_pred(self, p, flips, scale, img_size):
@@ -178,54 +171,75 @@ class Model(nn.Module):
             p = torch.cat((x, y, wh, p[..., 4:]), -1)
         return p
 
+    def _clip_augmented(self, y):
+        # Clip YOLOv5 augmented inference tails
+        nl = self.model[-1].nl  # number of detection layers (P3-P5)
+        g = sum(4 ** x for x in range(nl))  # grid points
+        e = 1  # exclude layer count
+        i = (y[0].shape[1] // g) * sum(4 ** x for x in range(e))  # indices
+        y[0] = y[0][:, :-i]  # large
+        i = (y[-1].shape[1] // g) * sum(4 ** (nl - 1 - x) for x in range(e))  # indices
+        y[-1] = y[-1][:, i:]  # small
+        return y
+
+    def _profile_one_layer(self, m, x, dt):
+        c = isinstance(m, Detect)  # is final layer, copy input as inplace fix
+        o = thop.profile(m, inputs=(x.copy() if c else x,), verbose=False)[0] / 1E9 * 2 if thop else 0  # FLOPs
+        t = time_sync()
+        for _ in range(10):
+            m(x.copy() if c else x)
+        dt.append((time_sync() - t) * 100)
+        if m == self.model[0]:
+            LOGGER.info(f"{'time (ms)':>10s} {'GFLOPs':>10s} {'params':>10s}  {'module'}")
+        LOGGER.info(f'{dt[-1]:10.2f} {o:10.2f} {m.np:10.0f}  {m.type}')
+        if c:
+            LOGGER.info(f"{sum(dt):10.2f} {'-':>10s} {'-':>10s}  Total")
+
     def _initialize_biases(self, cf=None):  # initialize biases into Detect(), cf is class frequency
         # https://arxiv.org/abs/1708.02002 section 3.3
         # cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1.
+        # m = self.model[-1]  # Detect() module
+        # for mi, s in zip(m.m, m.stride):  # from
+        #     b = mi.bias.view(m.na, -1)  # conv.bias(255) to (3,85)
+        #     b.data[:, 4] += math.log(8 / (640 / s) ** 2)  # obj (8 objects per 640 image)
+        #     b.data[:, 5:] += math.log(0.6 / (m.nc - 0.99)) if cf is None else torch.log(cf / cf.sum())  # cls
+        #     mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
         m = self.model[-1]  # Detect() module
         for mi, s in zip(m.m, m.stride):  # from
-            b = mi.bias.view(m.na, -1)  # conv.bias(255) to (3,85)
+            #b = mi.bias.view(m.na, -1)  # conv.bias(255) to (3,85)
+            bias_no_pad = mi.bias[:255]
+            b = bias_no_pad.view(m.na, -1)
             b.data[:, 4] += math.log(8 / (640 / s) ** 2)  # obj (8 objects per 640 image)
-            b.data[:, 5:] += math.log(0.6 / (m.nc - 0.99)) if cf is None else torch.log(cf / cf.sum())  # cls
-            mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
+            b.data[:, 5:] += math.log(0.6 / (m.nc - 0.999999)) if cf is None else torch.log(cf / cf.sum())  # cls
+            b = b.view(-1)
+            b_pad = torch.cat([b, torch.Tensor([0])], dim=0) 
+            mi.bias = torch.nn.Parameter(b_pad, requires_grad=True)
 
     def _print_biases(self):
-        m = self.model[-1]  # Detect() module
-        for mi in m.m:  # from
-            b = mi.bias.detach().view(m.na, -1).T  # conv.bias(255) to (3,85)
-            logger.info(
-                ('%6g Conv2d.bias:' + '%10.3g' * 6) % (mi.weight.shape[1], *b[:5].mean(1).tolist(), b[5:].mean()))
+        pass
+    #     m = self.model[-1]  # Detect() module
+    #     for mi in m.m:  # from
+    #         b = mi.bias.detach().view(m.na, -1).T  # conv.bias(255) to (3,85)
+    #         LOGGER.info(
+    #             ('%6g Conv2d.bias:' + '%10.3g' * 6) % (mi.weight.shape[1], *b[:5].mean(1).tolist(), b[5:].mean()))
 
     # def _print_weights(self):
     #     for m in self.model.modules():
     #         if type(m) is Bottleneck:
-    #             logger.info('%10.3g' % (m.w.detach().sigmoid() * 2))  # shortcut weights
+    #             LOGGER.info('%10.3g' % (m.w.detach().sigmoid() * 2))  # shortcut weights
 
     def fuse(self):  # fuse model Conv2d() + BatchNorm2d() layers
-        logger.info('Fusing layers... ')
+        LOGGER.info('Fusing layers... ')
         for m in self.model.modules():
-            if type(m) is Conv and hasattr(m, 'bn'):
+            if isinstance(m, (Conv, DWConv)) and hasattr(m, 'bn'):
                 m.conv = fuse_conv_and_bn(m.conv, m.bn)  # update conv
                 delattr(m, 'bn')  # remove batchnorm
-                m.forward = m.fuseforward  # update forward
+                m.forward = m.forward_fuse  # update forward
         self.info()
         return self
 
-    def nms(self, mode=True):  # add or remove NMS module
-        present = type(self.model[-1]) is NMS  # last layer is NMS
-        if mode and not present:
-            logger.info('Adding NMS... ')
-            m = NMS()  # module
-            m.f = -1  # from
-            m.i = self.model[-1].i + 1  # index
-            self.model.add_module(name='%s' % m.i, module=m)  # add
-            self.eval()
-        elif not mode and present:
-            logger.info('Removing NMS... ')
-            self.model = self.model[:-1]  # remove
-        return self
-
     def autoshape(self):  # add AutoShape module
-        logger.info('Adding AutoShape... ')
+        LOGGER.info('Adding AutoShape... ')
         m = AutoShape(self)  # wrap model
         copy_attr(m, self, include=('yaml', 'nc', 'hyp', 'names', 'stride'), exclude=())  # copy attributes
         return m
@@ -233,9 +247,20 @@ class Model(nn.Module):
     def info(self, verbose=False, img_size=640):  # print model information
         model_info(self, verbose, img_size)
 
+    def _apply(self, fn):
+        # Apply to(), cpu(), cuda(), half() to model tensors that are not parameters or registered buffers
+        self = super()._apply(fn)
+        m = self.model[-1]  # Detect()
+        if isinstance(m, Detect):
+            m.stride = fn(m.stride)
+            m.grid = list(map(fn, m.grid))
+            if isinstance(m.anchor_grid, list):
+                m.anchor_grid = list(map(fn, m.anchor_grid))
+        return self
+
 
 def parse_model(d, ch):  # model_dict, input_channels(3)
-    logger.info('\n%3s%18s%3s%10s  %-40s%-30s' % ('', 'from', 'n', 'params', 'module', 'arguments'))
+    LOGGER.info('\n%3s%18s%3s%10s  %-40s%-30s' % ('', 'from', 'n', 'params', 'module', 'arguments'))
     anchors, nc, gd, gw = d['anchors'], d['nc'], d['depth_multiple'], d['width_multiple']
     na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors  # number of anchors
     no = na * (nc + 5)  # number of outputs = anchors * (classes + 5)
@@ -246,18 +271,18 @@ def parse_model(d, ch):  # model_dict, input_channels(3)
         for j, a in enumerate(args):
             try:
                 args[j] = eval(a) if isinstance(a, str) else a  # eval strings
-            except:
+            except NameError:
                 pass
 
-        n = max(round(n * gd), 1) if n > 1 else n  # depth gain
-        if m in [Conv, GhostConv, Bottleneck, GhostBottleneck, SPP, DWConv, MixConv2d, Focus, CrossConv, BottleneckCSP,
-                 C3, C3TR]:
+        n = n_ = max(round(n * gd), 1) if n > 1 else n  # depth gain
+        if m in [Conv, GhostConv, Bottleneck, GhostBottleneck, SPP, SPPF, DWConv, MixConv2d, Focus, CrossConv,
+                 BottleneckCSP, C3, C3TR, C3SPP, C3Ghost]:
             c1, c2 = ch[f], args[0]
             if c2 != no:  # if not output
                 c2 = make_divisible(c2 * gw, 8)
 
             args = [c1, c2, *args[1:]]
-            if m in [BottleneckCSP, C3, C3TR]:
+            if m in [BottleneckCSP, C3, C3TR, C3Ghost]:
                 args.insert(2, n)  # number of repeats
                 n = 1
         elif m is nn.BatchNorm2d:
@@ -279,7 +304,7 @@ def parse_model(d, ch):  # model_dict, input_channels(3)
         t = str(m)[8:-2].replace('__main__.', '')  # module type
         np = sum([x.numel() for x in m_.parameters()])  # number params
         m_.i, m_.f, m_.type, m_.np = i, f, t, np  # attach index, 'from' index, type, number params
-        logger.info('%3s%18s%3s%10.0f  %-40s%-30s' % (i, f, n, np, t, args))  # print
+        LOGGER.info('%3s%18s%3s%10.0f  %-40s%-30s' % (i, f, n_, np, t, args))  # print
         save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1)  # append to savelist
         layers.append(m_)
         if i == 0:
@@ -292,8 +317,10 @@ if __name__ == '__main__':
     parser = argparse.ArgumentParser()
     parser.add_argument('--cfg', type=str, default='yolov5s.yaml', help='model.yaml')
     parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
+    parser.add_argument('--profile', action='store_true', help='profile model speed')
     opt = parser.parse_args()
-    opt.cfg = check_file(opt.cfg)  # check file
+    opt.cfg = check_yaml(opt.cfg)  # check YAML
+    print_args(FILE.stem, opt)
     set_logging()
     device = select_device(opt.device)
 
@@ -302,12 +329,12 @@ if __name__ == '__main__':
     model.train()
 
     # Profile
-    # img = torch.rand(8 if torch.cuda.is_available() else 1, 3, 320, 320).to(device)
-    # y = model(img, profile=True)
+    if opt.profile:
+        img = torch.rand(8 if torch.cuda.is_available() else 1, 3, 640, 640).to(device)
+        y = model(img, profile=True)
 
     # Tensorboard (not working https://github.com/ultralytics/yolov5/issues/2898)
     # from torch.utils.tensorboard import SummaryWriter
     # tb_writer = SummaryWriter('.')
-    # logger.info("Run 'tensorboard --logdir=models' to view tensorboard at http://localhost:6006/")
+    # LOGGER.info("Run 'tensorboard --logdir=models' to view tensorboard at http://localhost:6006/")
     # tb_writer.add_graph(torch.jit.trace(model, img, strict=False), [])  # add model graph
-    # tb_writer.add_image('test', img[0], dataformats='CWH')  # add model to tensorboard
diff --git a/cv/detection/yolov5/pytorch/models/yolov5l.yaml b/cv/detection/yolov5/pytorch/models/yolov5l.yaml
index 0c130c1..ce8a5de 100644
--- a/cv/detection/yolov5/pytorch/models/yolov5l.yaml
+++ b/cv/detection/yolov5/pytorch/models/yolov5l.yaml
@@ -1,3 +1,5 @@
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+
 # Parameters
 nc: 80  # number of classes
 depth_multiple: 1.0  # model depth multiple
@@ -7,22 +9,22 @@ anchors:
   - [30,61, 62,45, 59,119]  # P4/16
   - [116,90, 156,198, 373,326]  # P5/32
 
-# YOLOv5 backbone
+# YOLOv5 v6.0 backbone
 backbone:
   # [from, number, module, args]
-  [[-1, 1, Focus, [64, 3]],  # 0-P1/2
+  [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
    [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
    [-1, 3, C3, [128]],
    [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
-   [-1, 9, C3, [256]],
+   [-1, 6, C3, [256]],
    [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
    [-1, 9, C3, [512]],
    [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
-   [-1, 1, SPP, [1024, [5, 9, 13]]],
-   [-1, 3, C3, [1024, False]],  # 9
+   [-1, 3, C3, [1024]],
+   [-1, 1, SPPF, [1024, 5]],  # 9
   ]
 
-# YOLOv5 head
+# YOLOv5 v6.0 head
 head:
   [[-1, 1, Conv, [512, 1, 1]],
    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
diff --git a/cv/detection/yolov5/pytorch/models/yolov5m.yaml b/cv/detection/yolov5/pytorch/models/yolov5m.yaml
index e477b34..ad13ab3 100644
--- a/cv/detection/yolov5/pytorch/models/yolov5m.yaml
+++ b/cv/detection/yolov5/pytorch/models/yolov5m.yaml
@@ -1,3 +1,5 @@
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+
 # Parameters
 nc: 80  # number of classes
 depth_multiple: 0.67  # model depth multiple
@@ -7,22 +9,22 @@ anchors:
   - [30,61, 62,45, 59,119]  # P4/16
   - [116,90, 156,198, 373,326]  # P5/32
 
-# YOLOv5 backbone
+# YOLOv5 v6.0 backbone
 backbone:
   # [from, number, module, args]
-  [[-1, 1, Focus, [64, 3]],  # 0-P1/2
+  [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
    [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
    [-1, 3, C3, [128]],
    [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
-   [-1, 9, C3, [256]],
+   [-1, 6, C3, [256]],
    [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
    [-1, 9, C3, [512]],
    [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
-   [-1, 1, SPP, [1024, [5, 9, 13]]],
-   [-1, 3, C3, [1024, False]],  # 9
+   [-1, 3, C3, [1024]],
+   [-1, 1, SPPF, [1024, 5]],  # 9
   ]
 
-# YOLOv5 head
+# YOLOv5 v6.0 head
 head:
   [[-1, 1, Conv, [512, 1, 1]],
    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
diff --git a/cv/detection/yolov5/pytorch/models/yolov5n.yaml b/cv/detection/yolov5/pytorch/models/yolov5n.yaml
new file mode 100755
index 0000000..8a28a40
--- /dev/null
+++ b/cv/detection/yolov5/pytorch/models/yolov5n.yaml
@@ -0,0 +1,48 @@
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+
+# Parameters
+nc: 80  # number of classes
+depth_multiple: 0.33  # model depth multiple
+width_multiple: 0.25  # layer channel multiple
+anchors:
+  - [10,13, 16,30, 33,23]  # P3/8
+  - [30,61, 62,45, 59,119]  # P4/16
+  - [116,90, 156,198, 373,326]  # P5/32
+
+# YOLOv5 v6.0 backbone
+backbone:
+  # [from, number, module, args]
+  [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
+   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
+   [-1, 3, C3, [128]],
+   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
+   [-1, 6, C3, [256]],
+   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
+   [-1, 9, C3, [512]],
+   [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
+   [-1, 3, C3, [1024]],
+   [-1, 1, SPPF, [1024, 5]],  # 9
+  ]
+
+# YOLOv5 v6.0 head
+head:
+  [[-1, 1, Conv, [512, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
+   [-1, 3, C3, [512, False]],  # 13
+
+   [-1, 1, Conv, [256, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
+   [-1, 3, C3, [256, False]],  # 17 (P3/8-small)
+
+   [-1, 1, Conv, [256, 3, 2]],
+   [[-1, 14], 1, Concat, [1]],  # cat head P4
+   [-1, 3, C3, [512, False]],  # 20 (P4/16-medium)
+
+   [-1, 1, Conv, [512, 3, 2]],
+   [[-1, 10], 1, Concat, [1]],  # cat head P5
+   [-1, 3, C3, [1024, False]],  # 23 (P5/32-large)
+
+   [[17, 20, 23], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
+  ]
diff --git a/cv/detection/yolov5/pytorch/models/yolov5s.yaml b/cv/detection/yolov5/pytorch/models/yolov5s.yaml
index e85442d..f35beab 100644
--- a/cv/detection/yolov5/pytorch/models/yolov5s.yaml
+++ b/cv/detection/yolov5/pytorch/models/yolov5s.yaml
@@ -1,3 +1,5 @@
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+
 # Parameters
 nc: 80  # number of classes
 depth_multiple: 0.33  # model depth multiple
@@ -7,22 +9,22 @@ anchors:
   - [30,61, 62,45, 59,119]  # P4/16
   - [116,90, 156,198, 373,326]  # P5/32
 
-# YOLOv5 backbone
+# YOLOv5 v6.0 backbone
 backbone:
   # [from, number, module, args]
-  [[-1, 1, Focus, [64, 3]],  # 0-P1/2
+  [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
    [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
    [-1, 3, C3, [128]],
    [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
-   [-1, 9, C3, [256]],
+   [-1, 6, C3, [256]],
    [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
    [-1, 9, C3, [512]],
    [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
-   [-1, 1, SPP, [1024, [5, 9, 13]]],
-   [-1, 3, C3, [1024, False]],  # 9
+   [-1, 3, C3, [1024]],
+   [-1, 1, SPPF, [1024, 5]],  # 9
   ]
 
-# YOLOv5 head
+# YOLOv5 v6.0 head
 head:
   [[-1, 1, Conv, [512, 1, 1]],
    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
diff --git a/cv/detection/yolov5/pytorch/models/yolov5x.yaml b/cv/detection/yolov5/pytorch/models/yolov5x.yaml
index c7ca035..f617a02 100644
--- a/cv/detection/yolov5/pytorch/models/yolov5x.yaml
+++ b/cv/detection/yolov5/pytorch/models/yolov5x.yaml
@@ -1,3 +1,5 @@
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+
 # Parameters
 nc: 80  # number of classes
 depth_multiple: 1.33  # model depth multiple
@@ -7,22 +9,22 @@ anchors:
   - [30,61, 62,45, 59,119]  # P4/16
   - [116,90, 156,198, 373,326]  # P5/32
 
-# YOLOv5 backbone
+# YOLOv5 v6.0 backbone
 backbone:
   # [from, number, module, args]
-  [[-1, 1, Focus, [64, 3]],  # 0-P1/2
+  [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
    [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
    [-1, 3, C3, [128]],
    [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
-   [-1, 9, C3, [256]],
+   [-1, 6, C3, [256]],
    [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
    [-1, 9, C3, [512]],
    [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
-   [-1, 1, SPP, [1024, [5, 9, 13]]],
-   [-1, 3, C3, [1024, False]],  # 9
+   [-1, 3, C3, [1024]],
+   [-1, 1, SPPF, [1024, 5]],  # 9
   ]
 
-# YOLOv5 head
+# YOLOv5 v6.0 head
 head:
   [[-1, 1, Conv, [512, 1, 1]],
    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
diff --git a/cv/detection/yolov5/pytorch/requirements.txt b/cv/detection/yolov5/pytorch/requirements.txt
index b1b9e19..76c5fa0 100644
--- a/cv/detection/yolov5/pytorch/requirements.txt
+++ b/cv/detection/yolov5/pytorch/requirements.txt
@@ -1,27 +1,36 @@
-# pip install -r requirements_bi.txt
+# pip install -r requirements.txt
 
-# base ----------------------------------------
+# Base ----------------------------------------
 matplotlib>=3.2.2
-numpy>=1.19.5
-Pillow
+numpy>=1.18.5
+opencv-python>=4.1.2
+Pillow>=7.1.2
+PyYAML>=5.3.1
+requests>=2.23.0
 scipy>=1.4.1
+torch>=1.7.0
+torchvision>=0.8.1
 tqdm>=4.41.0
 
-# logging -------------------------------------
+# Logging -------------------------------------
 tensorboard>=2.4.1
 # wandb
 
-# plotting ------------------------------------
+# Plotting ------------------------------------
+pandas>=1.1.4
 seaborn>=0.11.0
-pandas
 
-# export --------------------------------------
-# coremltools>=4.1
-# onnx>=1.9.0
-# scikit-learn==0.19.2  # for coreml quantization
+# Export --------------------------------------
+# coremltools>=4.1  # CoreML export
+# onnx>=1.9.0  # ONNX export
+# onnx-simplifier>=0.3.6  # ONNX simplifier
+# scikit-learn==0.19.2  # CoreML quantization
+# tensorflow>=2.4.1  # TFLite export
+# tensorflowjs>=3.9.0  # TF.js export
 
-# extras --------------------------------------
+# Extras --------------------------------------
+# albumentations>=1.0.3
 # Cython  # for pycocotools https://github.com/cocodataset/cocoapi/issues/172
-# pycocotools>=2.0  # COCO mAP
-# albumentations>=1.0.0
+pycocotools>=2.0  # COCO mAP
+# roboflow
 thop  # FLOPs computation
diff --git a/cv/detection/yolov5/pytorch/run.sh b/cv/detection/yolov5/pytorch/run.sh
index ed5cda7..bf22c2e 100644
--- a/cv/detection/yolov5/pytorch/run.sh
+++ b/cv/detection/yolov5/pytorch/run.sh
@@ -1,38 +1,28 @@
 #!/bin/bash
-# Copyright (c) 2023, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# Copyright (c) 2023-2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
 # All Rights Reserved.
 #
-#    Licensed under the Apache License, Version 2.0 (the "License"); you may
-#    not use this file except in compliance with the License. You may obtain
-#    a copy of the License at
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
 #
-#         http://www.apache.org/licenses/LICENSE-2.0
+#     http://www.apache.org/licenses/LICENSE-2.0
 #
-#    Unless required by applicable law or agreed to in writing, software
-#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
-#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
-#    License for the specific language governing permissions and limitations
-#    under the License.
-
-start_time=$(date +%s)
-unset no_proxy use_proxy https_proxy http_proxy
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+export WANDB_DISABLED=true
 EXIT_STATUS=0
-check_status() {
-	if ((${PIPESTATUS[0]} != 0)); then
-		EXIT_STATUS=1
-	fi
+check_status()
+{
+    if ((${PIPESTATUS[0]} != 0)); then
+        EXIT_STATUS=1
+    fi
 }
 
-python3 -m torch.distributed.launch --nproc_per_node=16 \
-	train.py --batch-size 32 \
-	--data ./data/coco.yaml --weights "" \
-	--cfg models/yolov5m.yaml --workers 16 \
-	--epochs 3 --linear-lr "$@"
-check_status
+python3 -m torch.distributed.launch --nproc_per_node=16 train.py --epochs 3 --batch-size 512 --data ./data/coco.yaml --weights "" --cfg models/yolov5m.yaml --workers 16  "$@";  check_status
 
-wait
 
-end_time=$(date +%s)
-e2e_time=$(($end_time - $start_time))
-echo "end to end time: $e2e_time" >>total_time.log
 exit ${EXIT_STATUS}
diff --git a/cv/detection/yolov5/pytorch/train.py b/cv/detection/yolov5/pytorch/train.py
index f8f0f95..8f6c3e4 100644
--- a/cv/detection/yolov5/pytorch/train.py
+++ b/cv/detection/yolov5/pytorch/train.py
@@ -1,19 +1,20 @@
-# Copyright (c) 2023, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# Copyright (c) 2023-2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
 # All Rights Reserved.
 #
-#    Licensed under the Apache License, Version 2.0 (the "License"); you may
-#    not use this file except in compliance with the License. You may obtain
-#    a copy of the License at
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
 #
-#         http://www.apache.org/licenses/LICENSE-2.0
+#     http://www.apache.org/licenses/LICENSE-2.0
 #
-#    Unless required by applicable law or agreed to in writing, software
-#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
-#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
-#    License for the specific language governing permissions and limitations
-#    under the License.
-
-"""Train a YOLOv5 model on a custom dataset
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+"""
+Train a YOLOv5 model on a custom dataset
 
 Usage:
     $ python path/to/train.py --data coco128.yaml --weights yolov5s.pt --img 640
@@ -21,147 +22,49 @@ Usage:
 
 import argparse
 import logging
+import math
 import os
 import random
 import sys
 import time
-import warnings
 from copy import deepcopy
 from pathlib import Path
-from threading import Thread
-import traceback
-
-import torch
-
-try:
-    from torch.utils.tensorboard import SummaryWriter
-except:
-    class SummaryWriter(object):
-        def __init__(self, log_dir=None, comment='', purge_step=None, max_queue=10,
-                     flush_secs=120, filename_suffix=''):
-            if not log_dir:
-                import socket
-                from datetime import datetime
-                current_time = datetime.now().strftime('%b%d_%H-%M-%S')
-                log_dir = os.path.join(
-                    'runs', current_time + '_' + socket.gethostname() + comment)
-            self.log_dir = log_dir
-            self.purge_step = purge_step
-            self.max_queue = max_queue
-            self.flush_secs = flush_secs
-            self.filename_suffix = filename_suffix
-
-            # Initialize the file writers, but they can be cleared out on close
-            # and recreated later as needed.
-            self.file_writer = self.all_writers = None
-            self._get_file_writer()
-
-            # Create default bins for histograms, see generate_testdata.py in tensorflow/tensorboard
-            v = 1E-12
-            buckets = []
-            neg_buckets = []
-            while v < 1E20:
-                buckets.append(v)
-                neg_buckets.append(-v)
-                v *= 1.1
-            self.default_bins = neg_buckets[::-1] + [0] + buckets
-
-        def _check_caffe2_blob(self, item): pass
-
-        def _get_file_writer(self): pass
-
-        def get_logdir(self):
-            """Returns the directory where event files will be written."""
-            return self.log_dir
-
-        def add_hparams(self, hparam_dict, metric_dict, hparam_domain_discrete=None, run_name=None): pass
-
-        def add_scalar(self, tag, scalar_value, global_step=None, walltime=None, new_style=False): pass
-
-        def add_scalars(self, main_tag, tag_scalar_dict, global_step=None, walltime=None): pass
-
-        def add_histogram(self, tag, values, global_step=None, bins='tensorflow', walltime=None, max_bins=None): pass
-
-        def add_histogram_raw(self, tag, min, max, num, sum, sum_squares, bucket_limits, bucket_counts, global_step=None, walltime=None): pass
-
-        def add_image(self, tag, img_tensor, global_step=None, walltime=None, dataformats='CHW'): pass
-
-        def add_images(self, tag, img_tensor, global_step=None, walltime=None, dataformats='NCHW'): pass
-
-        def add_image_with_boxes(self, tag, img_tensor, box_tensor, global_step=None, walltime=None, rescale=1, dataformats='CHW', labels=None): pass
-
-        def add_figure(self, tag, figure, global_step=None, close=True, walltime=None): pass
-
-        def add_video(self, tag, vid_tensor, global_step=None, fps=4, walltime=None): pass
-
-        def add_audio(self, tag, snd_tensor, global_step=None, sample_rate=44100, walltime=None): pass
-
-        def add_text(self, tag, text_string, global_step=None, walltime=None): pass
-
-        def add_onnx_graph(self, prototxt): pass
-
-        def add_graph(self, model, input_to_model=None, verbose=False): pass
-
-        @staticmethod
-        def _encode(rawstr): pass
-
-        def add_embedding(self, mat, metadata=None, label_img=None, global_step=None, tag='default', metadata_header=None): pass
-
-        def add_pr_curve(self, tag, labels, predictions, global_step=None, num_thresholds=127, weights=None, walltime=None): pass
-
-        def add_pr_curve_raw(self, tag, true_positive_counts, false_positive_counts, true_negative_counts, false_negative_counts, precision, recall, global_step=None, num_thresholds=127, weights=None, walltime=None): pass
-
-        def add_custom_scalars_multilinechart(self, tags, category='default', title='untitled'): pass
-
-        def add_custom_scalars_marginchart(self, tags, category='default', title='untitled'): pass
-
-        def add_custom_scalars(self, layout): pass
-
-        def add_mesh(self, tag, vertices, colors=None, faces=None, config_dict=None, global_step=None, walltime=None): pass
-
-        def flush(self): pass
-
-        def close(self): pass
-
-        def __enter__(self):
-            return self
 
-        def __exit__(self, exc_type, exc_val, exc_tb):
-            self.close()
-
-
-import math
 import numpy as np
+import torch
 import torch.distributed as dist
 import torch.nn as nn
-import torch.nn.functional as F
-import torch.optim as optim
-import torch.optim.lr_scheduler as lr_scheduler
-import torch.utils.data
 import yaml
 from torch.cuda import amp
 from torch.nn.parallel import DistributedDataParallel as DDP
+from torch.optim import Adam, SGD, lr_scheduler
 from tqdm import tqdm
 
-FILE = Path(__file__).absolute()
-sys.path.append(FILE.parents[0].as_posix())  # add yolov5/ to path
+FILE = Path(__file__).resolve()
+ROOT = FILE.parents[0]  # YOLOv5 root directory
+if str(ROOT) not in sys.path:
+    sys.path.append(str(ROOT))  # add ROOT to PATH
+ROOT = Path(os.path.relpath(ROOT, Path.cwd()))  # relative
 
-import test  # for end-of-epoch mAP
+import val  # for end-of-epoch mAP
 from models.experimental import attempt_load
 from models.yolo import Model
 from utils.autoanchor import check_anchors
 from utils.datasets import create_dataloader
 from utils.general import labels_to_class_weights, increment_path, labels_to_image_weights, init_seeds, \
-    strip_optimizer, get_latest_run, check_dataset, check_file, check_git_status, check_img_size, \
-    check_requirements, print_mutation, set_logging, one_cycle, colorstr
-from utils.google_utils import attempt_download
+    strip_optimizer, get_latest_run, check_dataset, check_git_status, check_img_size, check_requirements, \
+    check_file, check_yaml, check_suffix, print_args, print_mutation, set_logging, one_cycle, colorstr, methods
+from utils.downloads import attempt_download
 from utils.loss import ComputeLoss
-from utils.plots import plot_images, plot_labels, plot_results, plot_evolution
-from utils.torch_utils import ModelEMA, select_device, intersect_dicts, torch_distributed_zero_first, de_parallel
-from utils.wandb_logging.wandb_utils import WandbLogger, check_wandb_resume
+from utils.plots import plot_labels, plot_evolve
+from utils.torch_utils import EarlyStopping, ModelEMA, de_parallel, intersect_dicts, select_device, \
+    torch_distributed_zero_first
+from utils.loggers.wandb.wandb_utils import check_wandb_resume
 from utils.metrics import fitness
+from utils.loggers import Loggers
+from utils.callbacks import Callbacks
 
-logger = logging.getLogger(__name__)
+LOGGER = logging.getLogger(__name__)
 LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1))  # https://pytorch.org/docs/stable/elastic/run.html
 RANK = int(os.getenv('RANK', -1))
 WORLD_SIZE = int(os.getenv('WORLD_SIZE', 1))
@@ -170,122 +73,112 @@ WORLD_SIZE = int(os.getenv('WORLD_SIZE', 1))
 def train(hyp,  # path/to/hyp.yaml or hyp dictionary
           opt,
           device,
+          callbacks
           ):
-    save_dir, epochs, batch_size, weights, single_cls, evolve, data, cfg, resume, notest, nosave, workers, = \
-        opt.save_dir, opt.epochs, opt.batch_size, opt.weights, opt.single_cls, opt.evolve, opt.data, opt.cfg, \
-        opt.resume, opt.notest, opt.nosave, opt.workers
+    save_dir, epochs, batch_size, weights, single_cls, evolve, data, cfg, resume, noval, nosave, workers, freeze, = \
+        Path(opt.save_dir), opt.epochs, opt.batch_size, opt.weights, opt.single_cls, opt.evolve, opt.data, opt.cfg, \
+        opt.resume, opt.noval, opt.nosave, opt.workers, opt.freeze
 
     # Directories
-    save_dir = Path(save_dir)
-    wdir = save_dir / 'weights'
-    wdir.mkdir(parents=True, exist_ok=True)  # make dir
-    last = wdir / 'last.pt'
-    best = wdir / 'best.pt'
-    results_file = save_dir / 'results.txt'
+    w = save_dir / 'weights'  # weights dir
+    (w.parent if evolve else w).mkdir(parents=True, exist_ok=True)  # make dir
+    last, best = w / 'last.pt', w / 'best.pt'
 
     # Hyperparameters
     if isinstance(hyp, str):
-        with open(hyp) as f:
+        with open(hyp, errors='ignore') as f:
             hyp = yaml.safe_load(f)  # load hyps dict
-    logger.info(colorstr('hyperparameters: ') + ', '.join(f'{k}={v}' for k, v in hyp.items()))
+    LOGGER.info(colorstr('hyperparameters: ') + ', '.join(f'{k}={v}' for k, v in hyp.items()))
 
     # Save run settings
     with open(save_dir / 'hyp.yaml', 'w') as f:
         yaml.safe_dump(hyp, f, sort_keys=False)
     with open(save_dir / 'opt.yaml', 'w') as f:
         yaml.safe_dump(vars(opt), f, sort_keys=False)
-
-    # Configure
-    plots = not evolve  # create plots
-    cuda = device.type != 'cpu'
-    init_seeds(1)
-    with open(data) as f:
-        data_dict = yaml.safe_load(f)  # data dict
+    data_dict = None
 
     # Loggers
-    loggers = {'wandb': None, 'tb': None}  # loggers dict
     if RANK in [-1, 0]:
-        # TensorBoard
-        if not evolve:
-            prefix = colorstr('tensorboard: ')
-            logger.info(f"{prefix}Start with 'tensorboard --logdir {opt.project}', view at http://localhost:6006/")
-            loggers['tb'] = SummaryWriter(str(save_dir))
-
-        # W&B
-        opt.hyp = hyp  # add hyperparameters
-        run_id = torch.load(weights).get('wandb_id') if weights.endswith('.pt') and os.path.isfile(weights) else None
-        run_id = run_id if opt.resume else None  # start fresh run if transfer learning
-        wandb_logger = WandbLogger(opt, save_dir.stem, run_id, data_dict)
-        loggers['wandb'] = wandb_logger.wandb
-        if loggers['wandb']:
-            data_dict = wandb_logger.data_dict
-            weights, epochs, hyp = opt.weights, opt.epochs, opt.hyp  # may update weights, epochs if resuming
+        loggers = Loggers(save_dir, weights, opt, hyp, LOGGER)  # loggers instance
+        if loggers.wandb:
+            data_dict = loggers.wandb.data_dict
+            if resume:
+                weights, epochs, hyp = opt.weights, opt.epochs, opt.hyp
 
+        # Register actions
+        for k in methods(loggers):
+            callbacks.register_action(k, callback=getattr(loggers, k))
+
+    # Config
+    plots = not evolve  # create plots
+    cuda = device.type != 'cpu'
+    init_seeds(1 + RANK)
+    with torch_distributed_zero_first(LOCAL_RANK):
+        data_dict = data_dict or check_dataset(data)  # check if None
+    train_path, val_path = data_dict['train'], data_dict['val']
     nc = 1 if single_cls else int(data_dict['nc'])  # number of classes
     names = ['item'] if single_cls and len(data_dict['names']) != 1 else data_dict['names']  # class names
-    assert len(names) == nc, '%g names found for nc=%g dataset in %s' % (len(names), nc, data)  # check
+    assert len(names) == nc, f'{len(names)} names found for nc={nc} dataset in {data}'  # check
     is_coco = data.endswith('coco.yaml') and nc == 80  # COCO dataset
 
     # Model
+    check_suffix(weights, '.pt')  # check weights
     pretrained = weights.endswith('.pt')
     if pretrained:
-        with torch_distributed_zero_first(RANK):
+        with torch_distributed_zero_first(LOCAL_RANK):
             weights = attempt_download(weights)  # download if not found locally
         ckpt = torch.load(weights, map_location=device)  # load checkpoint
         model = Model(cfg or ckpt['model'].yaml, ch=3, nc=nc, anchors=hyp.get('anchors')).to(device)  # create
         exclude = ['anchor'] if (cfg or hyp.get('anchors')) and not resume else []  # exclude keys
-        state_dict = ckpt['model'].float().state_dict()  # to FP32
-        state_dict = intersect_dicts(state_dict, model.state_dict(), exclude=exclude)  # intersect
-        model.load_state_dict(state_dict, strict=False)  # load
-        logger.info('Transferred %g/%g items from %s' % (len(state_dict), len(model.state_dict()), weights))  # report
+        csd = ckpt['model'].float().state_dict()  # checkpoint state_dict as FP32
+        csd = intersect_dicts(csd, model.state_dict(), exclude=exclude)  # intersect
+        model.load_state_dict(csd, strict=False)  # load
+        LOGGER.info(f'Transferred {len(csd)}/{len(model.state_dict())} items from {weights}')  # report
     else:
-        model = Model(cfg, ch=3, nc=nc, anchors=hyp.get('anchors')).to(device)  # create
-    with torch_distributed_zero_first(RANK):
-        check_dataset(data_dict)  # check
-    train_path = data_dict['train']
-    test_path = data_dict['val']
-
+        model = Model(cfg, ch=4, nc=nc, anchors=hyp.get('anchors')).to(device)  # create
+    
     # Freeze
-    freeze = []  # parameter names to freeze (full or partial)
+    freeze = [f'model.{x}.' for x in range(freeze)]  # layers to freeze
     for k, v in model.named_parameters():
         v.requires_grad = True  # train all layers
         if any(x in k for x in freeze):
-            print('freezing %s' % k)
+            print(f'freezing {k}')
             v.requires_grad = False
 
+    model= model.to(memory_format=torch.channels_last)
+
     # Optimizer
     nbs = 64  # nominal batch size
     accumulate = max(round(nbs / batch_size), 1)  # accumulate loss before optimizing
     hyp['weight_decay'] *= batch_size * accumulate / nbs  # scale weight_decay
-    logger.info(f"Scaled weight_decay = {hyp['weight_decay']}")
+    LOGGER.info(f"Scaled weight_decay = {hyp['weight_decay']}")
 
-    pg0, pg1, pg2 = [], [], []  # optimizer parameter groups
-    for k, v in model.named_modules():
-        if hasattr(v, 'bias') and isinstance(v.bias, nn.Parameter):
-            pg2.append(v.bias)  # biases
-        if isinstance(v, nn.BatchNorm2d):
-            pg0.append(v.weight)  # no decay
-        elif hasattr(v, 'weight') and isinstance(v.weight, nn.Parameter):
-            pg1.append(v.weight)  # apply decay
+    g0, g1, g2 = [], [], []  # optimizer parameter groups
+    for v in model.modules():
+        if hasattr(v, 'bias') and isinstance(v.bias, nn.Parameter):  # bias
+            g2.append(v.bias)
+        if isinstance(v, nn.BatchNorm2d):  # weight (no decay)
+            g0.append(v.weight)
+        elif hasattr(v, 'weight') and isinstance(v.weight, nn.Parameter):  # weight (with decay)
+            g1.append(v.weight)
 
     if opt.adam:
-        optimizer = optim.Adam(pg0, lr=hyp['lr0'], betas=(hyp['momentum'], 0.999))  # adjust beta1 to momentum
+        optimizer = Adam(g0, lr=hyp['lr0'], betas=(hyp['momentum'], 0.999))  # adjust beta1 to momentum
     else:
-        optimizer = optim.SGD(pg0, lr=hyp['lr0'], momentum=hyp['momentum'], nesterov=True)
+        optimizer = SGD(g0, lr=hyp['lr0'], momentum=hyp['momentum'], nesterov=True)
 
-    optimizer.add_param_group({'params': pg1, 'weight_decay': hyp['weight_decay']})  # add pg1 with weight_decay
-    optimizer.add_param_group({'params': pg2})  # add pg2 (biases)
-    logger.info('Optimizer groups: %g .bias, %g conv.weight, %g other' % (len(pg2), len(pg1), len(pg0)))
-    del pg0, pg1, pg2
+    optimizer.add_param_group({'params': g1, 'weight_decay': hyp['weight_decay']})  # add g1 with weight_decay
+    optimizer.add_param_group({'params': g2})  # add g2 (biases)
+    LOGGER.info(f"{colorstr('optimizer:')} {type(optimizer).__name__} with parameter groups "
+                f"{len(g0)} weight, {len(g1)} weight (no decay), {len(g2)} bias")
+    del g0, g1, g2
 
-    # Scheduler https://arxiv.org/pdf/1812.01187.pdf
-    # https://pytorch.org/docs/stable/_modules/torch/optim/lr_scheduler.html#OneCycleLR
+    # Scheduler
     if opt.linear_lr:
         lf = lambda x: (1 - x / (epochs - 1)) * (1.0 - hyp['lrf']) + hyp['lrf']  # linear
     else:
         lf = one_cycle(1, hyp['lrf'], epochs)  # cosine 1->hyp['lrf']
-    scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)
-    # plot_lr_scheduler(optimizer, scheduler, epochs)
+    scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)  # plot_lr_scheduler(optimizer, scheduler, epochs)
 
     # EMA
     ema = ModelEMA(model) if RANK in [-1, 0] else None
@@ -303,70 +196,62 @@ def train(hyp,  # path/to/hyp.yaml or hyp dictionary
             ema.ema.load_state_dict(ckpt['ema'].float().state_dict())
             ema.updates = ckpt['updates']
 
-        # Results
-        if ckpt.get('training_results') is not None:
-            results_file.write_text(ckpt['training_results'])  # write results.txt
-
         # Epochs
         start_epoch = ckpt['epoch'] + 1
         if resume:
-            assert start_epoch > 0, '%s training to %g epochs is finished, nothing to resume.' % (weights, epochs)
+            assert start_epoch > 0, f'{weights} training to {epochs} epochs is finished, nothing to resume.'
         if epochs < start_epoch:
-            logger.info('%s has been trained for %g epochs. Fine-tuning for %g additional epochs.' %
-                        (weights, ckpt['epoch'], epochs))
+            LOGGER.info(f"{weights} has been trained for {ckpt['epoch']} epochs. Fine-tuning for {epochs} more epochs.")
             epochs += ckpt['epoch']  # finetune additional epochs
 
-        del ckpt, state_dict
+        del ckpt, csd
 
     # Image sizes
     gs = max(int(model.stride.max()), 32)  # grid size (max stride)
     nl = model.model[-1].nl  # number of detection layers (used for scaling hyp['obj'])
-    imgsz, imgsz_test = [check_img_size(x, gs) for x in opt.img_size]  # verify imgsz are gs-multiples
+    imgsz = check_img_size(opt.imgsz, gs, floor=gs * 2)  # verify imgsz is gs-multiple
 
     # DP mode
-    # if cuda and RANK == -1 and torch.cuda.device_count() > 1:
-    #     logging.warning('DP not recommended, instead use torch.distributed.run for best DDP Multi-GPU results.\n'
-    #                     'See Multi-GPU Tutorial at https://github.com/ultralytics/yolov5/issues/475 to get started.')
-    #     model = torch.nn.DataParallel(model)
+    if cuda and RANK == -1 and torch.cuda.device_count() > 1:
+        logging.warning('DP not recommended, instead use torch.distributed.run for best DDP Multi-GPU results.\n'
+                        'See Multi-GPU Tutorial at https://github.com/ultralytics/yolov5/issues/475 to get started.')
+        model = torch.nn.DataParallel(model)
 
     # SyncBatchNorm
     if opt.sync_bn and cuda and RANK != -1:
         model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model).to(device)
-        logger.info('Using SyncBatchNorm()')
+        LOGGER.info('Using SyncBatchNorm()')
 
     # Trainloader
-    dataloader, dataset = create_dataloader(train_path, imgsz, batch_size, gs, single_cls,
-                                            hyp=hyp, augment=False, cache=opt.cache_images, rect=opt.rect, rank=RANK,
-                                            workers=workers,
-                                            image_weights=opt.image_weights, quad=opt.quad, prefix=colorstr('train: '))
-    mlc = np.concatenate(dataset.labels, 0)[:, 0].max()  # max label class
-    nb = len(dataloader)  # number of batches
-    assert mlc < nc, 'Label class %g exceeds nc=%g in %s. Possible class labels are 0-%g' % (mlc, nc, data, nc - 1)
+    train_loader, dataset = create_dataloader(train_path, imgsz, batch_size // WORLD_SIZE, gs, single_cls,
+                                              hyp=hyp, augment=True, cache=opt.cache, rect=opt.rect, rank=LOCAL_RANK,
+                                              workers=workers, image_weights=opt.image_weights, quad=opt.quad,
+                                              prefix=colorstr('train: '))
+    mlc = int(np.concatenate(dataset.labels, 0)[:, 0].max())  # max label class
+    nb = len(train_loader)  # number of batches
+    assert mlc < nc, f'Label class {mlc} exceeds nc={nc} in {data}. Possible class labels are 0-{nc - 1}'
 
     # Process 0
     if RANK in [-1, 0]:
-        testloader = create_dataloader(test_path, imgsz_test, batch_size, gs, single_cls,
-                                       hyp=hyp, cache=opt.cache_images and not notest, rect=True, rank=-1,
-                                       workers=workers,
-                                       pad=0.5, prefix=colorstr('val: '))[0]
+        val_loader = create_dataloader(val_path, imgsz, batch_size // WORLD_SIZE * 2, gs, single_cls,
+                                       hyp=hyp, cache=None if noval else opt.cache, rect=True, rank=-1,
+                                       workers=workers, pad=0.5,
+                                       prefix=colorstr('val: '))[0]
 
         if not resume:
             labels = np.concatenate(dataset.labels, 0)
-            c = torch.tensor(labels[:, 0])  # classes
+            # c = torch.tensor(labels[:, 0])  # classes
             # cf = torch.bincount(c.long(), minlength=nc) + 1.  # frequency
             # model._initialize_biases(cf.to(device))
             if plots:
-                plot_labels(labels, names, save_dir, loggers)
-                if loggers['tb']:
-                    loggers['tb'].add_histogram('classes', c, 0)  # TensorBoard
+                plot_labels(labels, names, save_dir)
 
             # Anchors
             if not opt.noautoanchor:
                 check_anchors(dataset, model=model, thr=hyp['anchor_t'], imgsz=imgsz)
-            if opt.amp:
-                model.half().float()  # pre-reduce anchor precision
-            else:
-                model.float()
+            model.half().float()  # pre-reduce anchor precision
+
+        callbacks.run('on_pretrain_routine_end')
 
     # DDP mode
     if cuda and RANK != -1:
@@ -379,7 +264,6 @@ def train(hyp,  # path/to/hyp.yaml or hyp dictionary
     hyp['label_smoothing'] = opt.label_smoothing
     model.nc = nc  # attach number of classes to model
     model.hyp = hyp  # attach hyperparameters to model
-    model.gr = 1.0  # iou loss ratio (obj_loss = 1.0 or iou)
     model.class_weights = labels_to_class_weights(dataset.labels, nc).to(device) * nc  # attach class weights
     model.names = names
 
@@ -392,50 +276,46 @@ def train(hyp,  # path/to/hyp.yaml or hyp dictionary
     results = (0, 0, 0, 0, 0, 0, 0)  # P, R, mAP@.5, mAP@.5-.95, val_loss(box, obj, cls)
     scheduler.last_epoch = start_epoch - 1  # do not move
     scaler = amp.GradScaler(enabled=cuda)
+    stopper = EarlyStopping(patience=opt.patience)
     compute_loss = ComputeLoss(model)  # init loss class
-    logger.info(f'Image sizes {imgsz} train, {imgsz_test} test\n'
-                f'Using {dataloader.num_workers} dataloader workers\n'
-                f'Logging results to {save_dir}\n'
+    LOGGER.info(f'Image sizes {imgsz} train, {imgsz} val\n'
+                f'Using {train_loader.num_workers} dataloader workers\n'
+                f"Logging results to {colorstr('bold', save_dir)}\n"
                 f'Starting training for {epochs} epochs...')
     for epoch in range(start_epoch, epochs):  # epoch ------------------------------------------------------------------
         model.train()
 
-        # Update image weights (optional)
+        # Update image weights (optional, single-GPU only)
         if opt.image_weights:
-            # Generate indices
-            if RANK in [-1, 0]:
-                cw = model.class_weights.cpu().numpy() * (1 - maps) ** 2 / nc  # class weights
-                iw = labels_to_image_weights(dataset.labels, nc=nc, class_weights=cw)  # image weights
-                dataset.indices = random.choices(range(dataset.n), weights=iw, k=dataset.n)  # rand weighted idx
-            # Broadcast if DDP
-            if RANK != -1:
-                indices = (torch.tensor(dataset.indices) if RANK == 0 else torch.zeros(dataset.n)).int()
-                dist.broadcast(indices, 0)
-                if RANK != 0:
-                    dataset.indices = indices.cpu().numpy()
-
-        # Update mosaic border
+            cw = model.class_weights.cpu().numpy() * (1 - maps) ** 2 / nc  # class weights
+            iw = labels_to_image_weights(dataset.labels, nc=nc, class_weights=cw)  # image weights
+            dataset.indices = random.choices(range(dataset.n), weights=iw, k=dataset.n)  # rand weighted idx
+
+        # Update mosaic border (optional)
         # b = int(random.uniform(0.25 * imgsz, 0.75 * imgsz + gs) // gs * gs)
         # dataset.mosaic_border = [b - imgsz, -b]  # height, width borders
 
-        mloss = torch.zeros(4, device=device)  # mean losses
+        mloss = torch.zeros(3, device=device)  # mean losses
         if RANK != -1:
-            dataloader.sampler.set_epoch(epoch)
-        pbar = enumerate(dataloader)
-        logger.info(('\n' + '%10s' * 8) % ('Epoch', 'gpu_mem', 'box', 'obj', 'cls', 'total', 'img_size', "total_fps"))
+            train_loader.sampler.set_epoch(epoch)
+        pbar = enumerate(train_loader)
+        LOGGER.info(('\n' + '%10s' * 7) % ('Epoch', 'gpu_mem', 'box', 'obj', 'cls', 'labels', 'img_size'))
         if RANK in [-1, 0]:
             pbar = tqdm(pbar, total=nb)  # progress bar
         optimizer.zero_grad()
         for i, (imgs, targets, paths, _) in pbar:  # batch -------------------------------------------------------------
-            step_start_time = time.time()
-
             ni = i + nb * epoch  # number integrated batches (since train start)
-            imgs = imgs.to(device, non_blocking=True).float() / 255.0  # uint8 to float32, 0-255 to 0.0-1.0
+            # imgs = imgs.to(device, non_blocking=True).float() / 255.0  # uint8 to float32, 0-255 to 0.0-1.0
+            imgs = imgs.to(device, non_blocking=True)
+            d1, d2, d3 = torch.split(imgs.permute(1, 0, 2, 3), 1, 0)
+            d4 = torch.zeros(d1.size(), device='cuda')
+            imgs = torch.cat([d1, d2, d3, d4], dim=0).permute(1, 0, 2, 3)
+            imgs = imgs.to(device, non_blocking=True, memory_format=torch.channels_last).float() / 255.0
 
             # Warmup
             if ni <= nw:
                 xi = [0, nw]  # x interp
-                # model.gr = np.interp(ni, xi, [0.0, 1.0])  # iou loss ratio (obj_loss = 1.0 or iou)
+                # compute_loss.gr = np.interp(ni, xi, [0.0, 1.0])  # iou loss ratio (obj_loss = 1.0 or iou)
                 accumulate = max(1, np.interp(ni, xi, [1, nbs / batch_size]).round())
                 for j, x in enumerate(optimizer.param_groups):
                     # bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0
@@ -449,196 +329,127 @@ def train(hyp,  # path/to/hyp.yaml or hyp dictionary
                 sf = sz / max(imgs.shape[2:])  # scale factor
                 if sf != 1:
                     ns = [math.ceil(x * sf / gs) * gs for x in imgs.shape[2:]]  # new shape (stretched to gs-multiple)
-                    imgs = F.interpolate(imgs, size=ns, mode='bilinear', align_corners=False)
+                    imgs = nn.functional.interpolate(imgs, size=ns, mode='bilinear', align_corners=False)
 
             # Forward
-            if opt.amp:
-                with amp.autocast(enabled=cuda):
-                    pred = model(imgs)  # forward
-                    loss, loss_items = compute_loss(pred, targets.to(device))  # loss scaled by batch_size
-                    if RANK != -1:
-                        loss *= WORLD_SIZE  # gradient averaged between devices in DDP mode
-                    if opt.quad:
-                        loss *= 4.
-            else:
+            with amp.autocast(enabled=cuda):
                 pred = model(imgs)  # forward
                 loss, loss_items = compute_loss(pred, targets.to(device))  # loss scaled by batch_size
+                if not math.isfinite(loss):
+                    print("Loss is {}, stopping training".format(loss))
+                    sys.exit(1)
                 if RANK != -1:
                     loss *= WORLD_SIZE  # gradient averaged between devices in DDP mode
                 if opt.quad:
                     loss *= 4.
 
-            if not math.isfinite(loss[0]):
-                print("Loss is {}, stopping training".format(loss[0]))
-                sys.exit(1)
-
             # Backward
-            if opt.amp:
-                scaler.scale(loss).backward()
-            else:
-                loss.backward()
-                torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.)
+            scaler.scale(loss).backward()
 
             # Optimize
             if ni - last_opt_step >= accumulate:
-                if opt.amp:
-                    scaler.unscale_(optimizer)
-                    torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.)
-                    scaler.step(optimizer)  # optimizer.step
-                    scaler.update()
-                else:
-                    optimizer.step()
+                scaler.step(optimizer)  # optimizer.step
+                scaler.update()
                 optimizer.zero_grad()
                 if ema:
                     ema.update(model)
                 last_opt_step = ni
 
-            step_end_time = time.time()
-            fps = len(imgs) / (step_end_time - step_start_time)
-            if torch.distributed.is_initialized():
-                fps = fps * torch.distributed.get_world_size()
-
-            # Print
+            # Log
             if RANK in [-1, 0]:
                 mloss = (mloss * i + loss_items) / (i + 1)  # update mean losses
-                mem = '%.3gG' % (torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0)  # (GB)
-                s = ('%10s' * 2 + '%10.4g' * 6) % (
-                    f'{epoch}/{epochs - 1}', mem, *mloss, imgs.shape[-1], fps)
-                pbar.set_description(s)
-
-                if nb > 1000:
-                    log_freq = 100
-                else:
-                    log_freq = 20
-                if "USE_DLTEST" in os.environ and i % log_freq == 0:
-                    print(".")
-
-                # Plot
-                if plots and ni < 3:
-                    f = save_dir / f'train_batch{ni}.jpg'  # filename
-                    Thread(target=plot_images, args=(imgs, targets, paths, f), daemon=True).start()
-                    if loggers['tb'] and ni == 0:  # TensorBoard
-                        with warnings.catch_warnings():
-                            warnings.simplefilter('ignore')  # suppress jit trace warning
-                            loggers['tb'].add_graph(torch.jit.trace(de_parallel(model), imgs[0:1], strict=False), [])
-                elif plots and ni == 10 and loggers['wandb']:
-                    wandb_logger.log({'Mosaics': [loggers['wandb'].Image(str(x), caption=x.name) for x in
-                                                  save_dir.glob('train*.jpg') if x.exists()]})
-
+                mem = f'{torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0:.3g}G'  # (GB)
+                pbar.set_description(('%10s' * 2 + '%10.4g' * 5) % (
+                    f'{epoch}/{epochs - 1}', mem, *mloss, targets.shape[0], imgs.shape[-1]))
+                callbacks.run('on_train_batch_end', ni, model, imgs, targets, paths, plots, opt.sync_bn)
             # end batch ------------------------------------------------------------------------------------------------
 
         # Scheduler
         lr = [x['lr'] for x in optimizer.param_groups]  # for loggers
         scheduler.step()
 
-        # DDP process 0 or single-GPU
-        stop_train = [False]
         if RANK in [-1, 0]:
             # mAP
-            try:
-                ema.update_attr(model, include=['yaml', 'nc', 'hyp', 'gr', 'names', 'stride', 'class_weights'])
-                final_epoch = epoch + 1 == epochs
-                if not notest or final_epoch:  # Calculate mAP
-                    wandb_logger.current_epoch = epoch + 1
-                    results, maps, _ = test.run(data_dict,
-                                                batch_size=batch_size // WORLD_SIZE * 2,
-                                                imgsz=imgsz_test,
-                                                model=ema.ema,
-                                                single_cls=single_cls,
-                                                dataloader=testloader,
-                                                save_dir=save_dir,
-                                                save_json=is_coco and final_epoch,
-                                                verbose=nc < 50 and final_epoch,
-                                                plots=plots and final_epoch,
-                                                wandb_logger=wandb_logger,
-                                                compute_loss=compute_loss)
-            except:
-                traceback.print_exc()
-                stop_train[0] = True
-
-            # Write
-            with open(results_file, 'a') as f:
-                f.write(s + '%10.4g' * 7 % results + '\n')  # append metrics, val_loss
-
-            # Log
-            tags = ['train/box_loss', 'train/obj_loss', 'train/cls_loss',  # train loss
-                    'metrics/precision', 'metrics/recall', 'metrics/mAP_0.5', 'metrics/mAP_0.5:0.95',
-                    'val/box_loss', 'val/obj_loss', 'val/cls_loss',  # val loss
-                    'x/lr0', 'x/lr1', 'x/lr2']  # params
-            for x, tag in zip(list(mloss[:-1]) + list(results) + lr, tags):
-                if loggers['tb']:
-                    loggers['tb'].add_scalar(tag, x, epoch)  # TensorBoard
-                if loggers['wandb']:
-                    wandb_logger.log({tag: x})  # W&B
+            callbacks.run('on_train_epoch_end', epoch=epoch)
+            ema.update_attr(model, include=['yaml', 'nc', 'hyp', 'names', 'stride', 'class_weights'])
+            final_epoch = (epoch + 1 == epochs) or stopper.possible_stop
+            if not noval or final_epoch:  # Calculate mAP
+                results, maps, _ = val.run(data_dict,
+                                           batch_size=batch_size // WORLD_SIZE * 2,
+                                           imgsz=imgsz,
+                                           model=ema.ema,
+                                           single_cls=single_cls,
+                                           dataloader=val_loader,
+                                           save_dir=save_dir,
+                                           plots=False,
+                                           callbacks=callbacks,
+                                           compute_loss=compute_loss)
 
             # Update best mAP
             fi = fitness(np.array(results).reshape(1, -1))  # weighted combination of [P, R, mAP@.5, mAP@.5-.95]
             if fi > best_fitness:
                 best_fitness = fi
-            wandb_logger.end_epoch(best_result=best_fitness == fi)
+            log_vals = list(mloss) + list(results) + lr
+            callbacks.run('on_fit_epoch_end', log_vals, epoch, best_fitness, fi)
 
             # Save model
             if (not nosave) or (final_epoch and not evolve):  # if save
                 ckpt = {'epoch': epoch,
                         'best_fitness': best_fitness,
-                        'training_results': results_file.read_text(),
-                        'model': deepcopy(de_parallel(model)).half() if opt.amp else deepcopy(de_parallel(model)),
-                        'ema': deepcopy(ema.ema).half() if opt.amp else deepcopy(ema.ema),
+                        'model': deepcopy(de_parallel(model)).half(),
+                        'ema': deepcopy(ema.ema).half(),
                         'updates': ema.updates,
                         'optimizer': optimizer.state_dict(),
-                        'wandb_id': wandb_logger.wandb_run.id if loggers['wandb'] else None}
+                        'wandb_id': loggers.wandb.wandb_run.id if loggers.wandb else None}
 
                 # Save last, best and delete
                 torch.save(ckpt, last)
                 if best_fitness == fi:
                     torch.save(ckpt, best)
-                if loggers['wandb']:
-                    if ((epoch + 1) % opt.save_period == 0 and not final_epoch) and opt.save_period != -1:
-                        wandb_logger.log_model(last.parent, opt, epoch, fi, best_model=best_fitness == fi)
+                if (epoch > 0) and (opt.save_period > 0) and (epoch % opt.save_period == 0):
+                    torch.save(ckpt, w / f'epoch{epoch}.pt')
                 del ckpt
+                callbacks.run('on_model_save', last, epoch, final_epoch, best_fitness, fi)
 
-        # Fix destroy process exception
-        if dist.is_initialized() and dist.get_world_size() > 1:
-            dist.broadcast_object_list(stop_train, src=0)
-        if stop_train[0]:
-            dist.destroy_process_group()
-            return
+            # Stop Single-GPU
+            if RANK == -1 and stopper(epoch=epoch, fitness=fi):
+                break
+
+            # Stop DDP TODO: known issues shttps://github.com/ultralytics/yolov5/pull/4576
+            # stop = stopper(epoch=epoch, fitness=fi)
+            # if RANK == 0:
+            #    dist.broadcast_object_list([stop], 0)  # broadcast 'stop' to all ranks
+
+        # Stop DPP
+        # with torch_distributed_zero_first(RANK):
+        # if stop:
+        #    break  # must break all DDP ranks
 
         # end epoch ----------------------------------------------------------------------------------------------------
     # end training -----------------------------------------------------------------------------------------------------
     if RANK in [-1, 0]:
-        logger.info(f'{epoch - start_epoch + 1} epochs completed in {(time.time() - t0) / 3600:.3f} hours.\n')
-        if plots:
-            plot_results(save_dir=save_dir)  # save as results.png
-            if loggers['wandb']:
-                files = ['results.png', 'confusion_matrix.png', *[f'{x}_curve.png' for x in ('F1', 'PR', 'P', 'R')]]
-                wandb_logger.log({"Results": [loggers['wandb'].Image(str(save_dir / f), caption=f) for f in files
-                                              if (save_dir / f).exists()]})
-
-        if not evolve:
-            if is_coco:  # COCO dataset
-                for m in [last, best] if best.exists() else [last]:  # speed, mAP tests
-                    results, _, _ = test.run(data_dict,
-                                             batch_size=batch_size // WORLD_SIZE * 2,
-                                             imgsz=imgsz_test,
-                                             conf_thres=0.001,
-                                             iou_thres=0.7,
-                                             model=attempt_load(m, device).half() if opt.amp else attempt_load(m, device),
-                                             single_cls=single_cls,
-                                             dataloader=testloader,
-                                             save_dir=save_dir,
-                                             save_json=True,
-                                             plots=False)
-
-            # Strip optimizers
-            for f in last, best:
-                if f.exists():
-                    strip_optimizer(f)  # strip optimizers
-            if loggers['wandb']:  # Log the stripped model
-                loggers['wandb'].log_artifact(str(best if best.exists() else last), type='model',
-                                              name='run_' + wandb_logger.wandb_run.id + '_model',
-                                              aliases=['latest', 'best', 'stripped'])
-        wandb_logger.finish_run()
+        LOGGER.info(f'\n{epoch - start_epoch + 1} epochs completed in {(time.time() - t0) / 3600:.3f} hours.')
+        for f in last, best:
+            if f.exists():
+                strip_optimizer(f)  # strip optimizers
+                if f is best:
+                    LOGGER.info(f'\nValidating {f}...')
+                    results, _, _ = val.run(data_dict,
+                                            batch_size=batch_size // WORLD_SIZE * 2,
+                                            imgsz=imgsz,
+                                            model=attempt_load(f, device).half(),
+                                            iou_thres=0.65 if is_coco else 0.60,  # best pycocotools results at 0.65
+                                            single_cls=single_cls,
+                                            dataloader=val_loader,
+                                            save_dir=save_dir,
+                                            save_json=is_coco,
+                                            verbose=True,
+                                            plots=True,
+                                            callbacks=callbacks,
+                                            compute_loss=compute_loss)  # val best model with plots
+
+        callbacks.run('on_train_end', last, best, plots, epoch)
+        LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}")
 
     torch.cuda.empty_cache()
     return results
@@ -646,21 +457,21 @@ def train(hyp,  # path/to/hyp.yaml or hyp dictionary
 
 def parse_opt(known=False):
     parser = argparse.ArgumentParser()
-    parser.add_argument('--weights', type=str, default='yolov5s.pt', help='initial weights path')
+    parser.add_argument('--weights', type=str, default=ROOT / 'yolov5s.pt', help='initial weights path')
     parser.add_argument('--cfg', type=str, default='', help='model.yaml path')
-    parser.add_argument('--data', type=str, default='data/coco128.yaml', help='dataset.yaml path')
-    parser.add_argument('--hyp', type=str, default='data/hyps/hyp.scratch.yaml', help='hyperparameters path')
-    parser.add_argument('--epochs', type=int, default=10)
-    parser.add_argument('--batch-size', type=int, default=8, help='total batch size for all GPUs')
-    parser.add_argument('--img-size', nargs='+', type=int, default=[640, 640], help='[train, test] image sizes')
+    parser.add_argument('--data', type=str, default=ROOT / 'data/coco128.yaml', help='dataset.yaml path')
+    parser.add_argument('--hyp', type=str, default=ROOT / 'data/hyps/hyp.scratch-high.yaml', help='hyperparameters path')
+    parser.add_argument('--epochs', type=int, default=400)
+    parser.add_argument('--batch-size', type=int, default=16, help='total batch size for all GPUs')
+    parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=640, help='train, val image size (pixels)')
     parser.add_argument('--rect', action='store_true', help='rectangular training')
     parser.add_argument('--resume', nargs='?', const=True, default=False, help='resume most recent training')
     parser.add_argument('--nosave', action='store_true', help='only save final checkpoint')
-    parser.add_argument('--notest', action='store_true', help='only test final epoch')
+    parser.add_argument('--noval', action='store_true', help='only validate final epoch')
     parser.add_argument('--noautoanchor', action='store_true', help='disable autoanchor check')
     parser.add_argument('--evolve', type=int, nargs='?', const=300, help='evolve hyperparameters for x generations')
     parser.add_argument('--bucket', type=str, default='', help='gsutil bucket')
-    parser.add_argument('--cache-images', action='store_true', help='cache images for faster training')
+    parser.add_argument('--cache', type=str, nargs='?', const='ram', help='--cache images in "ram" (default) or "disk"')
     parser.add_argument('--image-weights', action='store_true', help='use weighted image selection for training')
     parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
     parser.add_argument('--multi-scale', action='store_true', help='vary img-size +/- 50%%')
@@ -668,78 +479,69 @@ def parse_opt(known=False):
     parser.add_argument('--adam', action='store_true', help='use torch.optim.Adam() optimizer')
     parser.add_argument('--sync-bn', action='store_true', help='use SyncBatchNorm, only available in DDP mode')
     parser.add_argument('--workers', type=int, default=8, help='maximum number of dataloader workers')
-    parser.add_argument('--project', default='runs/train', help='save to project/name')
-    parser.add_argument('--entity', default=None, help='W&B entity')
+    parser.add_argument('--project', default=ROOT / 'runs/train', help='save to project/name')
     parser.add_argument('--name', default='exp', help='save to project/name')
     parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
     parser.add_argument('--quad', action='store_true', help='quad dataloader')
     parser.add_argument('--linear-lr', action='store_true', help='linear LR')
     parser.add_argument('--label-smoothing', type=float, default=0.0, help='Label smoothing epsilon')
-    parser.add_argument('--upload_dataset', action='store_true', help='Upload dataset as W&B artifact table')
-    parser.add_argument('--bbox_interval', type=int, default=-1, help='Set bounding-box image logging interval for W&B')
-    parser.add_argument('--save_period', type=int, default=-1, help='Log model after every "save_period" epoch')
-    parser.add_argument('--artifact_alias', type=str, default="latest", help='version of dataset artifact to be used')
-    parser.add_argument('--local_rank', '--local-rank', type=int, default=-1, help='DDP parameter, do not modify')
-    parser.add_argument('--amp', action='store_true', default=False, help='use amp to train and test')
+    parser.add_argument('--patience', type=int, default=100, help='EarlyStopping patience (epochs without improvement)')
+    parser.add_argument('--freeze', type=int, default=0, help='Number of layers to freeze. backbone=10, all=24')
+    parser.add_argument('--save-period', type=int, default=-1, help='Save checkpoint every x epochs (disabled if < 1)')
+    parser.add_argument('--local_rank','--local-rank', type=int, default=-1, help='DDP parameter, do not modify')
+
+    # Weights & Biases arguments
+    parser.add_argument('--entity', default=None, help='W&B: Entity')
+    parser.add_argument('--upload_dataset', action='store_true', help='W&B: Upload dataset as artifact table')
+    parser.add_argument('--bbox_interval', type=int, default=-1, help='W&B: Set bounding-box image logging interval')
+    parser.add_argument('--artifact_alias', type=str, default='latest', help='W&B: Version of dataset artifact to use')
+
     opt = parser.parse_known_args()[0] if known else parser.parse_args()
     return opt
 
 
-def main(opt):
+def main(opt, callbacks=Callbacks()):
+    # Checks
     set_logging(RANK)
     if RANK in [-1, 0]:
-        print(colorstr('train: ') + ', '.join(f'{k}={v}' for k, v in vars(opt).items()))
-        # check_git_status()
+        print_args(FILE.stem, opt)
+        check_git_status()
         check_requirements(exclude=['thop'])
 
     # Resume
-    wandb_run = check_wandb_resume(opt)
-    if opt.resume and not wandb_run:  # resume an interrupted run
+    if opt.resume and not check_wandb_resume(opt) and not opt.evolve:  # resume an interrupted run
         ckpt = opt.resume if isinstance(opt.resume, str) else get_latest_run()  # specified or most recent path
         assert os.path.isfile(ckpt), 'ERROR: --resume checkpoint does not exist'
-        with open(Path(ckpt).parent.parent / 'opt.yaml') as f:
+        with open(Path(ckpt).parent.parent / 'opt.yaml', errors='ignore') as f:
             opt = argparse.Namespace(**yaml.safe_load(f))  # replace
         opt.cfg, opt.weights, opt.resume = '', ckpt, True  # reinstate
-        logger.info('Resuming training from %s' % ckpt)
+        LOGGER.info(f'Resuming training from {ckpt}')
     else:
-        # opt.hyp = opt.hyp or ('hyp.finetune.yaml' if opt.weights else 'hyp.scratch.yaml')
-        opt.data, opt.cfg, opt.hyp = check_file(opt.data), check_file(opt.cfg), check_file(opt.hyp)  # check files
+        opt.data, opt.cfg, opt.hyp, opt.weights, opt.project = \
+            check_file(opt.data), check_yaml(opt.cfg), check_yaml(opt.hyp), str(opt.weights), str(opt.project)  # checks
         assert len(opt.cfg) or len(opt.weights), 'either --cfg or --weights must be specified'
-        opt.img_size.extend([opt.img_size[-1]] * (2 - len(opt.img_size)))  # extend to 2 sizes (train, test)
-        opt.name = 'evolve' if opt.evolve else opt.name
-        opt.save_dir = str(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok or opt.evolve))
-
-    print("Global setting:", LOCAL_RANK, RANK, WORLD_SIZE)
-
-    try:
-        from dltest import show_training_arguments
-        show_training_arguments(opt)
-    except:
-        pass
+        if opt.evolve:
+            opt.project = str(ROOT / 'runs/evolve')
+            opt.exist_ok, opt.resume = opt.resume, False  # pass resume to exist_ok and disable resume
+        opt.save_dir = str(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok))
 
     # DDP mode
     device = select_device(opt.device, batch_size=opt.batch_size)
     if LOCAL_RANK != -1:
-        from datetime import timedelta
         assert torch.cuda.device_count() > LOCAL_RANK, 'insufficient CUDA devices for DDP command'
+        assert opt.batch_size % WORLD_SIZE == 0, '--batch-size must be multiple of CUDA device count'
+        assert not opt.image_weights, '--image-weights argument is not compatible with DDP training'
+        assert not opt.evolve, '--evolve argument is not compatible with DDP training'
         torch.cuda.set_device(LOCAL_RANK)
         device = torch.device('cuda', LOCAL_RANK)
-
-        dist_backend = "nccl"
-        DIST_BACKEND_ENV = "PT_DIST_BACKEND"
-        if DIST_BACKEND_ENV in os.environ:
-            print("WARN: Use the distributed backend of the environment.")
-            dist_backend = os.environ[DIST_BACKEND_ENV]
-
-        dist.init_process_group(backend=dist_backend, rank=RANK, world_size=WORLD_SIZE)
-        # assert opt.batch_size % WORLD_SIZE == 0, '--batch-size must be multiple of CUDA device count'
-        assert not opt.image_weights, '--image-weights argument is not compatible with DDP training'
+        dist.init_process_group(backend="nccl" if dist.is_nccl_available() else "gloo")
 
     # Train
     if not opt.evolve:
-        train(opt.hyp, opt, device)
+        train(opt.hyp, opt, device, callbacks)
         if WORLD_SIZE > 1 and RANK == 0:
-            _ = [print('Destroying process group... ', end=''), dist.destroy_process_group(), print('Done.')]
+            LOGGER.info('Destroying process group... ')
+            dist.destroy_process_group()
 
     # Evolve hyperparameters (optional)
     else:
@@ -774,22 +576,21 @@ def main(opt):
                 'mixup': (1, 0.0, 1.0),  # image mixup (probability)
                 'copy_paste': (1, 0.0, 1.0)}  # segment copy-paste (probability)
 
-        with open(opt.hyp) as f:
+        with open(opt.hyp, errors='ignore') as f:
             hyp = yaml.safe_load(f)  # load hyps dict
             if 'anchors' not in hyp:  # anchors commented in hyp.yaml
                 hyp['anchors'] = 3
-        assert LOCAL_RANK == -1, 'DDP mode not implemented for --evolve'
-        opt.notest, opt.nosave = True, True  # only test/save final epoch
+        opt.noval, opt.nosave, save_dir = True, True, Path(opt.save_dir)  # only val/save final epoch
         # ei = [isinstance(x, (int, float)) for x in hyp.values()]  # evolvable indices
-        yaml_file = Path(opt.save_dir) / 'hyp_evolved.yaml'  # save best result here
+        evolve_yaml, evolve_csv = save_dir / 'hyp_evolve.yaml', save_dir / 'evolve.csv'
         if opt.bucket:
-            os.system('gsutil cp gs://%s/evolve.txt .' % opt.bucket)  # download evolve.txt if exists
+            os.system(f'gsutil cp gs://{opt.bucket}/evolve.csv {save_dir}')  # download evolve.csv if exists
 
         for _ in range(opt.evolve):  # generations to evolve
-            if Path('evolve.txt').exists():  # if evolve.txt exists: select best hyps and mutate
+            if evolve_csv.exists():  # if evolve.csv exists: select best hyps and mutate
                 # Select parent(s)
                 parent = 'single'  # parent selection method: 'single' or 'weighted'
-                x = np.loadtxt('evolve.txt', ndmin=2)
+                x = np.loadtxt(evolve_csv, ndmin=2, delimiter=',', skiprows=1)
                 n = min(5, len(x))  # number of previous results to consider
                 x = x[np.argsort(-fitness(x))][:n]  # top n mutations
                 w = fitness(x) - fitness(x).min() + 1E-6  # weights (sum > 0)
@@ -803,7 +604,7 @@ def main(opt):
                 mp, s = 0.8, 0.2  # mutation probability, sigma
                 npr = np.random
                 npr.seed(int(time.time()))
-                g = np.array([x[0] for x in meta.values()])  # gains 0-1
+                g = np.array([meta[k][0] for k in hyp.keys()])  # gains 0-1
                 ng = len(meta)
                 v = np.ones(ng)
                 while all(v == 1):  # mutate until a change occurs (prevent duplicates)
@@ -818,19 +619,20 @@ def main(opt):
                 hyp[k] = round(hyp[k], 5)  # significant digits
 
             # Train mutation
-            results = train(hyp.copy(), opt, device)
+            results = train(hyp.copy(), opt, device, callbacks)
 
             # Write mutation results
-            print_mutation(hyp.copy(), results, yaml_file, opt.bucket)
+            print_mutation(results, hyp.copy(), save_dir, opt.bucket)
 
         # Plot results
-        plot_evolution(yaml_file)
-        print(f'Hyperparameter evolution complete. Best results saved as: {yaml_file}\n'
-              f'Command to train a new model with these hyperparameters: $ python train.py --hyp {yaml_file}')
+        plot_evolve(evolve_csv)
+        print(f'Hyperparameter evolution finished\n'
+              f"Results saved to {colorstr('bold', save_dir)}\n"
+              f'Use best hyperparameters example: $ python train.py --hyp {evolve_yaml}')
 
 
 def run(**kwargs):
-    # Usage: import train; train.run(imgsz=320, weights='yolov5m.pt')
+    # Usage: import train; train.run(data='coco128.yaml', imgsz=320, weights='yolov5m.pt')
     opt = parse_opt(True)
     for k, v in kwargs.items():
         setattr(opt, k, v)
diff --git a/cv/detection/yolov5/pytorch/utils/activations.py b/cv/detection/yolov5/pytorch/utils/activations.py
index 92a3b5e..62eb532 100644
--- a/cv/detection/yolov5/pytorch/utils/activations.py
+++ b/cv/detection/yolov5/pytorch/utils/activations.py
@@ -1,4 +1,7 @@
-# Activation functions
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+"""
+Activation functions
+"""
 
 import torch
 import torch.nn as nn
diff --git a/cv/detection/yolov5/pytorch/utils/augmentations.py b/cv/detection/yolov5/pytorch/utils/augmentations.py
index 74ee4de..04192d1 100644
--- a/cv/detection/yolov5/pytorch/utils/augmentations.py
+++ b/cv/detection/yolov5/pytorch/utils/augmentations.py
@@ -1,10 +1,13 @@
-# YOLOv5 image augmentation functions
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+"""
+Image augmentation functions
+"""
 
 import logging
+import math
 import random
 
 import cv2
-import math
 import numpy as np
 
 from utils.general import colorstr, segment2box, resample_segments, check_version
@@ -17,15 +20,19 @@ class Albumentations:
         self.transform = None
         try:
             import albumentations as A
-            check_version(A.__version__, '1.0.0')  # version requirement
+            check_version(A.__version__, '1.0.3')  # version requirement
 
             self.transform = A.Compose([
-                A.Blur(p=0.1),
-                A.MedianBlur(p=0.1),
-                A.ToGray(p=0.01)],
+                A.Blur(p=0.01),
+                A.MedianBlur(p=0.01),
+                A.ToGray(p=0.01),
+                A.CLAHE(p=0.01),
+                A.RandomBrightnessContrast(p=0.0),
+                A.RandomGamma(p=0.0),
+                A.ImageCompression(quality_lower=75, p=0.0)],
                 bbox_params=A.BboxParams(format='yolo', label_fields=['class_labels']))
 
-            logging.info(colorstr('albumentations: ') + ', '.join(f'{x}' for x in self.transform.transforms))
+            logging.info(colorstr('albumentations: ') + ', '.join(f'{x}' for x in self.transform.transforms if x.p))
         except ImportError:  # package not installed, skip
             pass
         except Exception as e:
@@ -50,12 +57,12 @@ def augment_hsv(im, hgain=0.5, sgain=0.5, vgain=0.5):
         lut_sat = np.clip(x * r[1], 0, 255).astype(dtype)
         lut_val = np.clip(x * r[2], 0, 255).astype(dtype)
 
-        img_hsv = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val)))
-        cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=im)  # no return needed
+        im_hsv = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val)))
+        cv2.cvtColor(im_hsv, cv2.COLOR_HSV2BGR, dst=im)  # no return needed
 
 
 def hist_equalize(im, clahe=True, bgr=False):
-    # Equalize histogram on BGR image 'img' with img.shape(n,m,3) and range 0-255
+    # Equalize histogram on BGR image 'im' with im.shape(n,m,3) and range 0-255
     yuv = cv2.cvtColor(im, cv2.COLOR_BGR2YUV if bgr else cv2.COLOR_RGB2YUV)
     if clahe:
         c = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
@@ -76,7 +83,7 @@ def replicate(im, labels):
         bh, bw = y2b - y1b, x2b - x1b
         yc, xc = int(random.uniform(0, h - bh)), int(random.uniform(0, w - bw))  # offset x, y
         x1a, y1a, x2a, y2a = [xc, yc, xc + bw, yc + bh]
-        im[y1a:y2a, x1a:x2a] = im[y1b:y2b, x1b:x2b]  # img4[ymin:ymax, xmin:xmax]
+        im[y1a:y2a, x1a:x2a] = im[y1b:y2b, x1b:x2b]  # im4[ymin:ymax, xmin:xmax]
         labels = np.append(labels, [[labels[i, 0], x1a, y1a, x2a, y2a]], axis=0)
 
     return im, labels
@@ -90,7 +97,7 @@ def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleF
 
     # Scale ratio (new / old)
     r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
-    if not scaleup:  # only scale down, do not scale up (for better test mAP)
+    if not scaleup:  # only scale down, do not scale up (for better val mAP)
         r = min(r, 1.0)
 
     # Compute padding
@@ -162,8 +169,8 @@ def random_perspective(im, targets=(), segments=(), degrees=10, translate=.1, sc
     # Visualize
     # import matplotlib.pyplot as plt
     # ax = plt.subplots(1, 2, figsize=(12, 6))[1].ravel()
-    # ax[0].imshow(img[:, :, ::-1])  # base
-    # ax[1].imshow(img2[:, :, ::-1])  # warped
+    # ax[0].imshow(im[:, :, ::-1])  # base
+    # ax[1].imshow(im2[:, :, ::-1])  # warped
 
     # Transform label coordinates
     n = len(targets)
@@ -204,13 +211,13 @@ def random_perspective(im, targets=(), segments=(), degrees=10, translate=.1, sc
     return im, targets
 
 
-def copy_paste(im, labels, segments, probability=0.5):
+def copy_paste(im, labels, segments, p=0.5):
     # Implement Copy-Paste augmentation https://arxiv.org/abs/2012.07177, labels as nx5 np.array(cls, xyxy)
     n = len(segments)
-    if probability and n:
+    if p and n:
         h, w, c = im.shape  # height, width, channels
         im_new = np.zeros(im.shape, np.uint8)
-        for j in random.sample(range(n), k=round(probability * n)):
+        for j in random.sample(range(n), k=round(p * n)):
             l, s = labels[j], segments[j]
             box = w - l[3], l[2], w - l[1], l[4]
             ioa = bbox_ioa(box, labels[:, 1:5])  # intersection over area
@@ -223,35 +230,34 @@ def copy_paste(im, labels, segments, probability=0.5):
         result = cv2.flip(result, 1)  # augment segments (flip left-right)
         i = result > 0  # pixels to replace
         # i[:, :] = result.max(2).reshape(h, w, 1)  # act over ch
-        im[i] = result[i]  # cv2.imwrite('debug.jpg', img)  # debug
+        im[i] = result[i]  # cv2.imwrite('debug.jpg', im)  # debug
 
     return im, labels, segments
 
 
-def cutout(im, labels):
+def cutout(im, labels, p=0.5):
     # Applies image cutout augmentation https://arxiv.org/abs/1708.04552
-    h, w = im.shape[:2]
-
-    # create random masks
-    scales = [0.5] * 1 + [0.25] * 2 + [0.125] * 4 + [0.0625] * 8 + [0.03125] * 16  # image size fraction
-    for s in scales:
-        mask_h = random.randint(1, int(h * s))
-        mask_w = random.randint(1, int(w * s))
-
-        # box
-        xmin = max(0, random.randint(0, w) - mask_w // 2)
-        ymin = max(0, random.randint(0, h) - mask_h // 2)
-        xmax = min(w, xmin + mask_w)
-        ymax = min(h, ymin + mask_h)
-
-        # apply random color mask
-        im[ymin:ymax, xmin:xmax] = [random.randint(64, 191) for _ in range(3)]
-
-        # return unobscured labels
-        if len(labels) and s > 0.03:
-            box = np.array([xmin, ymin, xmax, ymax], dtype=np.float32)
-            ioa = bbox_ioa(box, labels[:, 1:5])  # intersection over area
-            labels = labels[ioa < 0.60]  # remove >60% obscured labels
+    if random.random() < p:
+        h, w = im.shape[:2]
+        scales = [0.5] * 1 + [0.25] * 2 + [0.125] * 4 + [0.0625] * 8 + [0.03125] * 16  # image size fraction
+        for s in scales:
+            mask_h = random.randint(1, int(h * s))  # create random masks
+            mask_w = random.randint(1, int(w * s))
+
+            # box
+            xmin = max(0, random.randint(0, w) - mask_w // 2)
+            ymin = max(0, random.randint(0, h) - mask_h // 2)
+            xmax = min(w, xmin + mask_w)
+            ymax = min(h, ymin + mask_h)
+
+            # apply random color mask
+            im[ymin:ymax, xmin:xmax] = [random.randint(64, 191) for _ in range(3)]
+
+            # return unobscured labels
+            if len(labels) and s > 0.03:
+                box = np.array([xmin, ymin, xmax, ymax], dtype=np.float32)
+                ioa = bbox_ioa(box, labels[:, 1:5])  # intersection over area
+                labels = labels[ioa < 0.60]  # remove >60% obscured labels
 
     return labels
 
diff --git a/cv/detection/yolov5/pytorch/utils/autoanchor.py b/cv/detection/yolov5/pytorch/utils/autoanchor.py
index 87dc394..6b3c661 100644
--- a/cv/detection/yolov5/pytorch/utils/autoanchor.py
+++ b/cv/detection/yolov5/pytorch/utils/autoanchor.py
@@ -1,4 +1,9 @@
-# Auto-anchor utils
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+"""
+Auto-anchor utils
+"""
+
+import random
 
 import numpy as np
 import torch
@@ -10,13 +15,12 @@ from utils.general import colorstr
 
 def check_anchor_order(m):
     # Check anchor order against stride order for YOLOv5 Detect() module m, and correct if necessary
-    a = m.anchor_grid.prod(-1).view(-1)  # anchor area
+    a = m.anchors.prod(-1).view(-1)  # anchor area
     da = a[-1] - a[0]  # delta a
     ds = m.stride[-1] - m.stride[0]  # delta s
     if da.sign() != ds.sign():  # same order
         print('Reversing anchor order')
         m.anchors[:] = m.anchors.flip(0)
-        m.anchor_grid[:] = m.anchor_grid.flip(0)
 
 
 def check_anchors(dataset, model, thr=4.0, imgsz=640):
@@ -36,12 +40,12 @@ def check_anchors(dataset, model, thr=4.0, imgsz=640):
         bpr = (best > 1. / thr).float().mean()  # best possible recall
         return bpr, aat
 
-    anchors = m.anchor_grid.clone().cpu().view(-1, 2)  # current anchors
-    bpr, aat = metric(anchors)
+    anchors = m.anchors.clone() * m.stride.to(m.anchors.device).view(-1, 1, 1)  # current anchors
+    bpr, aat = metric(anchors.cpu().view(-1, 2))
     print(f'anchors/target = {aat:.2f}, Best Possible Recall (BPR) = {bpr:.4f}', end='')
     if bpr < 0.98:  # threshold to recompute
         print('. Attempting to improve anchors, please wait...')
-        na = m.anchor_grid.numel() // 2  # number of anchors
+        na = m.anchors.numel() // 2  # number of anchors
         try:
             anchors = kmean_anchors(dataset, n=na, img_size=imgsz, thr=thr, gen=1000, verbose=False)
         except Exception as e:
@@ -49,7 +53,6 @@ def check_anchors(dataset, model, thr=4.0, imgsz=640):
         new_bpr = metric(anchors)[0]
         if new_bpr > bpr:  # replace anchors
             anchors = torch.tensor(anchors, device=m.anchors.device).type_as(m.anchors)
-            m.anchor_grid[:] = anchors.clone().view_as(m.anchor_grid)  # for inference
             m.anchors[:] = anchors.clone().view_as(m.anchors) / m.stride.to(m.anchors.device).view(-1, 1, 1)  # loss
             check_anchor_order(m)
             print(f'{prefix}New anchors saved to model. Update model *.yaml to use these anchors in the future.')
@@ -58,11 +61,11 @@ def check_anchors(dataset, model, thr=4.0, imgsz=640):
     print('')  # newline
 
 
-def kmean_anchors(path='./data/coco128.yaml', n=9, img_size=640, thr=4.0, gen=1000, verbose=True):
+def kmean_anchors(dataset='./data/coco128.yaml', n=9, img_size=640, thr=4.0, gen=1000, verbose=True):
     """ Creates kmeans-evolved anchors from training dataset
 
         Arguments:
-            path: path to dataset *.yaml, or a loaded dataset
+            dataset: path to data.yaml, or a loaded dataset
             n: number of anchors
             img_size: image size used for training
             thr: anchor-label wh ratio threshold hyperparameter hyp['anchor_t'] used for training, default=4.0
@@ -101,13 +104,11 @@ def kmean_anchors(path='./data/coco128.yaml', n=9, img_size=640, thr=4.0, gen=10
             print('%i,%i' % (round(x[0]), round(x[1])), end=',  ' if i < len(k) - 1 else '\n')  # use in *.cfg
         return k
 
-    if isinstance(path, str):  # *.yaml file
-        with open(path) as f:
+    if isinstance(dataset, str):  # *.yaml file
+        with open(dataset, errors='ignore') as f:
             data_dict = yaml.safe_load(f)  # model dict
         from utils.datasets import LoadImagesAndLabels
         dataset = LoadImagesAndLabels(data_dict['train'], augment=True, rect=True)
-    else:
-        dataset = path  # dataset
 
     # Get label wh
     shapes = img_size * dataset.shapes / dataset.shapes.max(1, keepdims=True)
@@ -124,7 +125,7 @@ def kmean_anchors(path='./data/coco128.yaml', n=9, img_size=640, thr=4.0, gen=10
     print(f'{prefix}Running kmeans for {n} anchors on {len(wh)} points...')
     s = wh.std(0)  # sigmas for whitening
     k, dist = kmeans(wh / s, n, iter=30)  # points, mean distance
-    assert len(k) == n, print(f'{prefix}ERROR: scipy.cluster.vq.kmeans requested {n} points but returned only {len(k)}')
+    assert len(k) == n, f'{prefix}ERROR: scipy.cluster.vq.kmeans requested {n} points but returned only {len(k)}'
     k *= s
     wh = torch.tensor(wh, dtype=torch.float32)  # filtered
     wh0 = torch.tensor(wh0, dtype=torch.float32)  # unfiltered
@@ -149,7 +150,7 @@ def kmean_anchors(path='./data/coco128.yaml', n=9, img_size=640, thr=4.0, gen=10
     for _ in pbar:
         v = np.ones(sh)
         while (v == 1).all():  # mutate until a change occurs (prevent duplicates)
-            v = ((npr.random(sh) < mp) * npr.random() * npr.randn(*sh) * s + 1).clip(0.3, 3.0)
+            v = ((npr.random(sh) < mp) * random.random() * npr.randn(*sh) * s + 1).clip(0.3, 3.0)
         kg = (k.copy() * v).clip(min=2.0)
         fg = anchor_fitness(kg)
         if fg > f:
diff --git a/cv/detection/yolov5/pytorch/utils/aws/resume.py b/cv/detection/yolov5/pytorch/utils/aws/resume.py
index 4b0d424..b21731c 100644
--- a/cv/detection/yolov5/pytorch/utils/aws/resume.py
+++ b/cv/detection/yolov5/pytorch/utils/aws/resume.py
@@ -8,7 +8,10 @@ from pathlib import Path
 import torch
 import yaml
 
-sys.path.append('./')  # to run '$ python *.py' files in subdirectories
+FILE = Path(__file__).resolve()
+ROOT = FILE.parents[2]  # YOLOv5 root directory
+if str(ROOT) not in sys.path:
+    sys.path.append(str(ROOT))  # add ROOT to PATH
 
 port = 0  # --master_port
 path = Path('').resolve()
@@ -18,7 +21,7 @@ for last in path.rglob('*/**/last.pt'):
         continue
 
     # Load opt.yaml
-    with open(last.parent.parent / 'opt.yaml') as f:
+    with open(last.parent.parent / 'opt.yaml', errors='ignore') as f:
         opt = yaml.safe_load(f)
 
     # Get device count
@@ -28,7 +31,7 @@ for last in path.rglob('*/**/last.pt'):
 
     if ddp:  # multi-GPU
         port += 1
-        cmd = f'python -m torch.distributed.launch --nproc_per_node {nd} --master_port {port} train.py --resume {last}'
+        cmd = f'python -m torch.distributed.run --nproc_per_node {nd} --master_port {port} train.py --resume {last}'
     else:  # single-GPU
         cmd = f'python train.py --resume {last}'
 
diff --git a/cv/detection/yolov5/pytorch/utils/aws/userdata.sh b/cv/detection/yolov5/pytorch/utils/aws/userdata.sh
index 0c28d0a..5fc1332 100644
--- a/cv/detection/yolov5/pytorch/utils/aws/userdata.sh
+++ b/cv/detection/yolov5/pytorch/utils/aws/userdata.sh
@@ -11,7 +11,7 @@ if [ ! -d yolov5 ]; then
   cd yolov5
   bash data/scripts/get_coco.sh && echo "COCO done." &
   sudo docker pull ultralytics/yolov5:latest && echo "Docker done." &
-  python3 -m pip3 install --upgrade pip3 && pip3 install -r requirements.txt && python detect.py && echo "Requirements done." &
+  python -m pip install --upgrade pip && pip install -r requirements.txt && python detect.py && echo "Requirements done." &
   wait && echo "All tasks done." # finish background tasks
 else
   echo "Running re-start script." # resume interrupted runs
diff --git a/cv/detection/yolov5/pytorch/utils/callbacks.py b/cv/detection/yolov5/pytorch/utils/callbacks.py
new file mode 100755
index 0000000..327b863
--- /dev/null
+++ b/cv/detection/yolov5/pytorch/utils/callbacks.py
@@ -0,0 +1,76 @@
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+"""
+Callback utils
+"""
+
+
+class Callbacks:
+    """"
+    Handles all registered callbacks for YOLOv5 Hooks
+    """
+
+    # Define the available callbacks
+    _callbacks = {
+        'on_pretrain_routine_start': [],
+        'on_pretrain_routine_end': [],
+
+        'on_train_start': [],
+        'on_train_epoch_start': [],
+        'on_train_batch_start': [],
+        'optimizer_step': [],
+        'on_before_zero_grad': [],
+        'on_train_batch_end': [],
+        'on_train_epoch_end': [],
+
+        'on_val_start': [],
+        'on_val_batch_start': [],
+        'on_val_image_end': [],
+        'on_val_batch_end': [],
+        'on_val_end': [],
+
+        'on_fit_epoch_end': [],  # fit = train + val
+        'on_model_save': [],
+        'on_train_end': [],
+
+        'teardown': [],
+    }
+
+    def register_action(self, hook, name='', callback=None):
+        """
+        Register a new action to a callback hook
+
+        Args:
+            hook        The callback hook name to register the action to
+            name        The name of the action for later reference
+            callback    The callback to fire
+        """
+        assert hook in self._callbacks, f"hook '{hook}' not found in callbacks {self._callbacks}"
+        assert callable(callback), f"callback '{callback}' is not callable"
+        self._callbacks[hook].append({'name': name, 'callback': callback})
+
+    def get_registered_actions(self, hook=None):
+        """"
+        Returns all the registered actions by callback hook
+
+        Args:
+            hook The name of the hook to check, defaults to all
+        """
+        if hook:
+            return self._callbacks[hook]
+        else:
+            return self._callbacks
+
+    def run(self, hook, *args, **kwargs):
+        """
+        Loop through the registered actions and fire all callbacks
+
+        Args:
+            hook The name of the hook to check, defaults to all
+            args Arguments to receive from YOLOv5
+            kwargs Keyword Arguments to receive from YOLOv5
+        """
+
+        assert hook in self._callbacks, f"hook '{hook}' not found in callbacks {self._callbacks}"
+
+        for logger in self._callbacks[hook]:
+            logger['callback'](*args, **kwargs)
diff --git a/cv/detection/yolov5/pytorch/utils/datasets.py b/cv/detection/yolov5/pytorch/utils/datasets.py
index 0bcfdcc..8a176b1 100644
--- a/cv/detection/yolov5/pytorch/utils/datasets.py
+++ b/cv/detection/yolov5/pytorch/utils/datasets.py
@@ -1,4 +1,7 @@
-# YOLOv5 dataset utils and dataloaders
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+"""
+Dataloaders and dataset utils
+"""
 
 import glob
 import hashlib
@@ -12,6 +15,7 @@ from itertools import repeat
 from multiprocessing.pool import ThreadPool, Pool
 from pathlib import Path
 from threading import Thread
+from zipfile import ZipFile
 
 import cv2
 import numpy as np
@@ -23,16 +27,15 @@ from torch.utils.data import Dataset
 from tqdm import tqdm
 
 from utils.augmentations import Albumentations, augment_hsv, copy_paste, letterbox, mixup, random_perspective
-from utils.general import check_requirements, check_file, check_dataset, xywh2xyxy, xywhn2xyxy, xyxy2xywhn, \
-    xyn2xy, segments2boxes, clean_str
+from utils.general import check_dataset, check_requirements, check_yaml, clean_str, segments2boxes, \
+    xywh2xyxy, xywhn2xyxy, xyxy2xywhn, xyn2xy
 from utils.torch_utils import torch_distributed_zero_first
 
 # Parameters
-help_url = 'https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data'
-img_formats = ['bmp', 'jpg', 'jpeg', 'png', 'tif', 'tiff', 'dng', 'webp', 'mpo']  # acceptable image suffixes
-vid_formats = ['mov', 'avi', 'mp4', 'mpg', 'mpeg', 'm4v', 'wmv', 'mkv']  # acceptable video suffixes
-num_threads = min(8, os.cpu_count())  # number of multiprocessing threads
-logger = logging.getLogger(__name__)
+HELP_URL = 'https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data'
+IMG_FORMATS = ['bmp', 'jpg', 'jpeg', 'png', 'tif', 'tiff', 'dng', 'webp', 'mpo']  # acceptable image suffixes
+VID_FORMATS = ['mov', 'avi', 'mp4', 'mpg', 'mpeg', 'm4v', 'wmv', 'mkv']  # acceptable video suffixes
+NUM_THREADS = min(8, os.cpu_count())  # number of multiprocessing threads
 
 # Get orientation exif tag
 for orientation in ExifTags.TAGS.keys():
@@ -152,9 +155,10 @@ class _RepeatSampler(object):
             yield from iter(self.sampler)
 
 
-class LoadImages:  # for inference
-    def __init__(self, path, img_size=640, stride=32):
-        p = str(Path(path).absolute())  # os-agnostic absolute path
+class LoadImages:
+    # YOLOv5 image/video dataloader, i.e. `python detect.py --source image.jpg/vid.mp4`
+    def __init__(self, path, img_size=640, stride=32, auto=True):
+        p = str(Path(path).resolve())  # os-agnostic absolute path
         if '*' in p:
             files = sorted(glob.glob(p, recursive=True))  # glob
         elif os.path.isdir(p):
@@ -164,8 +168,8 @@ class LoadImages:  # for inference
         else:
             raise Exception(f'ERROR: {p} does not exist')
 
-        images = [x for x in files if x.split('.')[-1].lower() in img_formats]
-        videos = [x for x in files if x.split('.')[-1].lower() in vid_formats]
+        images = [x for x in files if x.split('.')[-1].lower() in IMG_FORMATS]
+        videos = [x for x in files if x.split('.')[-1].lower() in VID_FORMATS]
         ni, nv = len(images), len(videos)
 
         self.img_size = img_size
@@ -174,12 +178,13 @@ class LoadImages:  # for inference
         self.nf = ni + nv  # number of files
         self.video_flag = [False] * ni + [True] * nv
         self.mode = 'image'
+        self.auto = auto
         if any(videos):
             self.new_video(videos[0])  # new video
         else:
             self.cap = None
         assert self.nf > 0, f'No images or videos found in {p}. ' \
-                            f'Supported formats are:\nimages: {img_formats}\nvideos: {vid_formats}'
+                            f'Supported formats are:\nimages: {IMG_FORMATS}\nvideos: {VID_FORMATS}'
 
     def __iter__(self):
         self.count = 0
@@ -215,7 +220,7 @@ class LoadImages:  # for inference
             print(f'image {self.count}/{self.nf} {path}: ', end='')
 
         # Padded resize
-        img = letterbox(img0, self.img_size, stride=self.stride)[0]
+        img = letterbox(img0, self.img_size, stride=self.stride, auto=self.auto)[0]
 
         # Convert
         img = img.transpose((2, 0, 1))[::-1]  # HWC to CHW, BGR to RGB
@@ -233,6 +238,7 @@ class LoadImages:  # for inference
 
 
 class LoadWebcam:  # for inference
+    # YOLOv5 local webcam dataloader, i.e. `python detect.py --source 0`
     def __init__(self, pipe='0', img_size=640, stride=32):
         self.img_size = img_size
         self.stride = stride
@@ -273,8 +279,9 @@ class LoadWebcam:  # for inference
         return 0
 
 
-class LoadStreams:  # multiple IP or RTSP cameras
-    def __init__(self, sources='streams.txt', img_size=640, stride=32):
+class LoadStreams:
+    # YOLOv5 streamloader, i.e. `python detect.py --source 'rtsp://example.com/media.mp4'  # RTSP, RTMP, HTTP streams`
+    def __init__(self, sources='streams.txt', img_size=640, stride=32, auto=True):
         self.mode = 'stream'
         self.img_size = img_size
         self.stride = stride
@@ -288,6 +295,7 @@ class LoadStreams:  # multiple IP or RTSP cameras
         n = len(sources)
         self.imgs, self.fps, self.frames, self.threads = [None] * n, [0] * n, [0] * n, [None] * n
         self.sources = [clean_str(x) for x in sources]  # clean source names for later
+        self.auto = auto
         for i, s in enumerate(sources):  # index, source
             # Start thread to read frames from video stream
             print(f'{i + 1}/{n}: {s}... ', end='')
@@ -304,18 +312,18 @@ class LoadStreams:  # multiple IP or RTSP cameras
             self.frames[i] = max(int(cap.get(cv2.CAP_PROP_FRAME_COUNT)), 0) or float('inf')  # infinite stream fallback
 
             _, self.imgs[i] = cap.read()  # guarantee first frame
-            self.threads[i] = Thread(target=self.update, args=([i, cap]), daemon=True)
+            self.threads[i] = Thread(target=self.update, args=([i, cap, s]), daemon=True)
             print(f" success ({self.frames[i]} frames {w}x{h} at {self.fps[i]:.2f} FPS)")
             self.threads[i].start()
         print('')  # newline
 
         # check for common shapes
-        s = np.stack([letterbox(x, self.img_size, stride=self.stride)[0].shape for x in self.imgs], 0)  # shapes
+        s = np.stack([letterbox(x, self.img_size, stride=self.stride, auto=self.auto)[0].shape for x in self.imgs])
         self.rect = np.unique(s, axis=0).shape[0] == 1  # rect inference if all shapes equal
         if not self.rect:
             print('WARNING: Different stream shapes detected. For optimal performance supply similarly-shaped streams.')
 
-    def update(self, i, cap):
+    def update(self, i, cap, stream):
         # Read stream `i` frames in daemon thread
         n, f, read = 0, self.frames[i], 1  # frame number, frame array, inference every 'read' frame
         while cap.isOpened() and n < f:
@@ -324,7 +332,12 @@ class LoadStreams:  # multiple IP or RTSP cameras
             cap.grab()
             if n % read == 0:
                 success, im = cap.retrieve()
-                self.imgs[i] = im if success else self.imgs[i] * 0
+                if success:
+                    self.imgs[i] = im
+                else:
+                    print('WARNING: Video stream unresponsive, please check your IP camera connection.')
+                    self.imgs[i] *= 0
+                    cap.open(stream)  # re-open stream if signal was lost
             time.sleep(1 / self.fps[i])  # wait time
 
     def __iter__(self):
@@ -339,7 +352,7 @@ class LoadStreams:  # multiple IP or RTSP cameras
 
         # Letterbox
         img0 = self.imgs.copy()
-        img = [letterbox(x, self.img_size, auto=self.rect, stride=self.stride)[0] for x in img0]
+        img = [letterbox(x, self.img_size, stride=self.stride, auto=self.rect and self.auto)[0] for x in img0]
 
         # Stack
         img = np.stack(img, 0)
@@ -360,7 +373,10 @@ def img2label_paths(img_paths):
     return [sb.join(x.rsplit(sa, 1)).rsplit('.', 1)[0] + '.txt' for x in img_paths]
 
 
-class LoadImagesAndLabels(Dataset):  # for training/testing
+class LoadImagesAndLabels(Dataset):
+    # YOLOv5 train_loader/val_loader, loads images and labels for training and validation
+    cache_version = 0.5  # dataset labels *.cache version
+
     def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, rect=False, image_weights=False,
                  cache_images=False, single_cls=False, stride=32, pad=0.0, prefix=''):
         self.img_size = img_size
@@ -389,20 +405,20 @@ class LoadImagesAndLabels(Dataset):  # for training/testing
                         # f += [p.parent / x.lstrip(os.sep) for x in t]  # local to global path (pathlib)
                 else:
                     raise Exception(f'{prefix}{p} does not exist')
-            self.img_files = sorted([x.replace('/', os.sep) for x in f if x.split('.')[-1].lower() in img_formats])
+            self.img_files = sorted([x.replace('/', os.sep) for x in f if x.split('.')[-1].lower() in IMG_FORMATS])
             # self.img_files = sorted([x for x in f if x.suffix[1:].lower() in img_formats])  # pathlib
             assert self.img_files, f'{prefix}No images found'
         except Exception as e:
-            raise Exception(f'{prefix}Error loading data from {path}: {e}\nSee {help_url}')
+            raise Exception(f'{prefix}Error loading data from {path}: {e}\nSee {HELP_URL}')
 
         # Check cache
         self.label_files = img2label_paths(self.img_files)  # labels
-        cache_path = (p if p.is_file() else Path(self.label_files[0]).parent).with_suffix('.cache')  # cached labels
-        if cache_path.is_file():
-            cache, exists = torch.load(cache_path), True  # load
-            if cache.get('version') != 0.3 or cache.get('hash') != get_hash(self.label_files + self.img_files):
-                cache, exists = self.cache_labels(cache_path, prefix), False  # re-cache
-        else:
+        cache_path = (p if p.is_file() else Path(self.label_files[0]).parent).with_suffix('.cache')
+        try:
+            cache, exists = np.load(cache_path, allow_pickle=True).item(), True  # load dict
+            assert cache['version'] == self.cache_version  # same version
+            assert cache['hash'] == get_hash(self.label_files + self.img_files)  # same hash
+        except:
             cache, exists = self.cache_labels(cache_path, prefix), False  # cache
 
         # Display cache
@@ -412,7 +428,7 @@ class LoadImagesAndLabels(Dataset):  # for training/testing
             tqdm(None, desc=prefix + d, total=n, initial=n)  # display cache results
             if cache['msgs']:
                 logging.info('\n'.join(cache['msgs']))  # display warnings
-        assert nf > 0 or not augment, f'{prefix}No labels in {cache_path}. Can not train without labels. See {help_url}'
+        assert nf > 0 or not augment, f'{prefix}No labels in {cache_path}. Can not train without labels. See {HELP_URL}'
 
         # Read cache
         [cache.pop(k) for k in ('hash', 'version', 'msgs')]  # remove items
@@ -426,7 +442,7 @@ class LoadImagesAndLabels(Dataset):  # for training/testing
                 x[:, 0] = 0
 
         n = len(shapes)  # number of images
-        bi = np.floor(np.arange(n) / batch_size).astype(np.int)  # batch index
+        bi = np.floor(np.arange(n) / batch_size).astype(np.int64)  # batch index
         nb = bi[-1] + 1  # number of batches
         self.batch = bi  # batch index of image
         self.n = n
@@ -454,19 +470,28 @@ class LoadImagesAndLabels(Dataset):  # for training/testing
                 elif mini > 1:
                     shapes[i] = [1, 1 / mini]
 
-            self.batch_shapes = np.ceil(np.array(shapes) * img_size / stride + pad).astype(np.int) * stride
+            self.batch_shapes = np.ceil(np.array(shapes) * img_size / stride + pad).astype(np.int64) * stride
 
         # Cache images into memory for faster training (WARNING: large datasets may exceed system RAM)
-        self.imgs = [None] * n
+        self.imgs, self.img_npy = [None] * n, [None] * n
         if cache_images:
+            if cache_images == 'disk':
+                self.im_cache_dir = Path(Path(self.img_files[0]).parent.as_posix() + '_npy')
+                self.img_npy = [self.im_cache_dir / Path(f).with_suffix('.npy').name for f in self.img_files]
+                self.im_cache_dir.mkdir(parents=True, exist_ok=True)
             gb = 0  # Gigabytes of cached images
             self.img_hw0, self.img_hw = [None] * n, [None] * n
-            results = ThreadPool(num_threads).imap(lambda x: load_image(*x), zip(repeat(self), range(n)))
+            results = ThreadPool(NUM_THREADS).imap(lambda x: load_image(*x), zip(repeat(self), range(n)))
             pbar = tqdm(enumerate(results), total=n)
             for i, x in pbar:
-                self.imgs[i], self.img_hw0[i], self.img_hw[i] = x  # img, hw_original, hw_resized = load_image(self, i)
-                gb += self.imgs[i].nbytes
-                pbar.desc = f'{prefix}Caching images ({gb / 1E9:.1f}GB)'
+                if cache_images == 'disk':
+                    if not self.img_npy[i].exists():
+                        np.save(self.img_npy[i].as_posix(), x[0])
+                    gb += self.img_npy[i].stat().st_size
+                else:
+                    self.imgs[i], self.img_hw0[i], self.img_hw[i] = x  # im, hw_orig, hw_resized = load_image(self, i)
+                    gb += self.imgs[i].nbytes
+                pbar.desc = f'{prefix}Caching images ({gb / 1E9:.1f}GB {cache_images})'
             pbar.close()
 
     def cache_labels(self, path=Path('./labels.cache'), prefix=''):
@@ -474,8 +499,8 @@ class LoadImagesAndLabels(Dataset):  # for training/testing
         x = {}  # dict
         nm, nf, ne, nc, msgs = 0, 0, 0, 0, []  # number missing, found, empty, corrupt, messages
         desc = f"{prefix}Scanning '{path.parent / path.stem}' images and labels..."
-        with Pool(num_threads) as pool:
-            pbar = tqdm(pool.imap_unordered(verify_image_label, zip(self.img_files, self.label_files, repeat(prefix))),
+        with Pool(NUM_THREADS) as pool:
+            pbar = tqdm(pool.imap(verify_image_label, zip(self.img_files, self.label_files, repeat(prefix))),
                         desc=desc, total=len(self.img_files))
             for im_file, l, shape, segments, nm_f, nf_f, ne_f, nc_f, msg in pbar:
                 nm += nm_f
@@ -492,13 +517,14 @@ class LoadImagesAndLabels(Dataset):  # for training/testing
         if msgs:
             logging.info('\n'.join(msgs))
         if nf == 0:
-            logging.info(f'{prefix}WARNING: No labels found in {path}. See {help_url}')
+            logging.info(f'{prefix}WARNING: No labels found in {path}. See {HELP_URL}')
         x['hash'] = get_hash(self.label_files + self.img_files)
         x['results'] = nf, nm, ne, nc, len(self.img_files)
         x['msgs'] = msgs  # warnings
-        x['version'] = 0.3  # cache version
+        x['version'] = self.cache_version  # cache version
         try:
-            torch.save(x, path)  # save cache for next time
+            np.save(path, x)  # save cache for next time
+            path.with_suffix('.cache.npy').rename(path)  # remove .npy suffix
             logging.info(f'{prefix}New cache created: {path}')
         except Exception as e:
             logging.info(f'{prefix}WARNING: Cache directory {path.parent} is not writeable: {e}')  # path not writeable
@@ -550,11 +576,12 @@ class LoadImagesAndLabels(Dataset):  # for training/testing
 
         nl = len(labels)  # number of labels
         if nl:
-            labels[:, 1:5] = xyxy2xywhn(labels[:, 1:5], w=img.shape[1], h=img.shape[0])  # xyxy to xywh normalized
+            labels[:, 1:5] = xyxy2xywhn(labels[:, 1:5], w=img.shape[1], h=img.shape[0], clip=True, eps=1E-3)
 
         if self.augment:
             # Albumentations
             img, labels = self.albumentations(img, labels)
+            nl = len(labels)  # update after albumentations
 
             # HSV color-space
             augment_hsv(img, hgain=hyp['hsv_h'], sgain=hyp['hsv_s'], vgain=hyp['hsv_v'])
@@ -572,8 +599,7 @@ class LoadImagesAndLabels(Dataset):  # for training/testing
                     labels[:, 1] = 1 - labels[:, 1]
 
             # Cutouts
-            # if random.random() < 0.9:
-            #     labels = cutout(img, labels)
+            # labels = cutout(img, labels, p=0.5)
 
         labels_out = torch.zeros((nl, 6))
         if nl:
@@ -620,30 +646,34 @@ class LoadImagesAndLabels(Dataset):  # for training/testing
 
 
 # Ancillary functions --------------------------------------------------------------------------------------------------
-def load_image(self, index):
-    # loads 1 image from dataset, returns img, original hw, resized hw
-    img = self.imgs[index]
-    if img is None:  # not cached
-        path = self.img_files[index]
-        img = cv2.imread(path)  # BGR
-        assert img is not None, 'Image Not Found ' + path
-        h0, w0 = img.shape[:2]  # orig hw
+def load_image(self, i):
+    # loads 1 image from dataset index 'i', returns im, original hw, resized hw
+    im = self.imgs[i]
+    if im is None:  # not cached in ram
+        npy = self.img_npy[i]
+        if npy and npy.exists():  # load npy
+            im = np.load(npy)
+        else:  # read image
+            path = self.img_files[i]
+            im = cv2.imread(path)  # BGR
+            assert im is not None, 'Image Not Found ' + path
+        h0, w0 = im.shape[:2]  # orig hw
         r = self.img_size / max(h0, w0)  # ratio
         if r != 1:  # if sizes are not equal
-            img = cv2.resize(img, (int(w0 * r), int(h0 * r)),
-                             interpolation=cv2.INTER_AREA if r < 1 and not self.augment else cv2.INTER_LINEAR)
-        return img, (h0, w0), img.shape[:2]  # img, hw_original, hw_resized
+            im = cv2.resize(im, (int(w0 * r), int(h0 * r)),
+                            interpolation=cv2.INTER_AREA if r < 1 and not self.augment else cv2.INTER_LINEAR)
+        return im, (h0, w0), im.shape[:2]  # im, hw_original, hw_resized
     else:
-        return self.imgs[index], self.img_hw0[index], self.img_hw[index]  # img, hw_original, hw_resized
+        return self.imgs[i], self.img_hw0[i], self.img_hw[i]  # im, hw_original, hw_resized
 
 
 def load_mosaic(self, index):
-    # loads images in a 4-mosaic
-
+    # YOLOv5 4-mosaic loader. Loads 1 image + 3 random images into a 4-image mosaic
     labels4, segments4 = [], []
     s = self.img_size
     yc, xc = [int(random.uniform(-x, 2 * s + x)) for x in self.mosaic_border]  # mosaic center x, y
     indices = [index] + random.choices(self.indices, k=3)  # 3 additional image indices
+    random.shuffle(indices)
     for i, index in enumerate(indices):
         # Load image
         img, _, (h, w) = load_image(self, index)
@@ -682,7 +712,7 @@ def load_mosaic(self, index):
     # img4, labels4 = replicate(img4, labels4)  # replicate
 
     # Augment
-    img4, labels4, segments4 = copy_paste(img4, labels4, segments4, probability=self.hyp['copy_paste'])
+    img4, labels4, segments4 = copy_paste(img4, labels4, segments4, p=self.hyp['copy_paste'])
     img4, labels4 = random_perspective(img4, labels4, segments4,
                                        degrees=self.hyp['degrees'],
                                        translate=self.hyp['translate'],
@@ -695,11 +725,11 @@ def load_mosaic(self, index):
 
 
 def load_mosaic9(self, index):
-    # loads images in a 9-mosaic
-
+    # YOLOv5 9-mosaic loader. Loads 1 image + 8 random images into a 9-image mosaic
     labels9, segments9 = [], []
     s = self.img_size
     indices = [index] + random.choices(self.indices, k=8)  # 8 additional image indices
+    random.shuffle(indices)
     for i, index in enumerate(indices):
         # Load image
         img, _, (h, w) = load_image(self, index)
@@ -790,7 +820,7 @@ def extract_boxes(path='../datasets/coco128'):  # from utils.datasets import *;
     files = list(path.rglob('*.*'))
     n = len(files)  # number of files
     for im_file in tqdm(files, total=n):
-        if im_file.suffix[1:] in img_formats:
+        if im_file.suffix[1:] in IMG_FORMATS:
             # image
             im = cv2.imread(str(im_file))[..., ::-1]  # BGR to RGB
             h, w = im.shape[:2]
@@ -810,7 +840,7 @@ def extract_boxes(path='../datasets/coco128'):  # from utils.datasets import *;
                     b = x[1:] * [w, h, w, h]  # box
                     # b[2:] = b[2:].max()  # rectangle to square
                     b[2:] = b[2:] * 1.2 + 3  # pad
-                    b = xywh2xyxy(b.reshape(-1, 4)).ravel().astype(np.int)
+                    b = xywh2xyxy(b.reshape(-1, 4)).ravel().astype(np.int64)
 
                     b[[0, 2]] = np.clip(b[[0, 2]], 0, w)  # clip boxes outside of image
                     b[[1, 3]] = np.clip(b[[1, 3]], 0, h)
@@ -826,7 +856,7 @@ def autosplit(path='../datasets/coco128/images', weights=(0.9, 0.1, 0.0), annota
         annotated_only:  Only use images with an annotated txt file
     """
     path = Path(path)  # images dir
-    files = sum([list(path.rglob(f"*.{img_ext}")) for img_ext in img_formats], [])  # image files only
+    files = sum([list(path.rglob(f"*.{img_ext}")) for img_ext in IMG_FORMATS], [])  # image files only
     n = len(files)  # number of files
     random.seed(0)  # for reproducibility
     indices = random.choices([0, 1, 2], weights=weights, k=n)  # assign each image to a split
@@ -844,21 +874,22 @@ def autosplit(path='../datasets/coco128/images', weights=(0.9, 0.1, 0.0), annota
 def verify_image_label(args):
     # Verify one image-label pair
     im_file, lb_file, prefix = args
-    nm, nf, ne, nc = 0, 0, 0, 0  # number missing, found, empty, corrupt
+    nm, nf, ne, nc, msg, segments = 0, 0, 0, 0, '', []  # number (missing, found, empty, corrupt), message, segments
     try:
         # verify images
         im = Image.open(im_file)
         im.verify()  # PIL verify
         shape = exif_size(im)  # image size
         assert (shape[0] > 9) & (shape[1] > 9), f'image size {shape} <10 pixels'
-        assert im.format.lower() in img_formats, f'invalid image format {im.format}'
+        assert im.format.lower() in IMG_FORMATS, f'invalid image format {im.format}'
         if im.format.lower() in ('jpg', 'jpeg'):
             with open(im_file, 'rb') as f:
                 f.seek(-2, 2)
-                assert f.read() == b'\xff\xd9', 'corrupted JPEG'
+                if f.read() != b'\xff\xd9':  # corrupt JPEG
+                    Image.open(im_file).save(im_file, format='JPEG', subsampling=0, quality=100)  # re-save image
+                    msg = f'{prefix}WARNING: corrupt JPEG restored and saved {im_file}'
 
         # verify labels
-        segments = []  # instance segments
         if os.path.isfile(lb_file):
             nf = 1  # label found
             with open(lb_file, 'r') as f:
@@ -879,41 +910,72 @@ def verify_image_label(args):
         else:
             nm = 1  # label missing
             l = np.zeros((0, 5), dtype=np.float32)
-        return im_file, l, shape, segments, nm, nf, ne, nc, ''
+        return im_file, l, shape, segments, nm, nf, ne, nc, msg
     except Exception as e:
         nc = 1
         msg = f'{prefix}WARNING: Ignoring corrupted image and/or label {im_file}: {e}'
         return [None, None, None, None, nm, nf, ne, nc, msg]
 
 
-def dataset_stats(path='coco128.yaml', autodownload=False, verbose=False):
+def dataset_stats(path='coco128.yaml', autodownload=False, verbose=False, profile=False, hub=False):
     """ Return dataset statistics dictionary with images and instances counts per split per class
-    Usage: from utils.datasets import *; dataset_stats('coco128.yaml', verbose=True)
+    To run in parent directory: export PYTHONPATH="$PWD/yolov5"
+    Usage1: from utils.datasets import *; dataset_stats('coco128.yaml', autodownload=True)
+    Usage2: from utils.datasets import *; dataset_stats('../datasets/coco128_with_yaml.zip')
     Arguments
-        path:           Path to data.yaml
+        path:           Path to data.yaml or data.zip (with data.yaml inside data.zip)
         autodownload:   Attempt to download dataset if not found locally
         verbose:        Print stats dictionary
     """
 
     def round_labels(labels):
         # Update labels to integer class and 6 decimal place floats
-        return [[int(c), *[round(x, 6) for x in points]] for c, *points in labels]
-
-    with open(check_file(path)) as f:
+        return [[int(c), *[round(x, 4) for x in points]] for c, *points in labels]
+
+    def unzip(path):
+        # Unzip data.zip TODO: CONSTRAINT: path/to/abc.zip MUST unzip to 'path/to/abc/'
+        if str(path).endswith('.zip'):  # path is data.zip
+            assert Path(path).is_file(), f'Error unzipping {path}, file not found'
+            ZipFile(path).extractall(path=path.parent)  # unzip
+            dir = path.with_suffix('')  # dataset directory == zip name
+            return True, str(dir), next(dir.rglob('*.yaml'))  # zipped, data_dir, yaml_path
+        else:  # path is data.yaml
+            return False, None, path
+
+    def hub_ops(f, max_dim=1920):
+        # HUB ops for 1 image 'f': resize and save at reduced quality in /dataset-hub for web/app viewing
+        f_new = im_dir / Path(f).name  # dataset-hub image filename
+        try:  # use PIL
+            im = Image.open(f)
+            r = max_dim / max(im.height, im.width)  # ratio
+            if r < 1.0:  # image too large
+                im = im.resize((int(im.width * r), int(im.height * r)))
+            im.save(f_new, quality=75)  # save
+        except Exception as e:  # use OpenCV
+            print(f'WARNING: HUB ops PIL failure {f}: {e}')
+            im = cv2.imread(f)
+            im_height, im_width = im.shape[:2]
+            r = max_dim / max(im_height, im_width)  # ratio
+            if r < 1.0:  # image too large
+                im = cv2.resize(im, (int(im_width * r), int(im_height * r)), interpolation=cv2.INTER_LINEAR)
+            cv2.imwrite(str(f_new), im)
+
+    zipped, data_dir, yaml_path = unzip(Path(path))
+    with open(check_yaml(yaml_path), errors='ignore') as f:
         data = yaml.safe_load(f)  # data dict
+        if zipped:
+            data['path'] = data_dir  # TODO: should this be dir.resolve()?
     check_dataset(data, autodownload)  # download dataset if missing
-    nc = data['nc']  # number of classes
-    stats = {'nc': nc, 'names': data['names']}  # statistics dictionary
+    hub_dir = Path(data['path'] + ('-hub' if hub else ''))
+    stats = {'nc': data['nc'], 'names': data['names']}  # statistics dictionary
     for split in 'train', 'val', 'test':
         if data.get(split) is None:
             stats[split] = None  # i.e. no test set
             continue
         x = []
-        dataset = LoadImagesAndLabels(data[split], augment=False, rect=True)  # load dataset
-        if split == 'train':
-            cache_path = Path(dataset.label_files[0]).parent.with_suffix('.cache')  # *.cache path
+        dataset = LoadImagesAndLabels(data[split])  # load dataset
         for label in tqdm(dataset.labels, total=dataset.n, desc='Statistics'):
-            x.append(np.bincount(label[:, 0].astype(int), minlength=nc))
+            x.append(np.bincount(label[:, 0].astype(int), minlength=data['nc']))
         x = np.array(x)  # shape(128x80)
         stats[split] = {'instance_stats': {'total': int(x.sum()), 'per_class': x.sum(0).tolist()},
                         'image_stats': {'total': dataset.n, 'unlabelled': int(np.all(x == 0, 1).sum()),
@@ -921,10 +983,37 @@ def dataset_stats(path='coco128.yaml', autodownload=False, verbose=False):
                         'labels': [{str(Path(k).name): round_labels(v.tolist())} for k, v in
                                    zip(dataset.img_files, dataset.labels)]}
 
+        if hub:
+            im_dir = hub_dir / 'images'
+            im_dir.mkdir(parents=True, exist_ok=True)
+            for _ in tqdm(ThreadPool(NUM_THREADS).imap(hub_ops, dataset.img_files), total=dataset.n, desc='HUB Ops'):
+                pass
+
+    # Profile
+    stats_path = hub_dir / 'stats.json'
+    if profile:
+        for _ in range(1):
+            file = stats_path.with_suffix('.npy')
+            t1 = time.time()
+            np.save(file, stats)
+            t2 = time.time()
+            x = np.load(file, allow_pickle=True)
+            print(f'stats.npy times: {time.time() - t2:.3f}s read, {t2 - t1:.3f}s write')
+
+            file = stats_path.with_suffix('.json')
+            t1 = time.time()
+            with open(file, 'w') as f:
+                json.dump(stats, f)  # save stats *.json
+            t2 = time.time()
+            with open(file, 'r') as f:
+                x = json.load(f)  # load hyps dict
+            print(f'stats.json times: {time.time() - t2:.3f}s read, {t2 - t1:.3f}s write')
+
     # Save, print and return
-    with open(cache_path.with_suffix('.json'), 'w') as f:
-        json.dump(stats, f)  # save stats *.json
+    if hub:
+        print(f'Saving {stats_path.resolve()}...')
+        with open(stats_path, 'w') as f:
+            json.dump(stats, f)  # save stats.json
     if verbose:
         print(json.dumps(stats, indent=2, sort_keys=False))
-        # print(yaml.dump([stats], sort_keys=False, default_flow_style=False))
     return stats
diff --git a/cv/detection/yolov5/pytorch/utils/downloads.py b/cv/detection/yolov5/pytorch/utils/downloads.py
new file mode 100755
index 0000000..eafa3b7
--- /dev/null
+++ b/cv/detection/yolov5/pytorch/utils/downloads.py
@@ -0,0 +1,150 @@
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+"""
+Download utils
+"""
+
+import os
+import platform
+import subprocess
+import time
+import urllib
+from pathlib import Path
+from zipfile import ZipFile
+
+import requests
+import torch
+
+
+def gsutil_getsize(url=''):
+    # gs://bucket/file size https://cloud.google.com/storage/docs/gsutil/commands/du
+    s = subprocess.check_output(f'gsutil du {url}', shell=True).decode('utf-8')
+    return eval(s.split(' ')[0]) if len(s) else 0  # bytes
+
+
+def safe_download(file, url, url2=None, min_bytes=1E0, error_msg=''):
+    # Attempts to download file from url or url2, checks and removes incomplete downloads < min_bytes
+    file = Path(file)
+    assert_msg = f"Downloaded file '{file}' does not exist or size is < min_bytes={min_bytes}"
+    try:  # url1
+        print(f'Downloading {url} to {file}...')
+        torch.hub.download_url_to_file(url, str(file))
+        assert file.exists() and file.stat().st_size > min_bytes, assert_msg  # check
+    except Exception as e:  # url2
+        file.unlink(missing_ok=True)  # remove partial downloads
+        print(f'ERROR: {e}\nRe-attempting {url2 or url} to {file}...')
+        os.system(f"curl -L '{url2 or url}' -o '{file}' --retry 3 -C -")  # curl download, retry and resume on fail
+    finally:
+        if not file.exists() or file.stat().st_size < min_bytes:  # check
+            file.unlink(missing_ok=True)  # remove partial downloads
+            print(f"ERROR: {assert_msg}\n{error_msg}")
+        print('')
+
+
+def attempt_download(file, repo='ultralytics/yolov5'):  # from utils.downloads import *; attempt_download()
+    # Attempt file download if does not exist
+    file = Path(str(file).strip().replace("'", ''))
+
+    if not file.exists():
+        # URL specified
+        name = Path(urllib.parse.unquote(str(file))).name  # decode '%2F' to '/' etc.
+        if str(file).startswith(('http:/', 'https:/')):  # download
+            url = str(file).replace(':/', '://')  # Pathlib turns :// -> :/
+            name = name.split('?')[0]  # parse authentication https://url.com/file.txt?auth...
+            safe_download(file=name, url=url, min_bytes=1E5)
+            return name
+
+        # GitHub assets
+        file.parent.mkdir(parents=True, exist_ok=True)  # make parent dir (if required)
+        try:
+            response = requests.get(f'https://api.github.com/repos/{repo}/releases/latest').json()  # github api
+            assets = [x['name'] for x in response['assets']]  # release assets, i.e. ['yolov5s.pt', 'yolov5m.pt', ...]
+            tag = response['tag_name']  # i.e. 'v1.0'
+        except:  # fallback plan
+            assets = ['yolov5s.pt', 'yolov5m.pt', 'yolov5l.pt', 'yolov5x.pt',
+                      'yolov5s6.pt', 'yolov5m6.pt', 'yolov5l6.pt', 'yolov5x6.pt']
+            try:
+                tag = subprocess.check_output('git tag', shell=True, stderr=subprocess.STDOUT).decode().split()[-1]
+            except:
+                tag = 'v5.0'  # current release
+
+        if name in assets:
+            safe_download(file,
+                          url=f'https://github.com/{repo}/releases/download/{tag}/{name}',
+                          # url2=f'https://storage.googleapis.com/{repo}/ckpt/{name}',  # backup url (optional)
+                          min_bytes=1E5,
+                          error_msg=f'{file} missing, try downloading from https://github.com/{repo}/releases/')
+
+    return str(file)
+
+
+def gdrive_download(id='16TiPfZj7htmTyhntwcZyEEAejOUxuT6m', file='tmp.zip'):
+    # Downloads a file from Google Drive. from yolov5.utils.downloads import *; gdrive_download()
+    t = time.time()
+    file = Path(file)
+    cookie = Path('cookie')  # gdrive cookie
+    print(f'Downloading https://drive.google.com/uc?export=download&id={id} as {file}... ', end='')
+    file.unlink(missing_ok=True)  # remove existing file
+    cookie.unlink(missing_ok=True)  # remove existing cookie
+
+    # Attempt file download
+    out = "NUL" if platform.system() == "Windows" else "/dev/null"
+    os.system(f'curl -c ./cookie -s -L "drive.google.com/uc?export=download&id={id}" > {out}')
+    if os.path.exists('cookie'):  # large file
+        s = f'curl -Lb ./cookie "drive.google.com/uc?export=download&confirm={get_token()}&id={id}" -o {file}'
+    else:  # small file
+        s = f'curl -s -L -o {file} "drive.google.com/uc?export=download&id={id}"'
+    r = os.system(s)  # execute, capture return
+    cookie.unlink(missing_ok=True)  # remove existing cookie
+
+    # Error check
+    if r != 0:
+        file.unlink(missing_ok=True)  # remove partial
+        print('Download error ')  # raise Exception('Download error')
+        return r
+
+    # Unzip if archive
+    if file.suffix == '.zip':
+        print('unzipping... ', end='')
+        ZipFile(file).extractall(path=file.parent)  # unzip
+        file.unlink()  # remove zip
+
+    print(f'Done ({time.time() - t:.1f}s)')
+    return r
+
+
+def get_token(cookie="./cookie"):
+    with open(cookie) as f:
+        for line in f:
+            if "download" in line:
+                return line.split()[-1]
+    return ""
+
+# Google utils: https://cloud.google.com/storage/docs/reference/libraries ----------------------------------------------
+#
+#
+# def upload_blob(bucket_name, source_file_name, destination_blob_name):
+#     # Uploads a file to a bucket
+#     # https://cloud.google.com/storage/docs/uploading-objects#storage-upload-object-python
+#
+#     storage_client = storage.Client()
+#     bucket = storage_client.get_bucket(bucket_name)
+#     blob = bucket.blob(destination_blob_name)
+#
+#     blob.upload_from_filename(source_file_name)
+#
+#     print('File {} uploaded to {}.'.format(
+#         source_file_name,
+#         destination_blob_name))
+#
+#
+# def download_blob(bucket_name, source_blob_name, destination_file_name):
+#     # Uploads a blob from a bucket
+#     storage_client = storage.Client()
+#     bucket = storage_client.get_bucket(bucket_name)
+#     blob = bucket.blob(source_blob_name)
+#
+#     blob.download_to_filename(destination_file_name)
+#
+#     print('Blob {} downloaded to {}.'.format(
+#         source_blob_name,
+#         destination_file_name))
diff --git a/cv/detection/yolov5/pytorch/utils/general.py b/cv/detection/yolov5/pytorch/utils/general.py
index f9b8961..03d995f 100644
--- a/cv/detection/yolov5/pytorch/utils/general.py
+++ b/cv/detection/yolov5/pytorch/utils/general.py
@@ -1,8 +1,12 @@
-# YOLOv5 general utils
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+"""
+General utils
+"""
 
 import contextlib
 import glob
 import logging
+import math
 import os
 import platform
 import random
@@ -14,9 +18,9 @@ from itertools import repeat
 from multiprocessing.pool import ThreadPool
 from pathlib import Path
 from subprocess import check_output
+from zipfile import ZipFile
 
 import cv2
-import math
 import numpy as np
 import pandas as pd
 import pkg_resources as pkg
@@ -24,9 +28,8 @@ import torch
 import torchvision
 import yaml
 
-from utils.google_utils import gsutil_getsize
+from utils.downloads import gsutil_getsize
 from utils.metrics import box_iou, fitness
-from utils.torch_utils import init_torch_seeds
 
 # Settings
 torch.set_printoptions(linewidth=320, precision=5, profile='long')
@@ -35,9 +38,21 @@ pd.options.display.max_columns = 10
 cv2.setNumThreads(0)  # prevent OpenCV from multithreading (incompatible with PyTorch DataLoader)
 os.environ['NUMEXPR_MAX_THREADS'] = str(min(os.cpu_count(), 8))  # NumExpr max threads
 
+FILE = Path(__file__).resolve()
+ROOT = FILE.parents[1]  # YOLOv5 root directory
+
+
+class Profile(contextlib.ContextDecorator):
+    # Usage: @Profile() decorator or 'with Profile():' context manager
+    def __enter__(self):
+        self.start = time.time()
+
+    def __exit__(self, type, value, traceback):
+        print(f'Profile results: {time.time() - self.start:.5f}s')
 
-class timeout(contextlib.ContextDecorator):
-    # Usage: @timeout(seconds) decorator or 'with timeout(seconds):' context manager
+
+class Timeout(contextlib.ContextDecorator):
+    # Usage: @Timeout(seconds) decorator or 'with Timeout(seconds):' context manager
     def __init__(self, seconds, *, timeout_msg='', suppress_timeout_errors=True):
         self.seconds = int(seconds)
         self.timeout_message = timeout_msg
@@ -56,17 +71,41 @@ class timeout(contextlib.ContextDecorator):
             return True
 
 
+def try_except(func):
+    # try-except function. Usage: @try_except decorator
+    def handler(*args, **kwargs):
+        try:
+            func(*args, **kwargs)
+        except Exception as e:
+            print(e)
+
+    return handler
+
+
+def methods(instance):
+    # Get class/instance methods
+    return [f for f in dir(instance) if callable(getattr(instance, f)) and not f.startswith("__")]
+
+
 def set_logging(rank=-1, verbose=True):
     logging.basicConfig(
         format="%(message)s",
         level=logging.INFO if (verbose and rank in [-1, 0]) else logging.WARN)
 
 
+def print_args(name, opt):
+    # Print argparser arguments
+    print(colorstr(f'{name}: ') + ', '.join(f'{k}={v}' for k, v in vars(opt).items()))
+
+
 def init_seeds(seed=0):
-    # Initialize random number generator (RNG) seeds
+    # Initialize random number generator (RNG) seeds https://pytorch.org/docs/stable/notes/randomness.html
+    # cudnn seed 0 settings are slower and more reproducible, else faster and less reproducible
+    import torch.backends.cudnn as cudnn
     random.seed(seed)
     np.random.seed(seed)
-    init_torch_seeds(seed)
+    torch.manual_seed(seed)
+    cudnn.benchmark, cudnn.deterministic = (False, True) if seed == 0 else (True, False)
 
 
 def get_latest_run(search_dir='.'):
@@ -75,6 +114,34 @@ def get_latest_run(search_dir='.'):
     return max(last_list, key=os.path.getctime) if last_list else ''
 
 
+def user_config_dir(dir='Ultralytics', env_var='YOLOV5_CONFIG_DIR'):
+    # Return path of user configuration directory. Prefer environment variable if exists. Make dir if required.
+    env = os.getenv(env_var)
+    if env:
+        path = Path(env)  # use environment variable
+    else:
+        cfg = {'Windows': 'AppData/Roaming', 'Linux': '.config', 'Darwin': 'Library/Application Support'}  # 3 OS dirs
+        path = Path.home() / cfg.get(platform.system(), '')  # OS-specific config dir
+        path = (path if is_writeable(path) else Path('/tmp')) / dir  # GCP and AWS lambda fix, only /tmp is writeable
+    path.mkdir(exist_ok=True)  # make if required
+    return path
+
+
+def is_writeable(dir, test=False):
+    # Return True if directory has write permissions, test opening a file with write permissions if test=True
+    if test:  # method 1
+        file = Path(dir) / 'tmp.txt'
+        try:
+            with open(file, 'w'):  # open file with write permissions
+                pass
+            file.unlink()  # remove file
+            return True
+        except IOError:
+            return False
+    else:  # method 2
+        return os.access(dir, os.R_OK)  # possible issues on Windows
+
+
 def is_docker():
     # Is environment a Docker container?
     return Path('/workspace').exists()  # or Path('/.dockerenv').exists()
@@ -85,13 +152,24 @@ def is_colab():
     try:
         import google.colab
         return True
-    except Exception as e:
+    except ImportError:
         return False
 
 
 def is_pip():
     # Is file in a pip package?
-    return 'site-packages' in Path(__file__).absolute().parts
+    return 'site-packages' in Path(__file__).resolve().parts
+
+
+def is_ascii(s=''):
+    # Is string composed of all ASCII (no UTF) characters? (note str().isascii() introduced in python 3.7)
+    s = str(s)  # convert list, tuple, None, etc. to str
+    return len(s.encode().decode('ascii', 'ignore')) == len(s)
+
+
+def is_chinese(s='人工智能'):
+    # Is string composed of any Chinese characters?
+    return re.search('[\u4e00-\u9fff]', s)
 
 
 def emojis(str=''):
@@ -99,9 +177,15 @@ def emojis(str=''):
     return str.encode().decode('ascii', 'ignore') if platform.system() == 'Windows' else str
 
 
-def file_size(file):
-    # Return file size in MB
-    return Path(file).stat().st_size / 1e6
+def file_size(path):
+    # Return file/dir size (MB)
+    path = Path(path)
+    if path.is_file():
+        return path.stat().st_size / 1E6
+    elif path.is_dir():
+        return sum(f.stat().st_size for f in path.glob('**/*') if f.is_file()) / 1E6
+    else:
+        return 0.0
 
 
 def check_online():
@@ -114,27 +198,25 @@ def check_online():
         return False
 
 
-def check_git_status(err_msg=', for updates see https://github.com/ultralytics/yolov5'):
+@try_except
+def check_git_status():
     # Recommend 'git pull' if code is out of date
-#    print(colorstr('github: '), end='')
-#    try:
-#        assert Path('.git').exists(), 'skipping check (not a git repository)'
-#        assert not is_docker(), 'skipping check (Docker image)'
-#        assert check_online(), 'skipping check (offline)'
-#
-#        cmd = 'git fetch && git config --get remote.origin.url'
-#        url = check_output(cmd, shell=True, timeout=5).decode().strip().rstrip('.git')  # git fetch
-#        branch = check_output('git rev-parse --abbrev-ref HEAD', shell=True).decode().strip()  # checked out
-#        n = int(check_output(f'git rev-list {branch}..origin/master --count', shell=True))  # commits behind
-#        if n > 0:
-#            s = f"⚠️ WARNING: code is out of date by {n} commit{'s' * (n > 1)}. " \
-#                f"Use 'git pull' to update or 'git clone {url}' to download latest."
-#        else:
-#            s = f'up to date with {url} ✅'
-#        print(emojis(s))  # emoji-safe
-#    except Exception as e:
-#        print(f'{e}{err_msg}')
-    print("no need to check git status")
+    msg = ', for updates see https://github.com/ultralytics/yolov5'
+    print(colorstr('github: '), end='')
+    assert Path('.git').exists(), 'skipping check (not a git repository)' + msg
+    assert not is_docker(), 'skipping check (Docker image)' + msg
+    assert check_online(), 'skipping check (offline)' + msg
+
+    cmd = 'git fetch && git config --get remote.origin.url'
+    url = check_output(cmd, shell=True, timeout=5).decode().strip().rstrip('.git')  # git fetch
+    branch = check_output('git rev-parse --abbrev-ref HEAD', shell=True).decode().strip()  # checked out
+    n = int(check_output(f'git rev-list {branch}..origin/master --count', shell=True))  # commits behind
+    if n > 0:
+        s = f"⚠️ YOLOv5 is out of date by {n} commit{'s' * (n > 1)}. Use `git pull` or `git clone {url}` to update."
+    else:
+        s = f'up to date with {url} ✅'
+    print(emojis(s))  # emoji-safe
+
 
 def check_python(minimum='3.6.2'):
     # Check current python version vs. required python version
@@ -148,17 +230,14 @@ def check_version(current='0.0.0', minimum='0.0.0', name='version ', pinned=Fals
     assert result, f'{name}{minimum} required by YOLOv5, but {name}{current} is currently installed'
 
 
-def check_requirements(requirements='requirements.txt', exclude=()):
-    # TODO: Remove `return` to auto install packages
-    return
+@try_except
+def check_requirements(requirements=ROOT / 'requirements.txt', exclude=(), install=True):
     # Check installed dependencies meet requirements (pass *.txt file or list of packages)
     prefix = colorstr('red', 'bold', 'requirements:')
     check_python()  # check python version
     if isinstance(requirements, (str, Path)):  # requirements.txt file
         file = Path(requirements)
-        if not file.exists():
-            print(f"{prefix} {file.resolve()} not found, check failed.")
-            return
+        assert file.exists(), f"{prefix} {file.resolve()} not found, check failed."
         requirements = [f'{x.name}{x.specifier}' for x in pkg.parse_requirements(file.open()) if x.name not in exclude]
     else:  # list or tuple of packages
         requirements = [x for x in requirements if x not in exclude]
@@ -168,26 +247,33 @@ def check_requirements(requirements='requirements.txt', exclude=()):
         try:
             pkg.require(r)
         except Exception as e:  # DistributionNotFound or VersionConflict if requirements not met
-            print(f"{prefix} {r} not found and is required by YOLOv5, attempting auto-update...")
-            try:
-                assert check_online(), f"'pip install {r}' skipped (offline)"
-                print(check_output(f"pip install '{r}'", shell=True).decode())
-                n += 1
-            except Exception as e:
-                print(f'{prefix} {e}')
+            s = f"{prefix} {r} not found and is required by YOLOv5"
+            if install:
+                print(f"{s}, attempting auto-update...")
+                try:
+                    assert check_online(), f"'pip install {r}' skipped (offline)"
+                    print(check_output(f"pip install '{r}'", shell=True).decode())
+                    n += 1
+                except Exception as e:
+                    print(f'{prefix} {e}')
+            else:
+                print(f'{s}. Please install and rerun your command.')
 
     if n:  # if packages updated
         source = file.resolve() if 'file' in locals() else requirements
         s = f"{prefix} {n} package{'s' * (n > 1)} updated per {source}\n" \
             f"{prefix} ⚠️ {colorstr('bold', 'Restart runtime or rerun command for updates to take effect')}\n"
-        #print(emojis(s))  # emoji-safe
+        print(emojis(s))
 
 
-def check_img_size(img_size, s=32):
-    # Verify img_size is a multiple of stride s
-    new_size = make_divisible(img_size, int(s))  # ceil gs-multiple
-    if new_size != img_size:
-        print('WARNING: --img-size %g must be multiple of max stride %g, updating to %g' % (img_size, s, new_size))
+def check_img_size(imgsz, s=32, floor=0):
+    # Verify image size is a multiple of stride s in each dimension
+    if isinstance(imgsz, int):  # integer i.e. img_size=640
+        new_size = max(make_divisible(imgsz, int(s)), floor)
+    else:  # list i.e. img_size=[640, 480]
+        new_size = [max(make_divisible(x, int(s)), floor) for x in imgsz]
+    if new_size != imgsz:
+        print(f'WARNING: --img-size {imgsz} must be multiple of max stride {s}, updating to {new_size}')
     return new_size
 
 
@@ -206,62 +292,109 @@ def check_imshow():
         return False
 
 
-def check_file(file):
+def check_suffix(file='yolov5s.pt', suffix=('.pt',), msg=''):
+    # Check file(s) for acceptable suffixes
+    if file and suffix:
+        if isinstance(suffix, str):
+            suffix = [suffix]
+        for f in file if isinstance(file, (list, tuple)) else [file]:
+            assert Path(f).suffix.lower() in suffix, f"{msg}{f} acceptable suffix is {suffix}"
+
+
+def check_yaml(file, suffix=('.yaml', '.yml')):
+    # Search/download YAML file (if necessary) and return path, checking suffix
+    return check_file(file, suffix)
+
+
+def check_file(file, suffix=''):
     # Search/download file (if necessary) and return path
+    check_suffix(file, suffix)  # optional
     file = str(file)  # convert to str()
     if Path(file).is_file() or file == '':  # exists
         return file
     elif file.startswith(('http:/', 'https:/')):  # download
         url = str(Path(file)).replace(':/', '://')  # Pathlib turns :// -> :/
-        file = Path(urllib.parse.unquote(file)).name.split('?')[0]  # '%2F' to '/', split https://url.com/file.txt?auth
+        file = Path(urllib.parse.unquote(file).split('?')[0]).name  # '%2F' to '/', split https://url.com/file.txt?auth
         print(f'Downloading {url} to {file}...')
         torch.hub.download_url_to_file(url, file)
         assert Path(file).exists() and Path(file).stat().st_size > 0, f'File download failed: {url}'  # check
         return file
     else:  # search
-        files = glob.glob('./**/' + file, recursive=True)  # find file
+        files = []
+        for d in 'data', 'models', 'utils':  # search directories
+            files.extend(glob.glob(str(ROOT / d / '**' / file), recursive=True))  # find file
         assert len(files), f'File not found: {file}'  # assert file was found
         assert len(files) == 1, f"Multiple files match '{file}', specify exact path: {files}"  # assert unique
         return files[0]  # return file
 
 
 def check_dataset(data, autodownload=True):
-    # Download dataset if not found locally
-    path = Path(data.get('path', ''))  # optional 'path' field
-    if path:
-        for k in 'train', 'val', 'test':
-            if data.get(k):  # prepend path
-                data[k] = str(path / data[k]) if isinstance(data[k], str) else [str(path / x) for x in data[k]]
-
+    # Download and/or unzip dataset if not found locally
+    # Usage: https://github.com/ultralytics/yolov5/releases/download/v1.0/coco128_with_yaml.zip
+
+    # Download (optional)
+    extract_dir = ''
+    if isinstance(data, (str, Path)) and str(data).endswith('.zip'):  # i.e. gs://bucket/dir/coco128.zip
+        download(data, dir='../datasets', unzip=True, delete=False, curl=False, threads=1)
+        data = next((Path('../datasets') / Path(data).stem).rglob('*.yaml'))
+        extract_dir, autodownload = data.parent, False
+
+    # Read yaml (optional)
+    if isinstance(data, (str, Path)):
+        with open(data, errors='ignore') as f:
+            data = yaml.safe_load(f)  # dictionary
+
+    # Parse yaml
+    path = extract_dir or Path(data.get('path') or '')  # optional 'path' default to '.'
+    for k in 'train', 'val', 'test':
+        if data.get(k):  # prepend path
+            data[k] = str(path / data[k]) if isinstance(data[k], str) else [str(path / x) for x in data[k]]
+
+    assert 'nc' in data, "Dataset 'nc' key missing."
+    if 'names' not in data:
+        data['names'] = [f'class{i}' for i in range(data['nc'])]  # assign class names if missing
     train, val, test, s = [data.get(x) for x in ('train', 'val', 'test', 'download')]
     if val:
         val = [Path(x).resolve() for x in (val if isinstance(val, list) else [val])]  # val path
         if not all(x.exists() for x in val):
             print('\nWARNING: Dataset not found, nonexistent paths: %s' % [str(x) for x in val if not x.exists()])
             if s and autodownload:  # download script
+                root = path.parent if 'path' in data else '..'  # unzip directory i.e. '../'
                 if s.startswith('http') and s.endswith('.zip'):  # URL
                     f = Path(s).name  # filename
-                    print(f'Downloading {s} ...')
+                    print(f'Downloading {s} to {f}...')
                     torch.hub.download_url_to_file(s, f)
-                    root = path.parent if 'path' in data else '..'  # unzip directory i.e. '../'
                     Path(root).mkdir(parents=True, exist_ok=True)  # create root
-                    r = os.system(f'unzip -q {f} -d {root} && rm {f}')  # unzip
+                    ZipFile(f).extractall(path=root)  # unzip
+                    Path(f).unlink()  # remove zip
+                    r = None  # success
                 elif s.startswith('bash '):  # bash script
                     print(f'Running {s} ...')
                     r = os.system(s)
                 else:  # python script
                     r = exec(s, {'yaml': data})  # return None
-                print('Dataset autodownload %s\n' % ('success' if r in (0, None) else 'failure'))  # print result
+                print(f"Dataset autodownload {f'success, saved to {root}' if r in (0, None) else 'failure'}\n")
             else:
                 raise Exception('Dataset not found.')
 
+    return data  # dictionary
+
+
+def url2file(url):
+    # Convert URL to filename, i.e. https://url.com/file.txt?auth -> file.txt
+    url = str(Path(url)).replace(':/', '://')  # Pathlib turns :// -> :/
+    file = Path(urllib.parse.unquote(url)).name.split('?')[0]  # '%2F' to '/', split https://url.com/file.txt?auth
+    return file
+
 
 def download(url, dir='.', unzip=True, delete=True, curl=False, threads=1):
-    # Multi-threaded file download and unzip function
+    # Multi-threaded file download and unzip function, used in data.yaml for autodownload
     def download_one(url, dir):
         # Download 1 file
         f = dir / Path(url).name  # filename
-        if not f.exists():
+        if Path(url).is_file():  # exists in current path
+            Path(url).rename(f)  # move to dir
+        elif not f.exists():
             print(f'Downloading {url} to {f}...')
             if curl:
                 os.system(f"curl -L '{url}' -o '{f}' --retry 9 -C -")  # curl download, retry and resume on fail
@@ -270,12 +403,11 @@ def download(url, dir='.', unzip=True, delete=True, curl=False, threads=1):
         if unzip and f.suffix in ('.zip', '.gz'):
             print(f'Unzipping {f}...')
             if f.suffix == '.zip':
-                s = f'unzip -qo {f} -d {dir}'  # unzip -quiet -overwrite
+                ZipFile(f).extractall(path=dir)  # unzip
             elif f.suffix == '.gz':
-                s = f'tar xfz {f} --directory {f.parent}'  # unzip
-            if delete:  # delete zip file after unzip
-                s += f' && rm {f}'
-            os.system(s)
+                os.system(f'tar xfz {f} --directory {f.parent}')  # unzip
+            if delete:
+                f.unlink()  # remove zip
 
     dir = Path(dir)
     dir.mkdir(parents=True, exist_ok=True)  # make directory
@@ -285,7 +417,7 @@ def download(url, dir='.', unzip=True, delete=True, curl=False, threads=1):
         pool.close()
         pool.join()
     else:
-        for u in tuple(url) if isinstance(url, str) else url:
+        for u in [url] if isinstance(url, (str, Path)) else url:
             download_one(u, dir)
 
 
@@ -300,7 +432,7 @@ def clean_str(s):
 
 
 def one_cycle(y1=0.0, y2=1.0, steps=100):
-    # lambda function for sinusoidal ramp from y1 to y2
+    # lambda function for sinusoidal ramp from y1 to y2 https://arxiv.org/pdf/1812.01187.pdf
     return lambda x: ((1 - math.cos(x * math.pi / steps)) / 2) * (y2 - y1) + y1
 
 
@@ -335,7 +467,7 @@ def labels_to_class_weights(labels, nc=80):
         return torch.Tensor()
 
     labels = np.concatenate(labels, 0)  # labels.shape = (866643, 5) for COCO
-    classes = labels[:, 0].astype(np.int)  # labels = [class xywh]
+    classes = labels[:, 0].astype(np.int64)  # labels = [class xywh]
     weights = np.bincount(classes, minlength=nc)  # occurrences per class
 
     # Prepend gridpoint count (for uCE training)
@@ -350,7 +482,7 @@ def labels_to_class_weights(labels, nc=80):
 
 def labels_to_image_weights(labels, nc=80, class_weights=np.ones(80)):
     # Produces image weights based on class_weights and image contents
-    class_counts = np.array([np.bincount(x[:, 0].astype(np.int), minlength=nc) for x in labels])
+    class_counts = np.array([np.bincount(x[:, 0].astype(np.int64), minlength=nc) for x in labels])
     image_weights = (class_weights.reshape(1, nc) * class_counts).sum(1)
     # index = random.choices(range(n), weights=image_weights, k=1)  # weight image sample
     return image_weights
@@ -398,10 +530,10 @@ def xywhn2xyxy(x, w=640, h=640, padw=0, padh=0):
     return y
 
 
-def xyxy2xywhn(x, w=640, h=640, clip=False):
+def xyxy2xywhn(x, w=640, h=640, clip=False, eps=0.0):
     # Convert nx4 boxes from [x1, y1, x2, y2] to [x, y, w, h] normalized where xy1=top-left, xy2=bottom-right
     if clip:
-        clip_coords(x, (h, w))  # warning: inplace clip
+        clip_coords(x, (h - eps, w - eps))  # warning: inplace clip
     y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
     y[:, 0] = ((x[:, 0] + x[:, 2]) / 2) / w  # x center
     y[:, 1] = ((x[:, 1] + x[:, 3]) / 2) / h  # y center
@@ -460,18 +592,16 @@ def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None):
     return coords
 
 
-def clip_coords(boxes, img_shape):
+def clip_coords(boxes, shape):
     # Clip bounding xyxy bounding boxes to image shape (height, width)
-    if isinstance(boxes, torch.Tensor):
-        boxes[:, 0].clamp_(0, img_shape[1])  # x1
-        boxes[:, 1].clamp_(0, img_shape[0])  # y1
-        boxes[:, 2].clamp_(0, img_shape[1])  # x2
-        boxes[:, 3].clamp_(0, img_shape[0])  # y2
-    else:  # np.array
-        boxes[:, 0].clip(0, img_shape[1], out=boxes[:, 0])  # x1
-        boxes[:, 1].clip(0, img_shape[0], out=boxes[:, 1])  # y1
-        boxes[:, 2].clip(0, img_shape[1], out=boxes[:, 2])  # x2
-        boxes[:, 3].clip(0, img_shape[0], out=boxes[:, 3])  # y2
+    if isinstance(boxes, torch.Tensor):  # faster individually
+        boxes[:, 0].clamp_(0, shape[1])  # x1
+        boxes[:, 1].clamp_(0, shape[0])  # y1
+        boxes[:, 2].clamp_(0, shape[1])  # x2
+        boxes[:, 3].clamp_(0, shape[0])  # y2
+    else:  # np.array (faster grouped)
+        boxes[:, [0, 2]] = boxes[:, [0, 2]].clip(0, shape[1])  # x1, x2
+        boxes[:, [1, 3]] = boxes[:, [1, 3]].clip(0, shape[0])  # y1, y2
 
 
 def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, classes=None, agnostic=False, multi_label=False,
@@ -584,35 +714,43 @@ def strip_optimizer(f='best.pt', s=''):  # from utils.general import *; strip_op
     print(f"Optimizer stripped from {f},{(' saved as %s,' % s) if s else ''} {mb:.1f}MB")
 
 
-def print_mutation(hyp, results, yaml_file='hyp_evolved.yaml', bucket=''):
-    # Print mutation results to evolve.txt (for use with train.py --evolve)
-    a = '%10s' * len(hyp) % tuple(hyp.keys())  # hyperparam keys
-    b = '%10.3g' * len(hyp) % tuple(hyp.values())  # hyperparam values
-    c = '%10.4g' * len(results) % results  # results (P, R, mAP@0.5, mAP@0.5:0.95, val_losses x 3)
-    print('\n%s\n%s\nEvolved fitness: %s\n' % (a, b, c))
+def print_mutation(results, hyp, save_dir, bucket):
+    evolve_csv, results_csv, evolve_yaml = save_dir / 'evolve.csv', save_dir / 'results.csv', save_dir / 'hyp_evolve.yaml'
+    keys = ('metrics/precision', 'metrics/recall', 'metrics/mAP_0.5', 'metrics/mAP_0.5:0.95',
+            'val/box_loss', 'val/obj_loss', 'val/cls_loss') + tuple(hyp.keys())  # [results + hyps]
+    keys = tuple(x.strip() for x in keys)
+    vals = results + tuple(hyp.values())
+    n = len(keys)
 
+    # Download (optional)
     if bucket:
-        url = 'gs://%s/evolve.txt' % bucket
-        if gsutil_getsize(url) > (os.path.getsize('evolve.txt') if os.path.exists('evolve.txt') else 0):
-            os.system('gsutil cp %s .' % url)  # download evolve.txt if larger than local
+        url = f'gs://{bucket}/evolve.csv'
+        if gsutil_getsize(url) > (os.path.getsize(evolve_csv) if os.path.exists(evolve_csv) else 0):
+            os.system(f'gsutil cp {url} {save_dir}')  # download evolve.csv if larger than local
+
+    # Log to evolve.csv
+    s = '' if evolve_csv.exists() else (('%20s,' * n % keys).rstrip(',') + '\n')  # add header
+    with open(evolve_csv, 'a') as f:
+        f.write(s + ('%20.5g,' * n % vals).rstrip(',') + '\n')
 
-    with open('evolve.txt', 'a') as f:  # append result
-        f.write(c + b + '\n')
-    x = np.unique(np.loadtxt('evolve.txt', ndmin=2), axis=0)  # load unique rows
-    x = x[np.argsort(-fitness(x))]  # sort
-    np.savetxt('evolve.txt', x, '%10.3g')  # save sort by fitness
+    # Print to screen
+    print(colorstr('evolve: ') + ', '.join(f'{x.strip():>20s}' for x in keys))
+    print(colorstr('evolve: ') + ', '.join(f'{x:20.5g}' for x in vals), end='\n\n\n')
 
     # Save yaml
-    for i, k in enumerate(hyp.keys()):
-        hyp[k] = float(x[0, i + 7])
-    with open(yaml_file, 'w') as f:
-        results = tuple(x[0, :7])
-        c = '%10.4g' * len(results) % results  # results (P, R, mAP@0.5, mAP@0.5:0.95, val_losses x 3)
-        f.write('# Hyperparameter Evolution Results\n# Generations: %g\n# Metrics: ' % len(x) + c + '\n\n')
+    with open(evolve_yaml, 'w') as f:
+        data = pd.read_csv(evolve_csv)
+        data = data.rename(columns=lambda x: x.strip())  # strip keys
+        i = np.argmax(fitness(data.values[:, :7]))  #
+        f.write('# YOLOv5 Hyperparameter Evolution Results\n' +
+                f'# Best generation: {i}\n' +
+                f'# Last generation: {len(data)}\n' +
+                '# ' + ', '.join(f'{x.strip():>20s}' for x in keys[:7]) + '\n' +
+                '# ' + ', '.join(f'{x:>20.5g}' for x in data.values[i, :7]) + '\n\n')
         yaml.safe_dump(hyp, f, sort_keys=False)
 
     if bucket:
-        os.system('gsutil cp evolve.txt %s gs://%s' % (yaml_file, bucket))  # upload
+        os.system(f'gsutil cp {evolve_csv} {evolve_yaml} gs://{bucket}')  # upload
 
 
 def apply_classifier(x, model, img, im0):
@@ -637,7 +775,7 @@ def apply_classifier(x, model, img, im0):
             for j, a in enumerate(d):  # per item
                 cutout = im0[i][int(a[1]):int(a[3]), int(a[0]):int(a[2])]
                 im = cv2.resize(cutout, (224, 224))  # BGR
-                # cv2.imwrite('test%i.jpg' % j, cutout)
+                # cv2.imwrite('example%i.jpg' % j, cutout)
 
                 im = im[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416
                 im = np.ascontiguousarray(im, dtype=np.float32)  # uint8 to float32
diff --git a/cv/detection/yolov5/pytorch/utils/loss.py b/cv/detection/yolov5/pytorch/utils/loss.py
index 370f732..21eec3e 100644
--- a/cv/detection/yolov5/pytorch/utils/loss.py
+++ b/cv/detection/yolov5/pytorch/utils/loss.py
@@ -1,7 +1,9 @@
-# Copyright (c) 2023, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# Copyright (c) 2023-2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
 # All Rights Reserved.
-
-# Loss functions
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+"""
+Loss functions
+"""
 
 import torch
 import torch.nn as nn
@@ -91,7 +93,6 @@ class QFocalLoss(nn.Module):
 class ComputeLoss:
     # Compute losses
     def __init__(self, model, autobalance=False):
-        super(ComputeLoss, self).__init__()
         self.sort_obj_iou = False
         device = next(model.parameters()).device  # get model device
         h = model.hyp  # hyperparameters
@@ -111,7 +112,7 @@ class ComputeLoss:
         det = model.module.model[-1] if is_parallel(model) else model.model[-1]  # Detect() module
         self.balance = {3: [4.0, 1.0, 0.4]}.get(det.nl, [4.0, 1.0, 0.25, 0.06, .02])  # P3-P7
         self.ssi = list(det.stride).index(16) if autobalance else 0  # stride 16 index
-        self.BCEcls, self.BCEobj, self.gr, self.hyp, self.autobalance = BCEcls, BCEobj, model.gr, h, autobalance
+        self.BCEcls, self.BCEobj, self.gr, self.hyp, self.autobalance = BCEcls, BCEobj, 1.0, h, autobalance
         for k in 'na', 'nc', 'nl', 'anchors':
             setattr(self, k, getattr(det, k))
 
@@ -165,8 +166,7 @@ class ComputeLoss:
         lcls *= self.hyp['cls']
         bs = tobj.shape[0]  # batch size
 
-        loss = lbox + lobj + lcls
-        return loss * bs, torch.cat((lbox, lobj, lcls, loss)).detach()
+        return (lbox + lobj + lcls) * bs, torch.cat((lbox, lobj, lcls)).detach()
 
     def build_targets(self, p, targets):
         # Build targets for compute_loss(), input targets(image,class,x,y,w,h)
@@ -218,7 +218,7 @@ class ComputeLoss:
             # Append
             a = t[:, 6].long()  # anchor indices
             # indices.append((b, a, gj.clamp_(0, gain[3] - 1), gi.clamp_(0, gain[2] - 1)))  # image, anchor, grid indices
-            indices.append((b,a,gj.clamp_(0,shape[2] - 1),gi.clamp_(0,shape[3] - 1)))  # image, anchor, grid indices
+            indices.append((b,a,gj.clamp_(0,shape[2] - 1),gi.clamp_(0,shape[3] - 1)))
             tbox.append(torch.cat((gxy - gij, gwh), 1))  # box
             anch.append(anchors[a])  # anchors
             tcls.append(c)  # class
diff --git a/cv/detection/yolov5/pytorch/utils/metrics.py b/cv/detection/yolov5/pytorch/utils/metrics.py
index c94c4a7..4f1b5e2 100644
--- a/cv/detection/yolov5/pytorch/utils/metrics.py
+++ b/cv/detection/yolov5/pytorch/utils/metrics.py
@@ -1,9 +1,12 @@
-# Model validation metrics
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+"""
+Model validation metrics
+"""
 
+import math
 import warnings
 from pathlib import Path
 
-import math
 import matplotlib.pyplot as plt
 import numpy as np
 import torch
@@ -88,8 +91,8 @@ def compute_ap(recall, precision):
     """
 
     # Append sentinel values to beginning and end
-    mrec = np.concatenate(([0.], recall, [recall[-1] + 0.01]))
-    mpre = np.concatenate(([1.], precision, [0.]))
+    mrec = np.concatenate(([0.0], recall, [1.0]))
+    mpre = np.concatenate(([1.0], precision, [0.0]))
 
     # Compute the precision envelope
     mpre = np.flip(np.maximum.accumulate(np.flip(mpre)))
@@ -175,6 +178,7 @@ class ConfusionMatrix:
             fig.axes[0].set_xlabel('True')
             fig.axes[0].set_ylabel('Predicted')
             fig.savefig(Path(save_dir) / 'confusion_matrix.png', dpi=250)
+            plt.close()
         except Exception as e:
             print(f'WARNING: ConfusionMatrix plot failure: {e}')
 
@@ -305,6 +309,7 @@ def plot_pr_curve(px, py, ap, save_dir='pr_curve.png', names=()):
     ax.set_ylim(0, 1)
     plt.legend(bbox_to_anchor=(1.04, 1), loc="upper left")
     fig.savefig(Path(save_dir), dpi=250)
+    plt.close()
 
 
 def plot_mc_curve(px, py, save_dir='mc_curve.png', names=(), xlabel='Confidence', ylabel='Metric'):
@@ -325,3 +330,4 @@ def plot_mc_curve(px, py, save_dir='mc_curve.png', names=(), xlabel='Confidence'
     ax.set_ylim(0, 1)
     plt.legend(bbox_to_anchor=(1.04, 1), loc="upper left")
     fig.savefig(Path(save_dir), dpi=250)
+    plt.close()
diff --git a/cv/detection/yolov5/pytorch/utils/plots.py b/cv/detection/yolov5/pytorch/utils/plots.py
index 23a4862..00b8f88 100644
--- a/cv/detection/yolov5/pytorch/utils/plots.py
+++ b/cv/detection/yolov5/pytorch/utils/plots.py
@@ -1,25 +1,28 @@
-# Plotting utils
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+"""
+Plotting utils
+"""
 
-import glob
+import math
 import os
 from copy import copy
 from pathlib import Path
 
 import cv2
-import math
 import matplotlib
 import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
 import seaborn as sn
 import torch
-import yaml
 from PIL import Image, ImageDraw, ImageFont
 
-from utils.general import increment_path, xywh2xyxy, xyxy2xywh
+from utils.general import user_config_dir, is_ascii, is_chinese, xywh2xyxy, xyxy2xywh
 from utils.metrics import fitness
 
 # Settings
+CONFIG_DIR = user_config_dir()  # Ultralytics settings dir
+RANK = int(os.getenv('RANK', -1))
 matplotlib.rc('font', **{'size': 11})
 matplotlib.use('Agg')  # for writing to files only
 
@@ -45,6 +48,75 @@ class Colors:
 colors = Colors()  # create instance for 'from utils.plots import colors'
 
 
+def check_font(font='Arial.ttf', size=10):
+    # Return a PIL TrueType Font, downloading to CONFIG_DIR if necessary
+    font = Path(font)
+    font = font if font.exists() else (CONFIG_DIR / font.name)
+    try:
+        return ImageFont.truetype(str(font) if font.exists() else font.name, size)
+    except Exception as e:  # download if missing
+        url = "https://ultralytics.com/assets/" + font.name
+        print(f'Downloading {url} to {font}...')
+        torch.hub.download_url_to_file(url, str(font), progress=False)
+        return ImageFont.truetype(str(font), size)
+
+
+class Annotator:
+    if RANK in (-1, 0):
+        check_font()  # download TTF if necessary
+
+    # YOLOv5 Annotator for train/val mosaics and jpgs and detect/hub inference annotations
+    def __init__(self, im, line_width=None, font_size=None, font='Arial.ttf', pil=False, example='abc'):
+        assert im.data.contiguous, 'Image not contiguous. Apply np.ascontiguousarray(im) to Annotator() input images.'
+        self.pil = pil or not is_ascii(example) or is_chinese(example)
+        if self.pil:  # use PIL
+            self.im = im if isinstance(im, Image.Image) else Image.fromarray(im)
+            self.draw = ImageDraw.Draw(self.im)
+            self.font = check_font(font='Arial.Unicode.ttf' if is_chinese(example) else font,
+                                   size=font_size or max(round(sum(self.im.size) / 2 * 0.035), 12))
+        else:  # use cv2
+            self.im = im
+        self.lw = line_width or max(round(sum(im.shape) / 2 * 0.003), 2)  # line width
+
+    def box_label(self, box, label='', color=(128, 128, 128), txt_color=(255, 255, 255)):
+        # Add one xyxy box to image with label
+        if self.pil or not is_ascii(label):
+            self.draw.rectangle(box, width=self.lw, outline=color)  # box
+            if label:
+                w, h = self.font.getsize(label)  # text width, height
+                outside = box[1] - h >= 0  # label fits outside box
+                self.draw.rectangle([box[0],
+                                     box[1] - h if outside else box[1],
+                                     box[0] + w + 1,
+                                     box[1] + 1 if outside else box[1] + h + 1], fill=color)
+                # self.draw.text((box[0], box[1]), label, fill=txt_color, font=self.font, anchor='ls')  # for PIL>8.0
+                self.draw.text((box[0], box[1] - h if outside else box[1]), label, fill=txt_color, font=self.font)
+        else:  # cv2
+            p1, p2 = (int(box[0]), int(box[1])), (int(box[2]), int(box[3]))
+            cv2.rectangle(self.im, p1, p2, color, thickness=self.lw, lineType=cv2.LINE_AA)
+            if label:
+                tf = max(self.lw - 1, 1)  # font thickness
+                w, h = cv2.getTextSize(label, 0, fontScale=self.lw / 3, thickness=tf)[0]  # text width, height
+                outside = p1[1] - h - 3 >= 0  # label fits outside box
+                p2 = p1[0] + w, p1[1] - h - 3 if outside else p1[1] + h + 3
+                cv2.rectangle(self.im, p1, p2, color, -1, cv2.LINE_AA)  # filled
+                cv2.putText(self.im, label, (p1[0], p1[1] - 2 if outside else p1[1] + h + 2), 0, self.lw / 3, txt_color,
+                            thickness=tf, lineType=cv2.LINE_AA)
+
+    def rectangle(self, xy, fill=None, outline=None, width=1):
+        # Add rectangle to image (PIL-only)
+        self.draw.rectangle(xy, fill, outline, width)
+
+    def text(self, xy, text, txt_color=(255, 255, 255)):
+        # Add text to image (PIL-only)
+        w, h = self.font.getsize(text)  # text width, height
+        self.draw.text((xy[0], xy[1] - h + 1), text, fill=txt_color, font=self.font)
+
+    def result(self):
+        # Return annotated image as array
+        return np.asarray(self.im)
+
+
 def hist2d(x, y, n=100):
     # 2d histogram used in labels.png and evolve.png
     xedges, yedges = np.linspace(x.min(), x.max(), n), np.linspace(y.min(), y.max(), n)
@@ -67,54 +139,6 @@ def butter_lowpass_filtfilt(data, cutoff=1500, fs=50000, order=5):
     return filtfilt(b, a, data)  # forward-backward filter
 
 
-def plot_one_box(x, im, color=(128, 128, 128), label=None, line_thickness=3):
-    # Plots one bounding box on image 'im' using OpenCV
-    assert im.data.contiguous, 'Image not contiguous. Apply np.ascontiguousarray(im) to plot_on_box() input image.'
-    tl = line_thickness or round(0.002 * (im.shape[0] + im.shape[1]) / 2) + 1  # line/font thickness
-    c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3]))
-    cv2.rectangle(im, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA)
-    if label:
-        tf = max(tl - 1, 1)  # font thickness
-        t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
-        c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
-        cv2.rectangle(im, c1, c2, color, -1, cv2.LINE_AA)  # filled
-        cv2.putText(im, label, (c1[0], c1[1] - 2), 0, tl / 3, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA)
-
-
-def plot_one_box_PIL(box, im, color=(128, 128, 128), label=None, line_thickness=None):
-    # Plots one bounding box on image 'im' using PIL
-    im = Image.fromarray(im)
-    draw = ImageDraw.Draw(im)
-    line_thickness = line_thickness or max(int(min(im.size) / 200), 2)
-    draw.rectangle(box, width=line_thickness, outline=color)  # plot
-    if label:
-        font = ImageFont.truetype("Arial.ttf", size=max(round(max(im.size) / 40), 12))
-        txt_width, txt_height = font.getsize(label)
-        draw.rectangle([box[0], box[1] - txt_height + 4, box[0] + txt_width, box[1]], fill=color)
-        draw.text((box[0], box[1] - txt_height + 1), label, fill=(255, 255, 255), font=font)
-    return np.asarray(im)
-
-
-def plot_wh_methods():  # from utils.plots import *; plot_wh_methods()
-    # Compares the two methods for width-height anchor multiplication
-    # https://github.com/ultralytics/yolov3/issues/168
-    x = np.arange(-4.0, 4.0, .1)
-    ya = np.exp(x)
-    yb = torch.sigmoid(torch.from_numpy(x)).numpy() * 2
-
-    fig = plt.figure(figsize=(6, 3), tight_layout=True)
-    plt.plot(x, ya, '.-', label='YOLOv3')
-    plt.plot(x, yb ** 2, '.-', label='YOLOv5 ^2')
-    plt.plot(x, yb ** 1.6, '.-', label='YOLOv5 ^1.6')
-    plt.xlim(left=-4, right=4)
-    plt.ylim(bottom=0, top=6)
-    plt.xlabel('input')
-    plt.ylabel('output')
-    plt.grid()
-    plt.legend()
-    fig.savefig('comparison.png', dpi=200)
-
-
 def output_to_target(output):
     # Convert model output to target format [batch_id, class_id, x, y, w, h, conf]
     targets = []
@@ -124,82 +148,65 @@ def output_to_target(output):
     return np.array(targets)
 
 
-def plot_images(images, targets, paths=None, fname='images.jpg', names=None, max_size=640, max_subplots=16):
+def plot_images(images, targets, paths=None, fname='images.jpg', names=None, max_size=1920, max_subplots=16):
     # Plot image grid with labels
-
     if isinstance(images, torch.Tensor):
         images = images.cpu().float().numpy()
     if isinstance(targets, torch.Tensor):
         targets = targets.cpu().numpy()
-
-    # un-normalise
     if np.max(images[0]) <= 1:
-        images *= 255
-
-    tl = 3  # line thickness
-    tf = max(tl - 1, 1)  # font thickness
+        images *= 255.0  # de-normalise (optional)
     bs, _, h, w = images.shape  # batch size, _, height, width
     bs = min(bs, max_subplots)  # limit plot images
     ns = np.ceil(bs ** 0.5)  # number of subplots (square)
 
-    # Check if we should resize
-    scale_factor = max_size / max(h, w)
-    if scale_factor < 1:
-        h = math.ceil(scale_factor * h)
-        w = math.ceil(scale_factor * w)
-
+    # Build Image
     mosaic = np.full((int(ns * h), int(ns * w), 3), 255, dtype=np.uint8)  # init
-    for i, img in enumerate(images):
+    for i, im in enumerate(images):
         if i == max_subplots:  # if last batch has fewer images than we expect
             break
-
-        block_x = int(w * (i // ns))
-        block_y = int(h * (i % ns))
-
-        img = img.transpose(1, 2, 0)
-        if scale_factor < 1:
-            img = cv2.resize(img, (w, h))
-
-        mosaic[block_y:block_y + h, block_x:block_x + w, :] = img
+        x, y = int(w * (i // ns)), int(h * (i % ns))  # block origin
+        im = im.transpose(1, 2, 0)
+        mosaic[y:y + h, x:x + w, :] = im
+
+    # Resize (optional)
+    scale = max_size / ns / max(h, w)
+    if scale < 1:
+        h = math.ceil(scale * h)
+        w = math.ceil(scale * w)
+        mosaic = cv2.resize(mosaic, tuple(int(x * ns) for x in (w, h)))
+
+    # Annotate
+    fs = int((h + w) * ns * 0.01)  # font size
+    annotator = Annotator(mosaic, line_width=round(fs / 10), font_size=fs, pil=True)
+    for i in range(i + 1):
+        x, y = int(w * (i // ns)), int(h * (i % ns))  # block origin
+        annotator.rectangle([x, y, x + w, y + h], None, (255, 255, 255), width=2)  # borders
+        if paths:
+            annotator.text((x + 5, y + 5 + h), text=Path(paths[i]).name[:40], txt_color=(220, 220, 220))  # filenames
         if len(targets) > 0:
-            image_targets = targets[targets[:, 0] == i]
-            boxes = xywh2xyxy(image_targets[:, 2:6]).T
-            classes = image_targets[:, 1].astype('int')
-            labels = image_targets.shape[1] == 6  # labels if no conf column
-            conf = None if labels else image_targets[:, 6]  # check for confidence presence (label vs pred)
+            ti = targets[targets[:, 0] == i]  # image targets
+            boxes = xywh2xyxy(ti[:, 2:6]).T
+            classes = ti[:, 1].astype('int')
+            labels = ti.shape[1] == 6  # labels if no conf column
+            conf = None if labels else ti[:, 6]  # check for confidence presence (label vs pred)
 
             if boxes.shape[1]:
                 if boxes.max() <= 1.01:  # if normalized with tolerance 0.01
                     boxes[[0, 2]] *= w  # scale to pixels
                     boxes[[1, 3]] *= h
-                elif scale_factor < 1:  # absolute coords need scale if image scales
-                    boxes *= scale_factor
-            boxes[[0, 2]] += block_x
-            boxes[[1, 3]] += block_y
-            for j, box in enumerate(boxes.T):
-                cls = int(classes[j])
+                elif scale < 1:  # absolute coords need scale if image scales
+                    boxes *= scale
+            boxes[[0, 2]] += x
+            boxes[[1, 3]] += y
+            for j, box in enumerate(boxes.T.tolist()):
+                cls = classes[j]
                 color = colors(cls)
                 cls = names[cls] if names else cls
                 if labels or conf[j] > 0.25:  # 0.25 conf thresh
-                    label = '%s' % cls if labels else '%s %.1f' % (cls, conf[j])
-                    plot_one_box(box, mosaic, label=label, color=color, line_thickness=tl)
-
-        # Draw image filename labels
-        if paths:
-            label = Path(paths[i]).name[:40]  # trim to 40 char
-            t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
-            cv2.putText(mosaic, label, (block_x + 5, block_y + t_size[1] + 5), 0, tl / 3, [220, 220, 220], thickness=tf,
-                        lineType=cv2.LINE_AA)
-
-        # Image border
-        cv2.rectangle(mosaic, (block_x, block_y), (block_x + w, block_y + h), (255, 255, 255), thickness=3)
-
-    if fname:
-        r = min(1280. / max(h, w) / ns, 1.0)  # ratio to limit image size
-        mosaic = cv2.resize(mosaic, (int(ns * w * r), int(ns * h * r)), interpolation=cv2.INTER_AREA)
-        # cv2.imwrite(fname, cv2.cvtColor(mosaic, cv2.COLOR_BGR2RGB))  # cv2 save
-        Image.fromarray(mosaic).save(fname)  # PIL save
-    return mosaic
+                    label = f'{cls}' if labels else f'{cls} {conf[j]:.1f}'
+                    annotator.box_label(box, label, color=color)
+    annotator.im.save(fname)  # save
 
 
 def plot_lr_scheduler(optimizer, scheduler, epochs=300, save_dir=''):
@@ -219,9 +226,9 @@ def plot_lr_scheduler(optimizer, scheduler, epochs=300, save_dir=''):
     plt.close()
 
 
-def plot_test_txt():  # from utils.plots import *; plot_test()
-    # Plot test.txt histograms
-    x = np.loadtxt('test.txt', dtype=np.float32)
+def plot_val_txt():  # from utils.plots import *; plot_val()
+    # Plot val.txt histograms
+    x = np.loadtxt('val.txt', dtype=np.float32)
     box = xyxy2xywh(x[:, :4])
     cx, cy = box[:, 0], box[:, 1]
 
@@ -249,15 +256,16 @@ def plot_targets_txt():  # from utils.plots import *; plot_targets_txt()
     plt.savefig('targets.jpg', dpi=200)
 
 
-def plot_study_txt(path='', x=None):  # from utils.plots import *; plot_study_txt()
-    # Plot study.txt generated by test.py
+def plot_val_study(file='', dir='', x=None):  # from utils.plots import *; plot_val_study()
+    # Plot file=study.txt generated by val.py (or plot all study*.txt in dir)
+    save_dir = Path(file).parent if file else Path(dir)
     plot2 = False  # plot additional results
     if plot2:
         ax = plt.subplots(2, 4, figsize=(10, 6), tight_layout=True)[1].ravel()
 
     fig2, ax2 = plt.subplots(1, 1, figsize=(8, 4), tight_layout=True)
-    # for f in [Path(path) / f'study_coco_{x}.txt' for x in ['yolov5s6', 'yolov5m6', 'yolov5l6', 'yolov5x6']]:
-    for f in sorted(Path(path).glob('study*.txt')):
+    # for f in [save_dir / f'study_coco_{x}.txt' for x in ['yolov5n6', 'yolov5s6', 'yolov5m6', 'yolov5l6', 'yolov5x6']]:
+    for f in sorted(save_dir.glob('study*.txt')):
         y = np.loadtxt(f, dtype=np.float32, usecols=[0, 1, 2, 3, 7, 8, 9], ndmin=2).T
         x = np.arange(y.shape[1]) if x is None else np.array(x)
         if plot2:
@@ -276,14 +284,16 @@ def plot_study_txt(path='', x=None):  # from utils.plots import *; plot_study_tx
     ax2.grid(alpha=0.2)
     ax2.set_yticks(np.arange(20, 60, 5))
     ax2.set_xlim(0, 57)
-    ax2.set_ylim(30, 55)
+    ax2.set_ylim(25, 55)
     ax2.set_xlabel('GPU Speed (ms/img)')
     ax2.set_ylabel('COCO AP val')
     ax2.legend(loc='lower right')
-    plt.savefig(str(Path(path).name) + '.png', dpi=300)
+    f = save_dir / 'study.png'
+    print(f'Saving {f}...')
+    plt.savefig(f, dpi=300)
 
 
-def plot_labels(labels, names=(), save_dir=Path(''), loggers=None):
+def plot_labels(labels, names=(), save_dir=Path('')):
     # plot dataset labels
     print('Plotting labels... ')
     c, b = labels[:, 0], labels[:, 1:].transpose()  # classes, boxes
@@ -326,35 +336,6 @@ def plot_labels(labels, names=(), save_dir=Path(''), loggers=None):
     matplotlib.use('Agg')
     plt.close()
 
-    # loggers
-    for k, v in loggers.items() or {}:
-        if k == 'wandb' and v:
-            v.log({"Labels": [v.Image(str(x), caption=x.name) for x in save_dir.glob('*labels*.jpg')]}, commit=False)
-
-
-def plot_evolution(yaml_file='data/hyp.finetune.yaml'):  # from utils.plots import *; plot_evolution()
-    # Plot hyperparameter evolution results in evolve.txt
-    with open(yaml_file) as f:
-        hyp = yaml.safe_load(f)
-    x = np.loadtxt('evolve.txt', ndmin=2)
-    f = fitness(x)
-    # weights = (f - f.min()) ** 2  # for weighted results
-    plt.figure(figsize=(10, 12), tight_layout=True)
-    matplotlib.rc('font', **{'size': 8})
-    for i, (k, v) in enumerate(hyp.items()):
-        y = x[:, i + 7]
-        # mu = (y * weights).sum() / weights.sum()  # best weighted result
-        mu = y[f.argmax()]  # best single result
-        plt.subplot(6, 5, i + 1)
-        plt.scatter(y, f, c=hist2d(y, f, 20), cmap='viridis', alpha=.8, edgecolors='none')
-        plt.plot(mu, f.max(), 'k+', markersize=15)
-        plt.title('%s = %.3g' % (k, mu), fontdict={'size': 9})  # limit to 40 characters
-        if i % 5 != 0:
-            plt.yticks([])
-        print('%15s: %.3g' % (k, mu))
-    plt.savefig('evolve.png', dpi=200)
-    print('\nPlot saved as evolve.png')
-
 
 def profile_idetection(start=0, stop=0, labels=(), save_dir=''):
     # Plot iDetection '*.txt' per-image logs. from utils.plots import *; profile_idetection()
@@ -383,71 +364,63 @@ def profile_idetection(start=0, stop=0, labels=(), save_dir=''):
                     a.remove()
         except Exception as e:
             print('Warning: Plotting error for %s; %s' % (f, e))
-
     ax[1].legend()
     plt.savefig(Path(save_dir) / 'idetection_profile.png', dpi=200)
 
 
-def plot_results_overlay(start=0, stop=0):  # from utils.plots import *; plot_results_overlay()
-    # Plot training 'results*.txt', overlaying train and val losses
-    s = ['train', 'train', 'train', 'Precision', 'mAP@0.5', 'val', 'val', 'val', 'Recall', 'mAP@0.5:0.95']  # legends
-    t = ['Box', 'Objectness', 'Classification', 'P-R', 'mAP-F1']  # titles
-    for f in sorted(glob.glob('results*.txt') + glob.glob('../../Downloads/results*.txt')):
-        results = np.loadtxt(f, usecols=[2, 3, 4, 8, 9, 12, 13, 14, 10, 11], ndmin=2).T
-        n = results.shape[1]  # number of rows
-        x = range(start, min(stop, n) if stop else n)
-        fig, ax = plt.subplots(1, 5, figsize=(14, 3.5), tight_layout=True)
-        ax = ax.ravel()
-        for i in range(5):
-            for j in [i, i + 5]:
-                y = results[j, x]
-                ax[i].plot(x, y, marker='.', label=s[j])
-                # y_smooth = butter_lowpass_filtfilt(y)
-                # ax[i].plot(x, np.gradient(y_smooth), marker='.', label=s[j])
-
-            ax[i].set_title(t[i])
-            ax[i].legend()
-            ax[i].set_ylabel(f) if i == 0 else None  # add filename
-        fig.savefig(f.replace('.txt', '.png'), dpi=200)
-
-
-def plot_results(start=0, stop=0, bucket='', id=(), labels=(), save_dir=''):
-    # Plot training 'results*.txt'. from utils.plots import *; plot_results(save_dir='runs/train/exp')
+def plot_evolve(evolve_csv='path/to/evolve.csv'):  # from utils.plots import *; plot_evolve()
+    # Plot evolve.csv hyp evolution results
+    evolve_csv = Path(evolve_csv)
+    data = pd.read_csv(evolve_csv)
+    keys = [x.strip() for x in data.columns]
+    x = data.values
+    f = fitness(x)
+    j = np.argmax(f)  # max fitness index
+    plt.figure(figsize=(10, 12), tight_layout=True)
+    matplotlib.rc('font', **{'size': 8})
+    for i, k in enumerate(keys[7:]):
+        v = x[:, 7 + i]
+        mu = v[j]  # best single result
+        plt.subplot(6, 5, i + 1)
+        plt.scatter(v, f, c=hist2d(v, f, 20), cmap='viridis', alpha=.8, edgecolors='none')
+        plt.plot(mu, f.max(), 'k+', markersize=15)
+        plt.title('%s = %.3g' % (k, mu), fontdict={'size': 9})  # limit to 40 characters
+        if i % 5 != 0:
+            plt.yticks([])
+        print('%15s: %.3g' % (k, mu))
+    f = evolve_csv.with_suffix('.png')  # filename
+    plt.savefig(f, dpi=200)
+    plt.close()
+    print(f'Saved {f}')
+
+
+def plot_results(file='path/to/results.csv', dir=''):
+    # Plot training results.csv. Usage: from utils.plots import *; plot_results('path/to/results.csv')
+    save_dir = Path(file).parent if file else Path(dir)
     fig, ax = plt.subplots(2, 5, figsize=(12, 6), tight_layout=True)
     ax = ax.ravel()
-    s = ['Box', 'Objectness', 'Classification', 'Precision', 'Recall',
-         'val Box', 'val Objectness', 'val Classification', 'mAP@0.5', 'mAP@0.5:0.95']
-    if bucket:
-        # files = ['https://storage.googleapis.com/%s/results%g.txt' % (bucket, x) for x in id]
-        files = ['results%g.txt' % x for x in id]
-        c = ('gsutil cp ' + '%s ' * len(files) + '.') % tuple('gs://%s/results%g.txt' % (bucket, x) for x in id)
-        os.system(c)
-    else:
-        files = list(Path(save_dir).glob('results*.txt'))
-    assert len(files), 'No results.txt files found in %s, nothing to plot.' % os.path.abspath(save_dir)
+    files = list(save_dir.glob('results*.csv'))
+    assert len(files), f'No results.csv files found in {save_dir.resolve()}, nothing to plot.'
     for fi, f in enumerate(files):
         try:
-            results = np.loadtxt(f, usecols=[2, 3, 4, 8, 9, 12, 13, 14, 10, 11], ndmin=2).T
-            n = results.shape[1]  # number of rows
-            x = range(start, min(stop, n) if stop else n)
-            for i in range(10):
-                y = results[i, x]
-                if i in [0, 1, 2, 5, 6, 7]:
-                    y[y == 0] = np.nan  # don't show zero loss values
-                    # y /= y[0]  # normalize
-                label = labels[fi] if len(labels) else f.stem
-                ax[i].plot(x, y, marker='.', label=label, linewidth=2, markersize=8)
-                ax[i].set_title(s[i])
-                # if i in [5, 6, 7]:  # share train and val loss y axes
+            data = pd.read_csv(f)
+            s = [x.strip() for x in data.columns]
+            x = data.values[:, 0]
+            for i, j in enumerate([1, 2, 3, 4, 5, 8, 9, 10, 6, 7]):
+                y = data.values[:, j]
+                # y[y == 0] = np.nan  # don't show zero values
+                ax[i].plot(x, y, marker='.', label=f.stem, linewidth=2, markersize=8)
+                ax[i].set_title(s[j], fontsize=12)
+                # if j in [8, 9, 10]:  # share train and val loss y axes
                 #     ax[i].get_shared_y_axes().join(ax[i], ax[i - 5])
         except Exception as e:
-            print('Warning: Plotting error for %s; %s' % (f, e))
-
+            print(f'Warning: Plotting error for {f}: {e}')
     ax[1].legend()
-    fig.savefig(Path(save_dir) / 'results.png', dpi=200)
+    fig.savefig(save_dir / 'results.png', dpi=200)
+    plt.close()
 
 
-def feature_visualization(x, module_type, stage, n=64, save_dir=Path('runs/detect/exp')):
+def feature_visualization(x, module_type, stage, n=32, save_dir=Path('runs/detect/exp')):
     """
     x:              Features to be visualized
     module_type:    Module type
@@ -460,13 +433,15 @@ def feature_visualization(x, module_type, stage, n=64, save_dir=Path('runs/detec
         if height > 1 and width > 1:
             f = f"stage{stage}_{module_type.split('.')[-1]}_features.png"  # filename
 
-            plt.figure(tight_layout=True)
             blocks = torch.chunk(x[0].cpu(), channels, dim=0)  # select batch index 0, block by channels
             n = min(n, channels)  # number of plots
-            ax = plt.subplots(math.ceil(n / 8), 8, tight_layout=True)[1].ravel()  # 8 rows x n/8 cols
+            fig, ax = plt.subplots(math.ceil(n / 8), 8, tight_layout=True)  # 8 rows x n/8 cols
+            ax = ax.ravel()
+            plt.subplots_adjust(wspace=0.05, hspace=0.05)
             for i in range(n):
                 ax[i].imshow(blocks[i].squeeze())  # cmap='gray'
                 ax[i].axis('off')
 
             print(f'Saving {save_dir / f}... ({n}/{channels})')
-            plt.savefig(save_dir / f, dpi=300)
+            plt.savefig(save_dir / f, dpi=300, bbox_inches='tight')
+            plt.close()
diff --git a/cv/detection/yolov5/pytorch/utils/torch_utils.py b/cv/detection/yolov5/pytorch/utils/torch_utils.py
index 36b6845..352ecf5 100644
--- a/cv/detection/yolov5/pytorch/utils/torch_utils.py
+++ b/cv/detection/yolov5/pytorch/utils/torch_utils.py
@@ -1,7 +1,11 @@
-# YOLOv5 PyTorch utils
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+"""
+PyTorch utils
+"""
 
 import datetime
 import logging
+import math
 import os
 import platform
 import subprocess
@@ -10,9 +14,7 @@ from contextlib import contextmanager
 from copy import deepcopy
 from pathlib import Path
 
-import math
 import torch
-import torch.backends.cudnn as cudnn
 import torch.distributed as dist
 import torch.nn as nn
 import torch.nn.functional as F
@@ -22,7 +24,8 @@ try:
     import thop  # for FLOPs computation
 except ImportError:
     thop = None
-logger = logging.getLogger(__name__)
+
+LOGGER = logging.getLogger(__name__)
 
 
 @contextmanager
@@ -31,19 +34,10 @@ def torch_distributed_zero_first(local_rank: int):
     Decorator to make all processes in distributed training wait for each local_master to do something.
     """
     if local_rank not in [-1, 0]:
-        dist.barrier()
+        dist.barrier(device_ids=[local_rank])
     yield
     if local_rank == 0:
-        dist.barrier()
-
-
-def init_torch_seeds(seed=0):
-    # Speed-reproducibility tradeoff https://pytorch.org/docs/stable/notes/randomness.html
-    torch.manual_seed(seed)
-    if seed == 0:  # slower, more reproducible
-        cudnn.benchmark, cudnn.deterministic = False, True
-    else:  # faster, less reproducible
-        cudnn.benchmark, cudnn.deterministic = True, False
+        dist.barrier(device_ids=[0])
 
 
 def date_modified(path=__file__):
@@ -85,54 +79,68 @@ def select_device(device='', batch_size=None):
     else:
         s += 'CPU\n'
 
-    logger.info(s.encode().decode('ascii', 'ignore') if platform.system() == 'Windows' else s)  # emoji-safe
+    LOGGER.info(s.encode().decode('ascii', 'ignore') if platform.system() == 'Windows' else s)  # emoji-safe
     return torch.device('cuda:0' if cuda else 'cpu')
 
 
-def time_synchronized():
+def time_sync():
     # pytorch-accurate time
     if torch.cuda.is_available():
         torch.cuda.synchronize()
     return time.time()
 
 
-def profile(x, ops, n=100, device=None):
-    # profile a pytorch module or list of modules. Example usage:
-    #     x = torch.randn(16, 3, 640, 640)  # input
+def profile(input, ops, n=10, device=None):
+    # YOLOv5 speed/memory/FLOPs profiler
+    #
+    # Usage:
+    #     input = torch.randn(16, 3, 640, 640)
     #     m1 = lambda x: x * torch.sigmoid(x)
     #     m2 = nn.SiLU()
-    #     profile(x, [m1, m2], n=100)  # profile speed over 100 iterations
-
-    device = device or torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
-    x = x.to(device)
-    x.requires_grad = True
-    print(torch.__version__, device.type, torch.cuda.get_device_properties(0) if device.type == 'cuda' else '')
-    print(f"\n{'Params':>12s}{'GFLOPs':>12s}{'forward (ms)':>16s}{'backward (ms)':>16s}{'input':>24s}{'output':>24s}")
-    for m in ops if isinstance(ops, list) else [ops]:
-        m = m.to(device) if hasattr(m, 'to') else m  # device
-        m = m.half() if hasattr(m, 'half') and isinstance(x, torch.Tensor) and x.dtype is torch.float16 else m  # type
-        dtf, dtb, t = 0., 0., [0., 0., 0.]  # dt forward, backward
-        try:
-            flops = thop.profile(m, inputs=(x,), verbose=False)[0] / 1E9 * 2  # GFLOPs
-        except:
-            flops = 0
-
-        for _ in range(n):
-            t[0] = time_synchronized()
-            y = m(x)
-            t[1] = time_synchronized()
+    #     profile(input, [m1, m2], n=100)  # profile over 100 iterations
+
+    results = []
+    logging.basicConfig(format="%(message)s", level=logging.INFO)
+    device = device or select_device()
+    print(f"{'Params':>12s}{'GFLOPs':>12s}{'GPU_mem (GB)':>14s}{'forward (ms)':>14s}{'backward (ms)':>14s}"
+          f"{'input':>24s}{'output':>24s}")
+
+    for x in input if isinstance(input, list) else [input]:
+        x = x.to(device)
+        x.requires_grad = True
+        for m in ops if isinstance(ops, list) else [ops]:
+            m = m.to(device) if hasattr(m, 'to') else m  # device
+            m = m.half() if hasattr(m, 'half') and isinstance(x, torch.Tensor) and x.dtype is torch.float16 else m
+            tf, tb, t = 0., 0., [0., 0., 0.]  # dt forward, backward
             try:
-                _ = y.sum().backward()
-                t[2] = time_synchronized()
-            except:  # no backward method
-                t[2] = float('nan')
-            dtf += (t[1] - t[0]) * 1000 / n  # ms per op forward
-            dtb += (t[2] - t[1]) * 1000 / n  # ms per op backward
+                flops = thop.profile(m, inputs=(x,), verbose=False)[0] / 1E9 * 2  # GFLOPs
+            except:
+                flops = 0
 
-        s_in = tuple(x.shape) if isinstance(x, torch.Tensor) else 'list'
-        s_out = tuple(y.shape) if isinstance(y, torch.Tensor) else 'list'
-        p = sum(list(x.numel() for x in m.parameters())) if isinstance(m, nn.Module) else 0  # parameters
-        print(f'{p:12}{flops:12.4g}{dtf:16.4g}{dtb:16.4g}{str(s_in):>24s}{str(s_out):>24s}')
+            try:
+                for _ in range(n):
+                    t[0] = time_sync()
+                    y = m(x)
+                    t[1] = time_sync()
+                    try:
+                        _ = (sum([yi.sum() for yi in y]) if isinstance(y, list) else y).sum().backward()
+                        t[2] = time_sync()
+                    except Exception as e:  # no backward method
+                        print(e)
+                        t[2] = float('nan')
+                    tf += (t[1] - t[0]) * 1000 / n  # ms per op forward
+                    tb += (t[2] - t[1]) * 1000 / n  # ms per op backward
+                mem = torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0  # (GB)
+                s_in = tuple(x.shape) if isinstance(x, torch.Tensor) else 'list'
+                s_out = tuple(y.shape) if isinstance(y, torch.Tensor) else 'list'
+                p = sum(list(x.numel() for x in m.parameters())) if isinstance(m, nn.Module) else 0  # parameters
+                print(f'{p:12}{flops:12.4g}{mem:>14.3f}{tf:14.4g}{tb:14.4g}{str(s_in):>24s}{str(s_out):>24s}')
+                results.append([p, flops, mem, tf, tb, s_in, s_out])
+            except Exception as e:
+                print(e)
+                results.append(None)
+            torch.cuda.empty_cache()
+    return results
 
 
 def is_parallel(model):
@@ -231,7 +239,7 @@ def model_info(model, verbose=False, img_size=640):
     except (ImportError, Exception):
         fs = ''
 
-    logger.info(f"Model Summary: {len(list(model.modules()))} layers, {n_p} parameters, {n_g} gradients{fs}")
+    LOGGER.info(f"Model Summary: {len(list(model.modules()))} layers, {n_p} parameters, {n_g} gradients{fs}")
 
 
 def load_classifier(name='resnet101', n=2):
@@ -275,6 +283,26 @@ def copy_attr(a, b, include=(), exclude=()):
             setattr(a, k, v)
 
 
+class EarlyStopping:
+    # YOLOv5 simple early stopper
+    def __init__(self, patience=30):
+        self.best_fitness = 0.0  # i.e. mAP
+        self.best_epoch = 0
+        self.patience = patience or float('inf')  # epochs to wait after fitness stops improving to stop
+        self.possible_stop = False  # possible stop may occur next epoch
+
+    def __call__(self, epoch, fitness):
+        if fitness >= self.best_fitness:  # >= 0 to allow for early zero-fitness stage of training
+            self.best_epoch = epoch
+            self.best_fitness = fitness
+        delta = epoch - self.best_epoch  # epochs without improvement
+        self.possible_stop = delta >= (self.patience - 1)  # possible stop may occur next epoch
+        stop = delta >= self.patience  # stop training if patience exceeded
+        if stop:
+            LOGGER.info(f'EarlyStopping patience {self.patience} exceeded, stopping training.')
+        return stop
+
+
 class ModelEMA:
     """ Model Exponential Moving Average from https://github.com/rwightman/pytorch-image-models
     Keep a moving average of everything in the model state_dict (parameters and buffers).
diff --git a/cv/detection/yolov5/pytorch/val.py b/cv/detection/yolov5/pytorch/val.py
new file mode 100755
index 0000000..5ea856c
--- /dev/null
+++ b/cv/detection/yolov5/pytorch/val.py
@@ -0,0 +1,364 @@
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+"""
+Validate a trained YOLOv5 model accuracy on a custom dataset
+
+Usage:
+    $ python path/to/val.py --data coco128.yaml --weights yolov5s.pt --img 640
+"""
+
+import argparse
+import json
+import os
+import sys
+from pathlib import Path
+from threading import Thread
+
+import numpy as np
+import torch
+from tqdm import tqdm
+
+FILE = Path(__file__).resolve()
+ROOT = FILE.parents[0]  # YOLOv5 root directory
+if str(ROOT) not in sys.path:
+    sys.path.append(str(ROOT))  # add ROOT to PATH
+ROOT = Path(os.path.relpath(ROOT, Path.cwd()))  # relative
+
+from models.experimental import attempt_load
+from utils.datasets import create_dataloader
+from utils.general import coco80_to_coco91_class, check_dataset, check_img_size, check_requirements, \
+    check_suffix, check_yaml, box_iou, non_max_suppression, scale_coords, xyxy2xywh, xywh2xyxy, set_logging, \
+    increment_path, colorstr, print_args
+from utils.metrics import ap_per_class, ConfusionMatrix
+from utils.plots import output_to_target, plot_images, plot_val_study
+from utils.torch_utils import select_device, time_sync
+from utils.callbacks import Callbacks
+
+
+def save_one_txt(predn, save_conf, shape, file):
+    # Save one txt result
+    gn = torch.tensor(shape)[[1, 0, 1, 0]]  # normalization gain whwh
+    for *xyxy, conf, cls in predn.tolist():
+        xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()  # normalized xywh
+        line = (cls, *xywh, conf) if save_conf else (cls, *xywh)  # label format
+        with open(file, 'a') as f:
+            f.write(('%g ' * len(line)).rstrip() % line + '\n')
+
+
+def save_one_json(predn, jdict, path, class_map):
+    # Save one JSON result {"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}
+    image_id = int(path.stem) if path.stem.isnumeric() else path.stem
+    box = xyxy2xywh(predn[:, :4])  # xywh
+    box[:, :2] -= box[:, 2:] / 2  # xy center to top-left corner
+    for p, b in zip(predn.tolist(), box.tolist()):
+        jdict.append({'image_id': image_id,
+                      'category_id': class_map[int(p[5])],
+                      'bbox': [round(x, 3) for x in b],
+                      'score': round(p[4], 5)})
+
+
+def process_batch(detections, labels, iouv):
+    """
+    Return correct predictions matrix. Both sets of boxes are in (x1, y1, x2, y2) format.
+    Arguments:
+        detections (Array[N, 6]), x1, y1, x2, y2, conf, class
+        labels (Array[M, 5]), class, x1, y1, x2, y2
+    Returns:
+        correct (Array[N, 10]), for 10 IoU levels
+    """
+    correct = torch.zeros(detections.shape[0], iouv.shape[0], dtype=torch.bool, device=iouv.device)
+    iou = box_iou(labels[:, 1:], detections[:, :4])
+    x = torch.where((iou >= iouv[0]) & (labels[:, 0:1] == detections[:, 5]))  # IoU above threshold and classes match
+    if x[0].shape[0]:
+        matches = torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy()  # [label, detection, iou]
+        if x[0].shape[0] > 1:
+            matches = matches[matches[:, 2].argsort()[::-1]]
+            matches = matches[np.unique(matches[:, 1], return_index=True)[1]]
+            # matches = matches[matches[:, 2].argsort()[::-1]]
+            matches = matches[np.unique(matches[:, 0], return_index=True)[1]]
+        matches = torch.Tensor(matches).to(iouv.device)
+        correct[matches[:, 1].long()] = matches[:, 2:3] >= iouv
+    return correct
+
+
+@torch.no_grad()
+def run(data,
+        weights=None,  # model.pt path(s)
+        batch_size=32,  # batch size
+        imgsz=640,  # inference size (pixels)
+        conf_thres=0.001,  # confidence threshold
+        iou_thres=0.6,  # NMS IoU threshold
+        task='val',  # train, val, test, speed or study
+        device='',  # cuda device, i.e. 0 or 0,1,2,3 or cpu
+        single_cls=False,  # treat as single-class dataset
+        augment=False,  # augmented inference
+        verbose=False,  # verbose output
+        save_txt=False,  # save results to *.txt
+        save_hybrid=False,  # save label+prediction hybrid results to *.txt
+        save_conf=False,  # save confidences in --save-txt labels
+        save_json=False,  # save a COCO-JSON results file
+        project=ROOT / 'runs/val',  # save to project/name
+        name='exp',  # save to project/name
+        exist_ok=False,  # existing project/name ok, do not increment
+        half=True,  # use FP16 half-precision inference
+        model=None,
+        dataloader=None,
+        save_dir=Path(''),
+        plots=True,
+        callbacks=Callbacks(),
+        compute_loss=None,
+        ):
+    # Initialize/load model and set device
+    training = model is not None
+    if training:  # called by train.py
+        device = next(model.parameters()).device  # get model device
+
+    else:  # called directly
+        device = select_device(device, batch_size=batch_size)
+
+        # Directories
+        save_dir = increment_path(Path(project) / name, exist_ok=exist_ok)  # increment run
+        (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True)  # make dir
+
+        # Load model
+        check_suffix(weights, '.pt')
+        model = attempt_load(weights, map_location=device)  # load FP32 model
+        gs = max(int(model.stride.max()), 32)  # grid size (max stride)
+        imgsz = check_img_size(imgsz, s=gs)  # check image size
+
+        # Multi-GPU disabled, incompatible with .half() https://github.com/ultralytics/yolov5/issues/99
+        # if device.type != 'cpu' and torch.cuda.device_count() > 1:
+        #     model = nn.DataParallel(model)
+
+        # Data
+        data = check_dataset(data)  # check
+
+    # Half
+    half &= device.type != 'cpu'  # half precision only supported on CUDA
+    model.half() if half else model.float()
+
+    # Configure
+    model.eval()
+    is_coco = isinstance(data.get('val'), str) and data['val'].endswith('coco/val2017.txt')  # COCO dataset
+    nc = 1 if single_cls else int(data['nc'])  # number of classes
+    iouv = torch.linspace(0.5, 0.95, 10).to(device)  # iou vector for mAP@0.5:0.95
+    niou = iouv.numel()
+
+    # Dataloader
+    if not training:
+        if device.type != 'cpu':
+            model(torch.zeros(1, 4, imgsz, imgsz).to(device).type_as(next(model.parameters())))  # run once
+        pad = 0.0 if task == 'speed' else 0.5
+        task = task if task in ('train', 'val', 'test') else 'val'  # path to train/val/test images
+        dataloader = create_dataloader(data[task], imgsz, batch_size, gs, single_cls, pad=pad, rect=True,
+                                       prefix=colorstr(f'{task}: '))[0]
+
+    seen = 0
+    confusion_matrix = ConfusionMatrix(nc=nc)
+    names = {k: v for k, v in enumerate(model.names if hasattr(model, 'names') else model.module.names)}
+    class_map = coco80_to_coco91_class() if is_coco else list(range(1000))
+    s = ('%20s' + '%11s' * 6) % ('Class', 'Images', 'Labels', 'P', 'R', 'mAP@.5', 'mAP@.5:.95')
+    dt, p, r, f1, mp, mr, map50, map = [0.0, 0.0, 0.0], 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
+    loss = torch.zeros(3, device=device)
+    jdict, stats, ap, ap_class = [], [], [], []
+    for batch_i, (img, targets, paths, shapes) in enumerate(tqdm(dataloader, desc=s)):
+        t1 = time_sync()
+        img = img.to(device, non_blocking=True)
+        d1, d2, d3 = torch.split(img.permute(1, 0, 2, 3), 1, 0)
+        d4 = torch.zeros(d1.size(), device='cuda')
+        img = torch.cat([d1, d2, d3, d4], dim=0).permute(1, 0, 2, 3)
+        img = img.to(device, non_blocking=True, memory_format=torch.channels_last)
+        img = img.half() if half else img.float()  # uint8 to fp16/32
+        img /= 255.0  # 0 - 255 to 0.0 - 1.0
+        targets = targets.to(device)
+        nb, _, height, width = img.shape  # batch size, channels, height, width
+        t2 = time_sync()
+        dt[0] += t2 - t1
+
+        # Run model
+        out, train_out = model(img, augment=augment)  # inference and training outputs
+        dt[1] += time_sync() - t2
+
+        # Compute loss
+        if compute_loss:
+            loss += compute_loss([x.float() for x in train_out], targets)[1]  # box, obj, cls
+
+        # Run NMS
+        targets[:, 2:] *= torch.Tensor([width, height, width, height]).to(device)  # to pixels
+        lb = [targets[targets[:, 0] == i, 1:] for i in range(nb)] if save_hybrid else []  # for autolabelling
+        t3 = time_sync()
+        out = non_max_suppression(out, conf_thres, iou_thres, labels=lb, multi_label=True, agnostic=single_cls)
+        dt[2] += time_sync() - t3
+
+        # Statistics per image
+        for si, pred in enumerate(out):
+            labels = targets[targets[:, 0] == si, 1:]
+            nl = len(labels)
+            tcls = labels[:, 0].tolist() if nl else []  # target class
+            path, shape = Path(paths[si]), shapes[si][0]
+            seen += 1
+
+            if len(pred) == 0:
+                if nl:
+                    stats.append((torch.zeros(0, niou, dtype=torch.bool), torch.Tensor(), torch.Tensor(), tcls))
+                continue
+
+            # Predictions
+            if single_cls:
+                pred[:, 5] = 0
+            predn = pred.clone()
+            scale_coords(img[si].shape[1:], predn[:, :4], shape, shapes[si][1])  # native-space pred
+
+            # Evaluate
+            if nl:
+                tbox = xywh2xyxy(labels[:, 1:5])  # target boxes
+                scale_coords(img[si].shape[1:], tbox, shape, shapes[si][1])  # native-space labels
+                labelsn = torch.cat((labels[:, 0:1], tbox), 1)  # native-space labels
+                correct = process_batch(predn, labelsn, iouv)
+                if plots:
+                    confusion_matrix.process_batch(predn, labelsn)
+            else:
+                correct = torch.zeros(pred.shape[0], niou, dtype=torch.bool)
+            stats.append((correct.cpu(), pred[:, 4].cpu(), pred[:, 5].cpu(), tcls))  # (correct, conf, pcls, tcls)
+
+            # Save/log
+            if save_txt:
+                save_one_txt(predn, save_conf, shape, file=save_dir / 'labels' / (path.stem + '.txt'))
+            if save_json:
+                save_one_json(predn, jdict, path, class_map)  # append to COCO-JSON dictionary
+            callbacks.run('on_val_image_end', pred, predn, path, names, img[si])
+
+        # Plot images
+        #if plots and batch_i < 3:
+         #   f = save_dir / f'val_batch{batch_i}_labels.jpg'  # labels
+          #  Thread(target=plot_images, args=(img, targets, paths, f, names), daemon=True).start()
+           # f = save_dir / f'val_batch{batch_i}_pred.jpg'  # predictions
+           # Thread(target=plot_images, args=(img, output_to_target(out), paths, f, names), daemon=True).start()
+
+    # Compute statistics
+    stats = [np.concatenate(x, 0) for x in zip(*stats)]  # to numpy
+    if len(stats) and stats[0].any():
+        p, r, ap, f1, ap_class = ap_per_class(*stats, plot=plots, save_dir=save_dir, names=names)
+        ap50, ap = ap[:, 0], ap.mean(1)  # AP@0.5, AP@0.5:0.95
+        mp, mr, map50, map = p.mean(), r.mean(), ap50.mean(), ap.mean()
+        nt = np.bincount(stats[3].astype(np.int64), minlength=nc)  # number of targets per class
+    else:
+        nt = torch.zeros(1)
+
+    # Print results
+    pf = '%20s' + '%11i' * 2 + '%11.3g' * 4  # print format
+    print(pf % ('all', seen, nt.sum(), mp, mr, map50, map))
+
+    # Print results per class
+    if (verbose or (nc < 50 and not training)) and nc > 1 and len(stats):
+        for i, c in enumerate(ap_class):
+            print(pf % (names[c], seen, nt[c], p[i], r[i], ap50[i], ap[i]))
+
+    # Print speeds
+    t = tuple(x / seen * 1E3 for x in dt)  # speeds per image
+    if not training:
+        shape = (batch_size, 3, imgsz, imgsz)
+        print(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {shape}' % t)
+
+    # Plots
+    if plots:
+        confusion_matrix.plot(save_dir=save_dir, names=list(names.values()))
+        callbacks.run('on_val_end')
+
+    # Save JSON
+    if save_json and len(jdict):
+        w = Path(weights[0] if isinstance(weights, list) else weights).stem if weights is not None else ''  # weights
+        anno_json = str(Path(data.get('path', '../coco')) / 'annotations/instances_val2017.json')  # annotations json
+        pred_json = str(save_dir / f"{w}_predictions.json")  # predictions json
+        print(f'\nEvaluating pycocotools mAP... saving {pred_json}...')
+        with open(pred_json, 'w') as f:
+            json.dump(jdict, f)
+
+        try:  # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb
+            check_requirements(['pycocotools'])
+            from pycocotools.coco import COCO
+            from pycocotools.cocoeval import COCOeval
+
+            anno = COCO(anno_json)  # init annotations api
+            pred = anno.loadRes(pred_json)  # init predictions api
+            eval = COCOeval(anno, pred, 'bbox')
+            if is_coco:
+                eval.params.imgIds = [int(Path(x).stem) for x in dataloader.dataset.img_files]  # image IDs to evaluate
+            eval.evaluate()
+            eval.accumulate()
+            eval.summarize()
+            map, map50 = eval.stats[:2]  # update results (mAP@0.5:0.95, mAP@0.5)
+        except Exception as e:
+            print(f'pycocotools unable to run: {e}')
+
+    # Return results
+    model.float()  # for training
+    if not training:
+        s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ''
+        print(f"Results saved to {colorstr('bold', save_dir)}{s}")
+    maps = np.zeros(nc) + map
+    for i, c in enumerate(ap_class):
+        maps[c] = ap[i]
+    return (mp, mr, map50, map, *(loss.cpu() / len(dataloader)).tolist()), maps, t
+
+
+def parse_opt():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--data', type=str, default=ROOT / 'data/coco128.yaml', help='dataset.yaml path')
+    parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolov5s.pt', help='model.pt path(s)')
+    parser.add_argument('--batch-size', type=int, default=32, help='batch size')
+    parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=640, help='inference size (pixels)')
+    parser.add_argument('--conf-thres', type=float, default=0.001, help='confidence threshold')
+    parser.add_argument('--iou-thres', type=float, default=0.6, help='NMS IoU threshold')
+    parser.add_argument('--task', default='val', help='train, val, test, speed or study')
+    parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
+    parser.add_argument('--single-cls', action='store_true', help='treat as single-class dataset')
+    parser.add_argument('--augment', action='store_true', help='augmented inference')
+    parser.add_argument('--verbose', action='store_true', help='report mAP by class')
+    parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')
+    parser.add_argument('--save-hybrid', action='store_true', help='save label+prediction hybrid results to *.txt')
+    parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels')
+    parser.add_argument('--save-json', action='store_true', help='save a COCO-JSON results file')
+    parser.add_argument('--project', default=ROOT / 'runs/val', help='save to project/name')
+    parser.add_argument('--name', default='exp', help='save to project/name')
+    parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
+    parser.add_argument('--half', action='store_true', help='use FP16 half-precision inference')
+    opt = parser.parse_args()
+    opt.data = check_yaml(opt.data)  # check YAML
+    opt.save_json |= opt.data.endswith('coco.yaml')
+    opt.save_txt |= opt.save_hybrid
+    print_args(FILE.stem, opt)
+    return opt
+
+
+def main(opt):
+    set_logging()
+    check_requirements(exclude=('tensorboard', 'thop'))
+
+    if opt.task in ('train', 'val', 'test'):  # run normally
+        run(**vars(opt))
+
+    elif opt.task == 'speed':  # speed benchmarks
+        # python val.py --task speed --data coco.yaml --batch 1 --weights yolov5n.pt yolov5s.pt...
+        for w in opt.weights if isinstance(opt.weights, list) else [opt.weights]:
+            run(opt.data, weights=w, batch_size=opt.batch_size, imgsz=opt.imgsz, conf_thres=.25, iou_thres=.45,
+                device=opt.device, save_json=False, plots=False)
+
+    elif opt.task == 'study':  # run over a range of settings and save/plot
+        # python val.py --task study --data coco.yaml --iou 0.7 --weights yolov5n.pt yolov5s.pt...
+        x = list(range(256, 1536 + 128, 128))  # x axis (image sizes)
+        for w in opt.weights if isinstance(opt.weights, list) else [opt.weights]:
+            f = f'study_{Path(opt.data).stem}_{Path(w).stem}.txt'  # filename to save to
+            y = []  # y axis
+            for i in x:  # img-size
+                print(f'\nRunning {f} point {i}...')
+                r, _, t = run(opt.data, weights=w, batch_size=opt.batch_size, imgsz=i, conf_thres=opt.conf_thres,
+                              iou_thres=opt.iou_thres, device=opt.device, save_json=opt.save_json, plots=False)
+                y.append(r + t)  # results and times
+            np.savetxt(f, y, fmt='%10.4g')  # save
+        os.system('zip -r study.zip study_*.txt')
+        plot_val_study(x=x)  # plot
+
+
+if __name__ == "__main__":
+    opt = parse_opt()
+    main(opt)
-- 
Gitee