Add Depth-Anything model (#10)

* Add Depth-Anything model
jamjamjon · Apr 24, 2024 · e614ca4 · e614ca4
1 parent beda8ef
commit e614ca4
Show file tree

Hide file tree

Showing 11 changed files with 476 additions and 24 deletions.
diff --git a/README.md b/README.md
@@ -1,9 +1,15 @@
 # usls
 
-A Rust library integrated with **ONNXRuntime**, providing a collection of **Computer Vison** and **Vision-Language** models including [YOLOv5](https://github.com/ultralytics/yolov5), [YOLOv8](https://github.com/ultralytics/ultralytics), [YOLOv9](https://github.com/WongKinYiu/yolov9), [RTDETR](https://arxiv.org/abs/2304.08069), [CLIP](https://github.com/openai/CLIP), [DINOv2](https://github.com/facebookresearch/dinov2), [FastSAM](https://github.com/CASIA-IVA-Lab/FastSAM), [YOLO-World](https://github.com/AILab-CVC/YOLO-World), [BLIP](https://arxiv.org/abs/2201.12086), [PaddleOCR](https://github.com/PaddlePaddle/PaddleOCR) and others.
+A Rust library integrated with **ONNXRuntime**, providing a collection of **Computer Vison** and **Vision-Language** models including [YOLOv5](https://github.com/ultralytics/yolov5), [YOLOv8](https://github.com/ultralytics/ultralytics), [YOLOv9](https://github.com/WongKinYiu/yolov9), [RTDETR](https://arxiv.org/abs/2304.08069), [CLIP](https://github.com/openai/CLIP), [DINOv2](https://github.com/facebookresearch/dinov2), [FastSAM](https://github.com/CASIA-IVA-Lab/FastSAM), [YOLO-World](https://github.com/AILab-CVC/YOLO-World), [BLIP](https://arxiv.org/abs/2201.12086), [PaddleOCR](https://github.com/PaddlePaddle/PaddleOCR) , [Depth-Anything](https://github.com/LiheYoung/Depth-Anything) and others.
 
 ## Recently Updated
 
+|        Depth-Anything         |
+| :----------------------------: |
+|<img src='examples/depth-anything/demo.png'   width="800px">|
+
+
+
 |        YOLOP-v2          |             Face-Parsing             |               Text-Detection           |  
 | :----------------------------: | :------------------------------: |  :------------------------------: |
 |<img src='examples/yolop/demo.png'  height="240px">| <img src='examples/face-parsing/demo.png'  height="240px"> | <img src='examples/db/demo.png'  height="240px"> |
@@ -41,6 +47,7 @@ A Rust library integrated with **ONNXRuntime**, providing a collection of **Comp
 |             [YOLOPv2](https://arxiv.org/abs/2208.11434)             | Panoptic driving Perception |   [demo](examples/yolop)   |      ✅      |      ✅      |             ✅             |            ✅            |
 |    [YOLOv5-classification](https://github.com/ultralytics/yolov5)    |      Object Detection      |   [demo](examples/yolov5)   |      ✅      |      ✅      |             ✅             |            ✅            |
 |     [YOLOv5-segmentation](https://github.com/ultralytics/yolov5)     |    Instance Segmentation    |   [demo](examples/yolov5)   |      ✅      |      ✅      |             ✅             |            ✅            |
+|     [Depth-Anything](https://github.com/LiheYoung/Depth-Anything)     |    Instance Segmentation    |   [demo](examples/depth-anything)   |      ✅      |      ✅      |             ❌             |            ❌            |
 
 ## Solution Models
 

diff --git a/examples/depth-anything/README.md b/examples/depth-anything/README.md
@@ -0,0 +1,16 @@
+## Quick Start
+
+```shell
+cargo run -r --example depth-anything
+```
+
+## ONNX Model
+
+- [depth-anything-s-dyn](https://github.com/jamjamjon/assets/releases/download/v0.0.1/depth-anything-s-dyn.onnx)  
+- [depth-anything-b-dyn](https://github.com/jamjamjon/assets/releases/download/v0.0.1/depth-anything-b-dyn.onnx)
+- [depth-anything-l-dyn](https://github.com/jamjamjon/assets/releases/download/v0.0.1/depth-anything-l-dyn.onnx)
+
+
+## Results
+
+![](./demo.png)
diff --git a/examples/depth-anything/demo.png b/examples/depth-anything/demo.png
diff --git a/examples/depth-anything/main.rs b/examples/depth-anything/main.rs
@@ -0,0 +1,23 @@
+use usls::{models::DepthAnything, Annotator, DataLoader, Options};
+
+fn main() -> Result<(), Box<dyn std::error::Error>> {
+    // visual
+    let options = Options::default()
+        .with_model("../models/depth-anything-s-dyn.onnx")
+        .with_i00((1, 1, 8).into())
+        .with_i02((384, 512, 1024).into())
+        .with_i03((384, 512, 1024).into());
+    let model = DepthAnything::new(&options)?;
+
+    // load
+    let x = vec![DataLoader::try_read("./assets/2.jpg")?];
+
+    // run
+    let y = model.run(&x)?;
+
+    // annotate
+    let annotator = Annotator::default().with_saveout("Depth-Anything");
+    annotator.annotate(&x, &y);
+
+    Ok(())
+}
diff --git a/examples/dinov2/README.md b/examples/dinov2/README.md
@@ -6,32 +6,17 @@ This demo showcases how to use `DINOv2` to compute image similarity, applicable
 cargo run -r --example dinov2
 ```
 
-## Or you can manully
+## Donwload DINOv2 ONNX Model
 
-### 1.Donwload DINOv2 ONNX Model
+- [dinov2-s14](https://github.com/jamjamjon/assets/releases/download/v0.0.1/dinov2-s14.onnx)
+- [dinov2-s14-dyn](https://github.com/jamjamjon/assets/releases/download/v0.0.1/dinov2-s14-dyn.onnx)
+- [dinov2-s14-dyn-f16](https://github.com/jamjamjon/assets/releases/download/v0.0.1/dinov2-s14-dyn-f16.onnx)
 
-[dinov2-s14](https://github.com/jamjamjon/assets/releases/download/v0.0.1/dinov2-s14.onnx)
-[dinov2-s14-dyn](https://github.com/jamjamjon/assets/releases/download/v0.0.1/dinov2-s14-dyn.onnx)
-[dinov2-s14-dyn-f16](https://github.com/jamjamjon/assets/releases/download/v0.0.1/dinov2-s14-dyn-f16.onnx)
+- [dinov2-b14](https://github.com/jamjamjon/assets/releases/download/v0.0.1/dinov2-b14.onnx)
+- [dinov2-b14-dyn](https://github.com/jamjamjon/assets/releases/download/v0.0.1/dinov2-b14-dyn.onnx)
 
-[dinov2-b14](https://github.com/jamjamjon/assets/releases/download/v0.0.1/dinov2-b14.onnx)
-[dinov2-b14-dyn](https://github.com/jamjamjon/assets/releases/download/v0.0.1/dinov2-b14-dyn.onnx)
 
 
-### 2. Specify the ONNX model path in `main.rs`
-
-```Rust
-let options = Options::default()
-    .with_model("ONNX_PATH")    // <= modify this
-    .with_profile(false);
-```
-
-### 3. Then, run
-
-```bash
-cargo run -r --example dinov2
-```
-
 ## Results
 
 ```shell

diff --git a/src/core/annotator.rs b/src/core/annotator.rs
@@ -1,7 +1,10 @@
-use crate::{auto_load, string_now, Bbox, Keypoint, Mask, Mbr, Prob, CHECK_MARK, CROSS_MARK, Y};
+use crate::{
+    auto_load, string_now, Bbox, Keypoint, Mask, Mbr, Prob, CHECK_MARK, CROSS_MARK, TURBO, Y,
+};
 use ab_glyph::{FontVec, PxScale};
 use anyhow::Result;
-use image::{DynamicImage, Rgba, RgbaImage};
+use image::{DynamicImage, ImageBuffer, Rgba, RgbaImage};
+use imageproc::map::map_colors;
 
 /// Annotator for struct `Y`
 #[derive(Debug)]
@@ -265,6 +268,13 @@ impl Annotator {
         for (img, y) in imgs.iter().zip(ys.iter()) {
             let mut img_rgb = img.to_rgba8();
 
+            // pixels
+            if !self.without_masks {
+                if let Some(xs) = &y.pixels() {
+                    self.plot_pixels(&mut img_rgb, xs)
+                }
+            }
+
             // masks
             if !self.without_masks {
                 if let Some(xs) = &y.masks() {
@@ -377,6 +387,60 @@ impl Annotator {
         }
     }
 
+    pub fn plot_pixels(&self, img: &mut RgbaImage, pixels: &[u8]) {
+        let (w, h) = img.dimensions();
+        let luma: ImageBuffer<image::Luma<_>, Vec<u8>> =
+            ImageBuffer::from_raw(w, h, pixels.to_vec())
+                .expect("Faild to create luma from ndarray");
+        let luma = map_colors(&luma, |p| {
+            let x = p[0];
+            image::Rgb(TURBO[x as usize])
+        });
+        let luma = image::DynamicImage::from(luma);
+        let luma = luma.resize_exact(w / 2, h / 2, image::imageops::FilterType::CatmullRom);
+        let im_ori = img.clone();
+        let im_ori = image::DynamicImage::from(im_ori);
+        let im_ori = im_ori.resize_exact(w / 2, h / 2, image::imageops::FilterType::CatmullRom);
+
+        // overwrite
+        for x in 0..w {
+            for y in 0..h {
+                img.put_pixel(x, y, Rgba([255, 255, 255, 255]));
+            }
+        }
+
+        // paste
+        let pos_x = 0;
+        let pos_y = (2 * (h - im_ori.height()) / 3) as i64;
+        image::imageops::overlay(img, &im_ori, pos_x, pos_y);
+        image::imageops::overlay(img, &luma, im_ori.width().into(), pos_y);
+
+        // text
+        let legend = "Raw";
+        let scale = PxScale::from(self.scale_dy * 2.5);
+        let (text_w, text_h) = imageproc::drawing::text_size(scale, &self.font, legend);
+        imageproc::drawing::draw_text_mut(
+            img,
+            Rgba([0, 0, 0, 255]),
+            ((im_ori.width() - text_w) / 2) as i32,
+            ((pos_y as u32 - text_h) / 2) as i32,
+            scale,
+            &self.font,
+            legend,
+        );
+        let legend = "Depth";
+        let (text_w, text_h) = imageproc::drawing::text_size(scale, &self.font, legend);
+        imageproc::drawing::draw_text_mut(
+            img,
+            Rgba([0, 0, 0, 255]),
+            (im_ori.width() + (im_ori.width() - text_w) / 2) as i32,
+            ((pos_y as u32 - text_h) / 2) as i32,
+            scale,
+            &self.font,
+            legend,
+        );
+    }
+
     pub fn plot_masks_and_polygons(&self, img: &mut RgbaImage, masks: &[Mask]) {
         let mut convas = img.clone();
         for mask in masks.iter() {

diff --git a/src/models/depth_anything.rs b/src/models/depth_anything.rs
@@ -0,0 +1,78 @@
+use crate::{ops, MinOptMax, Options, OrtEngine, Y};
+use anyhow::Result;
+use image::{DynamicImage, ImageBuffer};
+use ndarray::{Array, Axis, IxDyn};
+
+#[derive(Debug)]
+pub struct DepthAnything {
+    engine: OrtEngine,
+    height: MinOptMax,
+    width: MinOptMax,
+    batch: MinOptMax,
+}
+
+impl DepthAnything {
+    pub fn new(options: &Options) -> Result<Self> {
+        let engine = OrtEngine::new(options)?;
+        let (batch, height, width) = (
+            engine.batch().to_owned(),
+            engine.height().to_owned(),
+            engine.width().to_owned(),
+        );
+        engine.dry_run()?;
+
+        Ok(Self {
+            engine,
+            height,
+            width,
+            batch,
+        })
+    }
+
+    pub fn run(&self, xs: &[DynamicImage]) -> Result<Vec<Y>> {
+        let xs_ = ops::resize(xs, self.height.opt as u32, self.width.opt as u32)?;
+        let xs_ = ops::normalize(xs_, 0.0, 255.0);
+        let xs_ = ops::standardize(xs_, &[0.485, 0.456, 0.406], &[0.229, 0.224, 0.225]);
+        let ys = self.engine.run(&[xs_])?;
+        self.postprocess(ys, xs)
+    }
+
+    pub fn postprocess(&self, xs: Vec<Array<f32, IxDyn>>, xs0: &[DynamicImage]) -> Result<Vec<Y>> {
+        let mut ys: Vec<Y> = Vec::new();
+        for (idx, luma) in xs[0].axis_iter(Axis(0)).enumerate() {
+            let luma = luma
+                .into_shape((self.height() as usize, self.width() as usize, 1))?
+                .into_owned();
+            let v = luma.into_raw_vec();
+            let max_ = v.iter().max_by(|x, y| x.total_cmp(y)).unwrap();
+            let min_ = v.iter().min_by(|x, y| x.total_cmp(y)).unwrap();
+            let v = v
+                .iter()
+                .map(|x| (((*x - min_) / (max_ - min_)) * 255.).min(255.).max(0.) as u8)
+                .collect::<Vec<_>>();
+            let luma: ImageBuffer<image::Luma<_>, Vec<u8>> =
+                ImageBuffer::from_raw(self.width() as u32, self.height() as u32, v)
+                    .expect("Faild to create image from ndarray");
+            let luma = image::DynamicImage::from(luma);
+            let luma = luma.resize_exact(
+                xs0[idx].width(),
+                xs0[idx].height(),
+                image::imageops::FilterType::CatmullRom,
+            );
+            ys.push(Y::default().with_pixels(&luma.into_luma8().into_raw()));
+        }
+        Ok(ys)
+    }
+
+    pub fn batch(&self) -> isize {
+        self.batch.opt
+    }
+
+    pub fn width(&self) -> isize {
+        self.width.opt
+    }
+
+    pub fn height(&self) -> isize {
+        self.height.opt
+    }
+}
diff --git a/src/models/mod.rs b/src/models/mod.rs
@@ -1,6 +1,7 @@
 mod blip;
 mod clip;
 mod db;
+mod depth_anything;
 mod dinov2;
 mod rtdetr;
 mod rtmo;
@@ -11,6 +12,7 @@ mod yolop;
 pub use blip::Blip;
 pub use clip::Clip;
 pub use db::DB;
+pub use depth_anything::DepthAnything;
 pub use dinov2::Dinov2;
 pub use rtdetr::RTDETR;
 pub use rtmo::RTMO;

diff --git a/src/utils/mod.rs b/src/utils/mod.rs
@@ -4,6 +4,10 @@ use std::io::{Read, Write};
 use std::path::{Path, PathBuf};
 
 pub mod coco;
+mod turbo;
+
+pub use turbo::TURBO;
+
 pub const GITHUB_ASSETS: &str = "https://github.com/jamjamjon/assets/releases/download/v0.0.1";
 pub const CHECK_MARK: &str = "✅";
 pub const CROSS_MARK: &str = "❌";