juanmc2005 · hbredin · Nov 8, 2023 · Nov 9, 2023 · Nov 9, 2023 · Nov 9, 2023
diff --git a/.readthedocs.yaml b/.readthedocs.yaml
@@ -0,0 +1,16 @@
+version: 2
+
+build:
+  os: "ubuntu-22.04"
+  tools:
+    python: "3.10"
+
+python:
+  install:
+    - requirements: docs/requirements.txt
+    # Install diart before building the docs
+    - method: pip
+      path: .
+
+sphinx:
+  configuration: docs/conf.py
diff --git a/README.md b/README.md
diff --git a/demo.gif b/demo.gif
diff --git a/docs/Makefile b/docs/Makefile
@@ -0,0 +1,20 @@
+# Minimal makefile for Sphinx documentation
+#
+
+# You can set these variables from the command line, and also
+# from the environment for the first two.
+SPHINXOPTS    ?=
+SPHINXBUILD   ?= sphinx-build
+SOURCEDIR     = .
+BUILDDIR      = _build
+
+# Put it first so that "make" without argument is like "make help".
+help:
+	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+
+.PHONY: help Makefile
+
+# Catch-all target: route all unknown targets to Sphinx using the new
+# "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
+%: Makefile
+	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
diff --git a/docs/_static/logo.png b/docs/_static/logo.png
diff --git a/docs/conf.py b/docs/conf.py
@@ -0,0 +1,65 @@
+# Configuration file for the Sphinx documentation builder.
+#
+# For the full list of built-in configuration values, see the documentation:
+# https://www.sphinx-doc.org/en/master/usage/configuration.html
+
+# -- Project information -----------------------------------------------------
+# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
+
+project = "diart"
+copyright = "2023, Juan Manuel Coria"
+author = "Juan Manuel Coria"
+release = "v0.9"
+
+# -- General configuration ---------------------------------------------------
+# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
+
+extensions = [
+    "autoapi.extension",
+    "sphinx.ext.coverage",
+    "sphinx.ext.napoleon",
+    "sphinx_mdinclude",
+]
+
+autoapi_dirs = ["../src/diart"]
+autoapi_options = [
+    "members",
+    "undoc-members",
+    "show-inheritance",
+    "show-module-summary",
+    "special-members",
+    "imported-members",
+]
+
+templates_path = ["_templates"]
+exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
+
+# -- Options for autodoc ----------------------------------------------------
+# https://www.sphinx-doc.org/en/master/usage/extensions/autodoc.html#configuration
+
+# Automatically extract typehints when specified and place them in
+# descriptions of the relevant function/method.
+autodoc_typehints = "description"
+
+# Don't show class signature with the class' name.
+autodoc_class_signature = "separated"
+
+# -- Options for HTML output -------------------------------------------------
+# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
+
+html_theme = "furo"
+html_static_path = ["_static"]
+html_logo = "_static/logo.png"
+html_title = "diart documentation"
+
+
+def skip_submodules(app, what, name, obj, skip, options):
+    return (
+        name.endswith("__init__")
+        or name.startswith("diart.console")
+        or name.startswith("diart.argdoc")
+    )
+
+
+def setup(sphinx):
+    sphinx.connect("autoapi-skip-member", skip_submodules)
diff --git a/docs/index.rst b/docs/index.rst
@@ -0,0 +1,11 @@
+Get started with diart
+======================
+
+.. mdinclude:: ../README.md
+
+
+Useful Links
+============
+
+.. toctree::
+   :maxdepth: 1
diff --git a/docs/make.bat b/docs/make.bat
@@ -0,0 +1,35 @@
+@ECHO OFF
+
+pushd %~dp0
+
+REM Command file for Sphinx documentation
+
+if "%SPHINXBUILD%" == "" (
+	set SPHINXBUILD=sphinx-build
+)
+set SOURCEDIR=.
+set BUILDDIR=_build
+
+%SPHINXBUILD% >NUL 2>NUL
+if errorlevel 9009 (
+	echo.
+	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
+	echo.installed, then set the SPHINXBUILD environment variable to point
+	echo.to the full path of the 'sphinx-build' executable. Alternatively you
+	echo.may add the Sphinx directory to PATH.
+	echo.
+	echo.If you don't have Sphinx installed, grab it from
+	echo.https://www.sphinx-doc.org/
+	exit /b 1
+)
+
+if "%1" == "" goto help
+
+%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+goto end
+
+:help
+%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+
+:end
+popd
diff --git a/docs/requirements.txt b/docs/requirements.txt
@@ -0,0 +1,4 @@
+sphinx==6.2.1
+sphinx-autoapi==3.0.0
+sphinx-mdinclude==0.5.3
+furo==2023.9.10
diff --git a/environment.yml b/environment.yml
@@ -3,7 +3,7 @@ channels:
   - conda-forge
   - defaults
 dependencies:
-  - python=3.8
+  - python=3.10
   - portaudio=19.6.*
   - pysoundfile=0.12.*
   - ffmpeg[version='<4.4']

diff --git a/logo.jpg b/logo.jpg
diff --git a/pipeline.gif b/pipeline.gif
diff --git a/requirements.txt b/requirements.txt
@@ -10,6 +10,7 @@ torch>=1.12.1
 torchvision>=0.14.0
 torchaudio>=2.0.2
 pyannote.audio>=2.1.1
+requests>=2.31.0
 pyannote.core>=4.5
 pyannote.database>=4.1.1
 pyannote.metrics>=3.2

diff --git a/setup.cfg b/setup.cfg
@@ -1,8 +1,8 @@
 [metadata]
 name=diart
-version=0.8.0
+version=0.9.0
 author=Juan Manuel Coria
-description=Streaming speaker diarization in real-time
+description=A python framework to build AI for real-time speech
 long_description=file: README.md
 long_description_content_type=text/markdown
 keywords=speaker diarization, streaming, online, real time, rxpy
@@ -32,6 +32,7 @@ install_requires=
     torchvision>=0.14.0
     torchaudio>=2.0.2
     pyannote.audio>=2.1.1
+    requests>=2.31.0
     pyannote.core>=4.5
     pyannote.database>=4.1.1
     pyannote.metrics>=3.2

diff --git a/src/diart/argdoc.py b/src/diart/argdoc.py
@@ -15,3 +15,4 @@
 OUTPUT = "Directory to store the system's output in RTTM format"
 HF_TOKEN = "Huggingface authentication token for hosted models ('true' | 'false' | <token>). If 'true', it will use the token from huggingface-cli login"
 SAMPLE_RATE = "Sample rate of the audio stream"
+NORMALIZE_EMBEDDING_WEIGHTS = "Rescale embedding weights (min-max normalization) to be in the range [0, 1]. This is useful in some models without weighted statistics pooling that rely on masking, like Nvidia's NeMo or ECAPA-TDNN"
diff --git a/src/diart/blocks/__init__.py b/src/diart/blocks/__init__.py
@@ -14,6 +14,7 @@
 )
 from .segmentation import SpeakerSegmentation
 from .diarization import SpeakerDiarization, SpeakerDiarizationConfig
+from .pseudo_diarization import PseudoSpeakerDiarization, PseudoSpeakerDiarizationConfig
 from .base import PipelineConfig, Pipeline
 from .utils import Binarize, Resample, AdjustVolume
 from .vad import VoiceActivityDetection, VoiceActivityDetectionConfig
diff --git a/src/diart/blocks/base.py b/src/diart/blocks/base.py
@@ -11,12 +11,28 @@
 
 @dataclass
 class HyperParameter:
+    """Represents a pipeline hyper-parameter that can be tuned by diart"""
+
     name: Text
+    """Name of the hyper-parameter (e.g. tau_active)"""
     low: float
+    """Lowest value that this parameter can take"""
     high: float
+    """Highest value that this parameter can take"""
 
     @staticmethod
     def from_name(name: Text) -> "HyperParameter":
+        """Create a HyperParameter object given its name.
+
+        Parameters
+        ----------
+        name: str
+            Name of the hyper-parameter
+
+        Returns
+        -------
+        HyperParameter
+        """
         if name == "tau_active":
             return TauActive
         if name == "rho_update":
@@ -32,24 +48,34 @@ def from_name(name: Text) -> "HyperParameter":
 
 
 class PipelineConfig(ABC):
+    """Configuration containing the required
+    parameters to build and run a pipeline"""
+
     @property
     @abstractmethod
     def duration(self) -> float:
+        """The duration of an input audio chunk (in seconds)"""
         pass
 
     @property
     @abstractmethod
     def step(self) -> float:
+        """The step between two consecutive input audio chunks (in seconds)"""
         pass
 
     @property
     @abstractmethod
     def latency(self) -> float:
+        """The algorithmic latency of the pipeline (in seconds).
+        At time `t` of the audio stream, the pipeline will
+        output predictions for time `t - latency`.
+        """
         pass
 
     @property
     @abstractmethod
     def sample_rate(self) -> int:
+        """The sample rate of the input audio stream"""
         pass
 
     def get_file_padding(self, filepath: FilePath) -> Tuple[float, float]:
@@ -60,6 +86,8 @@ def get_file_padding(self, filepath: FilePath) -> Tuple[float, float]:
 
 
 class Pipeline(ABC):
+    """Represents a streaming audio pipeline"""
+
     @staticmethod
     @abstractmethod
     def get_config_class() -> type:
@@ -92,4 +120,18 @@ def set_timestamp_shift(self, shift: float):
     def __call__(
         self, waveforms: Sequence[SlidingWindowFeature]
     ) -> Sequence[Tuple[Any, SlidingWindowFeature]]:
+        """Runs the next steps of the pipeline
+        given a list of consecutive audio chunks.
+
+        Parameters
+        ----------
+        waveforms: Sequence[SlidingWindowFeature]
+            Consecutive chunk waveforms for the pipeline to ingest
+
+        Returns
+        -------
+        Sequence[Tuple[Any, SlidingWindowFeature]]
+            For each input waveform, a tuple containing
+            the pipeline output and its respective audio
+        """
         pass
diff --git a/src/diart/blocks/clustering.py b/src/diart/blocks/clustering.py
@@ -140,6 +140,10 @@ def identify(
         long_speakers = np.where(np.mean(segmentation.data, axis=0) >= self.rho_update)[
             0
         ]
+        # Remove speakers that have NaN embeddings
+        no_nan_embeddings = np.where(~np.isnan(embeddings).any(axis=1))[0]
+        active_speakers = np.intersect1d(active_speakers, no_nan_embeddings)
+
         num_local_speakers = segmentation.data.shape[1]
 
         if self.centers is None: