Skip to content

Commit 1b757a3

Browse files
committed
[detectors] Implement Koala-36M
Implement algorithm similar to that described in Koala-36M. Add `KoalaDetector` and `detect-koala` command. #441
1 parent 99b116e commit 1b757a3

File tree

11 files changed

+138
-6
lines changed

11 files changed

+138
-6
lines changed

benchmarks/README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ The following results indicate that ContentDetector achieves the highest perform
3737
| HashDetector | 92.96 | 76.27 | 83.79 | 16.26 |
3838
| HistogramDetector | 90.55 | 72.76 | 80.68 | 16.13 |
3939
| ThresholdDetector | 0.00 | 0.00 | 0.00 | 18.95 |
40+
| KoalaDetector | 86.83 | 78.38 | 82.39 | 97.75 |
4041

4142
## Citation
4243
### BBC

benchmarks/bbc_dataset.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,15 +12,19 @@ class BBCDataset:
1212

1313
def __init__(self, dataset_dir: str):
1414
self._video_files = [
15-
file for file in sorted(glob.glob(os.path.join(dataset_dir, "videos", "*.mp4")))
15+
file
16+
for file in sorted(
17+
glob.glob(os.path.join("benchmarks", dataset_dir, "videos", "*.mp4"))
18+
)
1619
]
1720
self._scene_files = [
18-
file for file in sorted(glob.glob(os.path.join(dataset_dir, "fixed", "*.txt")))
21+
file
22+
for file in sorted(glob.glob(os.path.join("benchmarks", dataset_dir, "fixed", "*.txt")))
1923
]
2024
assert len(self._video_files) == len(self._scene_files)
2125
for video_file, scene_file in zip(self._video_files, self._scene_files):
2226
video_id = os.path.basename(video_file).replace("bbc_", "").split(".")[0]
23-
scene_id = os.path.basename(scene_file).split("_")[0]
27+
scene_id = os.path.basename(scene_file).split("-")[0]
2428
assert video_id == scene_id
2529

2630
def __getitem__(self, index):

benchmarks/benchmark.py

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,10 @@
1010
ContentDetector,
1111
HashDetector,
1212
HistogramDetector,
13+
KoalaDetector,
14+
SceneManager,
1315
ThresholdDetector,
14-
detect,
16+
open_video,
1517
)
1618

1719

@@ -22,6 +24,7 @@ def make_detector(detector_name: str):
2224
"detect-hash": HashDetector(),
2325
"detect-hist": HistogramDetector(),
2426
"detect-threshold": ThresholdDetector(),
27+
"detect-koala": KoalaDetector(),
2528
}
2629
return detector_map[detector_name]
2730

@@ -31,7 +34,19 @@ def _detect_scenes(detector_type: str, dataset):
3134
for video_file, scene_file in tqdm(dataset):
3235
start = time.time()
3336
detector = make_detector(detector_type)
34-
pred_scene_list = detect(video_file, detector)
37+
38+
video = open_video(video_file)
39+
scene_manager = SceneManager()
40+
scene_manager.add_detector(detector)
41+
# TODO: We should also do this for detect-hash.
42+
if detector_type == "detect-koala":
43+
scene_manager.auto_downscale = False
44+
scene_manager.detect_scenes(
45+
video=video,
46+
show_progress=True,
47+
)
48+
pred_scene_list = scene_manager.get_scene_list()
49+
3550
elapsed = time.time() - start
3651
scenes = {
3752
scene_file: {
@@ -74,6 +89,7 @@ def main(args):
7489
"detect-hash",
7590
"detect-hist",
7691
"detect-threshold",
92+
"detect-koala",
7793
],
7894
default="detect-content",
7995
help="Detector name. Implemented detectors are listed: https://www.scenedetect.com/docs/latest/cli.html",

dist/requirements_windows.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ moviepy==2.1.1
77
numpy==2.1.3
88
platformdirs==4.3.6
99
tqdm==4.67.1
10+
scikit-image==0.24.0
1011

1112
# Build-only and test-only requirements.
1213
pyinstaller

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,3 +8,4 @@ opencv-python
88
platformdirs
99
pytest>=7.0
1010
tqdm
11+
scikit-image

requirements_headless.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,4 +7,5 @@ numpy
77
opencv-python-headless
88
platformdirs
99
pytest>=7.0
10-
tqdm
10+
scikit-image
11+
tqdm

scenedetect/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
ThresholdDetector,
4343
HistogramDetector,
4444
HashDetector,
45+
KoalaDetector,
4546
)
4647
from scenedetect.backends import (
4748
AVAILABLE_BACKENDS,

scenedetect/_cli/__init__.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
ContentDetector,
4343
HashDetector,
4444
HistogramDetector,
45+
KoalaDetector,
4546
ThresholdDetector,
4647
)
4748
from scenedetect.platform import get_cv2_imwrite_params, get_system_version_info
@@ -1590,3 +1591,16 @@ def save_qp_command(
15901591
scenedetect.add_command(list_scenes_command)
15911592
scenedetect.add_command(save_images_command)
15921593
scenedetect.add_command(split_video_command)
1594+
1595+
1596+
@click.command("detect-koala", cls=Command, help="""WIP""")
1597+
@click.pass_context
1598+
def detect_koala_command(
1599+
ctx: click.Context,
1600+
):
1601+
ctx = ctx.obj
1602+
assert isinstance(ctx, CliContext)
1603+
ctx.add_detector(KoalaDetector, {"min_scene_len": None})
1604+
1605+
1606+
scenedetect.add_command(detect_koala_command)

scenedetect/detectors/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
from scenedetect.detectors.adaptive_detector import AdaptiveDetector
4141
from scenedetect.detectors.hash_detector import HashDetector
4242
from scenedetect.detectors.histogram_detector import HistogramDetector
43+
from scenedetect.detectors.koala_detector import KoalaDetector
4344

4445
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
4546
# #
Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
#
2+
# PySceneDetect: Python-Based Video Scene Detector
3+
# -------------------------------------------------------------------
4+
# [ Site: https://scenedetect.com ]
5+
# [ Docs: https://scenedetect.com/docs/ ]
6+
# [ Github: https://github.com/Breakthrough/PySceneDetect/ ]
7+
#
8+
# Copyright (C) 2014-2024 Brandon Castellano <http://www.bcastell.com>.
9+
# PySceneDetect is licensed under the BSD 3-Clause License; see the
10+
# included LICENSE file, or visit one of the above pages for details.
11+
#
12+
""":class:`KoalaDetector` uses the detection method described by Koala-36M.
13+
See https://koala36m.github.io/ for details.
14+
15+
TODO: Cite correctly.
16+
17+
This detector is available from the command-line as the `detect-koala` command.
18+
"""
19+
20+
import typing as ty
21+
22+
import cv2
23+
import numpy as np
24+
from skimage.metrics import structural_similarity
25+
26+
from scenedetect.scene_detector import SceneDetector
27+
28+
29+
class KoalaDetector(SceneDetector):
30+
def __init__(self, min_scene_len: int = None):
31+
self._start_frame_num: int = None
32+
self._min_scene_len: int = min_scene_len if min_scene_len else 0
33+
self._last_histogram: np.ndarray = None
34+
self._last_edges: np.ndarray = None
35+
self._scores: ty.List[ty.List[int]] = []
36+
37+
# Tunables (TODO: Make these config params):
38+
39+
# Boxcar filter size (should be <= window size)
40+
self._filter_size: int = 3
41+
# Window to use for calculating threshold (should be >= filter size).
42+
self._window_size: int = 8
43+
# Multiplier for standard deviations when calculating threshold.
44+
self._deviation: float = 3.0
45+
46+
def process_frame(self, frame_num: int, frame_img: np.ndarray) -> ty.List[int]:
47+
# TODO: frame_img is already downscaled here. The same problem exists in HashDetector.
48+
# For now we can just set downscale factor to 1 in SceneManager to work around the issue.
49+
frame_img = cv2.resize(frame_img, (256, 256))
50+
histogram = np.asarray(
51+
[cv2.calcHist([c], [0], None, [254], [1, 255]) for c in cv2.split(frame_img)]
52+
)
53+
# TODO: Make the parameters below tunable.
54+
frame_gray = cv2.resize(cv2.cvtColor(frame_img, cv2.COLOR_BGR2GRAY), (128, 128))
55+
edges = np.maximum(frame_gray, cv2.Canny(frame_gray, 100, 200))
56+
if self._start_frame_num is not None:
57+
delta_histogram = cv2.compareHist(self._last_histogram, histogram, cv2.HISTCMP_CORREL)
58+
delta_edges = structural_similarity(self._last_edges, edges, data_range=255)
59+
score = 4.61480465 * delta_histogram + 3.75211168 * delta_edges - 5.485968377115124
60+
self._scores.append(score)
61+
if self._start_frame_num is None:
62+
self._start_frame_num = frame_num
63+
self._last_histogram = histogram
64+
self._last_edges = edges
65+
return []
66+
67+
def post_process(self, frame_num: int) -> ty.List[int]:
68+
cut_found = [score < 0.0 for score in self._scores]
69+
cut_found.append(True)
70+
filter = [1] * self._filter_size
71+
cutoff = float(self._filter_size) / float(self._filter_size + 1)
72+
filtered = np.convolve(self._scores, filter, mode="same")
73+
for frame_num in range(len(self._scores)):
74+
if frame_num >= self._window_size and filtered[frame_num] < cutoff:
75+
# TODO: Should we discard the N most extreme values before calculating threshold?
76+
window = filtered[frame_num - self._window_size : frame_num]
77+
threshold = window.mean() - (self._deviation * window.std())
78+
if filtered[frame_num] < threshold:
79+
cut_found[frame_num] = True
80+
81+
cuts = []
82+
last_cut = 0
83+
for frame_num in range(len(cut_found)):
84+
if cut_found[frame_num]:
85+
if (frame_num - last_cut) > self._window_size:
86+
cuts.append(last_cut)
87+
last_cut = frame_num + 1
88+
return [cut + self._start_frame_num for cut in cuts][1:]

tests/test_detectors.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
ContentDetector,
3030
HashDetector,
3131
HistogramDetector,
32+
KoalaDetector,
3233
ThresholdDetector,
3334
)
3435

@@ -37,6 +38,7 @@
3738
ContentDetector,
3839
HashDetector,
3940
HistogramDetector,
41+
KoalaDetector,
4042
)
4143

4244
ALL_DETECTORS: ty.Tuple[ty.Type[SceneDetector]] = (*FAST_CUT_DETECTORS, ThresholdDetector)
@@ -123,7 +125,9 @@ def get_fast_cut_test_cases():
123125
),
124126
id="%s/m=30" % detector_type.__name__,
125127
)
128+
# TODO: Make this work, right now min_scene_len isn't used by the detector.
126129
for detector_type in FAST_CUT_DETECTORS
130+
if detector_type != KoalaDetector
127131
]
128132
return test_cases
129133

0 commit comments

Comments
 (0)