|
6 | 6 | from .pydub_utils import db_to_float
|
7 | 7 |
|
8 | 8 |
|
9 |
| -def detect_silence(audio_segment, min_silence_len=1000, silence_thresh=-16, seek_step=1): |
10 |
| - """ |
11 |
| - Returns a list of all silent sections [start, end] in milliseconds of audio_segment. |
12 |
| - Inverse of detect_nonsilent() |
13 |
| -
|
14 |
| - audio_segment - the segment to find silence in |
15 |
| - min_silence_len - the minimum length for any silent section |
16 |
| - silence_thresh - the upper bound for how quiet is silent in dFBS |
17 |
| - seek_step - step size for interating over the segment in ms |
18 |
| - """ |
19 |
| - seg_len = len(audio_segment) |
20 |
| - |
21 |
| - # you can't have a silent portion of a sound that is longer than the sound |
22 |
| - if seg_len < min_silence_len: |
23 |
| - return [] |
24 |
| - |
25 |
| - # convert silence threshold to a float value (so we can compare it to rms) |
26 |
| - silence_thresh = db_to_float(silence_thresh) * audio_segment.max_possible_amplitude |
27 |
| - |
28 |
| - # find silence and add start and end indicies to the to_cut list |
29 |
| - silence_starts = [] |
30 |
| - |
31 |
| - # check successive (1 sec by default) chunk of sound for silence |
32 |
| - # try a chunk at every "seek step" (or every chunk for a seek step == 1) |
33 |
| - last_slice_start = seg_len - min_silence_len |
34 |
| - slice_starts = range(0, last_slice_start + 1, seek_step) |
35 |
| - |
36 |
| - # guarantee last_slice_start is included in the range |
37 |
| - # to make sure the last portion of the audio is searched |
38 |
| - if last_slice_start % seek_step: |
39 |
| - slice_starts = itertools.chain(slice_starts, [last_slice_start]) |
40 |
| - |
41 |
| - for i in slice_starts: |
42 |
| - audio_slice = audio_segment[i:i + min_silence_len] |
43 |
| - if audio_slice.rms <= silence_thresh: |
44 |
| - silence_starts.append(i) |
45 |
| - |
46 |
| - # short circuit when there is no silence |
47 |
| - if not silence_starts: |
48 |
| - return [] |
49 |
| - |
50 |
| - # combine the silence we detected into ranges (start ms - end ms) |
51 |
| - silent_ranges = [] |
52 |
| - |
53 |
| - prev_i = silence_starts.pop(0) |
54 |
| - current_range_start = prev_i |
55 |
| - |
56 |
| - for silence_start_i in silence_starts: |
57 |
| - continuous = (silence_start_i == prev_i + seek_step) |
58 |
| - |
59 |
| - # sometimes two small blips are enough for one particular slice to be |
60 |
| - # non-silent, despite the silence all running together. Just combine |
61 |
| - # the two overlapping silent ranges. |
62 |
| - silence_has_gap = silence_start_i > (prev_i + min_silence_len) |
63 |
| - |
64 |
| - if not continuous and silence_has_gap: |
65 |
| - silent_ranges.append([current_range_start, |
66 |
| - prev_i + min_silence_len]) |
67 |
| - current_range_start = silence_start_i |
68 |
| - prev_i = silence_start_i |
69 |
| - |
70 |
| - silent_ranges.append([current_range_start, |
71 |
| - prev_i + min_silence_len]) |
72 |
| - |
73 |
| - return silent_ranges |
74 |
| - |
75 |
| - |
76 |
| -def detect_nonsilent(audio_segment, min_silence_len=1000, silence_thresh=-16, seek_step=1): |
77 |
| - """ |
78 |
| - Returns a list of all nonsilent sections [start, end] in milliseconds of audio_segment. |
79 |
| - Inverse of detect_silent() |
80 |
| -
|
81 |
| - audio_segment - the segment to find silence in |
82 |
| - min_silence_len - the minimum length for any silent section |
83 |
| - silence_thresh - the upper bound for how quiet is silent in dFBS |
84 |
| - seek_step - step size for interating over the segment in ms |
85 |
| - """ |
86 |
| - silent_ranges = detect_silence(audio_segment, min_silence_len, silence_thresh, seek_step) |
87 |
| - len_seg = len(audio_segment) |
88 |
| - |
89 |
| - # if there is no silence, the whole thing is nonsilent |
90 |
| - if not silent_ranges: |
91 |
| - return [[0, len_seg]] |
92 |
| - |
93 |
| - # short circuit when the whole audio segment is silent |
94 |
| - if silent_ranges[0][0] == 0 and silent_ranges[0][1] == len_seg: |
95 |
| - return [] |
96 |
| - |
97 |
| - prev_end_i = 0 |
98 |
| - nonsilent_ranges = [] |
99 |
| - for start_i, end_i in silent_ranges: |
100 |
| - nonsilent_ranges.append([prev_end_i, start_i]) |
101 |
| - prev_end_i = end_i |
102 |
| - |
103 |
| - if end_i != len_seg: |
104 |
| - nonsilent_ranges.append([prev_end_i, len_seg]) |
105 |
| - |
106 |
| - if nonsilent_ranges[0] == [0, 0]: |
107 |
| - nonsilent_ranges.pop(0) |
108 |
| - |
109 |
| - return nonsilent_ranges |
110 |
| - |
111 |
| - |
112 | 9 | def split_on_silence(audio_segment, min_silence_len=1000, silence_thresh=-16, keep_silence=100,
|
113 | 10 | seek_step=1):
|
114 | 11 | """
|
|
0 commit comments