-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathextract_audio_clips.py
115 lines (84 loc) · 3.63 KB
/
extract_audio_clips.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
import argparse
import concurrent.futures
import datetime
import os
import pandas as pd
import torch
from moviepy.editor import VideoFileClip
TIME_DURATION_IN_SEC = 10
CSV_FILE = './vggsound.csv'
NEW_COLUMNS = {
'---g-f_I2yQ': 'youtube_video_id',
'1': 'start_seconds',
'people marching': 'label',
'test': 'split',
}
CORRUPTED_FILES = ['1WRx5JqaCy0', '1WbvSHeHf5g', '0WbBuzhazto', '-x7xMm47pfc', '-ixUrPNPogg', '-bBaZT4tEko']
def extract_audio_clip(video_id, start_time):
input_video_path = './data/video/full_vid_' + video_id + '.mp4'
if not os.path.exists(input_video_path):
return
try:
if video_id in CORRUPTED_FILES:
raise Exception(f'Skipping corrupted file: {video_id}')
video_clip = VideoFileClip(input_video_path)
if video_clip.duration < start_time + TIME_DURATION_IN_SEC:
raise Exception(f'{video_id} - audio clip too short, skipping')
# Define the subclip with the specified start and end times
subclip = video_clip.subclip(start_time, start_time + TIME_DURATION_IN_SEC)
# Extract audio from the subclip
audio_clip = subclip.audio
# Resample the audio to 16 kHz
resampled_audio_clip = audio_clip.set_fps(16000)
# Write the resampled audio to a WAV file
audio_file_path = f"./data/audio/audio_{video_id}.wav"
resampled_audio_clip.write_audiofile(audio_file_path, codec='pcm_s16le', verbose=False, logger=None)
# Close the clips
video_clip.close()
audio_clip.close()
except Exception as e:
print(f"Error extracting audio clip: {e}")
def extract_clip(row):
video_id = row['youtube_video_id']
start_time = row['start_seconds']
try:
extract_audio_clip(video_id, start_time)
return True
except Exception as e:
return False
def extract_clips(df, start, end):
end = min(end, len(df))
start = max(start, 0)
result = df.iloc[start:end + 1].apply(extract_clip, axis=1)
return result
def extract_audio_from_dataframe(df, start, end):
end = min(end, len(df))
start = max(start, 0)
video_ids = df.iloc[start: end + 1]['youtube_video_id'].tolist()
start_times = df.iloc[start: end + 1]['start_seconds'].tolist()
with concurrent.futures.ThreadPoolExecutor() as executor:
futures = []
for i in range(len(video_ids)):
futures.append(executor.submit(extract_audio_clip, video_ids[i], start_times[i]))
for _ in concurrent.futures.as_completed(futures):
pass # Wait for all downloads to complete
def main(start, end, is_concurrent):
df = pd.read_csv(CSV_FILE)
df.rename(columns=NEW_COLUMNS, inplace=True)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
os.environ["TOKENIZERS_PARALLELISM"] = "false" if device == "cpu" else "true"
start_time = datetime.datetime.now()
if is_concurrent:
extract_audio_from_dataframe(df, start, end)
else:
extract_clips(df, start, end)
end_time = datetime.datetime.now()
duration = (end_time - start_time).total_seconds()
print(f"Time taken for clipping: {duration:.6f} seconds")
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Extract audio/video clips from YouTube videos")
parser.add_argument("--start", type=int, default=0, help="Starting index of dataframe (default: 0)")
parser.add_argument("--end", type=int, default=-1, help="Starting index of dataframe (default: -1)")
parser.add_argument('--concurrent', action=argparse.BooleanOptionalAction)
args = parser.parse_args()
main(args.start, args.end, args.concurrent)