-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathindex_celebdf.py
77 lines (57 loc) · 3.04 KB
/
index_celebdf.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import argparse
from multiprocessing import Pool
from pathlib import Path
import numpy as np
import pandas as pd
from isplutils.utils import extract_meta_av, extract_meta_cv
def main():
parser = argparse.ArgumentParser()
parser.add_argument('--source', type=Path, help='Source dir',
required=True)
parser.add_argument('--videodataset', type=Path, default='data/celebdf_videos.pkl',
help='Path to save the videos DataFrame')
args = parser.parse_args()
## Parameters parsing
source_dir: Path = args.source
videodataset_path: Path = args.videodataset
# Create ouput folder (if doesn't exist)
videodataset_path.parent.mkdir(parents=True, exist_ok=True)
## DataFrame
if videodataset_path.exists():
print('Loading video DataFrame')
df_videos = pd.read_pickle(videodataset_path)
else:
print('Creating video DataFrame')
split_file = Path(source_dir).joinpath('List_of_testing_videos.txt')
if not split_file.exists():
raise FileNotFoundError('Unable to find "List_of_testing_videos.txt" in {}'.format(source_dir))
test_videos_df = pd.read_csv(split_file, delimiter=' ', header=0, index_col=1)
ff_videos = Path(source_dir).rglob('*.mp4')
df_videos = pd.DataFrame(
{'path': [f.relative_to(source_dir) for f in ff_videos]})
df_videos['height'] = df_videos['width'] = df_videos['frames'] = np.zeros(len(df_videos), dtype=np.uint16)
with Pool() as p:
meta = p.map(extract_meta_av, df_videos['path'].map(lambda x: str(source_dir.joinpath(x))))
meta = np.stack(meta)
df_videos.loc[:, ['height', 'width', 'frames']] = meta
# Fix for videos that av cannot decode properly
for idx, record in df_videos[df_videos['frames'] == 0].iterrows():
meta = extract_meta_cv(str(source_dir.joinpath(record['path'])))
df_videos.loc[idx, ['height', 'width', 'frames']] = meta
df_videos['class'] = df_videos['path'].map(lambda x: x.parts[0]).astype('category')
df_videos['label'] = df_videos['class'].map(
lambda x: True if x == 'Celeb-synthesis' else False) # True is FAKE, False is REAL
df_videos['name'] = df_videos['path'].map(lambda x: x.with_suffix('').name)
df_videos['original'] = -1 * np.ones(len(df_videos), dtype=np.int16)
df_videos.loc[(df_videos['label'] == True), 'original'] = \
df_videos[(df_videos['label'] == True)]['name'].map(
lambda x: df_videos.index[
np.flatnonzero(df_videos['name'] == '_'.join([x.split('_')[0], x.split('_')[2]]))[0]]
)
df_videos['test'] = df_videos['path'].map(str).isin(test_videos_df.index)
print('Saving video DataFrame to {}'.format(videodataset_path))
df_videos.to_pickle(str(videodataset_path))
print('Real videos: {:d}'.format(sum(df_videos['label'] == 0)))
print('Fake videos: {:d}'.format(sum(df_videos['label'] == 1)))
if __name__ == '__main__':
main()