-
Notifications
You must be signed in to change notification settings - Fork 1.4k
/
Copy pathmerge_logs.py
57 lines (46 loc) · 2.1 KB
/
merge_logs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
"""Script for merging the logs in a month-by-month simulation."""
import os
import sys
import pandas as pd
def cat_csvs(format_string_file_pattern, index_col, month_list):
"""
Utility function for concatentating CSV files from simulation.
Parameters:
- format_string_file_pattern: The pattern for the file name with `{}` in the place of the month
- index_col: The column with the datetimes to sort on.
- month_list: The list of the months as formatted in the file names.
Returns:
A concatenated `pandas.DataFrame`
"""
return pd.concat([
pd.read_csv(
format_string_file_pattern.format(file), index_col=index_col, parse_dates=True
) for file in month_list
]).sort_index()
def get_spillover(data, when):
"""Returns data from spillover"""
try:
return data.loc[when]
except KeyError:
return pd.DataFrame()
if __name__ == '__main__':
# make sure we write the files to the proper directory no matter where we called the script from
directory = os.path.dirname(os.path.realpath(sys.argv[0]))
os.chdir(directory)
logs_2018 = cat_csvs(
'logs/{}_2018.csv', 'datetime',
['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 'aug', 'sep', 'oct', 'nov', 'dec']
)
logs_2018.loc['2018'].to_csv('logs/logs_2018.csv') # sometimes the simulation overshoots the end date
logs_2019 = pd.concat([cat_csvs('logs/{}_2019.csv', 'datetime', ['jan', 'feb', 'mar']), get_spillover(logs_2018, '2019')]).sort_index()
logs_2019.loc['2019-Q1'].to_csv('logs/logs_2019.csv') # sometimes the simulation overshoots the end date
hackers_2018 = cat_csvs(
'logs/hackers_{}_2018.csv', 'start',
['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 'aug', 'sep', 'oct', 'nov', 'dec']
)
hackers_2018.loc['2018'].to_csv('logs/hackers_2018.csv')
hackers_2019 = pd.concat([
cat_csvs('logs/hackers_{}_2019.csv', 'start', ['jan', 'feb', 'mar']), get_spillover(hackers_2018, '2019')
]).sort_index()
hackers_2019.loc['2019-Q1'].to_csv('logs/hackers_2019.csv')
print('All done!')