-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathreproduce.txt
49 lines (35 loc) · 1.49 KB
/
reproduce.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
# This file can be run from the terminal as
# bash reproduce.txt
# Some software is required:
# python 2.7
# the following python packages (for scrape.py):
# requests, bs4, html5lib, pandas
# install with:
# pip install requests bs4 html5lib pandas
# R
# the following R packages
# lme4
# in an R session, install with
# install.packages("lme4")
# Each individual line can also be run from the terminal as written
# See individual .py and .R files for documentation of arguments
# scrape data from marathonguide.com
python scrape.py 1 500 men_scraped.csv
python scrape.py 2 500 women_scraped.csv
# uncomment to scrape data from marathonguide.com with Python 3
# python scrape_py3.py 1 500 men_scraped.csv
# python scrape_py3.py 2 500 women_scraped.csv
# generate sampled datasets
Rscript generate_sampled_dataset.R men_scraped.csv men_matches.csv 144 men_sampled.csv
Rscript generate_sampled_dataset.R women_scraped.csv women_matches.csv 165 women_sampled.csv
# merge sampled data with shoe data
Rscript merge.R men_sampled.csv men_shoe.csv men_sampled_shoe.csv
Rscript merge.R women_sampled.csv women_shoe.csv women_sampled_shoe.csv
# generate plot of data
Rscript explore_data.R
# analyze data with mixed models and print out some results
Rscript analysis.R men_sampled_shoe.csv men_fit.RData
Rscript analysis.R women_sampled_shoe.csv women_fit.RData
Rscript analysis.R men_sampled_shoe.csv women_sampled_shoe.csv combined_fit.RData
# create plot of generalized least squares coefficients
Rscript explore_fit.R