-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy path02_prep_data.R
160 lines (122 loc) · 5.18 KB
/
02_prep_data.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
################################################################################
#
# Contrasting future heat and cold-related mortality under climate change,
# demographic and adaptation scenarios in 854 European cities
#
# R Code Part 2: Load and prepare data
#
# Pierre Masselot & Antonio Gasparrini
#
################################################################################
#------------------------
# LOAD EUcityTRM info
#------------------------
# read coefficients
coefs <- read.csv("data/coefs.csv") |>
mutate(agegroup = factor(agegroup, levels = agelabs))
# Read city and age values
cityage <- read.csv("data/city_results.csv")
# Reduce to cities only
cities <- unique(subset(cityage, select = c("URAU_CODE", "LABEL", "CNTR_CODE",
"cntr_name", "region", "lon", "lat", "pop")))
# Countries
countries <- summarise(cities,
ncities = length(URAU_CODE), lat = mean(lat), region = unique(region),
.by = CNTR_CODE) |>
mutate(region = factor(region, levels = ordreg)) |>
arrange(region, desc(lat))
# Reorder cities
cities <- mutate(cities, region = factor(region, levels = ordreg),
CNTR_CODE = factor(CNTR_CODE, levels = countries$CNTR_CODE)) |>
arrange(region, CNTR_CODE, URAU_CODE)
#------------------------
# LOAD DEMOGRAPHIC DATA
#------------------------
#----- Load Wittgenstein projections
# Read Age-specific survival ratios
projdeath <- fread("data/wittgenstein_assr.csv")
# Attach the first year to each period to later merge with population
projdeath[, year5 := as.numeric(substr(period, 1, 4))]
# Read Population
projpop <- fread("data/wittgenstein_pop.csv") |> rename(year5 = "year")
# Select sex and age
projdeath <- projdeath[age != "Newborn"]
projpop <- projpop[age != "All" & sex != "Both",]
# Merge population and deaths
proj <- merge(projpop, projdeath)
# Select only relevant years
proj <- proj[year5 %between% histrange | year5 %between% range(projrange),]
# Rescale population and compute deaths
proj[, ":="(pop = 1000 * pop, death = 1000 * pop * (1 - assr))]
# Aggregate sex
proj <- proj[, .(pop = sum(pop), death = sum(death)),
by = .(CNTR_CODE, age, ssp, year5)]
#----- Aggregate by age group
# Create age groups
proj[, agegroup := cut(as.numeric(sapply(strsplit(age, "[-+]"), "[", 1)),
agebreaks, include.lowest = T, right = F, labels = agelabs)]
# Sum population and death by age group
proj <- proj[!is.na(agegroup), .(wittpop = sum(pop), wittdeath = sum(death)),
by = .(CNTR_CODE, agegroup, ssp, year5)]
# Rescale number of deaths as annual average
proj[, ":="(wittdeath = wittdeath / 5)]
#----- Calibrate using Urban Audit
# Average pop and deaths of Wittgenstein over historical period
histo_witt <- proj[year5 %between% histrange,
.(histowpop = mean(wittpop), histowdeaths = mean(wittdeath)),
by = .(CNTR_CODE, agegroup, ssp)]
# Extract pop and deaths from EUcityTRM
citycal <- rename(cityage, uraupop = agepop, uraudeath = death) |>
subset(select = c(URAU_CODE, CNTR_CODE, agegroup, uraupop, uraudeath))
# Merge Eurostat and projection data
citycal <- merge(histo_witt, citycal, all = T, allow.cartesian = T)
# Multiplicative factor for each city from national
citycal[, ":="(popfac = uraupop / histowpop, dfac = uraudeath / histowdeaths)]
# Add correction to population projection
projdata <- merge(proj, citycal, allow.cartesian = T, all = T)
projdata[, ":="(pop = wittpop * popfac, death = wittdeath * dfac)]
#----- Some further data cleaning
# Select SSP. Discard the duplicates of historical period
projdata <- projdata[ssp %in% ssplist,]
projdata[, ssp := as.character(ssp)]
projdata[year5 %between% histrange, ssp := "hist"]
projdata <- unique(projdata)
# Select cities with full data
cities <- subset(cities, URAU_CODE %in% projdata$URAU_CODE)
projdata <- subset(projdata, URAU_CODE %in% cities$URAU_CODE)
# Define factors with nice names
cities <- mutate(cities,
across(URAU_CODE:cntr_name, ~ factor(.x, levels = unique(.x))),
region = factor(region, levels = ordreg))
# Order demographic data
projdata[, ":="(URAU_CODE = factor(URAU_CODE, levels = levels(cities$URAU_CODE)),
agegroup = factor(agegroup, levels = agelabs))]
setkey(projdata, ssp, URAU_CODE, agegroup)
#------------------------
# Load warming target data
#------------------------
# Read data
warming_years <- read.csv("data/warming_years.csv", check.names = F)
# Reshape into long data.frame
warming_years <- pivot_longer(warming_years, !model_run, names_to = "level",
values_to = "year")
# Rename and select targets
warming_years <- mutate(warming_years,
model_run = str_split_i(model_run, "_", 1),
gcm = gsub("-", "_", model_run),
model_run = NULL,
ssp = str_split_i(level, "_", 2),
ssp = substr(str_extract(ssp, ".*ssp[[:digit:]]"), 4, 4),
level = str_split_i(level, "_", 1)) |>
subset(level %in% targets & ssp %in% ssplist)
# Expand for all years within the window and for all cities/ssp/agegroup
warming_win <- group_by(warming_years, gcm, level, ssp) |>
reframe(year = if (is.na(year)) NA else (year - win_len):(year + win_len)) |>
ungroup() |>
mutate(year5 = floor(year / 5) * 5) |>
data.table()
#------------------------
# Other data
#------------------------
# Load European map data
euromap <- gisco_get_countries(year = "2020", cache_dir = "data")