generated from opensafely/research-template
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
c2f7f08
commit e20e67e
Showing
5 changed files
with
283 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,192 @@ | ||
version 16 | ||
|
||
/*============================================================================== | ||
DO FILE NAME: Incidence graphs | ||
PROJECT: OpenSAFELY Disease Incidence project | ||
DATE: 23/08/2024 | ||
AUTHOR: J Galloway / M Russell | ||
DESCRIPTION OF FILE: Baseline data for reference population | ||
DATASETS USED: Dataset definition | ||
OTHER OUTPUT: logfiles, printed to folder $Logdir | ||
USER-INSTALLED ADO: | ||
(place .ado file(s) in analysis folder) | ||
==============================================================================*/ | ||
|
||
*Set filepaths | ||
*global projectdir "C:\Users\Mark\OneDrive\PhD Project\OpenSAFELY Incidence\disease_incidence" | ||
*global projectdir "C:\Users\k1754142\OneDrive\PhD Project\OpenSAFELY Incidence\disease_incidence" | ||
global projectdir `c(pwd)' | ||
di "$projectdir" | ||
|
||
capture mkdir "$projectdir/output/data" | ||
capture mkdir "$projectdir/output/tables" | ||
capture mkdir "$projectdir/output/figures" | ||
|
||
global logdir "$projectdir/logs" | ||
di "$logdir" | ||
|
||
*Open a log file | ||
cap log close | ||
log using "$logdir/baseline_data_midpoint.log", replace | ||
|
||
*Set Ado file path | ||
adopath + "$projectdir/analysis/extra_ados" | ||
|
||
*Import dataset | ||
import delimited "$projectdir/output/dataset_definition_midpoint.csv", clear | ||
|
||
set scheme plotplainblind | ||
|
||
*Create and label variables ===========================================================*/ | ||
|
||
**Age | ||
lab var age "Age" | ||
codebook age | ||
keep if age !=. | ||
|
||
**Sex | ||
gen gender = 1 if sex == "female" | ||
replace gender = 2 if sex == "male" | ||
lab var gender "Gender" | ||
lab define gender 1 "Female" 2 "Male", modify | ||
lab val gender gender | ||
tab gender, missing | ||
keep if gender !=. | ||
drop sex | ||
|
||
**Ethnicity | ||
gen ethnicity_n = 1 if ethnicity == "White" | ||
replace ethnicity_n = 2 if ethnicity == "Asian or Asian British" | ||
replace ethnicity_n = 3 if ethnicity == "Black or Black British" | ||
replace ethnicity_n = 4 if ethnicity == "Mixed" | ||
replace ethnicity_n = 5 if ethnicity == "Chinese or Other Ethnic Groups" | ||
replace ethnicity_n = 6 if ethnicity == "Unknown" | ||
|
||
|
||
label define ethnicity_n 1 "White" /// | ||
2 "Asian or Asian British" /// | ||
3 "Black or Black British" /// | ||
4 "Mixed" /// | ||
5 "Chinese or Other Ethnic Groups" /// | ||
6 "Unknown", modify | ||
|
||
label values ethnicity_n ethnicity_n | ||
lab var ethnicity_n "Ethnicity" | ||
tab ethnicity_n, missing | ||
drop ethnicity | ||
rename ethnicity_n ethnicity | ||
|
||
**IMD | ||
gen imd = 1 if imd_quintile == "1 (most deprived)" | ||
replace imd = 2 if imd_quintile == "2" | ||
replace imd = 3 if imd_quintile == "3" | ||
replace imd = 4 if imd_quintile == "4" | ||
replace imd = 5 if imd_quintile == "5 (least deprived)" | ||
replace imd = 6 if imd_quintile == "Unknown" | ||
|
||
label define imd 1 "1 (most deprived)" 2 "2" 3 "3" 4 "4" 5 "5 (least deprived)" 6 "Unknown", modify | ||
label values imd imd | ||
lab var imd "Index of multiple deprivation" | ||
tab imd, missing | ||
drop imd_quintile | ||
|
||
save "$projectdir/output/data/reference_data_processed.dta", replace | ||
|
||
/*Tables================================================================*/ | ||
|
||
use "$projectdir/output/data/reference_data_processed.dta", clear | ||
|
||
**Baseline table for reference population | ||
preserve | ||
table1_mc, total(before) onecol nospacelowpercent missing iqrmiddle(",") /// | ||
vars(age contn %5.1f \ /// | ||
gender cat %5.1f \ /// | ||
ethnicity cat %5.1f \ /// | ||
imd cat %5.1f \ /// | ||
) | ||
restore | ||
|
||
**Rounded and redacted baseline table for full population | ||
clear * | ||
save "$projectdir/output/data/reference_table_rounded.dta", replace emptyok | ||
use "$projectdir/output/data/reference_data_processed.dta", clear | ||
|
||
set type double | ||
|
||
foreach var of varlist imd ethnicity gender { | ||
preserve | ||
contract `var' | ||
local v : variable label `var' | ||
gen variable = `"`v'"' | ||
decode `var', gen(categories) | ||
gen count = round(_freq, 5) | ||
egen total = total(count) | ||
gen percent = round((count/total)*100, 0.1) | ||
order total, after(percent) | ||
gen countstr = string(count) | ||
replace countstr = "<8" if count<=7 | ||
order countstr, after(count) | ||
drop count | ||
rename countstr count | ||
tostring percent, gen(percentstr) force format(%9.1f) | ||
replace percentstr = "-" if count =="<8" | ||
order percentstr, after(percent) | ||
drop percent | ||
rename percentstr percent | ||
gen totalstr = string(total) | ||
replace totalstr = "-" if count =="<8" | ||
order totalstr, after(count) | ||
drop total | ||
rename totalstr total | ||
gen cohort = "All" | ||
order cohort, first | ||
list cohort variable categories count percent total | ||
keep cohort variable categories count percent total | ||
append using "$projectdir/output/data/reference_table_rounded.dta" | ||
save "$projectdir/output/data/reference_table_rounded.dta", replace | ||
restore | ||
} | ||
use "$projectdir/output/data/reference_table_rounded.dta", clear | ||
export excel "$projectdir/output/tables/reference_table_rounded.xls", replace sheet("Overall") keepcellfmt firstrow(variables) | ||
|
||
**Table of mean age | ||
clear * | ||
save "$projectdir/output/data/reference_mean_age_rounded.dta", replace emptyok | ||
use "$projectdir/output/data/reference_data_processed.dta", clear | ||
|
||
preserve | ||
collapse (count) count=age (mean) mean_age=age (sd) stdev_age=age | ||
rename *count freq | ||
gen count = round(freq, 5) | ||
gen countstr = string(count) | ||
replace countstr = "<8" if count<=7 | ||
order countstr, after(count) | ||
drop count | ||
rename countstr count | ||
tostring mean_age, gen(meanstr) force format(%9.1f) | ||
replace meanstr = "-" if count =="<8" | ||
drop mean_age | ||
rename meanstr mean_age | ||
tostring stdev_age, gen(stdevstr) force format(%9.1f) | ||
replace stdevstr = "-" if count =="<8" | ||
order stdevstr, after(stdev_age) | ||
drop stdev_age | ||
rename stdevstr stdev_age | ||
order count, first | ||
list count mean_age stdev_age | ||
keep count mean_age stdev_age | ||
append using "$projectdir/output/data/reference_mean_age_rounded.dta" | ||
save "$projectdir/output/data/reference_mean_age_rounded.dta", replace | ||
restore | ||
|
||
use "$projectdir/output/data/reference_mean_age_rounded.dta", clear | ||
export excel "$projectdir/output/tables/reference_mean_age_rounded.xls", replace keepcellfmt firstrow(variables) | ||
|
||
***Output tables as CSVs | ||
import excel "$projectdir/output/tables/reference_table_rounded.xls", clear | ||
export delimited using "$projectdir/output/tables/reference_table_rounded.csv", novarnames replace | ||
|
||
import excel "$projectdir/output/tables/reference_mean_age_rounded.xls", clear | ||
export delimited using "$projectdir/output/tables/reference_mean_age_rounded.csv", novarnames replace | ||
|
||
log close |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
from ehrql import create_dataset, days, months, years, case, when, create_measures, INTERVAL, minimum_of, maximum_of | ||
from ehrql.tables.tpp import patients, medications, practice_registrations, clinical_events, apcs, addresses, ons_deaths, appointments | ||
from ehrql.codes import ICD10Code | ||
from datetime import date, datetime | ||
import codelists_ehrQL as codelists | ||
|
||
dataset = create_dataset() | ||
dataset.configure_dummy_data(population_size=1000) | ||
|
||
index_date = "2020-08-01" | ||
|
||
# Demographics | ||
dataset.age = patients.age_on(index_date) | ||
dataset.sex = patients.sex | ||
|
||
# Currently registered at mid-point | ||
any_registration = practice_registrations.for_patient_on(index_date).exists_for_patient() | ||
|
||
# Define patient ethnicity | ||
latest_ethnicity_code = ( | ||
clinical_events.where(clinical_events.snomedct_code.is_in(codelists.ethnicity_codes)) | ||
.where(clinical_events.date.is_on_or_before(index_date)) | ||
.sort_by(clinical_events.date) | ||
.last_for_patient().snomedct_code.to_category(codelists.ethnicity_codes) | ||
) | ||
|
||
dataset.ethnicity = case( | ||
when(latest_ethnicity_code == "1").then("White"), | ||
when(latest_ethnicity_code == "2").then("Mixed"), | ||
when(latest_ethnicity_code == "3").then("Asian or Asian British"), | ||
when(latest_ethnicity_code == "4").then("Black or Black British"), | ||
when(latest_ethnicity_code == "5").then("Chinese or Other Ethnic Groups"), | ||
otherwise="Unknown", | ||
) | ||
|
||
# Define patient IMD | ||
imd = addresses.for_patient_on(index_date).imd_rounded | ||
|
||
dataset.imd_quintile = case( | ||
when((imd >= 0) & (imd < int(32844 * 1 / 5))).then("1 (most deprived)"), | ||
when(imd < int(32844 * 2 / 5)).then("2"), | ||
when(imd < int(32844 * 3 / 5)).then("3"), | ||
when(imd < int(32844 * 4 / 5)).then("4"), | ||
when(imd < int(32844 * 5 / 5)).then("5 (least deprived)"), | ||
otherwise="Unknown", | ||
) | ||
|
||
# Define population as any registered patient after index date - then apply further restrictions later | ||
dataset.define_population( | ||
any_registration | ||
& dataset.sex.is_in(["male", "female"]) | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters