-
Notifications
You must be signed in to change notification settings - Fork 1
/
Drug-related mortality.R
174 lines (137 loc) · 8.5 KB
/
Drug-related mortality.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
# WORK N PROGRESS - Drug-related mortality indicator code
# Part 1 - Extract data from SMRA
# Part 2 - Prepare geographies
# Part 3 - Run analysis functions
# Part 4 - Female drug related mortality
# Part 5 - Male drug related mortality
###############################################.
## Packages/Filepaths/Functions ----
###############################################.
source("1.indicator_analysis.R") #Normal indicator functions
source("2.deprivation_analysis.R") # deprivation function
# SMRA login information
channel <- suppressWarnings(dbConnect(odbc(), dsn="SMRA",
uid=.rs.askForPassword("SMRA Username:"),
pwd=.rs.askForPassword("SMRA Password:")))
###############################################.
## Part 1 - Extract data from SMRA ----
###############################################.
# Bringing files for 2002 to 2005 from previous profiles updates as we only get 2006 onwards from NRS
# Old file for 2002-2005 data - all
drug_deaths_02_05 <- read.csv(paste0(data_folder, "Received Data/drugs_deaths_2002-05_DO_NOT_DELETE.csv")) %>%
setNames(tolower(names(.)))
# Old file for 2002-2005 data - females
drug_deaths_female_02_05 <- read.csv(paste0(data_folder, "Received Data/drugs_deaths_female_DO_NOT_DELETE.csv")) %>%
setNames(tolower(names(.)))
# Old file for 2002-2005 data - males
drug_deaths_male_02_05 <- read.csv(paste0(data_folder, "Received Data/drugs_deaths_male_DO_NOT_DELETE.csv")) %>%
setNames(tolower(names(.)))
# NRS file for drug deaths - match to SQL query by rdno_entry_no_yr
drug_deaths_NRS <- readRDS(paste0(data_folder, "Received Data/Drugs/IR2024-00644.rds")) %>%
setNames(tolower(names(.)))
# SQL query for drug deaths 2006-2021
# Select drug specific deaths from SMRA - the ICD10 codes have been removed from query as it was not finding all the deaths in the NRS extract (Sep21)
# Exclude any with null age group
# Exclude deaths where sex is unknown (9)
drug_deaths_smr <- as_tibble(dbGetQuery(channel, statement=
"SELECT year_of_registration year, age, SEX sex_grp, UNDERLYING_CAUSE_OF_DEATH cod1, POSTCODE pc7,
REGISTRATION_DISTRICT rdno, ENTRY_NUMBER entry_no, PLACE_OF_DEATH_POSTCODE pc_death
FROM ANALYSIS.GRO_DEATHS_C
WHERE date_of_registration between '1 January 2006' and '31 December 2022'
AND age is not NULL
AND sex <> 9")) %>%
setNames(tolower(names(.))) #variables to lower case
# Adding padding to the entry_no variable so it matches NRS format
drug_deaths_smr <- drug_deaths_smr %>%
mutate(entry_no = case_when(nchar(entry_no) == 2 ~ paste0("0", entry_no),
nchar(entry_no) == 1 ~ paste0("00", entry_no),
TRUE ~paste0(entry_no)))
drug_deaths_smr <- drug_deaths_smr %>%
# Creating identifier used to match NRS and SMR data
mutate(rdno_entry_no_yr = paste0(rdno, entry_no, year)) %>%
create_agegroups() # 5 year age-bands grop
drug_deaths <- left_join(drug_deaths_NRS, drug_deaths_smr, "rdno_entry_no_yr") #|>
#filter(!(is.na(year))) #temporary line of code to exclude 2023 figures in 2022 which were showing up as NAs
# Adding this to ensure HB/CA breakdown matches the NRS publication.
# If postcode is NA then use the place of death postcode.
drug_deaths %<>% mutate(pc7 = case_when(is.na(pc7) ~ pc_death,
TRUE ~ pc7))
# checking if totals match with NRS publication
drug_deaths %>% group_by(year) %>% count() %>% View()
###############################################.
## Part 2 - Creating basefiles ----
###############################################.
# Bring council area info.
postcode_lookup <- read_rds('/conf/linkage/output/lookups/Unicode/Geography/Scottish Postcode Directory/Scottish_Postcode_Directory_2024_2.rds') %>%
setNames(tolower(names(.))) #variables to lower case
# Aggregating data by datazone for deprivation analysis
drug_deaths_depr <- left_join(drug_deaths, postcode_lookup, "pc7") %>%
select(year, datazone2001, datazone2011, sex_grp, age_grp) %>%
mutate(datazone = case_when(year<2014 ~ datazone2001,
year>2013 ~ datazone2011)) %>%
group_by(year, datazone, sex_grp, age_grp) %>%
summarize(numerator = n()) %>% ungroup()
saveRDS(drug_deaths_depr, file=paste0(data_folder, 'Prepared Data/drug_deaths_depr_raw.rds'))
# Aggregating data by CA
drug_deaths_ca <- left_join(drug_deaths, postcode_lookup, "pc7") %>%
rename(ca = ca2019) %>%
group_by(year, ca, sex_grp, age_grp) %>%
summarize(numerator = n()) %>% ungroup()
saveRDS(drug_deaths_ca, file=paste0(data_folder, 'Prepared Data/drug_deaths_raw.rds'))
# Females
saveRDS(drug_deaths_ca %>% subset(sex_grp==2),
file=paste0(data_folder, 'Prepared Data/drug_deaths_female_raw.rds'))
# Males
saveRDS(drug_deaths_ca %>% subset(sex_grp==1),
file=paste0(data_folder, 'Prepared Data/drug_deaths_male_raw.rds'))
###############################################.
## Part 3 - All drug-related mortality analysis functions ----
###############################################.
# Analysis by CA
analyze_first(filename = "drug_deaths", geography = "council", measure = "stdrate",
pop = "CA_pop_allages", yearstart = 2006, yearend = 2022,
time_agg = 1, epop_age = "normal", adp = TRUE, hscp = TRUE)
analyze_second(filename = "drug_deaths", measure = "stdrate", time_agg = 1,
epop_total = 200000, ind_id = 4121, year_type = "calendar", qa = FALSE)
#run up to here as qa app opens
run_qa(filename = "drug_deaths", old_file="all_drug_deaths")
all_drug_deaths <- rbind(final_result, drug_deaths_02_05)
# save for shiny - these files are to be used rather than the output from the analyze_second function.
saveRDS(all_drug_deaths, file = paste0(data_folder, "Data to be checked/all_drug_deaths_shiny.rds"))
write_csv(all_drug_deaths, file = paste0(data_folder, "Data to be checked/all_drug_deaths_shiny.csv"))
#Deprivation analysis function
analyze_deprivation(filename="drug_deaths_depr", measure="stdrate", time_agg=5,
yearstart= 2006, yearend=2022, year_type = "calendar",
pop = "depr_pop_allages", epop_age="normal",
epop_total =200000, ind_id = 4121)
###############################################.
## Part 4 - Female drug related mortality analysis functions ----
###############################################.
analyze_first(filename = "drug_deaths_female", geography = "council", measure = "stdrate",
pop = "CA_pop_allages", yearstart = 2006, yearend = 2022,
time_agg = 5, epop_age = "normal", adp = TRUE, hscp = TRUE)
#epop is only 100000 as only female half population
analyze_second(filename = "drug_deaths_female", measure = "stdrate", time_agg = 5,
epop_total = 100000, ind_id = 12535, year_type = "calendar", qa = FALSE)
#run up to here as qa app opens
run_qa(filename = "drug_deaths_female", old_file="all_female_drug_deaths")
all_female_drug_deaths <- rbind(final_result, drug_deaths_female_02_05)
# save for shiny - these files are to be used rather than the output from the analyze_second function.
saveRDS(all_female_drug_deaths, file = paste0(data_folder, "Data to be checked/all_female_drug_deaths_shiny.rds"))
write_csv(all_female_drug_deaths, file = paste0(data_folder, "Data to be checked/all_female_drug_deaths_shiny.csv"))
###############################################.
## Part 5 - Male drug related mortality analysis functions ----
###############################################.
analyze_first(filename = "drug_deaths_male", geography = "council", measure = "stdrate",
pop = "CA_pop_allages", yearstart = 2006, yearend = 2022,
time_agg = 5, epop_age = "normal", adp = TRUE, hscp = TRUE)
#epop is only 100000 as only male half population
analyze_second(filename = "drug_deaths_male", measure = "stdrate", time_agg = 5,
epop_total = 100000, ind_id = 12534, year_type = "calendar", qa = FALSE)
#run up to here as qa app opens
run_qa(filename = "drug_deaths_male", old_file="all_male_drug_deaths")
all_male_drug_deaths <- rbind(final_result, drug_deaths_male_02_05)
# save for shiny - these files are to be used rather than the output from the analyze_second function.
saveRDS(all_male_drug_deaths, file = paste0(data_folder, "Data to be checked/all_male_drug_deaths_shiny.rds"))
write_csv(all_male_drug_deaths, file = paste0(data_folder, "Data to be checked/all_male_drug_deaths_shiny.csv"))
## END