In this novel work Development and validation of a micro-QuEChERS method with high-throughput enhanced matrix removal followed with UHPLC-QqQ-MS/MS for analysis of raspberry ketone-related phenolic compounds in adipose tissues published in Talanta, we developed a new method to measure raspberry ketone (RK) metabolites in adipose tissue. It involved using enhanced-matrix-removal in a 96-well plate for effective sample cleanup. Additionally, a reversed-phase C18 sorbent was utilized to enhance recovery after SpeedVac drying. A stepwise recovery monitoring technique was developed for validation purposes.
Below is documented the R script for data analysis in this work. The R code was developed with reference to tidyverse documentation, R for Data Science, and DataBrewer.co for data wrangling, statistical analysis, and data visualization.
library(rebus)
library(cowplot)
library(gridExtra)
library(readxl)
library(RColorBrewer)
library(ggrepel)
library(tidyverse)
theme_set(theme_bw() + theme(axis.text = element_text(colour = "black", size = 14.5),
axis.title = element_text(colour = "black", face = "bold", size = 15.5),
strip.background = element_blank(),
panel.border = element_rect(size = .5, color = "black"),
strip.text = element_text(face = "bold", size = 12)))
path = "/Users/Boyuan/Desktop/My publication/17. EMR Adipose RK metabolites/data RK lipid EMR validation 3th ,Zhiya 081920.xlsx"
color.cmpd = colorRampPalette(brewer.pal(8, "Dark2"))(26) %>% sort() %>% sort()
d.opt.sorbent = read_excel(path, sheet = "sorbent opt")
d.opt.sorbent = d.opt.sorbent %>%
gather(-c(`Data File`, sorbent, `Acq. Date-Time`), key = compound, value = resp)
x = d.opt.sorbent %>% filter(sorbent == "ctrl") %>%
group_by(compound) %>%
summarise(resp.ctrl.mean = mean(resp),
resp.ctrl.sd = sd(resp))
y = d.opt.sorbent %>% filter(sorbent != "ctrl") %>%
group_by(sorbent, compound) %>%
summarise(resp.mean = mean(resp), resp.sd = sd(resp))
d.opt.sorbent.recovery = y %>% left_join(x, by = c("compound")) %>%
mutate(recovery.mean = resp.mean / resp.ctrl.mean * 100,
recovery.sd = sqrt((resp.sd/resp.mean)^2 + (resp.ctrl.sd/resp.ctrl.mean)^2) * recovery.mean )
d.opt.sorbent.recovery
## # A tibble: 182 x 8
## # Groups: sorbent [7]
## sorbent compound resp.mean resp.sd resp.ctrl.mean
## <chr> <chr> <dbl> <dbl> <dbl>
## 1 100 NP 3-HBA 2537. 91.8 2731.
## 2 100 NP 3-HPAA 2248. 329. 2560.
## 3 100 NP 3-HPPA 3208. 479. 3907.
## 4 100 NP 3,4-DHBA 1123. 178. 1099.
## 5 100 NP 3,4-DHP… 176. 119. 407.
## 6 100 NP 3,4-DHPE 1031. 93.5 1672.
## 7 100 NP 3,4-DHP… 203. 81.7 646.
## 8 100 NP 4-HBA 6886. 742. 7439.
## 9 100 NP 4-HBOH 2652. 280. 3249.
## 10 100 NP 4-HCA 1226. 152. 1367.
## # … with 172 more rows, and 3 more variables:
## # resp.ctrl.sd <dbl>, recovery.mean <dbl>,
## # recovery.sd <dbl>
d.opt.sorbent.recovery$sorbent = d.opt.sorbent.recovery$sorbent %>%
factor(levels = c("Sorbent free", "100 NP (big size)", "100 NP", "75 NP-25 RP", "50 NP-50 RP", "25 NP-75 RP", "100 RP"), ordered = T)
# low recovery compounds without sorbent
l = d.opt.sorbent.recovery %>% filter(sorbent == "Sorbent free") %>%
filter(recovery.mean < 30)
cmpd.lowRecovery = c("RK-Me", "PhLiAce", "3,4-DHPPA", "3,4-DHPAA", "CA", "4-HBOH")
dg = .3
plt.sorbentOptimization = d.opt.sorbent.recovery %>%
filter(!compound %in% cmpd.lowRecovery ) %>%
ggplot(aes(x = sorbent, y = recovery.mean, color = compound, fill = compound)) +
geom_boxplot(data = d.opt.sorbent.recovery, outlier.alpha = 0,
width = .8, fill = "snow1",
aes(group = sorbent)) +
geom_errorbar(aes(ymin = recovery.mean - recovery.sd,
ymax = recovery.mean + recovery.sd),
position = position_dodge(dg), size = .1, width = 1) +
geom_point(position = position_dodge(dg), shape = 21, fill = "white", size = 2) +
theme(legend.position = "none", panel.grid = element_blank(),
axis.text = element_text(size = 11, colour = "black"),
axis.title = element_text(size = 12, colour = "black")) +
scale_y_continuous(breaks = seq(0, 150, by = 10)) +
coord_cartesian(ylim = c(0, 110)) +
# threshold
geom_segment(aes(x = .5, xend = 7.5, y = 30, yend = 30),
linetype = "dashed", size = 1, color = "firebrick") +
# add special note to low-recovery compound
geom_line(data = d.opt.sorbent.recovery %>% filter(compound %in% cmpd.lowRecovery),
aes(group = compound), position = position_dodge(dg), linetype = "dotted") +
geom_errorbar(data = d.opt.sorbent.recovery %>% filter(compound %in% cmpd.lowRecovery),
aes(ymin = recovery.mean - recovery.sd,
ymax = recovery.mean + recovery.sd),
position = position_dodge(dg), size = .1, width = .1) +
geom_point(data = d.opt.sorbent.recovery %>% filter(compound %in% cmpd.lowRecovery),
aes(group = compound), position = position_dodge(dg),
shape = 21, fill = "white", size = 2, stroke = 1) +
# fill with transparent color
geom_point(data = d.opt.sorbent.recovery %>% filter(compound %in% cmpd.lowRecovery),
aes(group = compound), position = position_dodge(dg),
shape = 21, size = 2, alpha = .6) +
geom_text_repel(data = d.opt.sorbent.recovery %>% filter(compound %in% cmpd.lowRecovery),
aes(label = compound), size = 3, fontface = "bold",
position = position_dodge(dg)) +
labs(y = "Recovery (%)", x = "Sorbent") +
scale_color_manual(values = color.cmpd) +
scale_fill_manual(values = color.cmpd)
# 7.5 X 8.9 Mac screen
plt.sorbentOptimization
d.opt.sorbent.recovery %>%
filter(compound %in% c("RK-Me", "PhLiAce", "4-HBOH")) %>%
select(sorbent, compound, recovery.mean) %>% spread(sorbent, recovery.mean)
## # A tibble: 3 x 8
## compound `Sorbent free` `100 NP (big si… `100 NP`
## <chr> <dbl> <dbl> <dbl>
## 1 4-HBOH 22.1 72.7 81.6
## 2 PhLiAce 0.456 16.0 17.1
## 3 RK-Me 0.952 66.4 60.0
## # … with 4 more variables: `75 NP-25 RP` <dbl>, `50
## # NP-50 RP` <dbl>, `25 NP-75 RP` <dbl>, `100 RP` <dbl>
d.opt.sorbent.recovery$compound %>% n_distinct()
## [1] 26
# Effect of sorbent mass
d.opt.sorbent.mass = read_excel(path, sheet = "sorbent mass")
d.opt.sorbent.mass = d.opt.sorbent.mass %>%
gather(-c(`Data File`, code, `Acq. Date-Time`, `sorbent mass`), key = compound, value = resp) %>%
group_by(compound) %>%
mutate(resp.relative = resp / mean(resp))
d.opt.sorbent.mass = d.opt.sorbent.mass %>%
group_by(`sorbent mass`, compound) %>%
summarise(resp.relative.mean = mean(resp.relative), resp.relative.sd = sd(resp.relative))
d.opt.sorbent.mass
## # A tibble: 78 x 4
## # Groups: sorbent mass [3]
## `sorbent mass` compound resp.relative.m…
## <chr> <chr> <dbl>
## 1 1~5 mg 3-HBA 1.06
## 2 1~5 mg 3-HPAA 1.03
## 3 1~5 mg 3-HPPA 1.10
## 4 1~5 mg 3,4-DHBA 1.21
## 5 1~5 mg 3,4-DHP… 1.50
## 6 1~5 mg 3,4-DHPE 1.18
## 7 1~5 mg 3,4-DHP… 1.23
## 8 1~5 mg 4-HBA 1.06
## 9 1~5 mg 4-HBOH 1.07
## 10 1~5 mg 4-HCA 1.07
## # … with 68 more rows, and 1 more variable:
## # resp.relative.sd <dbl>
plt.sorbent.mass = d.opt.sorbent.mass %>%
ggplot(aes(x = `sorbent mass`, y = resp.relative.mean, color = compound)) +
geom_boxplot(aes(group = `sorbent mass`), outlier.alpha = 0) +
geom_errorbar(aes(ymin = resp.relative.mean - resp.relative.sd,
ymax = resp.relative.mean + resp.relative.sd),
position = position_dodge(.5), size = .2, width = 1.2) +
geom_point(position = position_dodge(.5), shape = 21, fill = "white") +
theme(legend.position = "none") +
coord_cartesian(ylim = c(.5, 1.6)) +
scale_y_continuous(breaks = seq(.6, 1.6, .2)) +
labs(y = "Relative peak intensity", caption = "Reconstituted in 100 uL 60% MeOH 0.1% FA before LCMS analysis")
plt.sorbent.mass
# <>0^%@#$!)(*&^$##&$)^$(^#!~#$)# <>0^%@#$!)(*&^$##&$)^$(^#!~#$)# <>0^%@#$!)(*&^$##&$)^$(^#!~#$)# <>0^%@#$!)(*&^$##&$)^$(^#!~#$)
# Validation
d.conc = read_excel(path, sheet = "calculated concentration")
d.conc = d.conc %>%
gather(-c(`Data File`, Name, `Acq. Date-Time`), key = compound, value = conc) %>%
select(-`Acq. Date-Time`)
# blank concentration
d.conc.blk = d.conc %>% filter(Name == "Adi + Enz") %>%
select(-c(Name, `Data File`)) %>%
group_by(compound) %>%
summarise(conc.blk.mean = mean(conc, na.rm = T),
conc.blk.sd = sd(conc, na.rm = T))
# total measured conc. (spiked + background)
d.ABCD = d.conc %>% filter(Name %in% c("A", "B", "C", "D")) %>%
group_by(compound, Name) %>%
summarise(conc.all.mean = mean(conc, na.rm = T),
conc.all.sd = sd(conc, na.rm = T))
# subract background
d.ABCD = d.ABCD %>% left_join(d.conc.blk, by = "compound") %>%
mutate(conc.spk.mean = conc.all.mean - conc.blk.mean,
conc.spk.sd = (conc.all.sd^2 + conc.blk.sd^2) %>% sqrt()) %>%
select(compound, Name, contains("spk"))
# Expected spiked amount
d.expect = read_excel(path, sheet = "stock solution", range = "B2:O28") %>%
select(compound, A, B, C, D)
d.expect = d.expect %>%
gather(-compound, key = Name, value = conc.expected)
# Compute Accuracy
d.ABCD = d.ABCD %>% left_join(d.expect, by = c("compound", "Name")) %>%
mutate(Accuracy.mean = conc.spk.mean / conc.expected * 100,
Accuracy.sd = conc.spk.sd / conc.expected * 100) %>%
# add spike concentration level
ungroup() %>%
mutate(level = rep(c("2000", "500", "100", "20"), time = d.ABCD$compound %>% n_distinct()),
level = factor(level, levels = c("2000", "500", "100", "20"), ordered = T))
# 3,4-DHPPA D level below limit of quantification
d.ABCD = d.ABCD %>% filter(! ((compound == "3,4-DHPPA") & (Name == "D")))
# accuracy box plot
dg = .4
plt.accuracy = d.ABCD %>%
ggplot(aes(x = level, y = Accuracy.mean, color = compound, fill = compound)) +
geom_boxplot(outlier.alpha = 0, aes(group = Name), fill = "grey", width = .8) +
geom_errorbar(aes(ymin = Accuracy.mean - Accuracy.sd,
ymax = Accuracy.mean + Accuracy.sd ),
size = .2, width = 1, position = position_dodge(dg)) +
geom_point(position = position_dodge(dg), shape = 21, fill = "white", size = 4.5) +
geom_point(position = position_dodge(dg), shape = 21, alpha = 0.2, size = 4.5) +
theme(legend.position = "None", panel.grid = element_blank()) +
scale_y_continuous(breaks = seq(0, 180, by = 20)) +
geom_text_repel(data = d.ABCD %>%
filter(Accuracy.mean > 140 | Accuracy.mean < 60 ),
aes(label = compound), size = 5, position = position_dodge(dg)) +
coord_cartesian(ylim = c(0, 180)) +
annotate("segment", x = .5, xend = 4.5, y = 120, yend = 120,
linetype = "dashed", size = .6, color = "firebrick") +
annotate("segment", x = .5, xend = 4.5, y = 80, yend = 80,
linetype = "dashed", size = .6, color = "firebrick") +
annotate("segment", x = .5, xend = 4.5, y = 100, yend = 100,
linetype = "dashed", size = .6, color = "darkgreen") +
scale_color_manual(values = color.cmpd) +
scale_fill_manual(values = color.cmpd) +
labs(y = "Accuracy", x = "Spiked concentration of final injection (ng/mL)")
plt.accuracy
# annotation standard: Accuracy.mean > 140 | Accuracy.mean < 60 | Accuracy.sd > 50
d.ABCD %>% filter(Accuracy.sd > 50)
## # A tibble: 3 x 8
## compound Name conc.spk.mean conc.spk.sd conc.expected
## <chr> <chr> <dbl> <dbl> <dbl>
## 1 4-HBA D 7.77 22.1 17.5
## 2 4-HPAA D 18.1 12.3 19.4
## 3 CA D 32.4 16.7 21.7
## # … with 3 more variables: Accuracy.mean <dbl>,
## # Accuracy.sd <dbl>, level <ord>
# <>)*$#*(#@!$&<>*&%$#@!@#^*^# <>)*$#*(#@!$&<>*&%$#@!@#^*^# <>)*$#*(#@!$&<>*&%$#@!@#^*^# <>)*$#*(#@!$&<>*&%$#@!@#^*^
# Recovery at each step
d.area = read_excel(path, sheet = "peak area")
d.area = d.area %>%
gather(-c(`Data File`, Name, `Acq. Date-Time`), key = compound, value = area)
d.area.copy = d.area # make a copy for later use with comparison with internal standard
# background
d.area.blk = d.area %>% filter(Name == "Adi + Enz") %>%
group_by(compound) %>%
summarise(area.blk.mean = mean(area, na.rm = T),
area.blk.sd = sd(area, na.rm = T))
# total area at each step ( spiked + background )
d.area.steps = d.area %>%
filter(Name %in% c("B", "C", "PX", "PC", "PR", "PR neat", "no silica gel", "PR neat")) %>%
group_by(compound, Name) %>%
summarise(area.mean = mean(area, na.rm = T),
area.sd = sd(area, na.rm = T))
#good
# subtract background
d.area.steps = d.area.steps %>% left_join(d.area.blk, by = "compound") %>%
mutate(area.spk.mean = area.mean - area.blk.mean,
area.spk.sd = (area.sd^2 + area.blk.sd^2) %>% sqrt()) %>%
select(compound, Name, contains("spk"))
d.area.steps
## # A tibble: 182 x 4
## # Groups: compound [26]
## compound Name area.spk.mean area.spk.sd
## <chr> <chr> <dbl> <dbl>
## 1 3-HBA B 18195. 1554.
## 2 3-HBA C 3564. 249.
## 3 3-HBA no silica gel 3845. 288.
## 4 3-HBA PC 23085. 1431.
## 5 3-HBA PR 23494. 727.
## 6 3-HBA PR neat 30583. 1915.
## 7 3-HBA PX 20413. 1060.
## 8 3-HPAA B 6893. 491.
## 9 3-HPAA C 1320. 123.
## 10 3-HPAA no silica gel 1535. 74.5
## # … with 172 more rows
# spread apart steps into separate columns
x = d.area.steps %>% select(-area.spk.sd) %>%
spread(key = Name, value = area.spk.mean)
colnames(x) = c("compound", colnames(x)[-1] %>% str_c(".mean"))
y = d.area.steps %>% select(-area.spk.mean) %>%
spread(key = Name, value = area.spk.sd)
colnames(y) = c("compound", colnames(y)[-1] %>% str_c(".sd"))
y
## # A tibble: 26 x 8
## # Groups: compound [26]
## compound B.sd C.sd `no silica gel.… PC.sd PR.sd
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 3-HBA 1554. 249. 288. 1431. 727.
## 2 3-HPAA 491. 123. 74.5 489. 397.
## 3 3-HPPA 2908. 936. 281. 3529. 1474.
## 4 3,4-DHBA 4313. 971. 658. 4378. 2164.
## 5 3,4-DHP… 2733. 502. 377. 2327. 1664.
## 6 3,4-DHPE 811. 147. 83.6 707. 448.
## 7 3,4-DHP… 2083. 359. 107. 3207. 3292.
## 8 4-HBA 10031. 5853. 2516. 7081. 3001.
## 9 4-HBOH 933. 232. 102. 2094. 1614.
## 10 4-HCA 1983. 459. 360. 2743. 1404.
## # … with 16 more rows, and 2 more variables: `PR
## # neat.sd` <dbl>, PX.sd <dbl>
d.area.steps = left_join(x, y, by = "compound")
# recovery at each steps (now background subtracted)
d.recovery = d.area.steps %>%
mutate(
# extraction (relative to spike amount of post extraction or PX)
rec.extraction.mean = B.mean / PX.mean * 100,
rec.extraction.sd = sqrt((PX.sd / PX.mean)^2 + (B.sd / B.mean)^2) * rec.extraction.mean,
# EMR clean up (relative to spike amount of post clean up or PC)
rec.EMR.mean = PX.mean / PC.mean * 100,
rec.EMR.sd = sqrt((PX.sd / PX.mean)^2 + (PC.sd / PC.mean)^2)* rec.EMR.mean,
# SpeedVac drying & reconstitution (relative to spike amount of post reconstitution or PR)
rec.DryReconstitute.mean = PC.mean / PR.mean * 100,
rec.DryReconstitute.sd = sqrt((PC.sd / PC.mean)^2 + (PR.sd / PR.mean)^2) * rec.DryReconstitute.mean,
# # add silica gel effect (relative to without, using spike level C)
# sorbent.mean = C.mean / `no silica gel.mean`, # (improvenment fold), with improvement > 1
# sorbent.sd = sqrt((`no silica gel.sd` / `no silica gel.mean`)^2 + (C.sd / C.mean)^2) * sorbent.mean,
# Overall recovery
rec.Overall.mean = B.mean / PR.mean * 100,
rec.Overall.sd = sqrt((B.sd / B.mean)^2 + (PR.sd/ PR.mean)^2) * rec.Overall.mean,
# matrix effect to ESI
matrixESI.mean = PR.mean / `PR neat.mean` * 100,
matrixESI.sd = sqrt((PR.sd / PR.mean)^2 + (`PR neat.sd` / `PR neat.mean`)^2) * matrixESI.mean,
# Overall processing efficiency
process.eff.mean = B.mean / `PR neat.mean` * 100,
process.eff.sd = sqrt((B.sd / B.mean)^2 + (`PR neat.sd` / `PR neat.mean`)^2) * process.eff.mean) %>%
select(compound, contains("rec"), contains("ESI"), contains("process"))
x = d.recovery %>%
select(compound, contains("mean")) %>%
gather(-compound, key = step, value = mean) %>%
mutate(step = str_remove(step, pattern = ".mean"))
y = d.recovery %>%
select(compound, contains("sd")) %>%
gather(-compound, key = step, value = sd) %>%
mutate(step = str_remove(step, pattern = ".sd"))
y
## # A tibble: 156 x 3
## # Groups: compound [26]
## compound step sd
## <chr> <chr> <dbl>
## 1 3-HBA rec.extraction 8.91
## 2 3-HPAA rec.extraction 7.39
## 3 3-HPPA rec.extraction 7.61
## 4 3,4-DHBA rec.extraction 9.13
## 5 3,4-DHPAA rec.extraction 14.2
## 6 3,4-DHPE rec.extraction 21.5
## 7 3,4-DHPPA rec.extraction 23.9
## 8 4-HBA rec.extraction 13.8
## 9 4-HBOH rec.extraction 16.3
## 10 4-HCA rec.extraction 7.07
## # … with 146 more rows
d.recovery.tidy = x %>% left_join(y, by = c("compound", "step"))
# compound wise
# d.recovery.tidy %>%
# ggplot(aes(x = compound, y = mean, color = step)) +
# geom_point(position = position_dodge(dg)) +
# coord_flip()
# overview
d.recovery.tidy$step = d.recovery.tidy$step %>% str_replace(pattern = "rec.extraction", replacement = "Extraction")
d.recovery.tidy$step = d.recovery.tidy$step %>% str_replace(pattern = "rec.EMR", replacement = "Enhanced matrix removal")
d.recovery.tidy$step = d.recovery.tidy$step %>% str_replace(pattern = "rec.DryReconstitute", replacement = "Dry & reconstitute")
d.recovery.tidy$step = d.recovery.tidy$step %>% str_replace(pattern = "rec.Overall", replacement = "Overall recovery")
d.recovery.tidy$step = d.recovery.tidy$step %>% str_replace(pattern = "matrixESI", replacement = "Matrix effect (ESI)")
d.recovery.tidy$step = d.recovery.tidy$step %>% str_replace(pattern = "process.eff", replacement = "Overall processing efficiency")
d.recovery.tidy$step = factor(d.recovery.tidy$step, levels = d.recovery.tidy$step %>% unique(), ordered = T)
plt.recovery.steps = d.recovery.tidy %>%
ggplot(aes(x = step, y = mean, color = compound, fill = compound)) +
geom_boxplot(outlier.alpha = 0, fill = "grey", aes(group = step)) +
geom_errorbar(aes(ymin = mean - sd, ymax = mean + sd),
position = position_dodge(dg),
size = .2, width = 1) +
geom_point(position = position_dodge(dg),
shape = 21, size = 3, stroke = 1, fill = "white") +
geom_point(position = position_dodge(dg),
shape = 21, size = 3, stroke = 1, alpha = .2) +
annotate("segment", x = .5, xend = 6.5, y = 100, yend = 100,
linetype = "dashed", size = .4, color = "darkgreen") +
annotate("segment", x = .5, xend = 6.5, y = 80, yend = 80,
linetype = "dashed", size = .4, color = "firebrick") +
coord_cartesian(ylim = c(20, 140)) +
scale_y_continuous(breaks = seq(0, 200, by = 10)) +
scale_color_manual(values = color.cmpd) +
scale_fill_manual(values = color.cmpd) +
theme(legend.position = "None") +
labs(x = " ") +
geom_text_repel(data = d.recovery.tidy %>% filter(mean > 110 | mean <= 50),
aes(label = compound), position = position_dodge(dg), size = 3)
plt.recovery.steps
# *<^%#^&*$#@!%$#^&*<^%#^&*$#@!%$#^&*<^%#^&*$#@!%$#^&*<^%#^&*$#@!%$#^&*<^%#^&*$#@!%$#^&*<^%#^&*$#@!%$#^&*<^%#^&*$#@!%$#^&
# sorbent improvement effect
# For this section the peak area should NOT subtract background since background is measured WITH sorbent. The with sorbent and without sorbent should be compared directly with background content included
d.area.sorbent = d.area %>% filter(Name %in% c("C", "no silica gel")) %>%
group_by(compound, Name) %>%
summarise(area.mean = mean(area),
area.sd = sd(area))
x = d.area.sorbent %>% select(-area.sd) %>% spread(key = Name, value = area.mean)
colnames(x) = c("compound", "C.mean", "no silica gel.mean")
y = d.area.sorbent %>% select(-area.mean) %>% spread(key = Name, value = area.sd)
colnames(y) = c("compound", "C.sd", "no silica gel.sd")
d.area.sorbent = left_join(x, y, by = "compound") %>%
mutate(sorbent.mean = C.mean / `no silica gel.mean`,
sorbent.sd = C.sd / `no silica gel.sd`)
dg. =5
plt.sorbent.validate = d.area.sorbent %>%
ggplot(aes(x = 1, y = sorbent.mean, color = compound, fill = compound)) +
geom_boxplot(outlier.alpha = 0, fill = "grey", aes(group = 1)) +
geom_errorbar(aes(ymin = sorbent.mean - sorbent.sd,
ymax = sorbent.mean + sorbent.sd),
position = position_dodge(dg),
size = .2, width = 1) +
geom_point(position = position_dodge(dg),
shape = 21, size = 3, stroke = 1, fill = "white") +
geom_point(position = position_dodge(dg),
shape = 21, size = 3, stroke = 1, alpha = .2) +
annotate("segment", x = .5, xend = 1.5, y = 1, yend = 1,
linetype = "dashed", size = .4, color = "darkgreen") +
scale_color_manual(values = color.cmpd) +
scale_fill_manual(values = color.cmpd) +
theme(legend.position = "None",
axis.text.x = element_blank(),
axis.ticks.x = element_blank()) +
labs(x = "", y = "Fold increase of peak area with sorbent than without ") +
geom_text_repel(data = d.area.sorbent %>% filter(sorbent.mean > 2),
aes(label = compound), position = position_dodge(dg), size = 3) +
# scale_y_continuous(breaks = seq(0, 7, 1))+
coord_cartesian(ylim = c(0, 6))
plt.sorbent.validate
#0*^$@$^!#%&*#0*^$@$^!#%&*#0*^$@$^!#%&*#0*^$@$^!#%&*#0*^$@$^!#%&*#0*^$@$^!#%&*#0*^$@
# Recovery with internal standard correcting the loss effect
# IS area in each sample file
d.area.IS1 = read_excel(path, sheet = "IS1 peak area")
d.area.IS2 = read_excel(path, sheet = "IS2 peak area")
d.area.IS = d.area.IS1 %>%
left_join(d.area.IS2 %>% select(-c(`Acq. Date-Time`, Name)),
by = c("Data File"))
# d.area.IS[!complete.cases(d.area.IS), ]
# Compounds vs. IS
d.IS = read_excel(path, sheet = "IS")
# Compoute area corrected
d.area.corrected = d.area.copy %>%
filter(Name %in% c("B", "PX", "PC", "PR", "PR neat")) %>%
# subtract background on a sample wise manner
left_join(d.area.blk, by = "compound") %>%
mutate(spk.mean = area - area.blk.mean,
spk.sd = area.blk.sd) %>%
select(-c(area, contains("blk"))) %>%
# which IS
left_join(d.IS, by = "compound")
# compounds using 4-HBA as IS
x = d.area.corrected %>% filter(IS == "4-HBA-d4") %>%
left_join(d.area.IS %>% select(`Data File`, `4-HBA-d4`), by = "Data File") %>%
rename(area.IS = `4-HBA-d4`)
# compounds using cinnamic acid as IS
y = d.area.corrected %>% filter(IS == "cinnamic-d6") %>%
left_join(d.area.IS %>% select(`Data File`, `cinnamic-d6`), by = "Data File") %>%
rename(area.IS = `cinnamic-d6`)
# combine 2 IS's
d.area.corrected = rbind(x, y)
d.area.corrected = d.area.corrected %>%
select(-c(`Data File`, `Acq. Date-Time`)) %>%
# now area corrected with area of IS, shown as the ratio
mutate(ratio = spk.mean / area.IS,
ratio.sd = spk.sd / area.IS) %>%
select(-contains("spk"), -contains("IS"))
d.area.corrected = d.area.corrected %>%
group_by(Name, compound) %>%
summarise(ratio.mean = mean(ratio, na.rm = T),
ratio.sd = sd(ratio, na.rm = T))
# d.area.corrected[!complete.cases(d.area.corrected), ]
# spread columns apart
x = d.area.corrected %>% select(-ratio.sd) %>%
spread(key = Name, value = ratio.mean)
colnames(x) = c("compound", colnames(x)[-1] %>% str_c(".mean") )
y = d.area.corrected %>% select(-ratio.mean) %>%
spread(key = Name, value = ratio.sd)
colnames(y) = c("compound", colnames(y)[-1] %>% str_c(".sd") )
d.area.corrected = left_join(x, y, by = "compound")
# compute recovery corrected by IS
d.recovery.corrected = d.area.corrected %>%
# the recovery code format is the same as above without correction
mutate(
# extraction (relative to spike amount of post extraction or PX)
rec.extraction.mean = B.mean / PX.mean * 100,
rec.extraction.sd = sqrt((PX.sd / PX.mean)^2 + (B.sd / B.mean)^2) * rec.extraction.mean,
# EMR clean up (relative to spike amount of post clean up or PC)
rec.EMR.mean = PX.mean / PC.mean * 100,
rec.EMR.sd = sqrt((PX.sd / PX.mean)^2 + (PC.sd / PC.mean)^2)* rec.EMR.mean,
# SpeedVac drying & reconstitution (relative to spike amount of post reconstitution or PR)
rec.DryReconstitute.mean = PC.mean / PR.mean * 100,
rec.DryReconstitute.sd = sqrt((PC.sd / PC.mean)^2 + (PR.sd / PR.mean)^2) * rec.DryReconstitute.mean,
# # add silica gel effect (relative to without, using spike level C)
# sorbent.mean = C.mean / `no silica gel.mean`, # (improvenment fold), with improvement > 1
# sorbent.sd = sqrt((`no silica gel.sd` / `no silica gel.mean`)^2 + (C.sd / C.mean)^2) * sorbent.mean,
# Overall recovery
rec.Overall.mean = B.mean / PR.mean * 100,
rec.Overall.sd = sqrt((B.sd / B.mean)^2 + (PR.sd/ PR.mean)^2) * rec.Overall.mean,
# matrix effect to ESI
matrixESI.mean = PR.mean / `PR neat.mean` * 100,
matrixESI.sd = sqrt((PR.sd / PR.mean)^2 + (`PR neat.sd` / `PR neat.mean`)^2) * matrixESI.mean,
# Overall processing efficiency
process.eff.mean = B.mean / `PR neat.mean` * 100,
process.eff.sd = sqrt((B.sd / B.mean)^2 + (`PR neat.sd` / `PR neat.mean`)^2) * process.eff.mean) %>%
select(compound, contains("rec"), contains("ESI"), contains("process"))
d.recovery.corrected
## # A tibble: 26 x 13
## compound rec.extraction.… rec.extraction.…
## <chr> <dbl> <dbl>
## 1 3-HBA 95.1 5.42
## 2 3-HPAA 95.9 6.83
## 3 3-HPPA 126. 12.9
## 4 3,4-DHBA 94.5 5.56
## 5 3,4-DHP… 84.6 14.6
## 6 3,4-DHPE 78.8 24.6
## 7 3,4-DHP… 96.7 30.5
## 8 4-HBA 99.9 9.77
## 9 4-HBOH 85.0 18.6
## 10 4-HCA 84.0 6.21
## # … with 16 more rows, and 10 more variables:
## # rec.EMR.mean <dbl>, rec.EMR.sd <dbl>,
## # rec.DryReconstitute.mean <dbl>,
## # rec.DryReconstitute.sd <dbl>,
## # rec.Overall.mean <dbl>, rec.Overall.sd <dbl>,
## # matrixESI.mean <dbl>, matrixESI.sd <dbl>,
## # process.eff.mean <dbl>, process.eff.sd <dbl>
# colnames(d.recovery.corrected) = c("compound", str_c("crrct.", colnames(d.recovery.corrected)[-1]))
# d.recovery.corrected
# -------<>-------<>-------<>-------<>-------<>-------<>-------<>-------<>-------<>-------<>-------
x = d.recovery.corrected %>%
select(compound, contains("mean")) %>%
gather(-compound, key = step, value = mean) %>%
mutate(step = str_remove(step, pattern = ".mean"))
y = d.recovery.corrected %>%
select(compound, contains("sd")) %>%
gather(-compound, key = step, value = sd) %>%
mutate(step = str_remove(step, pattern = ".sd"))
d.recovery.corrected.tidy = x %>% left_join(y, by = c("compound", "step"))
# combine dataset of recovery corrected and not corrected
d.recovery.all = rbind(d.recovery.corrected.tidy %>% mutate(corrected = "ISTD-corrected") %>% ungroup(),
d.recovery.tidy %>% mutate(corrected = "Absolute") %>% ungroup())
d.recovery.all$step = d.recovery.all$step %>% str_replace(pattern = "rec.extraction", replacement = "Extraction")
d.recovery.all$step = d.recovery.all$step %>% str_replace(pattern = "rec.EMR", replacement = "Enhanced matrix removal")
d.recovery.all$step = d.recovery.all$step %>% str_replace(pattern = "rec.DryReconstitute", replacement = "Dry & reconstitute")
d.recovery.all$step = d.recovery.all$step %>% str_replace(pattern = "rec.Overall", replacement = "Overall recovery")
d.recovery.all$step = d.recovery.all$step %>% str_replace(pattern = "matrixESI", replacement = "Matrix effect (ESI)")
d.recovery.all$step = d.recovery.all$step %>% str_replace(pattern = "process.eff", replacement = "Overall processing efficiency")
# ordered step
d.recovery.all$step = factor(d.recovery.all$step, levels = d.recovery.all$step %>% unique(), ordered = T)
# plot
dg = .4
plt.recovery.all = d.recovery.all %>%
ggplot(aes(x = corrected, y = mean)) +
facet_wrap(~step, nrow = 2) +
geom_boxplot(outlier.alpha = 0, fill = "grey") +
geom_errorbar(aes(ymin = mean - sd, ymax = mean + sd, color = compound),
position = position_dodge(dg),
size = .2, width = 1) +
geom_point(aes(color = compound),
position = position_dodge(dg),
size = 4.5, fill = "white", shape = 21) +
geom_point(aes(color = compound, fill = compound),
position = position_dodge(dg),
size = 4.5, shape = 21, alpha = .2) +
# geom_point(position = position_dodge(dg),
# shape = 21, size = 3, stroke = 1, alpha = .2) +
coord_cartesian(ylim = c(0, 180)) +
scale_y_continuous(breaks = seq(0, 200, by = 20)) +
scale_color_manual(values = color.cmpd) +
scale_fill_manual(values = color.cmpd) +
theme(legend.position = "None", panel.grid = element_blank()) +
labs(x = " ") +
# geom_text_repel(data = d.recovery.all$step %>% filter(mean > 110 | mean <= 60),
# aes(label = compound), position = position_dodge(dg), size = 3) +
annotate("segment", x = .5, xend = 2.5, y = 120, yend = 120,
linetype = "dotted", size = .6, color = "firebrick") +
annotate("segment", x = .5, xend = 2.5, y = 80, yend = 80,
linetype = "dotted", size = .6, color = "firebrick") +
annotate("segment", x = .5, xend = 2.5, y = 100, yend = 100,
linetype = "dotted", size = .6, color = "darkgreen") +
geom_text_repel(data = d.recovery.all %>% filter(mean < 50 | mean > 118 ),
aes(label = compound, color = compound), size = 5)
plt.recovery.all
# grid.arrange(plt.accuracy, plt.recovery.all, nrow = 2)
# Compound-stepwise recovery (accumulative effect) Not corrected
d.recovery.cummulated = d.recovery.all %>% filter(corrected == "Absolute" ) %>%
# filter(compound == "4-HBA") %>%
select(-sd, -corrected) %>%
spread(key = step, value = mean)
d.recovery.cummulated = d.recovery.cummulated %>%
mutate(Start = 100 + rnorm(n = nrow(d.recovery.cummulated), mean = 0, sd = 0),
# some random noise over 100 to avoid overlap
Extraction.0 = Start,
Extraction.1 = Extraction,
EMR.0 = Extraction.1,
EMR.1 = Extraction.1 * `Enhanced matrix removal` / 100,
Dry.0 = EMR.1,
Dry.1 = EMR.1 * `Dry & reconstitute` / 100,
matrixEffect.0 = Dry.1,
matrixEffect.1 = Dry.1 * `Matrix effect (ESI)` / 100,
Detected = matrixEffect.1) %>%
select(compound, Start, contains(".0"), contains(".1"), Detected) %>%
gather(-compound, key = step, value = recovery.cummulated)
# make a copy for later marking the plot the position of accumulated recovery on the y axis (horizontal line)
ymark = d.recovery.cummulated[d.recovery.cummulated$step %>% str_detect(pattern = ".1"), ]
ymark$step = ymark$step %>% str_remove(pattern = ".1")
ymark = ymark %>% mutate(status = "Absolute")
d.recovery.cummulated$step = d.recovery.cummulated$step %>% str_remove(pattern = ".0")
d.recovery.cummulated$step = d.recovery.cummulated$step %>% str_remove(pattern = ".1")
d.recovery.cummulated$step = d.recovery.cummulated$step %>%
factor(levels = d.recovery.cummulated$step %>% unique(), ordered = T)
# add IS. label
d.recovery.cummulated =
d.recovery.cummulated %>% left_join(d.IS, by = "compound")
# d.recovery.cummulated %>% as.data.frame()
#<>-<>-<>-<>-<>-<>-<>-<>-<>-<>-<>-<>-<>-<>-<>-<>-<>-<>-
# Compound-stepwise recovery (accumulative effect) Corrected
# d.recovery.all %>% arrange(compound, step) %>% as.data.frame()
d.recovery.cummulated.corrected = d.recovery.all %>%
filter(corrected == "ISTD-corrected" ) %>%
select(-sd, -corrected) %>%
spread(key = step, value = mean)
d.recovery.cummulated.corrected = d.recovery.cummulated.corrected %>%
mutate(Start = 100,
# may also consider adding some random noise over 100 to avoid overlap
Extraction.0 = Start,
Extraction.1 = Extraction,
EMR.0 = Extraction.1,
EMR.1 = Extraction.1 * `Enhanced matrix removal` / 100,
Dry.0 = EMR.1,
Dry.1 = EMR.1 * `Dry & reconstitute` / 100,
matrixEffect.0 = Dry.1,
matrixEffect.1 = Dry.1 * `Matrix effect (ESI)` / 100,
Detected = matrixEffect.1) %>%
select(compound, Start, contains(".0"), contains(".1"), Detected) %>%
gather(-compound, key = step, value = recovery.cummulated)
# d.recovery.cummulated.corrected %>% as.data.frame()
# make a copy for later marking the plot the position of accumulated recovery on the y axis
ymark.corrected = d.recovery.cummulated.corrected[d.recovery.cummulated.corrected$step %>% str_detect(pattern = ".1"), ]
ymark.corrected$step = ymark.corrected$step %>% str_remove(pattern = ".1")
ymark.corrected = ymark.corrected %>% mutate(status = "ISTD-corrected")
d.recovery.cummulated.corrected$step = d.recovery.cummulated.corrected$step %>% str_remove(pattern = ".0")
d.recovery.cummulated.corrected$step = d.recovery.cummulated.corrected$step %>% str_remove(pattern = ".1")
d.recovery.cummulated.corrected$step = d.recovery.cummulated.corrected$step %>%
factor(levels = d.recovery.cummulated.corrected$step %>% unique(), ordered = T)
# d.recovery.cummulated.corrected %>%
# arrange(compound, step) %>% as.data.frame() # so far correct
# add IS. label
d.recovery.cummulated.corrected =
d.recovery.cummulated.corrected %>% left_join(d.IS, by = "compound")
# d.recovery.cummulated.corrected %>%
# arrange(compound) %>% as.data.frame()
# Combine both dataset
d.recovery.cummulated.all = d.recovery.cummulated.corrected %>% mutate(status = "ISTD-corrected") %>%
rbind(d.recovery.cummulated %>% mutate(status = "Absolute"))
# d.recovery.cummulated.all %>%
# arrange(compound, status) %>% as.data.frame()
#plot
dg = .2
# plot 1
d.recovery.cummulated.all %>%
ggplot(aes(x = step, y = recovery.cummulated, color = compound)) +
geom_line(aes(group = compound), position = position_dodge(dg), alpha = .7) +
theme(legend.position = "None",
panel.grid = element_blank()) +
scale_y_continuous(breaks = seq(0, d.recovery.cummulated.corrected$recovery.cummulated %>% max(), by = 10)) +
facet_wrap(~status, nrow = 1) +
scale_color_manual(values = color.cmpd) +
labs(x = " ", y = "Recovery (%)")
# plot 2
d.recovery.all$step = d.recovery.all$step %>% str_replace(pattern = "Enhanced matrix removal", replacement = "EMR")
d.recovery.all$step = d.recovery.all$step %>% str_replace(pattern = "Dry & reconstitute", replacement = "Dry")
d.recovery.all$step[d.recovery.all$step == "Matrix effect (ESI)"] = "matrixEffect"
# not knowing why but this following command not work....
d.recovery.all$step = d.recovery.all$step %>% str_replace(pattern = "Matrix effect (ESI)", replacement = "matrixEffect")
# combine accumulated recovery with stepwise recovery
d.recovery.cummulated.all.withStepWise =
d.recovery.cummulated.all %>%
left_join(d.recovery.all %>% rename(status = corrected),
by = c("compound", "step", "status")) %>%
mutate(stepLossPercent = mean - 100)
# d.recovery.cummulated.all.withStepWise %>% select(compound, step, recovery.cummulated, status) %>%
# arrange(compound, status) %>%
# as.data.frame()
# mark y position in the plot for accumulated recovery -- horizontal line
d.recovery.cummulated.all.withStepWise =
d.recovery.cummulated.all.withStepWise %>%
left_join(rbind(ymark, ymark.corrected) %>% rename(ymark = recovery.cummulated),
by = c("compound", "step", "status"))
# mark y position in the plot for stepwise loss -- vertical line
d.recovery.cummulated.all.withStepWise =
d.recovery.cummulated.all.withStepWise %>%
group_by(compound, status, step) %>%
summarise(lossMarkPosition = mean(recovery.cummulated)) %>%
left_join(d.recovery.cummulated.all.withStepWise, by = c("compound", "status", "step"))
# now the main body has been built up, let's add the error band
# pp1 = d.recovery.all %>%
# filter(corrected == "ISTD-corrected") %>% select(-corrected)
#
# pp1.mean = pp1 %>% select(-sd) %>% spread(step, mean) %>%
# select(compound, Extraction, EMR, Dry)
# names(pp1.mean) = c("compound", names(pp1.mean)[-1] %>% str_c(".mean"))
#
# pp1.sd = pp1 %>% select(-mean) %>% spread(step, sd) %>%
# select(compound, Extraction, EMR, Dry)
# names(pp1.sd) = c("compound", names(pp1.sd)[-1] %>% str_c(".sd"))
#
# pp1.mean %>% left_join(pp1.sd, by = "compound") %>%
# mutate(Extraction = Extraction.mean,
# Extraction.errorBand = Extraction.sd,
#
# EMR = Extraction.mean * EMR.mean / 100,
# EMR.errorBand =
# )
# re-compute the accumulated recovery (as cross validation) more importantly the errorband!!
myFunc1 = function(dataset){
kkk1 = dataset %>%
mutate(Extraction = B.mean / PX.mean * 100,
Extraction.errorBand = sqrt((B.sd / B.mean)^2 + (PX.sd / PX.mean)^2) * Extraction,
EMR = B.mean / PC.mean * 100,
EMR.errorBand = sqrt((B.sd / B.mean)^2 + (PC.sd / PC.mean)^2) * EMR,
Dry = B.mean / PR.mean * 100,
Dry.errorBand = sqrt((B.sd / B.mean)^2 + (PR.sd / PR.mean)^2) * Dry,
matrixEffect = B.mean / `PR neat.mean` * 100,
matrixEffect.errorBand = sqrt((B.sd / B.mean)^2 + (`PR neat.sd` / `PR neat.mean`)^2) * matrixEffect) %>%
select(-contains(".mean"), -contains(".sd")) %>%
ungroup()
uu1 = kkk1 %>% select(compound, Extraction, EMR, Dry, matrixEffect) %>%
gather(-compound, key = step, value = recovery.cummulated)
uu2 = kkk1 %>% select(compound, contains("errorBand")) %>%
gather(-compound, key = step, value = recovery.cummulated)
uu2$step = uu2$step %>% str_extract(one_or_more(WRD))
uu2 = uu2 %>% rename(recovery.cummulated.errorBand = recovery.cummulated)
kkk1 = uu1 %>% left_join(uu2, by = c("compound", "step"))
return(kkk1)
}
qqq1 = myFunc1(dataset = d.area.steps %>% select(-C.mean, -C.sd, -contains("silica") ) ) %>%
mutate(status = "Absolute")
qqq2 = myFunc1(dataset = d.area.corrected) %>%
mutate(status = "ISTD-corrected")
d.recovery.cummulated.errorBand = qqq1 %>% rbind(qqq2, by = c("compound", "step")) %>%
mutate(recovery.cummulated = recovery.cummulated %>% as.numeric(),
recovery.cummulated.errorBand = recovery.cummulated.errorBand %>% as.numeric())
kkk1 = d.area.steps %>% select(-C.mean, -C.sd, -contains("silica")) %>%
mutate(Extraction = B.mean / PX.mean * 100,
Extraction.errorBand = sqrt((B.sd / B.mean)^2 + (PX.sd / PX.mean)^2) * Extraction,
EMR = B.mean / PC.mean * 100,
EMR.errorBand = sqrt((B.sd / B.mean)^2 + (PC.sd / PC.mean)^2) * EMR,
Dry = B.mean / PR.mean * 100,
Dry.errorBand = sqrt((B.sd / B.mean)^2 + (PR.sd / PR.mean)^2) * Dry,
matrixEffect = B.mean / `PR neat.mean` * 100,
matrixEffect.errorBand = sqrt((B.sd / B.mean)^2 + (`PR neat.sd` / `PR neat.mean`)^2) * matrixEffect) %>%
select(-contains(".mean"), -contains(".sd")) %>%
ungroup()
uu1 = kkk1 %>% select(compound, Extraction, EMR, Dry, matrixEffect) %>%
gather(-compound, key = step, value = recovery.cummulated)
uu2 = kkk1 %>% select(compound, contains("errorBand")) %>%
gather(-compound, key = step, value = recovery.cummulated)
uu2$step = uu2$step %>% str_extract(one_or_more(WRD))
uu2 = uu2 %>% rename(recovery.cummulated.errorBand = recovery.cummulated)
kkk1 = uu1 %>% left_join(uu2, by = c("compound", "step"))
# Combine with errorband dataset
d.recovery.cummulated.all.withStepWise =
d.recovery.cummulated.all.withStepWise %>%
left_join(d.recovery.cummulated.errorBand %>% rename(recovery.cummulated.homogenous = recovery.cummulated),
by = c("compound", "status", "step") )
# ordered factor
d.recovery.cummulated.all.withStepWise$step =
factor(d.recovery.cummulated.all.withStepWise$step,
levels = c("Start", "Extraction", "EMR", "Dry", "matrixEffect", "Detected"), ordered = T)
# add numeric x axis to easy label the accumulated recovery
myX = seq(n_distinct(d.recovery.cummulated.all.withStepWise$step))
names(myX) = d.recovery.cummulated.all.withStepWise$step %>% unique() %>% sort()
d.recovery.cummulated.all.withStepWise$numericX = myX[d.recovery.cummulated.all.withStepWise$step]
# Note that for "recovery.cummulated", each compound at each step has two values to form the vertical line in the plot, connecting the end of the prior step and the beginning of the following step. This term is only for purpse of drawing the line plot. For "recovery.cummulated.homogenous", its homogenous, it's the pure accumulated recovery at the end of the given step. It's used to draw the error band. The homogenous term is equal to one of the two recovery.cummulated values for a given compound at a given step (and also at the given status - IS corrected or not)
# plot
dg = 0
myalpha = .06
# define plot function
func.plotStepwiseRecovery = function(whichCompound) {
dd.i = d.recovery.cummulated.all.withStepWise %>% filter(compound == whichCompound)
dd.i %>%
ggplot(aes(x = numericX, y = recovery.cummulated, color = status)) +
geom_line(aes(group = status), position = position_dodge(dg), size = 1.2) +
theme(legend.position = c(.25, .2),
legend.title = element_blank(),
legend.text = element_text(size = 14),
panel.grid = element_blank(),
strip.text = element_text(size = 14),
axis.text = element_text(colour = "black"),
axis.title = element_text(colour = "black")
) +
# scale_y_continuous(breaks = seq(0, max(dd.i$recovery.cummulated) + 10, by = 10)) +
scale_x_continuous(breaks = myX) +
facet_wrap(~compound, nrow = 3, scales = "free_y") +
scale_color_brewer(palette = "Set1") +
scale_fill_brewer(palette = "Set1") +
labs(x = " ", y = "Recovery (%)") +
# error band
# extraction error band for accumulated recovery until end of extraction
geom_ribbon(data = dd.i %>% filter(step == "Extraction"),
aes(ymin = recovery.cummulated.homogenous - recovery.cummulated.errorBand,
ymax = recovery.cummulated.homogenous + recovery.cummulated.errorBand,
x = c(2,3, 2,3), fill = status), alpha = myalpha, position = position_dodge(dg),
linetype = "dotted", show.legend = F) +
# EMR error band for accumulated recovery until end of EMR
geom_ribbon(data = dd.i %>% filter(step == "EMR"),
aes(ymin = recovery.cummulated.homogenous - recovery.cummulated.errorBand,
ymax = recovery.cummulated.homogenous + recovery.cummulated.errorBand,
x = c(3,4, 3,4), fill = status), alpha = myalpha, position = position_dodge(dg),
linetype = "dotted", show.legend = F) +
# Dry and recovery error band for accumulated recovery until end of reconstitution
geom_ribbon(data = dd.i %>% filter(step == "Dry"),
aes(ymin = recovery.cummulated.homogenous - recovery.cummulated.errorBand,
ymax = recovery.cummulated.homogenous + recovery.cummulated.errorBand,
x = c(4,5, 4,5), fill = status), alpha = myalpha, position = position_dodge(dg),
linetype = "dotted", show.legend = F) +
# Matrix effect error band for accumulated recovery until end of ESI
geom_ribbon(data = dd.i %>% filter(step == "matrixEffect"),
aes(ymin = recovery.cummulated.homogenous - recovery.cummulated.errorBand,
ymax = recovery.cummulated.homogenous + recovery.cummulated.errorBand,
x = c(5,6, 5,6), fill = status), alpha = myalpha, position = position_dodge(dg),
linetype = "dotted", show.legend = F) +
# Add annotation
# stepwise loss
geom_label(aes(label = round(stepLossPercent, 1), y = lossMarkPosition),
size = 5.5, label.padding = unit(0.12, "lines"),
# fontface = "bold",
position = position_dodge(dg)) +
# accumulated recovery
geom_label(aes(label = round(ymark, 1),
fill = status,
y = ymark, x = numericX + .5),
fontface = "bold",
size = 5.5,label.padding = unit(0.12, "lines"),
color = "white")
}
y1 = func.plotStepwiseRecovery(whichCompound = "RK")
y1
y2 = func.plotStepwiseRecovery(whichCompound = "ROH")
y3 = func.plotStepwiseRecovery(whichCompound = "RK-Me")
y4 = func.plotStepwiseRecovery(whichCompound = "3-HBA")
plt.streamLine = plot_grid(y1, y2, nrow = 2)
# y3 = plot_grid(yy, plt.accuracy, nrow = 1, rel_widths = c(7, 4))
# accuracy variance with background interference
plt.accuracyVariance.vs.background = d.ABCD %>% select(compound, contains("Accuracy"), Name, conc.expected) %>%
left_join(d.conc.blk, by = "compound") %>%
mutate(ratio.spk.vs.blk = conc.expected / conc.blk.mean) %>%
ggplot(aes(x = ratio.spk.vs.blk, y = Accuracy.sd, color = Name)) +
geom_point(size = 2, alpha = .5) +
geom_point(size = 2, shape = 21, fill = NA) +
geom_smooth(method = "lm", show.legend = F, aes(fill = Name), alpha = .1) +
scale_y_log10() +
scale_x_log10() +
annotation_logticks(sides = "lb") +
scale_color_brewer(palette = "Set1") +
scale_fill_brewer(palette = "Set1") +
theme(legend.position = "right",
panel.grid = element_blank(),
legend.text = element_text(size = 13))
plt.accuracyVariance.vs.background
plt.accuracyAnalysis = plot_grid(
plt.accuracy, ggplot() + theme_void(), plt.accuracyVariance.vs.background,
nrow = 1, rel_widths = c(5, 1.5, 4))
plt.Validation.Accuracy.Recovery = plot_grid(
plot_grid(plt.recovery.all, plt.streamLine, rel_widths = c(6, 3)),
plt.accuracyAnalysis,
# nrow = 3,
# rel_heights = c(3, 3.6, 3)
nrow = 2,
rel_heights = c(2.2, 1)
)
plt.Validation.Accuracy.Recovery
# 15.3 X 18
# ----Compare with the prior without sorbent in presence of biomatrices
# the first formal validation study that renders low recovery
mypath = "/Users/Boyuan/Desktop/My publication/17. EMR Adipose RK metabolites/Archived/EMR_all data.xlsx"
# peak area
d.area = read_excel(mypath, sheet = "peak area")
d.area.copy = d.area
d.area = d.area %>% filter(Name %in% c("PRneat", "B", "PX", "PC", "PR", "BLK" )) %>%
gather(-c(`Data File`, Name, `Acq. Date-Time`), key = compound, value = area)
d.area.blk = d.area %>%
filter(Name == "BLK") %>% group_by(compound) %>%
summarise(area.blk.mean = mean(area), area.blk.std = sd(area))
d.area.spk = d.area %>% filter(Name != "BLK") %>% group_by(compound, Name) %>%
summarise(area.mean = mean(area),
area.sd = sd(area)) %>%
# subtract blank
left_join(d.area.blk, by = "compound") %>%
mutate(area.spk = area.mean - area.blk.mean,
area.spk.sd = sqrt( area.sd ^2 + area.blk.std ^ 2 )) %>%
select(compound, Name, contains("spk"))
# tidy up
x = d.area.spk %>% select(-area.spk.sd) %>%
spread(key = Name, value = area.spk)
y = d.area.spk %>% select(-area.spk) %>%
spread(key = Name, value = area.spk.sd)
colnames(y) = c("compound", colnames(y)[-1] %>% str_c(".sd"))
y
## # A tibble: 26 x 6
## # Groups: compound [26]
## compound B.sd PC.sd PR.sd PRneat.sd PX.sd
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 3-HBA 376. 587. 1884. 397. 659.
## 2 3-HPAA 777. 453. 905. 357. 677.
## 3 3-HPPA 505. 768. 1032. 649. 1122.
## 4 3,4-DHBA 802. 546. 737. 286. 709.
## 5 3,4-DHPAA 1269. 182. 274. 353. 768.
## 6 3,4-DHPE 69.2 189. 602. 207. 360.
## 7 3,4-DHPPA 674. 643. 464. 552. 439.
## 8 4-HBA 3929. 2995. 2762. 1408. 5057.
## 9 4-HBOH 29.5 77.0 566. 215. 196.
## 10 4-HCA 947. 543. 1144. 1031. 916.
## # … with 16 more rows
d.area.spk.net = left_join(x, y, by = "compound")
# compute analyte loss at each step
d.area.spk.net = d.area.spk.net %>%
mutate(rec.extract = B / PX * 100,
rec.cleanup = PX / PC * 100,
rec.dryConcentrate = PC / PR * 100,
rec.dryConcentrate.sd = ((PC.sd / PC)^2 + (PR.sd / PR)^2) * rec.dryConcentrate )
d.area.spk.net.copy = d.area.spk.net %>%
select(compound, contains("rec")) %>%
select(-contains("sd")) %>%
gather(-1, key = step, value = recovery)
# Combine the recovery with and without sorbent in presence of biomatrices
# data from the first validation study (the one that failed due to low recovery at drying step without sorbent)
k1 = d.area.spk.net %>%
select(compound, contains("rec.dryConcentrate")) %>%
mutate(sorbent = "with biomatrix, no sorbent") %>% ungroup()
# data from the second validation study conducted with sorbent of C18 ca. 2~5 mg/600 uL ACN extract
k2 = d.recovery.all %>%
filter(step == "Dry") %>% filter(corrected == "Absolute") %>%
select(compound, mean, sd) %>%
mutate(sorbent = "with biomatrix, with C18") %>%
rename(rec.dryConcentrate = mean,
rec.dryConcentrate.sd = sd) %>% ungroup()
d.recoveryCompare.sorbentWithBiomatrices = k1 %>% rbind(k2)
# combine with when having biomatrix, with or without sorbent
d.recoveryCompare.sorbentWithBiomatrices =
# no biomatrix, no sorbent
d.opt.sorbent.recovery %>%
filter(sorbent == "Sorbent free") %>% ungroup() %>%
select(compound, recovery.mean, recovery.sd) %>%
rename(rec.dryConcentrate = recovery.mean,
rec.dryConcentrate.sd = recovery.sd) %>%
mutate(sorbent = "no biomatrix, no sorbent") %>%
rbind(d.recoveryCompare.sorbentWithBiomatrices)
# ordered matrix-sorbent condition
d.recoveryCompare.sorbentWithBiomatrices$sorbent =
d.recoveryCompare.sorbentWithBiomatrices$sorbent %>%
factor(levels = d.recoveryCompare.sorbentWithBiomatrices$sorbent %>% unique(), ordered = T)
#
# plot
# arrange compound from low to high recovery
jjj = d.recoveryCompare.sorbentWithBiomatrices %>%
group_by(compound) %>%
summarise(mean.rec = mean(rec.dryConcentrate)) %>%
arrange(mean.rec)
cmpd.ordered.dryRecovery = jjj$compound %>% unique()
d.recoveryCompare.sorbentWithBiomatrices$compound =
d.recoveryCompare.sorbentWithBiomatrices$compound %>%
factor(levels = cmpd.ordered.dryRecovery, ordered = T)
# now plot!
dg = .6
plt.recoveryCompare.sorbentWithBiomatrices =
d.recoveryCompare.sorbentWithBiomatrices %>%
filter(rec.dryConcentrate < 120) %>% # 4-HPPA, no biomatrix no sorbent the recoveryca. 150 outlier remove it
ggplot(aes(x = compound, y = rec.dryConcentrate, fill = sorbent)) +
geom_bar(stat = "identity", position = position_dodge(dg), alpha = 1, width = .7) +
theme(panel.grid = element_blank(),
legend.position = "bottom",
legend.text = element_text(size = 11),
legend.title = element_blank(),
axis.text = element_text(size = 11)) +
scale_y_continuous(breaks = seq(0, 120, 20)) +
geom_errorbar(aes( ymin = rec.dryConcentrate - rec.dryConcentrate.sd,
ymax = rec.dryConcentrate + rec.dryConcentrate.sd,
color = sorbent),
position = position_dodge(dg), width = .5) +
coord_flip() +
labs(y = "Recovery (%) ") +
scale_color_manual(values = c("snow4", "tomato", "skyblue")) +
scale_fill_manual(values = c("snow4", "tomato", "skyblue"))
# plt.recoveryCompare.sorbentWithBiomatrices
# plot sorbent rsult
plot_grid(plt.sorbentOptimization,
plt.recoveryCompare.sorbentWithBiomatrices,
nrow = 1, rel_widths = c(1.5, 1))
# big screen 14.7 X 8.16
# Check the effect of with vs. without sorbent validated at level C, i.e. about 50 ng/mL
d.area = read_excel(path, sheet = "peak area")
gg = d.area %>%
filter(Name %in% c("C", "no silica gel")) %>%
select(-c(`Acq. Date-Time`, `Data File`)) %>%
gather(-Name, key = compound, value = resp) %>%
group_by(Name, compound) %>%
summarise(resp.mean = mean(resp),
resp.sd = sd(resp))
gg = gg %>%
group_by(compound) %>%
mutate(resp.mean.normalize = resp.mean / mean(resp.mean),
resp.sd.normalize = resp.sd / mean(resp.mean)) %>%
select(Name, compound, contains("normalize"))
mj = gg %>% filter(Name == "C") %>%
arrange(resp.mean.normalize)
gg$compound = factor(gg$compound, levels = rev(mj$compound), ordered = T)
gg %>% filter(compound != "4-HPPA") %>%
ggplot(aes(x = compound, y = resp.mean.normalize, color = Name, fill = Name)) +
geom_bar(stat = "identity", alpha = .6, position = position_dodge(.6)) +
geom_errorbar(aes(ymin = resp.mean.normalize - resp.sd.normalize,
ymax = resp.mean.normalize + resp.sd.normalize),
width = .3, position = position_dodge(.6)) +
# coord_flip() +
labs(y = "Normalized peak area") +
theme(axis.text.x = element_text(angle = 90, hjust = 1),
panel.grid = element_blank())
# -- check some numbers for paper writing
d.checkProcessingEfficency.vs.accuracy.B.Level = d.ABCD %>% filter(Name == "B") %>%
select(compound, Accuracy.mean) %>%
left_join(d.recovery.all %>% filter(step == "Overall processing efficiency") %>%
filter(corrected == "ISTD-corrected"), by = "compound") %>%
select(compound, Accuracy.mean, mean) %>%
rename(ProcessingEfficiency.corrected = mean) %>%
mutate(ratio = Accuracy.mean / ProcessingEfficiency.corrected) %>%
arrange(ratio)
d.checkProcessingEfficency.vs.accuracy.B.Level$compound =
factor(d.checkProcessingEfficency.vs.accuracy.B.Level$compound,
levels = d.checkProcessingEfficency.vs.accuracy.B.Level$compound,
ordered = T)
d.checkProcessingEfficency.vs.accuracy.B.Level %>%
ggplot(aes(x = compound, y = ratio)) +
geom_bar(stat = "identity") +
coord_flip() +
geom_segment(aes(x = 1, xend = 26, y = 1, yend = 1))
d.recovery.all$step %>% unique()
## [1] "Extraction"
## [2] "EMR"
## [3] "Dry"
## [4] "Overall recovery"
## [5] "matrixEffect"
## [6] "Overall processing efficiency"
px = d.recovery.all %>%
filter(step == "Overall processing efficiency") %>%
filter(corrected == "ISTD-corrected") %>%
arrange(mean) %>%
as.data.frame()
# px
px$mean %>% hist(breaks = 26)
kkkkk = d.recovery.all %>%
filter(step == "Overall processing efficiency") %>%
select(-step, -sd) %>%
spread(key = corrected, value = mean) %>%
mutate(correctAmoung = `ISTD-corrected` - Absolute)
kkkkk$correctAmoung %>% hist(breaks = 26)
pp = d.recoveryCompare.sorbentWithBiomatrices %>%
select(-contains(".sd")) %>%
spread(sorbent, rec.dryConcentrate) %>%
mutate(sorb.improv = `with biomatrix, with C18` - `with biomatrix, no sorbent`) %>%
arrange(sorb.improv) %>%
as.data.frame()
# pp