{hyenaR}
:create_sample_starting.table()
# A tibble: 177 × 5
ID sampleID collection_time type treatment
<chr> <chr> <dttm> <chr> <chr>
1 M-052 N0340a 1999-04-04 12:55:00 faeces liq N
2 M-052 N0340b 1999-04-04 12:55:00 faeces liq N
3 M-052 N0340c 1999-04-04 12:55:00 mucus liq N
4 M-052 N0340d 1999-04-04 12:55:00 mucus liq N
5 M-052 N0340e 1999-04-04 12:55:00 mucus Ethanol
6 M-123 N0341a 1999-04-11 11:15:00 faeces liq N
7 M-123 N0341b 1999-04-11 11:15:00 faeces liq N
8 M-123 N0341c 1999-04-11 11:15:00 mucus liq N
9 M-123 N0341d 1999-04-11 11:15:00 parasite 10% Form
10 M-123 N0341e 1999-04-11 11:15:00 mucus liq N
# … with 167 more rows
stringr
to filter text columnsSimple, Consistent Wrappers for Common String Operations
‘string’ = ‘character string’
stringr
to filter text columnscreate_sample_starting.table() %>%
mutate(is_liver = stringr::str_detect(string = type,
pattern = "liver"))
# A tibble: 9,029 × 6
ID sampleID collection_time type treatment is_liver
<chr> <chr> <dttm> <chr> <chr> <lgl>
1 M-013 N0000a 1996-07-22 18:50:00 faeces liq N FALSE
2 M-013 N0000b 1996-07-22 18:50:00 faeces liq N FALSE
3 L-043 N0001 1996-07-30 16:30:00 testicle liq N FALSE
4 L-043 N0002a 1996-07-30 14:24:00 liver 10% Form TRUE
5 L-043 N0002 1996-07-30 16:30:00 liver liq N TRUE
6 L-043 N0003a 1996-07-30 14:30:00 spleen 10% Form FALSE
7 L-043 N0003 1996-07-30 16:30:00 spleen liq N FALSE
8 L-043 N0004a 1996-07-30 15:30:00 salivary gland 10% Form FALSE
9 L-043 N0004 1996-07-30 16:30:00 salivary gland liq N FALSE
10 L-043 N0005a 1996-07-30 15:30:00 lymph node 10% Form FALSE
# … with 9,019 more rows
stringr
to filter text columnscreate_sample_starting.table() %>%
filter(stringr::str_detect(string = type,
pattern = "liver")) %>%
mutate(year = lubridate::year(collection_time)) %>%
group_by(year) %>%
count() %>%
ungroup()
# A tibble: 16 × 2
year n
<dbl> <int>
1 1996 2
2 1997 6
3 1998 4
4 2002 5
5 2003 2
6 2004 1
7 2005 3
8 2006 3
9 2007 4
10 2008 2
11 2011 1
12 2012 4
13 2017 2
14 2018 4
15 2019 5
16 NA 18
stringr
to filter text columnscreate_sample_starting.table() %>%
filter(stringr::str_detect(string = type,
pattern = "liver|kidney")) %>%
mutate(year = lubridate::year(collection_time)) %>%
group_by(year) %>%
count() %>%
ungroup()
# A tibble: 16 × 2
year n
<dbl> <int>
1 1996 2
2 1997 8
3 1998 6
4 2002 9
5 2003 4
6 2004 1
7 2005 5
8 2006 5
9 2007 7
10 2008 6
11 2011 3
12 2012 8
13 2017 3
14 2018 8
15 2019 8
16 NA 28
stringr
to filter text columnsstringr
to filter text columnsstringr
to filter text columnsextract_database_table("injuries") %>%
filter(stringr::str_detect(tolower(cause),
pattern = "strep|disease")) %>%
mutate(year = lubridate::year(date_time)) %>%
group_by(year) %>%
summarise(n_disease = n())
# A tibble: 23 × 2
year n_disease
<dbl> <int>
1 1998 3
2 1999 2
3 2000 1
4 2001 2
5 2002 15
6 2003 6
7 2004 5
8 2005 5
9 2006 8
10 2007 4
# … with 13 more rows
stringr
to filter text columnsplot_data <- extract_database_table("injuries") %>%
filter(stringr::str_detect(tolower(cause), pattern = "strep|disease")) %>%
mutate(year = lubridate::year(date_time)) %>%
group_by(year) %>%
summarise(n_disease = n())
ggplot(data = plot_data) +
geom_col(aes(x = year, y = n_disease)) +
labs(y = "Number of diseased individuals sighted",
title = "Number of disease sightings per year",
subtitle = "Includes sightings caused by both streptococcus and other disease") +
scale_x_continuous(limits = c(1996, NA)) +
theme_classic() +
theme(axis.title.x = element_blank())
stringr
to filter text columns## Replace character with something new
stringr::str_replace_all(string = "liver_kidney/brain",
pattern = "_|/",
replacement = ", ")
[1] "liver, kidney, brain"
create_id_starting.table()
to find all individuals born in main clans in the years 1997 - 2021 (where we have full year observation).