{hyenaR}
:select()
/where()
group_by()
/filter()
create_id_starting.table()
to find all individuals born in main clans in the years 1997 - 2021.NOTE:
By itself, ‘clan.birth’ will not help here because it ignores from/to. ‘clan.birth’ is intended as a filter in addition to other arguments.
cubs_wdetails <- cubs %>%
mutate(birth_clan = fetch_id_clan.birth(ID),
birth_date = fetch_id_date.birth(ID),
sex = fetch_id_sex(ID),
lifespan = fetch_id_duration.lifespan(ID))
cubs_wdetails
# A tibble: 2,532 × 5
ID birth_clan birth_date sex lifespan
<chr> <chr> <date> <chr> <dbl>
1 A-059 A 2002-10-19 male 1.67
2 A-060 A 2002-10-20 female 1.03
3 A-061 A 2003-07-25 <NA> 0.137
4 A-062 A 2006-08-02 <NA> 0.131
5 A-063 A 2006-08-02 <NA> 0.131
6 A-090 A 1997-01-20 male 0.364
7 A-091 A 1997-01-20 male 0.600
8 A-094 A 1997-05-07 <NA> 0.0684
9 A-096 A 1997-06-12 female 0.0301
10 A-097 A 1997-06-12 male 0.0301
# … with 2,522 more rows
NOTE:
fetch_id_duration.lifespan()
and fetch_id_age(at = death_date)
are the same.
birth clan/birth date: A mistake in the data! All individuals should have an assigned birth clan and date.
sex: Sex not (yet) determined. Particularly common for individuals that died young.
lifespan/death date: Individual is right censored (i.e. hasn’t died yet!)
select()
/where()
NOTE:
If we are just applying one function we can just write the function name.
select()
/where()
NOTE:
If we want something more complicated we need to include our own function.
cubs_wdetails %>%
#Select columns that contain at least 1 NA
select(where(fn = function(x) any(is.na(x))))
# A tibble: 2,532 × 2
sex lifespan
<chr> <dbl>
1 male 1.67
2 female 1.03
3 <NA> 0.137
4 <NA> 0.131
5 <NA> 0.131
6 male 0.364
7 male 0.600
8 <NA> 0.0684
9 female 0.0301
10 male 0.0301
# … with 2,522 more rows
# A tibble: 2,532 × 6
ID birth_clan birth_date sex lifespan birth_year
<chr> <chr> <date> <chr> <dbl> <dbl>
1 A-059 A 2002-10-19 male 1.67 2002
2 A-060 A 2002-10-20 female 1.03 2002
3 A-061 A 2003-07-25 <NA> 0.137 2003
4 A-062 A 2006-08-02 <NA> 0.131 2006
5 A-063 A 2006-08-02 <NA> 0.131 2006
6 A-090 A 1997-01-20 male 0.364 1997
7 A-091 A 1997-01-20 male 0.600 1997
8 A-094 A 1997-05-07 <NA> 0.0684 1997
9 A-096 A 1997-06-12 female 0.0301 1997
10 A-097 A 1997-06-12 male 0.0301 1997
# … with 2,522 more rows
cubs_wbirthyr %>%
group_by(birth_year) %>%
summarise(total_cubs = n()) %>%
ungroup() %>%
arrange(desc(total_cubs))
# A tibble: 25 × 2
birth_year total_cubs
<dbl> <int>
1 2016 164
2 2010 149
3 2017 141
4 2009 130
5 2011 126
6 2019 125
7 2013 122
8 2014 118
9 2021 118
10 2015 114
# … with 15 more rows
cubs_wbirthyr %>%
group_by(birth_year, birth_clan) %>%
summarise(total_cubs = n()) %>%
ungroup() %>%
arrange(desc(total_cubs))
# A tibble: 198 × 3
birth_year birth_clan total_cubs
<dbl> <chr> <int>
1 2017 A 42
2 2009 L 40
3 2004 L 33
4 2016 A 33
5 2016 S 32
6 2007 L 30
7 2010 M 30
8 2014 A 30
9 2010 L 27
10 2016 F 27
# … with 188 more rows
cubs_wbirthyr %>%
group_by(birth_year, birth_clan) %>%
summarise(total_cubs = n()) %>%
ungroup() %>%
group_by(birth_clan) %>%
#Return the record that has the max number of cubs in EACH CLAN
filter(total_cubs == max(total_cubs))
# A tibble: 8 × 3
# Groups: birth_clan [8]
birth_year birth_clan total_cubs
<dbl> <chr> <int>
1 2009 L 40
2 2010 M 30
3 2016 F 27
4 2016 N 19
5 2016 S 32
6 2016 T 21
7 2017 A 42
8 2021 E 16
group_by()
/filter()
This allows us to apply filters to each group separately!
cubs_wbirthyr %>%
# Find longest lifespan in each clan!
group_by(birth_clan) %>%
# Find record that has max lifespan in EACH CLAN
filter(lifespan == max(lifespan, na.rm = TRUE))
# A tibble: 8 × 6
# Groups: birth_clan [8]
ID birth_clan birth_date sex lifespan birth_year
<chr> <chr> <date> <chr> <dbl> <dbl>
1 A-124 A 2000-04-21 male 18.2 2000
2 E-094 E 2000-06-28 male 17.6 2000
3 F-107 F 2003-09-29 female 17.5 2003
4 L-187 L 2002-06-20 female 17.7 2002
5 M-132 M 1999-03-14 female 17.7 1999
6 N-102 N 2000-02-16 female 16.9 2000
7 S-087 S 1999-11-14 male 17.2 1999
8 T-014 T 1997-09-08 female 18.5 1997
group_by()
/filter()
Find earliest and latest individual(s) born in each year
# A tibble: 78 × 6
ID birth_clan birth_date sex lifespan birth_year
<chr> <chr> <date> <chr> <dbl> <dbl>
1 L-108 L 1997-12-31 male 9.43 1997
2 L-109 L 1997-12-31 male 1.39 1997
3 M-098 M 1997-01-09 female 11.3 1997
4 M-099 M 1997-01-09 female 14.8 1997
5 L-107 L 1998-01-04 male 1.35 1998
6 S-085 S 1998-12-23 male 14.4 1998
7 F-090 F 1999-12-22 male 5.08 1999
8 M-132 M 1999-03-14 female 17.7 1999
9 M-133 M 1999-03-14 male 6.51 1999
10 F-097 F 2000-12-30 female 15.3 2000
# … with 68 more rows
group_by()
/filter()
Find earliest and latest individual(s) born in Airstrip each year
# A tibble: 68 × 6
ID birth_clan birth_date sex lifespan birth_year
<chr> <chr> <date> <chr> <dbl> <dbl>
1 A-090 A 1997-01-20 male 0.364 1997
2 A-091 A 1997-01-20 male 0.600 1997
3 A-114 A 1997-08-23 female 7.92 1997
4 A-101 A 1998-01-18 female 5.60 1998
5 A-105 A 1998-10-15 female 5.68 1998
6 A-106 A 1998-10-15 female 15.7 1998
7 A-107 A 1999-03-17 female 17.2 1999
8 A-108 A 1999-03-17 female 3.33 1999
9 A-119 A 1999-12-03 female 13.5 1999
10 A-120 A 1999-12-03 female 0.301 1999
# … with 58 more rows
Warning
This is why we always need to be careful to check grouping!!
# A tibble: 198 × 3
# Groups: birth_year [25]
birth_year birth_clan total_cubs
<dbl> <chr> <int>
1 1997 A 9
2 1997 E 4
3 1997 F 2
4 1997 L 16
5 1997 M 18
6 1997 N 9
7 1997 T 4
8 1998 A 8
9 1998 E 4
10 1998 F 1
# … with 188 more rows
# A tibble: 198 × 3
birth_year birth_clan total_cubs
<dbl> <chr> <int>
1 1997 A 9
2 1997 E 4
3 1997 F 2
4 1997 L 16
5 1997 M 18
6 1997 N 9
7 1997 T 4
8 1998 A 8
9 1998 E 4
10 1998 F 1
# … with 188 more rows
Warning
This is why we always need to be careful to check grouping!!
cubs_wbirthyr %>%
group_by(birth_year, birth_clan) %>%
summarise(total_cubs = n()) %>%
filter(total_cubs == max(total_cubs))
# A tibble: 26 × 3
# Groups: birth_year [25]
birth_year birth_clan total_cubs
<dbl> <chr> <int>
1 1997 M 18
2 1998 M 14
3 1999 M 17
4 2000 M 20
5 2001 L 24
6 2002 A 21
7 2003 L 21
8 2004 L 33
9 2005 L 21
10 2006 M 18
# … with 16 more rows
cubs_wbirthyr_month <- cubs_wbirthyr %>%
mutate(birth_month = lubridate::month(birth_date))
cubs_wbirthyr_month
# A tibble: 2,532 × 7
ID birth_clan birth_date sex lifespan birth_year birth_month
<chr> <chr> <date> <chr> <dbl> <dbl> <dbl>
1 A-059 A 2002-10-19 male 1.67 2002 10
2 A-060 A 2002-10-20 female 1.03 2002 10
3 A-061 A 2003-07-25 <NA> 0.137 2003 7
4 A-062 A 2006-08-02 <NA> 0.131 2006 8
5 A-063 A 2006-08-02 <NA> 0.131 2006 8
6 A-090 A 1997-01-20 male 0.364 1997 1
7 A-091 A 1997-01-20 male 0.600 1997 1
8 A-094 A 1997-05-07 <NA> 0.0684 1997 5
9 A-096 A 1997-06-12 female 0.0301 1997 6
10 A-097 A 1997-06-12 male 0.0301 1997 6
# … with 2,522 more rows
group_by()
/summarise()
to show how many individuals are left or right censored.ifelse
.case_when()
function. Can you use this to make 3 groups instead of 2?