A R notebook to analyze your sleep and step data recorded by a Pebble watch
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

287 lines
8.3 KiB

---
title: "Health stats"
output:
html_document:
toc: yes
pdf_document: default
html_notebook:
code_folding: hide
toc: yes
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = FALSE, warning = FALSE)
```
# Summary
This documents provides an overview of health data recorded by *Gadgetbridge* using a *Pebble* smartwatch.
# Preparation
Put the *GadgetBridge* database in the current folder, then knit the current `.Rmd` file.
You should also set you **timezone** in the corresponding variable.
* `PEBBLE_HEALTH_ACTIVITY_SAMPLE` is a simple table containing steps and timestamps.
* `PEBBLE_HEALTH_ACTIVITY_OVERLAY` is more complex. It tracks activities (sleep, deep sleep, nap…) with a start and an end date. Note that date may overlap: you can be both in sleep and deep sleep.
I have decided that any event happening after 8pm is registered for the next day. For instance:
* if you go to bed at 9pm on Tuesday and wake up at 7am on Wednesday, the data will return 10 hours of sleep on Wednesday
* if you go to bed at 7pm on Tuesday, a part of your sleep will be added to Tuesday. The cutoff won't exactly be 8pm, it will depend on the duration of the first sleep session recorded by you watch.
```{r include=FALSE}
# Load data ------------------------------------------------------------------
library(DBI)
library(RSQLite)
library(tidyverse)
library(lubridate)
library(scales)
con <- DBI::dbConnect(RSQLite::SQLite(), dbname = "gadgetbridge")
steps_data <- dbReadTable(con, "PEBBLE_HEALTH_ACTIVITY_SAMPLE")
sleep_data <- dbReadTable(con, "PEBBLE_HEALTH_ACTIVITY_OVERLAY")
dbDisconnect(con)
#str(steps_data)
# Transform data -------------------------------------------------------------
## Interprate timestamp
timezone = "Europe/Paris"
steps_data$datetime <- as_datetime(steps_data$TIMESTAMP)
sleep_data$datetime_from <- with_tz(as_datetime(sleep_data$TIMESTAMP_FROM), timezone)
sleep_data$datetime_to <- with_tz(as_datetime(sleep_data$TIMESTAMP_TO), timezone)
## Calculate activity duration
## Data recorded after 8pm is attached to the next day
sleep_data$day <- date(sleep_data$datetime_from + hours(4))
sleep_data$duration <- int_length(
sleep_data$datetime_from %--% sleep_data$datetime_to
) / period_to_seconds(minutes(1))
sleep_data$bed_time <- case_when(
sleep_data$RAW_KIND == 1 ~ sleep_data$datetime_from
)
sleep_data$wakeup_time <- case_when(
sleep_data$RAW_KIND == 1 ~ sleep_data$datetime_to
)
## Convert RAW_KIND to the corresponding activity and summarize values
sleep_data <- sleep_data %>%
spread(RAW_KIND,duration,sep="_") %>%
group_by(day) %>%
summarise(
sleep = sum(RAW_KIND_1, na.rm = TRUE),
deep_sleep = sum(RAW_KIND_2, na.rm = TRUE),
nap = sum(RAW_KIND_3, na.rm = TRUE),
deep_nap = sum(RAW_KIND_4, na.rm = TRUE),
walk = sum(RAW_KIND_5, na.rm = TRUE),
run = sum(RAW_KIND_6, na.rm = TRUE), # Really not sure about this one
bed_time = min(bed_time, na.rm = TRUE),
wakeup_time = max(wakeup_time, na.rm = TRUE)
)
```
# Visualisation
## Steps
### Distribution of steps
```{r}
data <- steps_data %>%
mutate(date = date(datetime)) %>%
select(date,STEPS) %>%
group_by(date) %>%
summarise(steps = sum(STEPS))
ggplot(data, aes(steps)) +
geom_histogram(binwidth = 500) +
theme_minimal() +
labs(x="Steps", y="Number of occurences")
```
### Distribution of steps per day of week
```{r}
data <- steps_data %>%
mutate(
date = date(datetime),
wday = wday(
datetime,
label = TRUE,
week_start = getOption("lubridate.week.start", 1)
)
) %>%
select(date, wday, STEPS) %>%
group_by(date, wday) %>%
summarise(steps = sum(STEPS))
ggplot(data, aes(x=wday,y=steps)) +
geom_boxplot() +
theme_minimal() +
labs(x="Day of week", y="Number of steps")
```
### Number of steps per month
```{r}
data <- steps_data %>%
mutate(month = floor_date(datetime, unit = "month")) %>%
select(month,STEPS) %>%
group_by(month) %>%
summarise(steps = sum(STEPS))
ggplot(data, aes(x=month, y=steps)) +
geom_col() +
scale_x_datetime(labels = date_format("%Y-%m")) +
scale_y_continuous(
breaks = seq(0,500000,50000),
labels=function(x) format(x, big.mark = " ")
) +
geom_smooth(method = lm) +
theme_minimal() +
labs(x="Month", y="Number of steps")
```
### Average number of steps per hour of the day, year after year
```{r}
data <- steps_data %>%
mutate(
date = date(datetime),
time = datetime-floor_date(datetime, unit="day"),
year = year(datetime)
) %>%
group_by(date) %>%
mutate(cumsteps = cumsum(STEPS)) %>%
select(date, time, year, cumsteps) %>%
ungroup() %>%
group_by(time, year) %>%
summarise(min = min(cumsteps), max = max(cumsteps), average = mean(cumsteps))
ggplot(data) +
geom_step(aes(x=time, y=average)) +
theme_minimal() +
labs(title = "Average number of steps per hour of the day", x="Hour", y="Number of steps") +
scale_x_continuous(
breaks = seq(
0,
period_to_seconds(hours(24)),
period_to_seconds(hours(1))
),
labels = seq(0,24,1)
) +
facet_wrap(vars(year))
```
### Active time per week
```{r}
data <- sleep_data %>%
mutate(
wday = wday(
day,
label = TRUE,
week_start = getOption("lubridate.week.start", 1)
)
) %>%
select(day, wday, walk) %>%
group_by(day, wday) %>%
summarise(walk_time = sum(walk)/60)
ggplot(data, aes(x=wday,y=walk_time)) +
geom_boxplot() +
theme_minimal() +
labs(x="Day of week", y="Hours active")
```
## Sleep
### Distribution of sleep duration
```{r}
data <- sleep_data %>%
group_by(day) %>%
summarise(sleep_duration = sum(sleep)/60)
ggplot(data) +
geom_histogram(aes(sleep_duration), bins = 50) +
scale_x_continuous(breaks = seq(0,12,1)) +
theme_minimal() +
labs(x="Sleep duration (hours)", y="Number of occurences")
```
### Distribution of deep sleep duration
```{r}
data <- sleep_data %>%
group_by(day) %>%
summarise(deep_sleep_duration = sum(deep_sleep)/60)
ggplot(data) +
geom_histogram(aes(deep_sleep_duration),bins = 50) +
scale_x_continuous(breaks = seq(0,12,1)) +
theme_minimal() +
labs(x="Deep sleep duration (hours)", y="Number of occurences")
```
### Distribution of sleep duration per day of week, year after year
```{r}
data <- sleep_data %>%
mutate(
wday = wday(
day,
label = TRUE,
week_start = getOption("lubridate.week.start", 1)
),
year = year(day)
) %>%
select(year, day, wday, sleep) %>%
group_by(year, day, wday) %>%
summarise(sleep_duration = sum(sleep)/60)
ggplot(data, aes(x=wday,y=sleep_duration)) +
geom_boxplot() +
theme_minimal() +
labs(x="Day of week", y="Sleep duration") +
facet_grid(rows = vars(year), )
```
### Distribution of nap duration
```{r}
data <- sleep_data %>%
filter(nap > 0) %>%
group_by(day) %>%
summarise(nap_time = sum(nap))
ggplot(data) +
geom_histogram(aes(nap_time), bins = 10) +
scale_x_continuous(breaks = seq(0,240,15)) +
theme_minimal() +
labs(x="Nap duration (minutes)", y="Number of occurences")
```
### Time of bed and waking up by year
```{r}
data <- sleep_data %>%
mutate(
year = year(day),
month = floor_date(day, unit = "month"),
bed_time_hms = hms::as.hms(
period_to_seconds(
hours(hour(bed_time)) + minutes(minute(bed_time))
)
),
wakeup_time_hms = hms::as.hms(
period_to_seconds(
hours(hour(wakeup_time))+minutes(minute(wakeup_time))
)
)
) %>%
drop_na(bed_time_hms) %>%
drop_na(wakeup_time_hms)
ggplot(data) +
geom_histogram(aes(bed_time_hms), fill="orange", alpha=0.5, bins=30) +
geom_histogram(aes(wakeup_time_hms), fill="blue", alpha=0.5, bins=30) +
scale_x_continuous(
breaks = seq(
0,
period_to_seconds(hours(24)),
period_to_seconds(hours(1))
),
labels = seq(0,24,1)
) +
theme_minimal() +
labs(x="Bed time and wakeup time", y="Number of occurences") +
facet_grid(rows = vars(year),scales="free_y")
```