The four pillars:
Diagnostic analytics seek to explain why something happened in the data. Here, we are looking for relationships within the data.
machine_categories = tb$category %>% unique()
tb_cat_dist = lapply(machine_categories, function(c) {
tmp = tb %>%
filter(category == c) %>%
pull(dt) %>%
summary()
tmp %>%
matrix(nrow = 1) %>%
as_tibble() %>%
magrittr::set_colnames(names(tmp))
}) %>%
bind_rows() %>%
mutate(category = machine_categories)
tb_cat_dist %>%
left_join(
tb %>%
group_by(category) %>%
summarize(tot_dt = sum(dt), dt_inst = n()) %>%
mutate(p_dt = tot_dt/sum(tot_dt)),
by = "category"
) %>%
mutate(Mean = round(Mean, 2)) %>%
arrange(desc(tot_dt)) %>%
DT::datatable() %>%
DT::formatPercentage(c("p_dt"), digits = 2)
reason_codes = tb$reason_code %>% unique()
tb_reason_dist = lapply(reason_codes, function(c) {
tmp = tb %>%
filter(reason_code == c) %>%
pull(dt) %>%
summary()
tmp %>%
matrix(nrow = 1) %>%
as_tibble() %>%
magrittr::set_colnames(names(tmp))
}) %>%
bind_rows() %>%
mutate(reason_code = reason_codes)
tb_reason_dist %>%
left_join(
tb %>%
group_by(reason_code) %>%
summarize(tot_dt = sum(dt), dt_inst = n()) %>%
mutate(p_dt = tot_dt/sum(tot_dt)),
by = "reason_code"
) %>%
mutate(Mean = round(Mean, 2)) %>%
arrange(desc(tot_dt)) %>%
DT::datatable() %>%
DT::formatPercentage(c("p_dt"), digits = 2)
“No Operator” and “No Work” cause ~66% of the total downtime. When are these codes occurring?
clrs = hue_pal()(2)
tb_day_summary_tmp = tb %>%
filter(reason_code %in% c("No Operator", "No Work")) %>%
group_by(day, reason_code) %>%
summarize(
dt_instances = n(), avg_dt = mean(dt), med_dt = median(dt),
tot_dt = sum(dt)
) %>%
ungroup()
tb_day_summary = lapply(c("No Operator", "No Work"), function(i) {
tb_day_summary_tmp %>%
filter(reason_code == i) %>%
complete(day = seq(min(day), max(day), by = 'day')) %>%
mutate(wday = weekdays(day)) %>%
replace_na(list(reason_code = i))
}) %>% bind_rows()
p = tb_day_summary %>%
ggplot(aes(day, dt_instances, color = reason_code)) +
geom_point(alpha = 0.5) +
geom_smooth(method = "loess", formula = y ~ x) +
labs(color = "") +
theme_minimal()
ggplotly(p) %>%
layout(
xaxis = list(title = "day"),
yaxis = list(title = "downtime instances"),
title = list(text = paste0(
"downtime instances per day",
"<br>",
"<sup>",
'"No Operator" and "No Work"',
"</sup>"
))
)
work_week_order = weekdays(x=as.Date(seq(7), origin="1950-01-01"))
p = tb_day_summary %>%
replace_na(list(dt_instances = 0)) %>%
mutate(wday = factor(wday, work_week_order)) %>%
group_by(wday, reason_code) %>%
summarize(mu_instances = mean(dt_instances)) %>%
ggplot(aes(reason_code, mu_instances, fill = wday)) +
geom_bar(stat = 'identity', position = 'dodge') +
scale_fill_npg() +
labs(
fill = "", x = "", y = "average # of instances",
title = "entire period"
) +
theme_minimal()
ggplotly(p)
work_week_order = weekdays(x=as.Date(seq(7), origin="1950-01-01"))
p = tb_day_summary %>%
filter(day < as.Date("2020/11/01")) %>%
replace_na(list(dt_instances = 0)) %>%
mutate(wday = factor(wday, work_week_order)) %>%
group_by(wday, reason_code) %>%
summarize(mu_instances = mean(dt_instances)) %>%
ggplot(aes(reason_code, mu_instances, fill = wday)) +
geom_bar(stat = 'identity', position = 'dodge') +
scale_fill_npg() +
labs(
fill = "", x = "", y = "average # of instances",
title = "before November 1, 2020"
) +
theme_minimal()
ggplotly(p)
work_week_order = weekdays(x=as.Date(seq(7), origin="1950-01-01"))
p = tb_day_summary %>%
filter(day >= as.Date("2020/11/01")) %>%
replace_na(list(dt_instances = 0)) %>%
mutate(wday = factor(wday, work_week_order)) %>%
group_by(wday, reason_code) %>%
summarize(mu_instances = mean(dt_instances)) %>%
ggplot(aes(reason_code, mu_instances, fill = wday)) +
geom_bar(stat = 'identity', position = 'dodge') +
scale_fill_npg() +
labs(
fill = "", x = "", y = "average # of instances",
title = "after (including) November 1, 2020"
) +
theme_minimal()
ggplotly(p)
work_week_order = weekdays(x=as.Date(seq(7), origin="1950-01-01"))
p = tb_day_summary %>%
filter(day >= as.Date("2020/11/01")) %>%
replace_na(list(dt_instances = 0)) %>%
mutate(wday = factor(wday, rev(work_week_order))) %>%
ggplot(aes(wday, dt_instances)) +
facet_wrap(~reason_code, scales = "free_x") +
geom_boxplot() +
scale_fill_npg() +
labs(fill = "", x = "", y = "# of instances") +
coord_flip() +
theme_minimal()
ggplotly(p) %>%
layout(title = "after (including) November 1, 2020")
tb_day_summary_plant_tmp = tb %>%
filter(reason_code %in% c("No Operator", "No Work")) %>%
group_by(day, reason_code, plant) %>%
summarize(
dt_instances = n(), avg_dt = mean(dt), med_dt = median(dt),
tot_dt = sum(dt)
) %>%
ungroup()
tb_day_plant_summary = lapply(c("No Operator", "No Work"), function(i) {
tb_day_summary_plant_tmp %>%
filter(reason_code == i) %>%
group_by(plant) %>%
complete(day = seq(min(tb$day), max(tb$day), by = 'day')) %>%
mutate(wday = weekdays(day)) %>%
replace_na(list(reason_code = i))
}) %>% bind_rows() %>%
ungroup()
work_week_order = weekdays(x=as.Date(seq(7), origin="1950-01-01"))
p = tb_day_plant_summary %>%
filter(day >= as.Date("2020/11/01")) %>%
replace_na(list(dt_instances = 0)) %>%
mutate(wday = factor(wday, work_week_order)) %>%
group_by(wday, reason_code, plant) %>%
summarize(mu_instances = mean(dt_instances)) %>%
ggplot(aes(reason_code, mu_instances, fill = wday)) +
geom_bar(stat = 'identity', position = 'dodge') +
facet_wrap(~plant) +
scale_fill_npg() +
labs(
fill = "", x = "", y = "average # of instances",
title = "after (including) November 1, 2020"
) +
theme_minimal()
ggplotly(p)
p = tb_day_plant_summary %>%
filter(day >= as.Date("2020/11/01")) %>%
replace_na(list(tot_dt = 0)) %>%
mutate(wday = factor(wday, work_week_order)) %>%
group_by(wday, reason_code, plant) %>%
summarize(mu_dt = mean(tot_dt)/60) %>%
ggplot(aes(reason_code, mu_dt, fill = wday)) +
geom_bar(stat = 'identity', position = 'dodge') +
facet_wrap(~plant) +
scale_fill_npg() +
labs(
fill = "", x = "", y = "average total downtime per day (hours)",
title = "after (including) November 1, 2020"
) +
theme_minimal()
ggplotly(p)
We’ll continue our analysis only looking at dates at and beyond November 1, 2020.
reason_codes = tb$reason_code %>% unique()
tb_subset = tb %>%
filter(day >= as.Date("2020/11/01"))
tb_reason_dist = lapply(reason_codes, function(c) {
tmp = tb_subset %>%
filter(reason_code == c) %>%
pull(dt) %>%
summary()
tmp %>%
matrix(nrow = 1) %>%
as_tibble() %>%
magrittr::set_colnames(names(tmp))
}) %>%
bind_rows() %>%
mutate(reason_code = reason_codes)
tb_reason_dist %>%
left_join(
tb_subset %>%
group_by(reason_code) %>%
summarize(tot_dt = sum(dt), dt_inst = n()) %>%
mutate(p_dt = tot_dt/sum(tot_dt)),
by = "reason_code"
) %>%
mutate(Mean = round(Mean, 2)) %>%
arrange(desc(tot_dt)) %>%
DT::datatable() %>%
DT::formatPercentage(c("p_dt"), digits = 2)
plants = c("all", tb$plant %>% unique())
tb_tmp = lapply(plants, function(p) {
tmp = tb %>%
filter(day >= as.Date("2020/11/01")) %>%
mutate(m_id = paste0(plant, "_", machine_id)) %>%
group_by(m_id, category, plant) %>%
summarize(tot_dt = sum(dt)/(60*24), dt_inst = n()) %>%
mutate(group = p)
# To help in redraw of visualization
dummy_data = tibble(category = tb$category %>% unique()) %>%
mutate(tot_dt = 0) %>%
mutate(dt_inst = 0) %>%
mutate(plant = p)
if (p != "all") {
tmp = tmp %>% filter(plant == p)
}
tmp %>%
select(-plant) %>%
rename(plant = group) %>%
bind_rows(dummy_data) %>%
return()
}) %>% bind_rows()
tb_tmp %>%
plot_ly(x = ~tot_dt, y = ~dt_inst, text = ~m_id) %>%
add_markers(data = tb_tmp, frame = ~plant, color = ~category, alpha = 0.7) %>%
layout(
xaxis = list(
title = "total downtime (days)"
),
yaxis = list(
title = "total downtime instances"
),
title = "after (including) November 1, 2020"
) %>%
animation_button(visible = F) %>%
animation_opts(redraw = F, transition = 0) %>%
animation_slider(currentvalue = list(font = list(size = 0, color = "white")))
tb_reason_code_day_summary_tmp = tb_subset %>%
group_by(day, reason_code) %>%
summarize(dt_instances = n(), tot_dt = sum(dt)) %>%
ungroup()
tb_reason_code_day_summary = lapply(reason_codes, function(i) {
tb_reason_code_day_summary_tmp %>%
filter(reason_code == i) %>%
complete(day = seq(min(tb_subset$day), max(tb_subset$day), by = 'day')) %>%
replace_na(list(reason_code = i, dt_instances = 0, tot_dt = 0))
}) %>% bind_rows()
M_reason_instances = tb_reason_code_day_summary %>%
select(-tot_dt) %>%
pivot_wider(names_from = reason_code, values_from = dt_instances) %>%
select(-day) %>%
as.matrix()
M_ri_cor = cor(M_reason_instances)
plot_ly(
x = M_ri_cor %>% rownames(),
y = M_ri_cor %>% rownames(),
z = M_ri_cor, type = "heatmap"
) %>%
layout(
title = "linear correlations in reason code instances"
)
p = M_reason_instances %>%
as_tibble() %>%
ggpairs(
columns = c(1:2, 4, 13),
mapping = aes(alpha = 0.5)
) +
theme_minimal()
ggplotly(p) %>%
layout(
title = list(text = paste0(
"relationship of select reason codes",
"<br>",
"<sup>",
'# downtime instances per day',
"</sup>"
))
)
M_reason_total = tb_reason_code_day_summary %>%
select(-dt_instances) %>%
pivot_wider(names_from = reason_code, values_from = tot_dt) %>%
select(-day) %>%
as.matrix()
M_rt_cor = cor(M_reason_total)
plot_ly(
x = M_rt_cor %>% rownames(),
y = M_rt_cor %>% rownames(),
z = M_rt_cor, type = "heatmap"
) %>%
layout(
title = "linear correlations in reason code daily total downtime"
)
p = M_reason_total %>%
as_tibble() %>%
ggpairs(
columns = c(1:2, 4, 13),
mapping = aes(alpha = 0.5)
) +
theme_minimal()
ggplotly(p) %>%
layout(
title = list(text = paste0(
"relationship of select reason codes",
"<br>",
"<sup>",
'daily total downtime',
"</sup>"
))
)
tb_machine_day_summary_tmp = tb_subset %>%
mutate(m_id = paste0(plant, "_", machine_id)) %>%
group_by(day, m_id) %>%
summarize(dt_instances = n(), tot_dt = sum(dt)) %>%
ungroup()
m_ids = tb_subset %>%
mutate(m_id = paste0(plant, "_", machine_id)) %>%
pull(m_id) %>%
unique()
tb_machine_day_summary = lapply(m_ids, function(i) {
tb_machine_day_summary_tmp %>%
filter(m_id == i) %>%
complete(day = seq(min(tb_subset$day), max(tb_subset$day), by = 'day')) %>%
replace_na(list(m_id = i, dt_instances = 0, tot_dt = 0))
}) %>% bind_rows()
M_machine_instances = tb_machine_day_summary %>%
select(-tot_dt) %>%
pivot_wider(names_from = m_id, values_from = dt_instances) %>%
select(-day) %>%
as.matrix()
M_mi_cor = cor(M_machine_instances)
plot_ly(
x = M_mi_cor %>% rownames(),
y = M_mi_cor %>% rownames(),
z = M_mi_cor, type = "heatmap"
) %>%
layout(
title = "linear correlations in machines daily instances"
)
p = M_machine_instances %>%
as_tibble() %>%
ggpairs(
columns = 30:33,
mapping = aes(alpha = 0.5),
lower = list(continuous=wrap("points", position=position_jitter(height=0.25, width=0.25)))
) +
theme_minimal()
ggplotly(p) %>%
layout(
title = list(text = paste0(
"relationship of select machines",
"<br>",
"<sup>",
'# downtime instances per day',
"</sup>"
))
)
tb_subset %>%
mutate(m_id = paste0(plant, "_", machine_id)) %>%
filter(
m_id %in% (M_machine_instances %>%
as_tibble() %>% names() %>% magrittr::extract(30:33))
) %>%
group_by(m_id) %>%
slice(1) %>%
ungroup() %>%
select(plant, machine_id, m_id, description, category)
## # A tibble: 4 × 5
## plant machine_id m_id description category
## <chr> <dbl> <chr> <chr> <chr>
## 1 cooking 18 cooking_18 Woodtek Sander Sander
## 2 cooking 19 cooking_19 Woodtek Sander Sander
## 3 cooking 75 cooking_75 Woodtek Sander Sander
## 4 cooking 77 cooking_77 Woodtek Sander Sander
M_machine_dt = tb_machine_day_summary %>%
select(-dt_instances) %>%
pivot_wider(names_from = m_id, values_from = tot_dt) %>%
select(-day) %>%
as.matrix()
M_mt_cor = cor(M_machine_dt)
plot_ly(
x = M_mt_cor %>% rownames(),
y = M_mt_cor %>% rownames(),
z = M_mt_cor, type = "heatmap"
) %>%
layout(
title = "linear correlations in machines daily total downtime"
)
p = M_machine_dt %>%
as_tibble() %>%
ggpairs(
columns = 121:123,
mapping = aes(alpha = 0.5)
) +
theme_minimal()
ggplotly(p) %>%
layout(
title = list(text = paste0(
"relationship of select machines",
"<br>",
"<sup>",
'daily total downtime',
"</sup>"
))
)
tb_subset %>%
mutate(m_id = paste0(plant, "_", machine_id)) %>%
filter(
m_id %in% (M_machine_instances %>%
as_tibble() %>% names() %>% magrittr::extract(121:123))
) %>%
group_by(m_id) %>%
slice(1) %>%
ungroup() %>%
select(plant, machine_id, m_id, description, category)
## # A tibble: 3 × 5
## plant machine_id m_id description category
## <chr> <dbl> <chr> <chr> <chr>
## 1 cooking 43 cooking_43 Amada FBD 8025 NT Press Brake Press Brake
## 2 cooking 54 cooking_54 Amada Vipros 255 Turret Press
## 3 cooking 7 cooking_7 Amada HDS 8025 NT Press Brake Press Brake
sessionInfo()
## R version 4.2.1 (2022-06-23 ucrt)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 19044)
##
## Matrix products: default
##
## locale:
## [1] LC_COLLATE=English_United States.utf8
## [2] LC_CTYPE=English_United States.utf8
## [3] LC_MONETARY=English_United States.utf8
## [4] LC_NUMERIC=C
## [5] LC_TIME=English_United States.utf8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] GGally_2.1.2 scales_1.2.1 dendextend_1.16.0 diagram_1.6.5
## [5] shape_1.4.6 markovchain_0.9.0 plotly_4.10.1 ggsci_2.9
## [9] lubridate_1.8.0 readxl_1.4.1 forcats_0.5.2 stringr_1.5.0
## [13] dplyr_1.0.10 purrr_1.0.1 readr_2.1.3 tidyr_1.3.0
## [17] tibble_3.1.8 ggplot2_3.4.0 tidyverse_1.3.2
##
## loaded via a namespace (and not attached):
## [1] nlme_3.1-157 fs_1.5.2 bit64_4.0.5
## [4] RColorBrewer_1.1-3 httr_1.4.4 tools_4.2.1
## [7] backports_1.4.1 bslib_0.4.1 DT_0.26
## [10] utf8_1.2.2 R6_2.5.1 mgcv_1.8-40
## [13] DBI_1.1.3 lazyeval_0.2.2 colorspace_2.0-3
## [16] withr_2.5.0 tidyselect_1.2.0 gridExtra_2.3
## [19] bit_4.0.4 compiler_4.2.1 cli_3.4.1
## [22] rvest_1.0.3 expm_0.999-6 xml2_1.3.3
## [25] labeling_0.4.2 sass_0.4.2 digest_0.6.30
## [28] rmarkdown_2.17 pkgconfig_2.0.3 htmltools_0.5.3
## [31] dbplyr_2.2.1 fastmap_1.1.0 htmlwidgets_1.5.4
## [34] rlang_1.0.6 rstudioapi_0.14 farver_2.1.1
## [37] jquerylib_0.1.4 generics_0.1.3 jsonlite_1.8.3
## [40] crosstalk_1.2.0 vroom_1.6.0 googlesheets4_1.0.1
## [43] magrittr_2.0.3 Matrix_1.5-3 Rcpp_1.0.9
## [46] munsell_0.5.0 fansi_1.0.3 viridis_0.6.2
## [49] lifecycle_1.0.3 stringi_1.7.8 yaml_2.3.6
## [52] plyr_1.8.7 grid_4.2.1 parallel_4.2.1
## [55] crayon_1.5.2 lattice_0.20-45 splines_4.2.1
## [58] haven_2.5.1 hms_1.1.2 knitr_1.40
## [61] pillar_1.8.1 igraph_1.3.5 stats4_4.2.1
## [64] reprex_2.0.2 glue_1.6.2 evaluate_0.18
## [67] data.table_1.14.4 RcppParallel_5.1.5 modelr_0.1.9
## [70] vctrs_0.5.2 tzdb_0.3.0 cellranger_1.1.0
## [73] gtable_0.3.1 reshape_0.8.9 assertthat_0.2.1
## [76] cachem_1.0.6 xfun_0.34 broom_1.0.1
## [79] googledrive_2.0.0 viridisLite_0.4.1 gargle_1.2.1
## [82] ellipsis_0.3.2