## ---- include = FALSE--------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.height = 7, fig.width = 7, warning = FALSE, fig.align = "center" ) ## ----setup, message = FALSE, warning = FALSE---------------------------------- library(dplyr) library(ggplot2) library(theft) library(theftdlc) ## ---- message = FALSE, warning = FALSE, eval = FALSE-------------------------- # theft::simData ## ---- message = FALSE, warning = FALSE---------------------------------------- head(simData) ## ---- message = FALSE, warning = FALSE---------------------------------------- feature_matrix <- calculate_features(data = simData, id_var = "id", time_var = "timepoint", values_var = "values", group_var = "process", feature_set = "catch22", seed = 123) ## ---- message = FALSE, warning = FALSE---------------------------------------- plot(feature_matrix, type = "quality") ## ---- message = FALSE, warning = FALSE---------------------------------------- plot(feature_matrix, type = "matrix", norm_method = "RobustSigmoid") ## ---- message = FALSE, warning = FALSE---------------------------------------- plot(feature_matrix, type = "violin", feature_names = c("CO_f1ecac", "PD_PeriodicityWang_th0_01")) ## ---- eval = FALSE------------------------------------------------------------ # plot(feature_matrix, type = "violin", # feature_names = c("CO_f1ecac", "PD_PeriodicityWang_th0_01"), # size = 0.7, alpha = 0.9) ## ---- message = FALSE, warning = FALSE---------------------------------------- low_dim <- project(feature_matrix, norm_method = "RobustSigmoid", unit_int = TRUE, low_dim_method = "PCA", seed = 123) ## ---- message = FALSE, warning = FALSE---------------------------------------- plot(low_dim) ## ---- message = FALSE, warning = FALSE---------------------------------------- low_dim2 <- project(feature_matrix, norm_method = "RobustSigmoid", unit_int = TRUE, low_dim_method = "tSNE", perplexity = 10, seed = 123) plot(low_dim2, show_covariance = FALSE) ## ---- message = FALSE, warning = FALSE---------------------------------------- plot(feature_matrix, type = "cor") ## ---- message = FALSE, warning = FALSE---------------------------------------- feature_classifiers <- classify(feature_matrix, by_set = FALSE, n_resamples = 5, use_null = TRUE) ## ---- message = FALSE, warning = FALSE---------------------------------------- myclassifier <- function(formula, data){ mod <- e1071::svm(formula, data = data, kernel = "radial", scale = FALSE, probability = TRUE) } feature_classifiers_radial <- classify(feature_matrix, classifier = myclassifier, by_set = FALSE, n_resamples = 5, use_null = TRUE) ## ---- message = FALSE, warning = FALSE---------------------------------------- feature_vs_null <- compare_features(feature_classifiers, by_set = FALSE, hypothesis = "null") head(feature_vs_null) ## ---- message = FALSE, warning = FALSE---------------------------------------- pairwise_features <- compare_features(feature_classifiers, by_set = FALSE, hypothesis = "pairwise", p_adj = "holm") head(pairwise_features) ## ---- message = FALSE, warning = FALSE---------------------------------------- top_10 <- feature_vs_null %>% dplyr::slice_min(p.value, n = 10) %>% dplyr::select(c(feature_set, original_names, p.value)) feature_matrix_filt <- feature_matrix %>% dplyr::filter(feature_set %in% top_10$feature_set & names %in% top_10$original_names) feature_matrix_filt <- structure(feature_matrix_filt, class = c("feature_calculations", "data.frame")) plot(feature_matrix_filt, type = "cor") ## ---- message = FALSE, warning = FALSE---------------------------------------- plot(feature_matrix_filt, type = "violin", feature_names = top_10$original_names) ## ---- message = FALSE, warning = FALSE---------------------------------------- interval(feature_classifiers, by_set = FALSE) ## ---- message = FALSE, warning = FALSE---------------------------------------- feature_matrix2 <- calculate_features(data = simData, group_var = "process", feature_set = "catch22", features = list("mean" = mean, "sd" = sd), seed = 123) set_classifiers <- classify(feature_matrix2, by_set = TRUE, n_resamples = 5, use_null = TRUE) head(set_classifiers) ## ---- message = FALSE, warning = FALSE---------------------------------------- interval_calcs <- interval(set_classifiers) interval_calcs %>% ggplot2::ggplot(ggplot2::aes(x = reorder(feature_set, -.mean), y = .mean, colour = feature_set)) + ggplot2::geom_errorbar(ggplot2::aes(ymin = .lower, ymax = .upper)) + ggplot2::geom_point(size = 5) + ggplot2::labs(x = "Feature set", y = "Classification accuracy") + ggplot2::scale_colour_brewer(palette = "Dark2") + ggplot2::theme_bw() + ggplot2::theme(legend.position = "none", panel.grid.minor = ggplot2::element_blank()) ## ---- message = FALSE, warning = FALSE---------------------------------------- feature_clusters <- cluster(feature_matrix, k = 6) ## ---- message = FALSE, warning = FALSE---------------------------------------- feature_clusters$Data %>% dplyr::filter(names %in% c("CO_HistogramAMI_even_2_5", "DN_OutlierInclude_p_001_mdrmd")) %>% tidyr::pivot_wider(id_cols = c("id", "group", "cluster"), names_from = "names", values_from = "values") %>% ggplot2::ggplot(ggplot2::aes(x = CO_HistogramAMI_even_2_5, DN_OutlierInclude_p_001_mdrmd, colour = as.factor(cluster))) + ggplot2::stat_ellipse(ggplot2::aes(fill = as.factor(cluster)), geom = "polygon", alpha = 0.2) + ggplot2::geom_point() + ggplot2::labs(colour = "Cluster") + ggplot2::guides(fill = "none") + ggplot2::scale_fill_brewer(palette = "Dark2") + ggplot2::scale_colour_brewer(palette = "Dark2") + ggplot2::theme_bw() + ggplot2::theme(legend.position = "bottom", panel.grid.minor = ggplot2::element_blank())