immunaut

Ivan Tomic info@ivantomic.com

Installation

You can install the released version of immunaut from CRAN with:

install.packages("immunaut")

Or you can install immunaut directly from GitHub with use of following commands:

# install.packages("devtools")
devtools::install_github("atomiclaboratory/immunaut", subdir = 'R-package')

Initial setup

library("immunaut")

# Generate a demo dataset with 1000 subjects, 200 features, 4 clusters, and a 10% probability of missing values
dataset <- generate_demo_data(n_subjects = 1000, n_features = 200, 
                                desired_number_clusters = 4, # Approximate number of clusters
                                cluster_overlap_sd = 35, # Standard deviation for cluster overlap
                                missing_prob = 0.1) # Probability of missing values

# Generate a file header for the dataset to use in downstream analysis
file_header <- generate_file_header(dataset)

settings <- list(
    fileHeader = file_header,
    seed = 1337,
    selectedColumns = colnames(dataset),  # Columns selected for analysis
    # Exclude outcome, age, and gender columns from the analysis
    excludedColumns = c("outcome", "age", "gender"),
    removeNA = TRUE,
    
    clusterType = "Louvain",
    target_clusters_range = c(3,4),
    resolution_increments =c(0.01, 0.1, 0.2, 0.3, 0.4),
    min_modularities = c(0.5, 0.6, 0.7, 0.8),
    pickBestClusterMethod = "Modularity",
    weights = list(AUROC = 0.9, modularity = 0.05, silhouette = 0.05),
    
    preProcessDataset = c("scale", "center", "medianImpute", "corr", "zv", "nzv"),
    selectedPartitionSplit = 0.7,  # Use the current partition split
    selectedPackages = c("rf", "gcvEarth"),
    trainingTimeout = 180
)

Example 1: Perform t-SNE and Louvain Clustering and Machine Learning

# Perform t-SNE and Louvain clustering using the 'immunaut' function
result <- immunaut(dataset, settings)

# Plot the clustered t-SNE results using ggplot2
p <- plot_clustered_tsne(result$tsne_clust$info.norm, 
                                result$tsne_clust$cluster_data, 
                                result$settings) 
print(p) # Display the plot


# Extract the dataset with the applied clustering from the result
dataset_ml <- result$dataset$dataset_ml
# Run the auto_simon_ml function to train machine learning models on the dataset
model_results <- auto_simon_ml(dataset_ml, settings)

# Extract the names of the models
model_names <- names(model_results$models)

# Create a data frame to store the model names and their corresponding AUROC values
model_auroc_table <- data.frame(
  Model = character(),
  AUROC = numeric(),
  stringsAsFactors = FALSE
)

# Loop through the models and extract AUROC values (One-vs-Rest) for Multiclass Models
for (model_name in model_names) {
  auroc_value <- model_results$models[[model_name]][["predictions"]][["AUROC"]]
  # Add the model name and its AUROC to the table
  model_auroc_table <- rbind(model_auroc_table, data.frame(Model = model_name, AUROC = auroc_value))
}

library(ggplot2)
# Create a bar chart with AUROC values
ggplot(model_auroc_table, aes(x = Model, y = AUROC, fill = Model)) +
  geom_bar(stat = "identity") +  # Create bars
  geom_text(aes(label = round(AUROC, 3)), vjust = -0.5) +  # Add AUROC values above bars
  ggtitle("AUROC for Models") +
  xlab("Model") + 
  ylab("AUROC") +
  theme_minimal() +  # Use a minimal theme
  scale_fill_brewer(palette = "Set3")

Example 2: Switch to DBSCAN Clustering

# Update settings for DBSCAN clustering
settings$clusterType <- "Density"
settings$minPtsAdjustmentFactor <- 1.5
settings$epsQuantile <- 0.9

# Run t-SNE and DBSCAN clustering
dbscan_result <- immunaut(dataset, settings)
#> [1] "====> Density-based clustering"

Example 3: Perform Mclust Clustering

# Update settings for Mclust clustering
settings$clusterType <- "Mclust"
settings$clustGroups <- 3  # Specify the number of clusters for Mclust

# Run t-SNE and Mclust clustering
mclust_result <- immunaut(dataset, settings)
#> [1] "==> cluster_tsne_mclust clustGroups:  3"
#> fitting ...
#>   |                                                                                                                           |                                                                                                                   |   0%  |                                                                                                                           |========                                                                                                           |   7%  |                                                                                                                           |===============                                                                                                    |  13%  |                                                                                                                           |=======================                                                                                            |  20%  |                                                                                                                           |===============================                                                                                    |  27%  |                                                                                                                           |======================================                                                                             |  33%  |                                                                                                                           |==============================================                                                                     |  40%  |                                                                                                                           |======================================================                                                             |  47%  |                                                                                                                           |=============================================================                                                      |  53%  |                                                                                                                           |=====================================================================                                              |  60%  |                                                                                                                           |=============================================================================                                      |  67%  |                                                                                                                           |====================================================================================                               |  73%  |                                                                                                                           |============================================================================================                       |  80%  |                                                                                                                           |====================================================================================================               |  87%  |                                                                                                                           |===========================================================================================================        |  93%  |                                                                                                                           |===================================================================================================================| 100%

Example 4: Perform Hierarchical Clustering

# Update settings for Hierarchical clustering
settings$clusterType <- "Hierarchical"
settings$clustLinkage <- "ward.D2"
settings$clustGroups <- 3

# Run t-SNE and Mclust clustering
hierarchical_result <- immunaut(dataset, settings)
#> [1] "====> Noise indices:  31"
#> [1] "====> Noise indices done"

Example 5: Using Immune Response Dataset for LAIV Vaccination in Pediatric Cohorts Dataset


library(immunaut)

library(ggplot2)

data("immunautLAIV")


file_header <- generate_file_header(immunautLAIV)

# Base settings (shared across all runs unless overridden):
settings <- list()
settings$fileHeader <- file_header

settings$selectedColumns <- c(
  "H1_HAI_FC","H3_HAI_FC","B_HAI_FC","H1N1_NA_FC","H3_HA_IgA_FC",
  "B_HA_IgA_FC","B_NA_IgA_FC","H1_CD8_IFNg_FC","H3_CD8_IFNg_FC",
  "H3_CD8_IL2_FC","HAB_CD8_IFNg_FC","HAB_CD8_IL2_FC","NC99_H1_FC",
  "SWISS_H3_FC","HK14_H3_FC","KAN_H3_FC","B_PHU_FC","cH7_IgA_FC",
  "N2_FC","cH6_FC"
)
settings$groupingVariables <- c(
  "max_HAI_responder","max_iga_responder","max_mnp_cd4_responder",
  "max_mnp_cd8_responder ","year","sex","v0_resp_virus_positive",
  "h1_v2_shed","h3_v2_shed","b_v2_shed","h1_v7_shed","h3_v7_shed",
  "b_v7_shed","h1_v0_seropositive","h3_v0_seropositive","b_v0_seropositive"
)


settings$preProcessDataset <- c("medianImpute","center","scale","corr","nzv","zv")

result <- immunaut(immunautLAIV, settings)

p <- plot_clustered_tsne(result$tsne_clust$info.norm, 
                                result$tsne_clust$cluster_data, 
                                result$settings) 
# Adjust the plot: use Set2 palette and a nice minimal theme
p <- p +
  scale_color_brewer(palette = "Set2") +
  theme_minimal(base_size = 12) +
  labs(
    title    = "Clustered t-SNE of Pediatric LAIV Data",
    subtitle = "Comprehensive Immune Response Dataset (244 Children)",
    color    = "Cluster"
  )

print(p)