Ivan Tomic info@ivantomic.com
You can install the released version of immunaut from CRAN with:
install.packages("immunaut")
Or you can install immunaut directly from GitHub with use of following commands:
# install.packages("devtools")
::install_github("atomiclaboratory/immunaut", subdir = 'R-package') devtools
library("immunaut")
# Generate a demo dataset with 1000 subjects, 200 features, 4 clusters, and a 10% probability of missing values
<- generate_demo_data(n_subjects = 1000, n_features = 200,
dataset desired_number_clusters = 4, # Approximate number of clusters
cluster_overlap_sd = 35, # Standard deviation for cluster overlap
missing_prob = 0.1) # Probability of missing values
# Generate a file header for the dataset to use in downstream analysis
<- generate_file_header(dataset)
file_header
<- list(
settings fileHeader = file_header,
seed = 1337,
selectedColumns = colnames(dataset), # Columns selected for analysis
# Exclude outcome, age, and gender columns from the analysis
excludedColumns = c("outcome", "age", "gender"),
removeNA = TRUE,
clusterType = "Louvain",
target_clusters_range = c(3,4),
resolution_increments =c(0.01, 0.1, 0.2, 0.3, 0.4),
min_modularities = c(0.5, 0.6, 0.7, 0.8),
pickBestClusterMethod = "Modularity",
weights = list(AUROC = 0.9, modularity = 0.05, silhouette = 0.05),
preProcessDataset = c("scale", "center", "medianImpute", "corr", "zv", "nzv"),
selectedPartitionSplit = 0.7, # Use the current partition split
selectedPackages = c("rf", "gcvEarth"),
trainingTimeout = 180
)
# Perform t-SNE and Louvain clustering using the 'immunaut' function
<- immunaut(dataset, settings)
result
# Plot the clustered t-SNE results using ggplot2
<- plot_clustered_tsne(result$tsne_clust$info.norm,
p $tsne_clust$cluster_data,
result$settings)
resultprint(p) # Display the plot
# Extract the dataset with the applied clustering from the result
<- result$dataset$dataset_ml
dataset_ml # Run the auto_simon_ml function to train machine learning models on the dataset
<- auto_simon_ml(dataset_ml, settings)
model_results
# Extract the names of the models
<- names(model_results$models)
model_names
# Create a data frame to store the model names and their corresponding AUROC values
<- data.frame(
model_auroc_table Model = character(),
AUROC = numeric(),
stringsAsFactors = FALSE
)
# Loop through the models and extract AUROC values (One-vs-Rest) for Multiclass Models
for (model_name in model_names) {
<- model_results$models[[model_name]][["predictions"]][["AUROC"]]
auroc_value # Add the model name and its AUROC to the table
<- rbind(model_auroc_table, data.frame(Model = model_name, AUROC = auroc_value))
model_auroc_table
}
library(ggplot2)
# Create a bar chart with AUROC values
ggplot(model_auroc_table, aes(x = Model, y = AUROC, fill = Model)) +
geom_bar(stat = "identity") + # Create bars
geom_text(aes(label = round(AUROC, 3)), vjust = -0.5) + # Add AUROC values above bars
ggtitle("AUROC for Models") +
xlab("Model") +
ylab("AUROC") +
theme_minimal() + # Use a minimal theme
scale_fill_brewer(palette = "Set3")
# Update settings for DBSCAN clustering
$clusterType <- "Density"
settings$minPtsAdjustmentFactor <- 1.5
settings$epsQuantile <- 0.9
settings
# Run t-SNE and DBSCAN clustering
<- immunaut(dataset, settings)
dbscan_result #> [1] "====> Density-based clustering"
# Update settings for Mclust clustering
$clusterType <- "Mclust"
settings$clustGroups <- 3 # Specify the number of clusters for Mclust
settings
# Run t-SNE and Mclust clustering
<- immunaut(dataset, settings)
mclust_result #> [1] "==> cluster_tsne_mclust clustGroups: 3"
#> fitting ...
#> | | | 0% | |========= | 7% | |================== | 13% | |========================== | 20% | |=================================== | 27% | |============================================ | 33% | |===================================================== | 40% | |============================================================== | 47% | |====================================================================== | 53% | |=============================================================================== | 60% | |======================================================================================== | 67% | |================================================================================================= | 73% | |========================================================================================================== | 80% | |================================================================================================================== | 87% | |=========================================================================================================================== | 93% | |====================================================================================================================================| 100%
# Update settings for Hierarchical clustering
$clusterType <- "Hierarchical"
settings$clustLinkage <- "ward.D2"
settings$clustGroups <- 3
settings
# Run t-SNE and Mclust clustering
<- immunaut(dataset, settings)
hierarchical_result #> [1] "====> Noise indices: 25"
#> [1] "====> Noise indices done"