## ----eval=FALSE--------------------------------------------------------------- # # install.packages("remotes") # remotes::install_github("EuropeanIFCBGroup/iRfcb") ## ----eval=FALSE--------------------------------------------------------------- # library(iRfcb) # library(dplyr) # For data wrangling # library(readr) # For creating .tsv files # library(lubridate) # For handling dates ## ----include=FALSE------------------------------------------------------------ library(iRfcb) library(dplyr) # For data wrangling library(readr) # For creating .tsv files library(lubridate) # For handling dates ## ----eval=FALSE--------------------------------------------------------------- # # Define data directory # data_dir <- "data" # # # Download and extract test data in the data folder # ifcb_download_test_data(dest_dir = data_dir, # max_retries = 10, # sleep_time = 30, # verbose = FALSE) ## ----include=FALSE------------------------------------------------------------ # Define data directory data_dir <- "data" # Download and extract test data in the data folder if (!dir.exists(data_dir)) { # Download and extract test data if the folder does not exist ifcb_download_test_data(dest_dir = data_dir, max_retries = 10, sleep_time = 30, verbose = FALSE) } ## ----------------------------------------------------------------------------- # Define path to sample that you wish to prepare for a EcoTaxa submission sample_path <- "data/data/2023/D20230314/D20230314T003836_IFCB134" # Extract .png images ifcb_extract_pngs(paste0(sample_path, ".roi")) ## ----------------------------------------------------------------------------- # Extract image metadata metadata_sample <- ifcb_summarize_png_metadata(sample_path) ## ----------------------------------------------------------------------------- # Get the minimal EcoTaxa metadata header names ecotaxa_minimal_headers <- ifcb_get_ecotaxa_example("minimal")[0,] # Create a data frame with empty rows matching the length of data ecotaxa_minimal_headers[1:nrow(metadata_sample),] <- NA # Map metadata to Ecotaxa headers ecotaxa_minimal <- ecotaxa_minimal_headers %>% mutate(img_file_name = metadata_sample$image, object_id = tools::file_path_sans_ext(metadata_sample$image)) ## ----------------------------------------------------------------------------- # Write metadata tsv file write_tsv(ecotaxa_minimal, file.path( sample_path, paste0("ecotaxa_D20230314T003836_IFCB134.tsv")), na = "") # Create zip-archive ifcb_zip_pngs(png_folder = "data/data/2023/D20230314/", zip_filename = "data/zip/D20230314T003836_IFCB134_ecotaxa.zip", include_txt = TRUE, # To include the metadata text-files in the archive split_zip = TRUE, max_size = 500, print_progress = FALSE) ## ----------------------------------------------------------------------------- # Extract .png images ifcb_extract_annotated_images(manual_folder = "data/manual", class2use_file = "data/config/class2use.mat", roi_folders = "data/data", out_folder = "data/extracted_images", skip_class = 1, # or "unclassified" verbose = FALSE) # Do not print messages ## ----------------------------------------------------------------------------- # Summarize image metadata from feature and hdr files metadata <- ifcb_summarize_png_metadata(png_folder = "data/extracted_images", feature_folder = "data/features", hdr_folder = "data/data") # Print the first ten columns of output manual_files <- list.files("data/manual", pattern = ".mat", full.names = TRUE) # Get file info the the .mat files file_info <- file.info(manual_files) # Extract analysis date and time based file timestamps analysis_date <- data.frame(sample = sub(".mat$", "", basename(manual_files)), analysis_date = as.Date(file_info$ctime), analysis_time = format(ymd_hms(file_info$ctime), "%H:%M:%S")) # Merge with metadata metadata <- metadata %>% left_join(analysis_date, by = "sample") ## ----------------------------------------------------------------------------- # Get taxa names taxa_names <- unique(metadata$subfolder) # Clean taxa_names by substituting specific patterns with spaces or empty strings taxa_names_clean <- iRfcb:::truncate_folder_name(taxa_names) # Remove numerics from folder name taxa_names_clean <- gsub("_", " ", taxa_names_clean) taxa_names_clean <- gsub(" single cell", "", taxa_names_clean) taxa_names_clean <- gsub(" chain", "", taxa_names_clean) taxa_names_clean <- gsub("-like", "", taxa_names_clean) taxa_names_clean <- gsub(" larger than 30unidentified", "", taxa_names_clean) taxa_names_clean <- gsub(" smaller than 30unidentified", "", taxa_names_clean) # Remove species flags from class names taxa_names_clean <- gsub("\\<spp\\>", "", taxa_names_clean) taxa_names_clean <- gsub(" ", " ", taxa_names_clean) # Turn f to f. for forma taxa_names_clean <- gsub("\\bf\\b", "f.", taxa_names_clean) # Add "/" for multiple names with capital letters # e.g. Heterocapsa_Azadinium to Heterocapsa/Azadinium taxa_names_clean <- gsub(" ([A-Z])", "/\\1", taxa_names_clean) taxa_names_clean <- gsub(" ([A-Z])", "/\\1", taxa_names_clean) # Remove any whitespace taxa_names_clean <- trimws(taxa_names_clean) # Retrieve worms records worms_records <- ifcb_match_taxa_names(taxa_names_clean, marine_only = FALSE, verbose = FALSE) # Create data frame with taxa information and class names class_names <- worms_records %>% mutate(subfolder = taxa_names, class_clean = taxa_names_clean) # Merge with metadata metadata <- metadata %>% left_join(class_names, by = "subfolder") ## ----------------------------------------------------------------------------- # Get EcoTaxa metadata header names ecotaxa_headers <- ifcb_get_ecotaxa_example()[0,] # Create a data frame with empty rows matching the length of data ecotaxa_headers[1:nrow(metadata),] <- NA # Map metadata to populate the empty dataframe ecotaxa_metadata <- ecotaxa_headers %>% mutate( # Image fields img_file_name = metadata$image, # Static information object_link = "https://doi.org/10.17044/scilifelab.25883455", object_annotation_status = "validated", acq_resolution_pixels_per_micron = 3.4, acq_instrument = "IFCB", sample_source = "flowthrough", # Software process_soft = "MATLAB, R", process_soft_version = paste0("R2022a, ", version$version.string), process_library = "ifcb-analysis", process_library_version = 2, process_script = "iRfcb", process_script_version = as.character(packageVersion("iRfcb")), process_date = format(Sys.Date(),"%Y%m%d"), process_time = format(Sys.time(),"%H%M%S"), # Object-related fields object_id = tools::file_path_sans_ext(metadata$image), object_roi_number = metadata$roi, object_lat = metadata$gpsLatitude, object_lon = metadata$gpsLongitude, object_date = format(metadata$date, "%Y%m%d"), object_time = gsub(":", "", metadata$time), object_annotation_hierarchy = metadata$subfolder, object_annotation_category = metadata$class_clean, object_aphiaid = metadata$AphiaID, object_annotation_date = format(metadata$analysis_date, "%Y%m%d"), object_annotation_time = gsub(":", "", metadata$analysis_time), object_annotation_person_name = "John Doe", object_annotation_person_email = "john.doe@email.com", # Depth fields object_depth_min = 4, # Sampled at 4 m depth object_depth_max = 4, # Sampled at 4 m depth # Sample fields sample_vessel = "RV Svea", sample_id = metadata$sample, sample_station = NA, sample_cruise = NA, ### Features fields # PMT object_pmt_scattering = NA, object_pmt_fluorescence = NA, # Morphological metrics object_area = metadata$Area, object_biovolume = metadata$Biovolume, object_perimeter = metadata$Perimeter, object_bounding_box_xwidth = metadata$BoundingBox_xwidth, object_bounding_box_ywidth = metadata$BoundingBox_ywidth, object_convex_area = metadata$ConvexArea, object_convex_perimeter = metadata$ConvexPerimeter, object_feret_diameter = metadata$FeretDiameter, object_major_axis_length = metadata$MajorAxisLength, object_minor_axis_length = metadata$MinorAxisLength, object_orientation = metadata$Orientation, object_eccentricity = metadata$Eccentricity, object_equiv_diameter = metadata$EquivDiameter, object_extent = metadata$Extent, object_r_wcenter2total_powerratio = metadata$RWcenter2total_powerratio, object_r_whalfpowerintegral = metadata$RWhalfpowerintegral, # Miscellaneous fields object_solidity = metadata$Solidity, object_num_blobs = metadata$numBlobs, object_h180 = metadata$H180, object_h90 = metadata$H90, object_hflip = metadata$Hflip, object_summed_area = metadata$summedArea, object_summed_biovolume = metadata$summedBiovolume, object_summed_convex_area = metadata$summedConvexArea, object_summed_convex_perimeter = metadata$summedConvexPerimeter, object_summed_feret_diameter = metadata$summedFeretDiameter, object_summed_major_axis_length = metadata$summedMajorAxisLength, object_summed_minor_axis_length = metadata$summedMinorAxisLength, object_summed_perimeter = metadata$summedPerimeter, object_shapehist_kurtosis_norm_eq_d = metadata$shapehist_kurtosis_normEqD, object_shapehist_mean_norm_eq_d = metadata$shapehist_mean_normEqD, object_shapehist_median_norm_eq_d = metadata$shapehist_median_normEqD, object_shapehist_mode_norm_eq_d = metadata$shapehist_mode_normEqD, object_shapehist_skewness_norm_eq_d = metadata$shapehist_skewness_normEqD, object_area_over_perimeter_squared = metadata$Area_over_PerimeterSquared, object_area_over_perimeter = metadata$Area_over_Perimeter, object_h90_over_hflip = metadata$H90_over_Hflip, object_h90_over_h180 = metadata$H90_over_H180, object_hflip_over_h180 = metadata$Hflip_over_H180, object_summed_convex_perimeter_over_perimeter = metadata$summedConvexPerimeter_over_Perimeter, object_rotated_bounding_box_solidity = metadata$rotated_BoundingBox_solidity, object_rotated_area = metadata$RotatedArea, object_rotated_bounding_box_xwidth = metadata$RotatedBoundingBox_xwidth, object_rotated_bounding_box_ywidth = metadata$RotatedBoundingBox_ywidth, # Texture-related fields object_texture_average_contrast = metadata$texture_average_contrast, object_texture_average_gray_level = metadata$texture_average_gray_level, object_texture_entropy = metadata$texture_entropy, object_texture_smoothness = metadata$texture_smoothness, object_texture_third_moment = metadata$texture_third_moment, object_texture_uniformity = metadata$texture_uniformity, # Moment invariants object_moment_invariant1 = metadata$moment_invariant1, object_moment_invariant2 = metadata$moment_invariant2, object_moment_invariant3 = metadata$moment_invariant3, object_moment_invariant4 = metadata$moment_invariant4, object_moment_invariant5 = metadata$moment_invariant5, object_moment_invariant6 = metadata$moment_invariant6, object_moment_invariant7 = metadata$moment_invariant7, # Ring fields object_ring01 = metadata$Ring01, object_ring02 = metadata$Ring02, object_ring03 = metadata$Ring03, object_ring04 = metadata$Ring04, object_ring05 = metadata$Ring05, object_ring06 = metadata$Ring06, object_ring07 = metadata$Ring07, object_ring08 = metadata$Ring08, object_ring09 = metadata$Ring09, object_ring10 = metadata$Ring10, object_ring11 = metadata$Ring11, object_ring12 = metadata$Ring12, object_ring13 = metadata$Ring13, object_ring14 = metadata$Ring14, object_ring15 = metadata$Ring15, object_ring16 = metadata$Ring16, object_ring17 = metadata$Ring17, object_ring18 = metadata$Ring18, object_ring19 = metadata$Ring19, object_ring20 = metadata$Ring20, object_ring21 = metadata$Ring21, object_ring22 = metadata$Ring22, object_ring23 = metadata$Ring23, object_ring24 = metadata$Ring24, object_ring25 = metadata$Ring25, object_ring26 = metadata$Ring26, object_ring27 = metadata$Ring27, object_ring28 = metadata$Ring28, object_ring29 = metadata$Ring29, object_ring30 = metadata$Ring30, object_ring31 = metadata$Ring31, object_ring32 = metadata$Ring32, object_ring33 = metadata$Ring33, object_ring34 = metadata$Ring34, object_ring35 = metadata$Ring35, object_ring36 = metadata$Ring36, object_ring37 = metadata$Ring37, object_ring38 = metadata$Ring38, object_ring39 = metadata$Ring39, object_ring40 = metadata$Ring40, object_ring41 = metadata$Ring41, object_ring42 = metadata$Ring42, object_ring43 = metadata$Ring43, object_ring44 = metadata$Ring44, object_ring45 = metadata$Ring45, object_ring46 = metadata$Ring46, object_ring47 = metadata$Ring47, object_ring48 = metadata$Ring48, object_ring49 = metadata$Ring49, object_ring50 = metadata$Ring50, # HOG fields object_hog01 = metadata$HOG01, object_hog02 = metadata$HOG02, object_hog03 = metadata$HOG03, object_hog04 = metadata$HOG04, object_hog05 = metadata$HOG05, object_hog06 = metadata$HOG06, object_hog07 = metadata$HOG07, object_hog08 = metadata$HOG08, object_hog09 = metadata$HOG09, object_hog10 = metadata$HOG10, object_hog11 = metadata$HOG11, object_hog12 = metadata$HOG12, object_hog13 = metadata$HOG13, object_hog14 = metadata$HOG14, object_hog15 = metadata$HOG15, object_hog16 = metadata$HOG16, object_hog17 = metadata$HOG17, object_hog18 = metadata$HOG18, object_hog19 = metadata$HOG19, object_hog20 = metadata$HOG20, object_hog21 = metadata$HOG21, object_hog22 = metadata$HOG22, object_hog23 = metadata$HOG23, object_hog24 = metadata$HOG24, object_hog25 = metadata$HOG25, object_hog26 = metadata$HOG26, object_hog27 = metadata$HOG27, object_hog28 = metadata$HOG28, object_hog29 = metadata$HOG29, object_hog30 = metadata$HOG30, object_hog31 = metadata$HOG31, object_hog32 = metadata$HOG32, object_hog33 = metadata$HOG33, object_hog34 = metadata$HOG34, object_hog35 = metadata$HOG35, object_hog36 = metadata$HOG36, object_hog37 = metadata$HOG37, object_hog38 = metadata$HOG38, object_hog39 = metadata$HOG39, object_hog40 = metadata$HOG40, object_hog41 = metadata$HOG41, object_hog42 = metadata$HOG42, object_hog43 = metadata$HOG43, object_hog44 = metadata$HOG44, object_hog45 = metadata$HOG45, object_hog46 = metadata$HOG46, object_hog47 = metadata$HOG47, object_hog48 = metadata$HOG48, object_hog49 = metadata$HOG49, object_hog50 = metadata$HOG50, object_hog51 = metadata$HOG51, object_hog52 = metadata$HOG52, object_hog53 = metadata$HOG53, object_hog54 = metadata$HOG54, object_hog55 = metadata$HOG55, object_hog56 = metadata$HOG56, object_hog57 = metadata$HOG57, object_hog58 = metadata$HOG58, object_hog59 = metadata$HOG59, object_hog60 = metadata$HOG60, object_hog61 = metadata$HOG61, object_hog62 = metadata$HOG62, object_hog63 = metadata$HOG63, object_hog64 = metadata$HOG64, object_hog65 = metadata$HOG65, object_hog66 = metadata$HOG66, object_hog67 = metadata$HOG67, object_hog68 = metadata$HOG68, object_hog69 = metadata$HOG69, object_hog70 = metadata$HOG70, object_hog71 = metadata$HOG71, object_hog72 = metadata$HOG72, object_hog73 = metadata$HOG73, object_hog74 = metadata$HOG74, object_hog75 = metadata$HOG75, object_hog76 = metadata$HOG76, object_hog77 = metadata$HOG77, object_hog78 = metadata$HOG78, object_hog79 = metadata$HOG79, object_hog80 = metadata$HOG80, object_hog81 = metadata$HOG81, # Wedge fields object_wedge01 = metadata$Wedge01, object_wedge02 = metadata$Wedge02, object_wedge03 = metadata$Wedge03, object_wedge04 = metadata$Wedge04, object_wedge05 = metadata$Wedge05, object_wedge06 = metadata$Wedge06, object_wedge07 = metadata$Wedge07, object_wedge08 = metadata$Wedge08, object_wedge09 = metadata$Wedge09, object_wedge10 = metadata$Wedge10, object_wedge11 = metadata$Wedge11, object_wedge12 = metadata$Wedge12, object_wedge13 = metadata$Wedge13, object_wedge14 = metadata$Wedge14, object_wedge15 = metadata$Wedge15, object_wedge16 = metadata$Wedge16, object_wedge17 = metadata$Wedge17, object_wedge18 = metadata$Wedge18, object_wedge19 = metadata$Wedge19, object_wedge20 = metadata$Wedge20, object_wedge21 = metadata$Wedge21, object_wedge22 = metadata$Wedge22, object_wedge23 = metadata$Wedge23, object_wedge24 = metadata$Wedge24, object_wedge25 = metadata$Wedge25, object_wedge26 = metadata$Wedge26, object_wedge27 = metadata$Wedge27, object_wedge28 = metadata$Wedge28, object_wedge29 = metadata$Wedge29, object_wedge30 = metadata$Wedge30, object_wedge31 = metadata$Wedge31, object_wedge32 = metadata$Wedge32, object_wedge33 = metadata$Wedge33, object_wedge34 = metadata$Wedge34, object_wedge35 = metadata$Wedge35, object_wedge36 = metadata$Wedge36, object_wedge37 = metadata$Wedge37, object_wedge38 = metadata$Wedge38, object_wedge39 = metadata$Wedge39, object_wedge40 = metadata$Wedge40, object_wedge41 = metadata$Wedge41, object_wedge42 = metadata$Wedge42, object_wedge43 = metadata$Wedge43, object_wedge44 = metadata$Wedge44, object_wedge45 = metadata$Wedge45, object_wedge46 = metadata$Wedge46, object_wedge47 = metadata$Wedge47, object_wedge48 = metadata$Wedge48 ) ## ----write_tsvs--------------------------------------------------------------- # Loop .tsv creation for each class for (i in seq_along(unique(ecotaxa_metadata$object_annotation_hierarchy))) { # Define path to subfolder subfolder_path <- file.path("data/extracted_images", unique(ecotaxa_metadata$object_annotation_hierarchy)[i]) # Filter metadata for each class ecotaxa_metadata_ix <- ecotaxa_metadata %>% filter(object_annotation_hierarchy == unique(ecotaxa_metadata$object_annotation_hierarchy)[i]) %>% mutate(object_annotation_hierarchy = iRfcb:::truncate_folder_name(object_annotation_hierarchy)) # Add data format codes (text[t], float[f] etc.) ecotaxa_metadata_ix <- bind_rows( ifcb_get_ecotaxa_example()[1, ] %>% mutate(across(everything(), as.character)), ecotaxa_metadata_ix %>% mutate(across(everything(), as.character)) ) # Write one metadata file per class subfolder write_tsv(ecotaxa_metadata_ix, file.path( subfolder_path, paste0("ecotaxa_", unique(iRfcb:::truncate_folder_name(ecotaxa_metadata$object_annotation_hierarchy))[i], ".tsv")), na = "") } ## ----------------------------------------------------------------------------- # Create zip-archive ifcb_zip_pngs(png_folder = "data/extracted_images", zip_filename = "data/zip/iRfcb_ecotaxa.zip", readme_file = system.file("exdata/README-template.md", package = "iRfcb"), # Template icluded in `iRfcb` email_address = "tutorial@test.com", version = "1.1", include_txt = TRUE, # To include the metadata text-files in the archive split_zip = TRUE, max_size = 500, print_progress = FALSE) ## ----echo=FALSE--------------------------------------------------------------- # Print citation citation("iRfcb") ## ----include=FALSE------------------------------------------------------------ # Clean up unlink(file.path(data_dir, "extracted_images"), recursive = TRUE) unlink(sample_path, recursive = TRUE)