library(here)
library(cogeqc)
library(tidyverse)
library(ggpubr)
library(rstatix)
library(clusterProfiler)
library(enrichplot)
library(patchwork)
library(dplyr)
source(here("code", "utils.R"))
4 Assessing orthogroup inference for Brassicaceae genomes
Here, we will compare the protein domain-based approach in cogeqc
to assess the impact of multiple combinations of parameters in OrthoFinder (Emms and Kelly 2019) in the accuracy of orthogroup inference. The data set used here will be a collection of Brassicaceae genomes. The parameters we will change are:
- Program (
-S
option)
- DIAMOND
- DIAMOND ultrasensitive
- MCL inflation parameter (
-I
)
- 1
- 1.5 (default)
- 2
- 3
4.1 Orthogroup inference
To start, we will load the proteome data and export each proteome as a FASTA file in the data
directory, so we can pass it to OrthoFinder.
# Load proteomes
load(here("data", "brassicaceae_proteomes.rda"))
# Write files to data/
lapply(seq_along(brassicaceae_proteomes), function(x) {
<- here("data", paste0(names(brassicaceae_proteomes)[x], ".fasta"))
outfile ::writeXStringSet(
Biostrings
brassicaceae_proteomes[[x]], outfile
) })
Now, we can run OrthoFinder for each combination of parameters. Here, we created 2 different bash scripts for each DIAMOND mode. They are:
of_diamond.sh
: code to run DIAMOND (default mode) for different inflation parameters;of_diamond_ultra.sh
: code to run DIAMOND in ultrasensitive mode for different inflation parameters
The 2 files can be run with:
bash of_diamond.sh
bash of_diamond_ultra.sh
The Orthogroups.tsv files were all moved to the directory products/result_files
.
4.2 Exploratory analysis of orthogroup inference results
Now that we have the Orthogroups.tsv files from OrthoFinder, let’s load them.
# Extract tar.xz file
<- here("products", "result_files", "Orthogroups.tar.xz")
tarfile <- tempdir()
outdir
system2("tar", args = c("-xf", tarfile, "--directory", outdir))
# Get path to OrthoFinder output
<- list.files(
og_files path = outdir,
pattern = "Orthogroups.*", full.names = TRUE
)
# Read and parse files
<- lapply(og_files, function(x) {
ogs <- read_orthogroups(x)
og <- og %>%
og mutate(Species = stringr::str_replace_all(Species, "\\.", "")) %>%
mutate(Gene = str_replace_all(
c(
Gene, "\\.[0-9]$" = "",
"\\.[0-9]\\.p$" = "",
"\\.t[0-9]$" = "",
"\\.g$" = ""
)
))return(og)
})<- gsub("\\.tsv", "", basename(og_files))
og_names <- gsub("Orthogroups_", "", og_names)
og_names
names(ogs) <- og_names
Let’s explore OG sizes for each combination of parameters and filter orthogroups by size to remove orthogroups that are artificially large.
# Visualize OG sizes
<- patchwork::wrap_plots(
og_sizes_plot plot_og_sizes(ogs$default_1) + ggtitle("Default, mcl = 1"),
plot_og_sizes(ogs$default_1_5) + ggtitle("Default, mcl = 1.5") +
theme(axis.text.y = element_blank()),
plot_og_sizes(ogs$default_2) + ggtitle("Default, mcl = 2") +
theme(axis.text.y = element_blank()),
plot_og_sizes(ogs$default_3) + ggtitle("Default, mcl = 3") +
theme(axis.text.y = element_blank()),
plot_og_sizes(ogs$ultra_1) + ggtitle("Ultra, mcl = 1") +
theme(axis.text.y = element_blank()),
plot_og_sizes(ogs$ultra_1_5) + ggtitle("Ultra, mcl = 1.5") +
theme(axis.text.y = element_blank()),
plot_og_sizes(ogs$ultra_2) + ggtitle("Ultra, mcl = 2") +
theme(axis.text.y = element_blank()),
plot_og_sizes(ogs$ultra_3) + ggtitle("Ultra, mcl = 3") +
theme(axis.text.y = element_blank()),
nrow = 1, ncol = 8
)
og_sizes_plot
Expectedly, OrthoFinder runs with mcl inflation parameters of 1 lead to very large orthogroups, including some orthogroups with thousands of genes.
Now, let’s explore the percentage of orthogroups with >200, >100, and >50 genes in each OrthoFinder run.
# Calculate OG sizes for each run
<- lapply(ogs, function(x) {
og_sizes <- as.matrix(table(x$Orthogroup, x$Species))
sizes <- rowSums(sizes)
total
<- data.frame(unclass(sizes))
sizes_df $Total <- total
sizes_dfreturn(sizes_df)
})
# What is the percentage of OGs with >=100 genes? And with >50 genes?
<- function(size_df, n = 100) {
percentage_size return(sum(size_df$Total > n) / nrow(size_df) * 100)
}
<- data.frame(
percentages Mode = names(og_sizes),
P200 = unlist(lapply(og_sizes, percentage_size, n = 200)),
P100 = unlist(lapply(og_sizes, percentage_size, n = 100)),
P50 = unlist(lapply(og_sizes, percentage_size, n = 50)),
OGs = unlist(lapply(og_sizes, nrow))
)
# Reorder rows from lowest to highest mcl inflation
<- c(
orders "default_1", "default_1_5", "default_2", "default_3",
"ultra_1", "ultra_1_5", "ultra_2", "ultra_3"
)<- percentages[orders, ]
percentages
# Visual exploration
<- percentages %>%
percentage_plot ::pivot_longer(cols = !Mode) %>%
tidyrmutate(name = str_replace_all(
name,c(
"OGs" = "Number of OGs",
"P200" = "% OGs with >200 genes",
"P100" = "% OGs with >100 genes",
"P50" = "% OGs with >50 genes"
)%>%
)) ggplot(., aes(y = Mode, x = value)) +
geom_col(aes(fill = Mode), show.legend = "none") +
scale_fill_manual(
values = c("ultra_3" = "#08519C", "ultra_2" = "#3182BD",
"ultra_1_5" = "#6BAED6", "ultra_1" = "#BDD7E7",
"default_3" = "#006D2C", "default_2" = "#31A354",
"default_1_5" = "#74C476", "default_1" = "#BAE4B3")
+
) facet_wrap(~name, ncol = 4, scales = "free_x") +
theme_bw() +
labs(
x = "", y = "OrthoFinder mode",
title = "Relationship between the number of orthogroups and orthogroup size per OrthoFinder mode"
)
percentage_plot
It is very clear that increasing the mcl inflation increases the number of orthogroups, but decreases the percentage of OGs with more than 100 and 50 genes.
Finally, let’s remove OGs with >=200 genes to remove noise.
# Filter OGs
<- lapply(seq_along(ogs), function(x) {
ogs_filtered
# Which OGs less than 200 genes?
<- rownames(og_sizes[[x]][og_sizes[[x]]$Total < 200, ])
og_keep
<- ogs[[x]][ogs[[x]]$Orthogroup %in% og_keep, ]
fogs return(fogs)
})names(ogs_filtered) <- names(ogs)
4.3 Orthogroup assessment
Now, let’s get InterPro domain annotation for the following species to assess orthogroups:
- A. thaliana
- A. arabicum
- A. lyrata
- B. carinata
- C. rubella
- C. hirsuta
- S. parvula
# Define function to read functional annotation from PLAZA 5.0
<- function(url, cols = c(1, 3)) {
read_annotation <- readr::read_tsv(url, show_col_types = FALSE, skip = 8) %>%
annot select(cols)
names(annot)[1:2] <- c("Gene", "Annotation")
return(annot)
}
# Get Interpro annotation
<- "https://ftp.psb.ugent.be/pub/plaza/plaza_public_dicots_05/InterPro/"
base <- list(
interpro Athaliana = read_annotation(paste0(base, "interpro.ath.csv.gz")),
Aarabicum = read_annotation(paste0(base, "interpro.aar.csv.gz")),
Alyrata_cvMN47 = read_annotation(paste0(base, "interpro.aly.csv.gz")),
Bcarinata_cvzd1 = read_annotation(paste0(base, "interpro.bca.csv.gz")),
Crubella_cvMonteGargano = read_annotation(paste0(base, "interpro.cru.csv.gz")),
Chirsuta = read_annotation(paste0(base, "interpro.chi.csv.gz")),
Sparvula = read_annotation(paste0(base, "interpro.spa.csv.gz"))
)<- lapply(interpro, as.data.frame)
interpro
# Calculate homogeneity scores
<- names(interpro)
species_annotation <- lapply(seq_along(ogs_filtered), function(x) {
og_assessment
message("Working on mode ", names(ogs_filtered)[x])
<- ogs_filtered[[x]]
orthogroups <- orthogroups[orthogroups$Species %in% species_annotation, ]
orthogroups
<- assess_orthogroups(orthogroups, interpro)
res $Mode <- factor(
resnames(ogs_filtered)[x],
levels = c(
"ultra_3", "ultra_2", "ultra_1_5", "ultra_1",
"default_3", "default_2", "default_1_5", "default_1"
)
)return(res)
})<- Reduce(rbind, og_assessment)
og_assessment
# Save homogeneity stats
save(
compress = "xz",
og_assessment, file = here("products", "result_files", "og_assessment_brassicaceae.rda")
)
4.4 Comparing and visualizing homogeneity statistics
Here, we will compare and visualize how the homogeneity scores are affected by:
- different species choice
- different mcl inflation values
- different DIAMOND modes (default and ultra)
Quick exploration of median and mean homogeneity:
load(here("products", "result_files", "og_assessment_brassicaceae.rda"))
# Scale value to the maximum so that values range from 0 to 1
$Median_score <- og_assessment$Median_score /
og_assessmentmax(og_assessment$Median_score)
# Mean
<- og_assessment %>%
mean_og group_by(Mode) %>%
summarise(mean = mean(Median_score))
# Median
<- og_assessment %>%
median_og group_by(Mode) %>%
summarise(median = median(Median_score))
<- inner_join(mean_og, median_og) |>
mean_and_median_og ::rename(Mean = mean, Median = median)
dplyr
::kable(mean_and_median_og, caption = "Mean and median OG scores.", digits = 3) knitr
Mode | Mean | Median |
---|---|---|
ultra_3 | 0.640 | 0.640 |
ultra_2 | 0.631 | 0.635 |
ultra_1_5 | 0.620 | 0.628 |
ultra_1 | 0.425 | 0.424 |
default_3 | 0.639 | 0.640 |
default_2 | 0.631 | 0.635 |
default_1_5 | 0.620 | 0.628 |
default_1 | 0.425 | 0.423 |
4.4.1 Global distributions
Here, we will compare and visualize all distros considering different DIAMOND modes and mcl inflation values. To start, let’s perform Wilcoxon tests for all combinations of modes and obtain effect sizes.
# Relevel 'Mode' factor
$Mode <- factor(
og_assessment$Mode,
og_assessmentlevels = c(
"ultra_3", "ultra_2", "ultra_1_5", "ultra_1",
"default_3", "default_2", "default_1_5", "default_1"
)
)
# Comparing all vs all
<- compare(og_assessment, "Median_score ~ Mode")
comp_global |>
comp_global filter_comparison() |>
::kable(
knitrcaption = "Mann-Whitney U test for differences in orthogroup scores with Wilcoxon effect sizes.",
digits = 10
)
group1 | group2 | n1 | n2 | padj | effsize | magnitude |
---|---|---|---|---|---|---|
ultra_3 | ultra_2 | 19738 | 18575 | 0.00e+00 | 0.04120347 | small |
ultra_3 | ultra_1_5 | 19738 | 16898 | 0.00e+00 | 0.06125964 | small |
ultra_3 | ultra_1 | 19738 | 5534 | 0.00e+00 | 0.34087185 | moderate |
ultra_3 | default_3 | 19738 | 19765 | 5.00e-10 | 0.03113134 | small |
ultra_3 | default_2 | 19738 | 18633 | 0.00e+00 | 0.04197169 | small |
ultra_3 | default_1_5 | 19738 | 16975 | 0.00e+00 | 0.06233198 | small |
ultra_3 | default_1 | 19738 | 5587 | 0.00e+00 | 0.34258513 | moderate |
ultra_2 | ultra_1_5 | 18575 | 16898 | 0.00e+00 | 0.04340346 | small |
ultra_2 | ultra_1 | 18575 | 5534 | 0.00e+00 | 0.33536590 | moderate |
ultra_2 | default_3 | 18575 | 19765 | 0.00e+00 | 0.04018176 | small |
ultra_2 | default_2 | 18575 | 18633 | 2.55e-08 | 0.02855053 | small |
ultra_2 | default_1_5 | 18575 | 16975 | 0.00e+00 | 0.04451653 | small |
ultra_2 | default_1 | 18575 | 5587 | 0.00e+00 | 0.33742024 | moderate |
ultra_1_5 | ultra_1 | 16898 | 5534 | 0.00e+00 | 0.32401575 | moderate |
ultra_1_5 | default_3 | 16898 | 19765 | 0.00e+00 | 0.05737302 | small |
ultra_1_5 | default_2 | 16898 | 18633 | 0.00e+00 | 0.04259062 | small |
ultra_1_5 | default_1_5 | 16898 | 16975 | 1.72e-06 | 0.02554732 | small |
ultra_1_5 | default_1 | 16898 | 5587 | 0.00e+00 | 0.32541386 | moderate |
ultra_1 | default_3 | 5534 | 19765 | 0.00e+00 | 0.34014551 | moderate |
ultra_1 | default_2 | 5534 | 18633 | 0.00e+00 | 0.33456485 | moderate |
ultra_1 | default_1_5 | 5534 | 16975 | 0.00e+00 | 0.32260079 | moderate |
ultra_1 | default_1 | 5534 | 5587 | 2.70e-02 | 0.01928759 | small |
default_3 | default_2 | 19765 | 18633 | 0.00e+00 | 0.04084430 | small |
default_3 | default_1_5 | 19765 | 16975 | 0.00e+00 | 0.06124558 | small |
default_3 | default_1 | 19765 | 5587 | 0.00e+00 | 0.34147823 | moderate |
default_2 | default_1_5 | 18633 | 16975 | 0.00e+00 | 0.04366051 | small |
default_2 | default_1 | 18633 | 5587 | 0.00e+00 | 0.33616829 | moderate |
default_1_5 | default_1 | 16975 | 5587 | 0.00e+00 | 0.32420710 | moderate |
As we can see, using mcl = 1 leads to much smaller homogeneity scores as compared to every other mcl value. For mcl values >=1.5, there are differences, but they are likely due to large sample sizes, as indicated by small effect sizes.
The default OrthoFinder mode (default DIAMOND, mcl = 1.5) leads to higher homogeneity as compared to runs using mcl = 1, both in default and ultrasensitive DIAMOND modes. The difference between the default mode and runs with higher mcl values are negligible.
Now, let’s visualize the distributions and compare the default OrthoFinder mode with every other mode, highlighting significant differences (P < 0.05) with effect size > 0.1.
# Visualize
<- list(
global_comps c("default_1_5", "ultra_1"),
c("default_1_5", "default_1")
)
<- ggviolin(
p_distros_global y = "Median_score", x = "Mode",
og_assessment, orientation = "horiz", trim = TRUE,
add = c("boxplot", "mean"),
fill = "Mode", add.params = list(fill = "white")
+
) scale_fill_manual(
values = c("ultra_3" = "#08519C", "ultra_2" = "#3182BD",
"ultra_1_5" = "#6BAED6", "ultra_1" = "#BDD7E7",
"default_3" = "#006D2C", "default_2" = "#31A354",
"default_1_5" = "#74C476", "default_1" = "#BAE4B3")
+
) stat_compare_means(
comparisons = global_comps, label = "p.signif",
method = "wilcox.test"
+
) theme(legend.position = "none") +
labs(y = "Scaled homogeneity scores", x = "OrthoFinder modes",
title = "Distribution of mean homogeneity scores for orthogroups") +
theme(plot.subtitle = ggtext::element_markdown())
p_distros_global
4.4.2 The effect of species choice
Here, we will compare the distributions of orthogroups scores using each species individually to see if the species choice has an impact on the conclusions.
<- Reduce(rbind, lapply(2:8, function(x) {
og_species_long
<- names(og_assessment)[x]
var <- gsub("_.*", "", var)
species_name
<- og_assessment[, c("Orthogroups", var, "Mode")]
long_df names(long_df) <- c("OGs", "Score", "Mode")
$Score <- long_df$Score / max(long_df$Score, na.rm = TRUE)
long_df$Species <- species_name
long_df
return(long_df)
}))
<- og_species_long[!is.na(og_species_long$Score), ]
og_species_long <- og_species_long |>
og_species_long mutate(
Species = str_replace_all(
Species, c(
"Aarabicum" = "A. arabicum",
"Alyrata" = "A. lyrata",
"Athaliana" = "A. thaliana",
"Bcarinata" = "B. carinata",
"Chirsuta" = "C. hirsuta",
"Crubella" = "C. rubella",
"Sparvula" = "S. parvula"
)
)
)
<- ggviolin(
p_distros_by_species
og_species_long, y = "Score", x = "Mode",
orientation = "horiz", trim = TRUE,
add = c("boxplot", "mean"), facet.by = "Species", nrow = 1,
fill = "Mode", add.params = list(fill = "white")
+
) scale_fill_manual(
values = c(
"ultra_3" = "#08519C", "ultra_2" = "#3182BD",
"ultra_1_5" = "#6BAED6", "ultra_1" = "#BDD7E7",
"default_3" = "#006D2C", "default_2" = "#31A354",
"default_1_5" = "#74C476", "default_1" = "#BAE4B3"
)+
) theme(legend.position = "none") +
labs(
y = "Scaled homogeneity scores", x = "OrthoFinder modes",
title = "Distribution of OG scores for each species"
+
) scale_x_discrete(
labels = c(
"default_1" = "Default, 1",
"default_1_5" = "Default, 1.5",
"default_2" = "Default, 2",
"default_3" = "Default, 3",
"ultra_1" = "Ultra, 1",
"ultra_1_5" = "Ultra, 1.5",
"ultra_2" = "Ultra, 2",
"ultra_3" = "Ultra, 3"
)+
) theme(axis.text.x = element_text(angle = 60, vjust = 0.5))
p_distros_by_species
We conclude that the species choice does not affect the comparisons of orthogroup scores among OrthoFinder runs.
4.4.3 The effect of mcl inflation parameters
Here, we will explore the impact of changing mcl inflation parameters in the homogeneity of orthogroups.
# Process data to include information on DIAMOND mode and mcl
<- og_assessment %>%
og_modes mutate(diamond = str_replace_all(Mode, "_.*", "")) %>%
mutate(mcl = str_replace_all(Mode, c("default_" = "", "ultra_" = ""))) %>%
mutate(mcl = str_replace_all(mcl, "_", ".")) %>%
mutate(mcl = as.numeric(mcl))
# Obtain P-values from Wilcoxon tests and effect sizes
<- og_modes %>%
comp_mcl_default filter(diamond == "default") %>%
compare(., "Median_score ~ mcl")
|>
comp_mcl_default filter_comparison() |>
::kable(
knitrcaption = "Mann-Whitney U test for differences in orthogroup scores between runs with different mcl parameters and standard DIAMOND mode. Effect sizes represent Wilcoxon effect sizes.",
digits = 10
)
group1 | group2 | n1 | n2 | padj | effsize | magnitude |
---|---|---|---|---|---|---|
1 | 1.5 | 5587 | 16975 | 0 | 0.32420710 | moderate |
1 | 2 | 5587 | 18633 | 0 | 0.33616829 | moderate |
1 | 3 | 5587 | 19765 | 0 | 0.34147823 | moderate |
1.5 | 2 | 16975 | 18633 | 0 | 0.04366051 | small |
1.5 | 3 | 16975 | 19765 | 0 | 0.06124558 | small |
2 | 3 | 18633 | 19765 | 0 | 0.04084430 | small |
<- og_modes %>%
comp_mcl_ultra filter(diamond == "ultra") %>%
compare(., "Median_score ~ mcl")
|>
comp_mcl_ultra filter_comparison() |>
::kable(
knitrcaption = "Mann-Whitney U test for differences in orthogroup scores between runs with different mcl parameters and ultra-sensitive DIAMOND mode. Effect sizes represent Wilcoxon effect sizes.",
digits = 10
)
group1 | group2 | n1 | n2 | padj | effsize | magnitude |
---|---|---|---|---|---|---|
1 | 1.5 | 5534 | 16898 | 0 | 0.32401575 | moderate |
1 | 2 | 5534 | 18575 | 0 | 0.33536590 | moderate |
1 | 3 | 5534 | 19738 | 0 | 0.34087185 | moderate |
1.5 | 2 | 16898 | 18575 | 0 | 0.04340346 | small |
1.5 | 3 | 16898 | 19738 | 0 | 0.06125964 | small |
2 | 3 | 18575 | 19738 | 0 | 0.04120347 | small |
In line with what we demonstrated in the global distributions, the Wilcoxon tests show that mcl = 1 leads to much lower homogeneity scores than all other mcl values, regardless of the DIAMOND mode. Additionally, increasing mcl values leads to increased homogeneity scores (i.e., homogeneity scores follow the order of mcl 3 > 2 > 1.5 > 1), but differences among mcl values >=1.5 are negligible, as indicated by small effect sizes. Thus, low P-values could be due to large sample sizes.
Now, let’s visualize the distributions.
# List of comparisons to be made
<- list(
mcl_comp c("1", "1.5"), c("1", "2"), c("1", "3"), c("1.5", "3")
)
# Plot
<- og_assessment %>%
p_distros_mcl mutate(diamond = str_replace_all(Mode, "_.*", "")) %>%
mutate(mcl = str_replace_all(Mode, c("default_" = "", "ultra_" = ""))) %>%
mutate(mcl = str_replace_all(mcl, "_", ".")) %>%
mutate(mcl = as.numeric(mcl)) %>%
ggviolin(., x = "mcl", y = "Median_score", trim = TRUE,
add = c("boxplot", "mean"), facet.by = "diamond",
fill = "Mode", add.params = list(fill = "white")) +
theme(legend.position = "none") +
scale_fill_manual(
values = c("ultra_3" = "#08519C", "ultra_2" = "#3182BD",
"ultra_1_5" = "#6BAED6", "ultra_1" = "#BDD7E7",
"default_3" = "#006D2C", "default_2" = "#31A354",
"default_1_5" = "#74C476", "default_1" = "#BAE4B3")
+
) stat_compare_means(
comparisons = mcl_comp, label = "p.signif",
method = "wilcox.test"
+
) labs(
y = "Scaled homogeneity scores", x = "MCL inflation parameters",
title = "Effect of MCL inflation values on orthogroup inference",
subtitle = "Panels represent DIAMOND sensitivity modes"
)
p_distros_mcl
4.4.4 The effect of DIAMOND mode (default vs ultra)
Here, we will investigate whether changing the DIAMOND mode (default vs ultrasensitive) in OrthoFinder affects orthogroup homogeneity.
# Compare median scores
<- og_modes %>%
mcl1 filter(mcl == 1) %>%
compare(., "Median_score ~ diamond") |>
filter_comparison()
<- og_modes %>%
mcl1_5 filter(mcl == 1.5) %>%
compare(., "Median_score ~ diamond") |>
filter_comparison()
<- og_modes %>%
mcl2 filter(mcl == 2) %>%
compare(., "Median_score ~ diamond") |>
filter_comparison()
<- og_modes %>%
mcl3 filter(mcl == 3) %>%
compare(., "Median_score ~ diamond") |>
filter_comparison()
bind_rows(
|> mutate(mcl = 1),
mcl1 |> mutate(mcl = 1.5),
mcl1_5 |> mutate(mcl = 2),
mcl2 |> mutate(mcl = 3)
mcl3 |>
) ::kable(
knitrcaption = "Mann-Whitney U test for differences in orthogroup scores between runs with different DIAMOND modes for each mcl value. Effect sizes represent Wilcoxon effect sizes.",
digits = 10
)
group1 | group2 | n1 | n2 | padj | effsize | magnitude | mcl |
---|---|---|---|---|---|---|---|
default | ultra | 5587 | 5534 | 2.10e-02 | 0.01928759 | small | 1.0 |
default | ultra | 16975 | 16898 | 1.29e-06 | 0.02554732 | small | 1.5 |
default | ultra | 18633 | 18575 | 1.82e-08 | 0.02855053 | small | 2.0 |
default | ultra | 19765 | 19738 | 3.00e-10 | 0.03113134 | small | 3.0 |
Again, we can see that there are significant P-values, but very small effect sizes, indicating no difference resulting from the DIAMOND mode. Thus, users can run the default mode of DIAMOND, which is way faster, without any loss of biological signal for orthogroup inference.
Let’s visualize the distributions.
# Plot
<- og_modes %>%
p_distros_diamond ggviolin(., x = "diamond", y = "Median_score", trim = TRUE,
add = c("boxplot", "mean"), facet.by = "mcl", ncol = 4,
fill = "Mode", add.params = list(fill = "white")) +
theme(legend.position = "none") +
scale_fill_manual(
values = c("ultra_3" = "#08519C", "ultra_2" = "#3182BD",
"ultra_1_5" = "#6BAED6", "ultra_1" = "#BDD7E7",
"default_3" = "#006D2C", "default_2" = "#31A354",
"default_1_5" = "#74C476", "default_1" = "#BAE4B3")
+
) labs(y = "Scaled homogeneity scores", x = "DIAMOND mode",
title = "Effect of DIAMOND sensitivity mode on orthogroup inference",
subtitle = "Panels represent MCL inflation parameters") +
theme(plot.subtitle = ggtext::element_markdown())
p_distros_diamond
4.5 Functional analysis of homogeneous and heterogeneous gene families
By looking at the global distributions of homogeneity scores, we can see that all distributions have a similar shape. This pattern suggests that some gene families tend to be more homogeneous (scores close to 1), while others tend to include domains that are not shared by all members. The latter can be, for instance, rapidly evolving families that gain or lose domains at faster rates.
To explore what these groups of families contain, we will perform a functional enrichment analysis each group. First of anything, let’s plot the distribution for the default OrthoFinder mode and highlight the groups.
# Plot distro with groups
<- og_assessment %>%
p_distros_groups filter(Mode == "default_1_5") %>%
ggplot(aes(x = Median_score)) +
geom_density(fill = "grey80", color = "black") +
::theme_pubr() +
ggpubrlabs(
y = "Density", x = "Orthogroup scores",
title = "Distribution of mean homogeneity scores for orthogroups",
subtitle = "Scores for the default OrthoFinder mode"
+
) geom_vline(xintercept = 0.56, color = "firebrick", linetype = 2) +
geom_vline(xintercept = 0.87, color = "firebrick", linetype = 2)
p_distros_groups
Now, let’s get vectors of genes in orthogroups from each of the groups highlighted in the figure above.
<- c(
species "Athaliana", "Aarabicum", "Alyrata_cvMN47", "Bcarinata_cvzd1",
"Crubella_cvMonteGargano", "Chirsuta", "Sparvula"
)
# Get genes and orthogroups (default mode)
<- ogs_filtered$default_1_5
genes_ogs
# Keep only species for which we have functional annotation info
<- genes_ogs[genes_ogs$Species %in% species, c(1, 3)]
genes_ogs
# Get background genes (all genes in OGs)
<- genes_ogs$Gene
background
# Find orthogroups for each group
## G1: 0 - 0.56
<- og_assessment %>%
g1 filter(Mode == "default_1_5") %>%
mutate(Median_score = Median_score / max(Median_score)) %>%
filter(Median_score <= 0.56) %>%
select(Orthogroups) %>%
inner_join(., genes_ogs, by = c("Orthogroups" = "Orthogroup")) %>%
pull(Gene)
## G2: 0.56 - 0.87
<- og_assessment %>%
g2 filter(Mode == "default_1_5") %>%
mutate(Median_score = Median_score / max(Median_score)) %>%
filter(Median_score > 0.56 & Median_score <= 0.87) %>%
select(Orthogroups) %>%
inner_join(., genes_ogs, by = c("Orthogroups" = "Orthogroup")) %>%
pull(Gene)
## G3: 0.87 - 1
<- og_assessment %>%
g3 filter(Mode == "default_1_5") %>%
mutate(Median_score = Median_score / max(Median_score)) %>%
filter(Median_score > 0.87) %>%
select(Orthogroups) %>%
inner_join(., genes_ogs, by = c("Orthogroups" = "Orthogroup")) %>%
pull(Gene)
Next, we need to get functional annotation from PLAZA.
options(timeout = 6000)
<- c("ath", "aar", "aly", "bca", "cru", "chi", "spa")
plaza_species
# GO annotation
<- "https://ftp.psb.ugent.be/pub/plaza/plaza_public_dicots_05/GO/"
bgo <- lapply(plaza_species, function(x) {
go <- read_annotation(paste0(bgo, "go.", x, ".csv.gz"), c(1, 3, 8))
y <- y[, c(2, 1)] %>% distinct(., .keep_all = TRUE)
term2gene <- y[, c(2, 3)] %>% distinct(., .keep_all = TRUE)
term2name <- list(
res TERM2GENE = as.data.frame(term2gene),
TERM2NAME = as.data.frame(term2name)
)return(res)
})<- Reduce(rbind, lapply(go, function(x) return(x$TERM2GENE)))
go_gene <- Reduce(rbind, lapply(go, function(x) return(x$TERM2NAME)))
go_des
## Remove non-BP terms
<- file.path(tempdir(), "ath_bp.rds")
ath_bp download.file(
"https://jokergoo.github.io/rGREAT_genesets/genesets/bp_athaliana_eg_gene_go_genesets.rds",
destfile = ath_bp
)<- readRDS(ath_bp)
gobp <- names(gobp)
gobp <- go_gene[go_gene$Annotation %in% gobp, ]
go_gene <- go_des[go_des$Annotation %in% gobp, ]
go_des rm(gobp)
# MapMan annotation
<- "https://ftp.psb.ugent.be/pub/plaza/plaza_public_dicots_05/MapMan/"
bmm <- lapply(plaza_species, function(x) {
mm <- read_annotation(paste0(bmm, "mapman.", x, ".csv.gz"), c(3:5))
y <- y[, c(2, 1)] %>% distinct(., .keep_all = TRUE)
term2gene <- y[, c(2, 3)] %>% distinct(., .keep_all = TRUE)
term2name <- list(
res TERM2GENE = as.data.frame(term2gene),
TERM2NAME = as.data.frame(term2name)
)return(res)
})<- Reduce(rbind, lapply(mm, function(x) return(x$TERM2GENE)))
mm_gene <- Reduce(rbind, lapply(mm, function(x) return(x$TERM2NAME))) %>%
mm_des mutate(desc = str_replace_all(desc, ".*\\.", ""))
# InterPro
<- "https://ftp.psb.ugent.be/pub/plaza/plaza_public_dicots_05/InterPro/"
bi <- lapply(plaza_species, function(x) {
ip <- read_annotation(paste0(bi, "interpro.", x, ".csv.gz"), c(1, 3, 4))
y <- y[, c(2, 1)] %>% distinct(., .keep_all = TRUE)
term2gene <- y[, c(2, 3)] %>% distinct(., .keep_all = TRUE)
term2name <- list(
res TERM2GENE = as.data.frame(term2gene),
TERM2NAME = as.data.frame(term2name)
)return(res)
})<- Reduce(rbind, lapply(ip, function(x) return(x$TERM2GENE)))
ip_gene <- Reduce(rbind, lapply(ip, function(x) return(x$TERM2NAME))) ip_des
Now, we can finally perform the enrichment analyses.
# Perform enrichment analyses
library(clusterProfiler)
<- list(
tgene GO = go_gene,
MapMan = mm_gene,
InterPro = ip_gene
)<- list(
tname GO = go_des,
MapMan = mm_des,
InterPro = ip_des
)
## G1
<- Reduce(rbind, lapply(seq_along(tgene), function(x) {
g1_sea return(as.data.frame(enricher(
universe = background,
g1, TERM2GENE = tgene[[x]], TERM2NAME = tname[[x]]
1:6])
))[,
}))
## G2
<- Reduce(rbind, lapply(seq_along(tgene), function(x) {
g2_sea return(as.data.frame(enricher(
universe = background,
g2, TERM2GENE = tgene[[x]], TERM2NAME = tname[[x]]
1:6])
))[,
}))
## G3
<- Reduce(rbind, lapply(seq_along(tgene), function(x) {
g3_sea return(as.data.frame(enricher(
universe = background,
g3, TERM2GENE = tgene[[x]], TERM2NAME = tname[[x]]
1:6])
))[,
}))
# Combine SEA results in a single data frame and export it as a .tsv file
## Combine data frames
<- rbind(
sea_res %>% mutate(group = "G1"),
g1_sea %>% mutate(group = "G2"),
g2_sea %>% mutate(group = "G3")
g3_sea
)
## Export .tsv
write_tsv(
sea_res,file = here("products", "tables", "enrichment_bygroup.tsv")
)
The complete enrichment results are stored in the table enrichment_bygroup.tsv
. To make visualization and interpretation easier, we will perform semantic similarity analysis to group redundant terms and get a global view of processes associated with each cluster.
Here, we will only use GO terms from the category “Biological Process”.
# Semantic similarity analysis for GO-BP terms
## G1
<- pairwise_termsim(enricher(
g1_summary universe = background,
g1, TERM2GENE = go_gene, TERM2NAME = go_des
))
## G2
<- pairwise_termsim(enricher(
g2_summary universe = background,
g2, TERM2GENE = go_gene, TERM2NAME = go_des
))
## G3
<- pairwise_termsim(enricher(
g3_summary universe = background,
g3, TERM2GENE = go_gene, TERM2NAME = go_des
))
# Save objects
save(
compress = "xz",
g1_summary, file = here("products", "result_files", "g1_summary.rda")
)
save(
compress = "xz",
g2_summary, file = here("products", "result_files", "g2_summary.rda")
)
save(
compress = "xz",
g3_summary, file = here("products", "result_files", "g3_summary.rda")
)
Now, let’s plot the results.
# Tree plot
<- treeplot(g1_summary, nWords = 0) +
p_tree_g1 ::scale_fill_jama() +
ggsciggtitle("Group 1")
$layers[[4]] <- NULL
p_tree_g1
<- treeplot(g2_summary, nCluster = 7, nWords = 0) +
p_tree_g2 ::scale_fill_jama() +
ggsciggtitle("Group 2")
$layers[[4]] <- NULL
p_tree_g2
<- treeplot(g3_summary, nWords = 0) +
p_tree_g3 ::scale_fill_jama() +
ggsciggtitle("Group 3")
$layers[[4]] <- NULL
p_tree_g3
# Replace P.adj with -log10(P.adj)
$data$color <- -log10(p_tree_g1$data$color)
p_tree_g1$data$color <- -log10(p_tree_g2$data$color)
p_tree_g2$data$color <- -log10(p_tree_g3$data$color)
p_tree_g3
# Combine plots in one, with shared legends
<- range(
rcol c(
$data$color, p_tree_g2$data$color, p_tree_g3$data$color
p_tree_g1
),na.rm = TRUE
)<- range(
rsize c(
$data$count, p_tree_g2$data$count, p_tree_g2$data$count
p_tree_g1
),na.rm = TRUE
)
wrap_plots(p_tree_g1, p_tree_g2, p_tree_g3) +
plot_layout(guides = "collect") &
scale_color_continuous(name = "-Log10(P)", limits = signif(rcol, 2)) &
scale_size_continuous(name = "Gene count", limits = rsize) &
theme(legend.position = "bottom")
# Dot plot
<- dotplot(g1_summary, showCategory = 20) + ggtitle("Group 1")
p_dot_g1 <- dotplot(g2_summary, showCategory = 20) + ggtitle("Group 2")
p_dot_g2 <- dotplot(g3_summary, showCategory = 20) + ggtitle("Group 3")
p_dot_g3
# Replace P.adj with -log10(P.adj)
$data$p.adjust <- -log10(p_dot_g1$data$p.adjust)
p_dot_g1$data$p.adjust <- -log10(p_dot_g2$data$p.adjust)
p_dot_g2$data$p.adjust <- -log10(p_dot_g3$data$p.adjust)
p_dot_g3
# Combine plots in one, keep shared legend
<- range(
rcol c(
$data$p.adjust, p_dot_g2$data$p.adjust,
p_dot_g1$data$p.adjust
p_dot_g3
),na.rm = TRUE
)<- range(
rsize c(
$data$Count, p_dot_g2$data$Count, p_dot_g3$data$Count
p_dot_g1
),na.rm = TRUE
)
wrap_plots(p_dot_g1, p_dot_g2, p_dot_g3) +
plot_layout(guides = "collect") &
scale_color_continuous(name = "-Log10(P)", limits = signif(rcol, 2)) &
scale_size_continuous(name = "Gene count", limits = rsize) &
theme(legend.position = "bottom")
The plots show that genes associated to particular biological processes tend to be clustered in the same orthogroup (group 3, scores closer to 1), while genes associated to other biological processes tend to be more dispersed across orthogroups (groups 1 and 2, scores closer to 1), possibly because they are evolving faster and, hence, have lower sequence similarity among themselves. In details, these genes and processes are:
Group 1: ATP production, water and K+ transport, seed oilbody biogenesis, and response to nitrate and ethylene.
Group 2: sulfur amino acid metabolsm, spliceosome biogenesis, beta-1,3-glucan biosynthesis, response to brassinosteroids, xylem development, exocytosis, and calcium and sulfate transport.
Group 3: photosynthesis, zinc and amino acid transport, DNA replication, endocytosis, cell-cell junction assembly, and toxin catabolism.
4.6 Is there an association between OG score and OG gene length?
Emms and Kelly (2015) have demonstrated a gene length bias that influences the accuracy of orthogroup detection. This is because short sequences cannot produce large bit scores or low e-values, and long sequences produce many hits with scores better than those for the best hits of short sequences (Emms and Kelly 2015). OrthoFinder implements a score transform that claims to eliminate such bias. But does it remove the bias completely?
To answer this question, we will use homogeneity scores for the default OrthoFinder run (default DIAMOND mode, mcl = 1.5).
First of all, let’s calculate the mean and median gene length for each orthogroup.
# Combine proteomes into a single AAStringSet object and clean gene names
names(brassicaceae_proteomes) <- NULL
<- do.call(c, brassicaceae_proteomes)
proteomes rm(brassicaceae_proteomes)
names(proteomes) <- gsub("\\\t.*", "", names(proteomes))
names(proteomes) <- gsub(" .*", "", names(proteomes))
names(proteomes) <- gsub("\\.[0-9]$", "", names(proteomes))
names(proteomes) <- gsub("\\.[0-9]\\.p$", "", names(proteomes))
names(proteomes) <- gsub("\\.t[0-9]$", "", names(proteomes))
names(proteomes) <- gsub("\\.g$", "", names(proteomes))
# Load only orthogroups from the default OrthoFinder run
<- read_orthogroups(file.path(tempdir(), "Orthogroups_default_1_5.tsv")) %>%
og mutate(Gene = str_replace_all(
c(
Gene, "\\\t.*" = "",
"\\.[0-9]$" = "",
"\\.[0-9]\\.p$" = "",
"\\.t[0-9]$" = "",
"\\.g$" = ""
)%>%
)) ::select(Orthogroup, Gene)
dplyr
# Calculate mean gene lengths for each orthogroup
<- data.frame(
gene_lengths Gene = names(proteomes),
Length = Biostrings::width(proteomes)
)
<- og %>%
og_gene_lengths inner_join(., gene_lengths) %>%
group_by(Orthogroup) %>%
summarise(
mean_length = mean(Length),
median_length = median(Length)
)
# Add homogeneity scores to data frame of mean gene length per orthogroup
<- og_assessment %>%
og_length_and_scores ::filter(Mode == "default_1_5") %>%
dplyr::select(Orthogroups, Mean_H, Median_H) %>%
dplyrinner_join(., og_gene_lengths, by = c("Orthogroups" = "Orthogroup"))
Now, since the number of domains in a protein correlates with its length, let’s also calculate the median number of domains in an orthogroup.
# Calculate median number of domains for each orthogroup
<- Reduce(rbind, interpro) |>
og_domain_count ::count(Gene) |>
dplyrinner_join(og, by = "Gene") |>
group_by(Orthogroup) |>
summarise(
median_ndomains = median(n)
)
<- left_join(
og_length_and_scores by = c("Orthogroups" = "Orthogroup")
og_length_and_scores, og_domain_count,
)
# Save data
save(
og_length_and_scores,file = here("products", "result_files", "og_length_and_scores.rda"),
compress = "xz"
)
Next, we will investigate if the number of domains can be a confounder in associations between the orthogroup score and gene length.
# Explore associations between the number of domains and gene length
<- ggplot(
p_length_domains
og_length_and_scores, aes(y = log2(median_ndomains + 1), x = log2(median_length + 1))
+
) geom_point(alpha = 0.3) +
theme_bw() +
labs(
title = "Number of domains and gene length",
x = expression(Log[2] ~ "median gene length"),
y = expression(Log[2] ~ "median number of domains")
)
<- cor.test(
cor_length_domains log2(og_length_and_scores$median_length + 1),
log2(og_length_and_scores$median_ndomains + 1),
method = "spearman",
exact = FALSE
)
# Show plot and correlation test statistics
p_length_domains
cor_length_domains
Spearman's rank correlation rho
data: log2(og_length_and_scores$median_length + 1) and log2(og_length_and_scores$median_ndomains + 1)
S = 4.7564e+11, p-value < 2.2e-16
alternative hypothesis: true rho is not equal to 0
sample estimates:
rho
0.4165488
The figure and test above show that there is indeed a moderate correlation (\(\rho \approx 0.4, P <0.001\)) between gene length and number of domains. Because of that, we will use partial Spearman’s correlation to measure the association between orthogroup scores and gene length while controlling for the number of domains.
# Calculate partial Spearman's correlations
## Without accounting for the number of domains
<- ppcor::pcor(
cor1 data.frame(
Length = log2(og_length_and_scores$median_length + 1),
Score = log2(og_length_and_scores$Median_H + 1)
),method = "spearman"
) cor1
$estimate
Length Score
Length 1.0000000 -0.1903208
Score -0.1903208 1.0000000
$p.value
Length Score
Length 0.000000e+00 3.370622e-138
Score 3.370622e-138 0.000000e+00
$statistic
Length Score
Length 0.00000 -25.25673
Score -25.25673 0.00000
$n
[1] 16975
$gp
[1] 0
$method
[1] "spearman"
## Accounting for the number of domains
<- ppcor::pcor.test(
cor2 log2(og_length_and_scores$median_length + 1),
log2(og_length_and_scores$Median_H + 1),
log2(og_length_and_scores$median_ndomains + 1),
method = "spearman"
) cor2
estimate p.value statistic n gp Method
1 0.08593287 3.417431e-29 11.23661 16975 1 spearman
The tests show a weak correlation between orthogroup scores and gene length. When the number of domains is included as a covariate, we find no correlation at all, indicating that OrthoFinder’s normalization score is effective.
Finally, let’s plot the data and add the test statistics.
<- og_length_and_scores %>%
p_association_length_homogeneity mutate(
logH = log10(Median_H + 1),
logLength = log10(median_length + 1)
%>%
) ggscatter(
x = "logLength", y = "logH", alpha = 0.3,
., color = "black", size = 1
+
) annotate(
"text",
x = 1.71, y = 0.055,
label = paste(
"rho", "==", signif(cor1$estimate[1, 2], 2)
),parse = TRUE
+
) annotate(
"text",
x = 1.71, y = 0.035,
label = paste(
"rho[partial]", "==", signif(cor2$estimate, 2)
),parse = TRUE
+
) annotate(
"text",
x = 1.71, y = 0.015,
label = paste("P", "<", 2.2e-16), parse = TRUE
+
) labs(
title = "Relationship between OG score and gene length",
x = expression(Log[10] ~ "median gene length"),
y = expression(Log[10] ~ "median homogeneity score")
)
p_association_length_homogeneity
Session info
This document was created under the following conditions:
─ Session info ───────────────────────────────────────────────────────────────
setting value
version R version 4.3.0 (2023-04-21)
os Ubuntu 20.04.5 LTS
system x86_64, linux-gnu
ui X11
language (EN)
collate en_US.UTF-8
ctype en_US.UTF-8
tz Europe/Brussels
date 2023-08-08
pandoc 3.1.1 @ /usr/lib/rstudio/resources/app/bin/quarto/bin/tools/ (via rmarkdown)
─ Packages ───────────────────────────────────────────────────────────────────
package * version date (UTC) lib source
abind 1.4-5 2016-07-21 [1] CRAN (R 4.3.0)
AnnotationDbi 1.62.0 2023-04-25 [1] Bioconductor
ape 5.7-1 2023-03-13 [1] CRAN (R 4.3.0)
aplot 0.1.10 2023-03-08 [1] CRAN (R 4.3.0)
backports 1.4.1 2021-12-13 [1] CRAN (R 4.3.0)
beeswarm 0.4.0 2021-06-01 [1] CRAN (R 4.3.0)
Biobase 2.60.0 2023-04-25 [1] Bioconductor
BiocGenerics 0.46.0 2023-04-25 [1] Bioconductor
BiocParallel 1.34.0 2023-04-25 [1] Bioconductor
Biostrings 2.68.0 2023-04-25 [1] Bioconductor
bit 4.0.5 2022-11-15 [1] CRAN (R 4.3.0)
bit64 4.0.5 2020-08-30 [1] CRAN (R 4.3.0)
bitops 1.0-7 2021-04-24 [1] CRAN (R 4.3.0)
blob 1.2.4 2023-03-17 [1] CRAN (R 4.3.0)
broom 1.0.4 2023-03-11 [1] CRAN (R 4.3.0)
cachem 1.0.8 2023-05-01 [1] CRAN (R 4.3.0)
car 3.1-2 2023-03-30 [1] CRAN (R 4.3.0)
carData 3.0-5 2022-01-06 [1] CRAN (R 4.3.0)
cli 3.6.1 2023-03-23 [1] CRAN (R 4.3.0)
clusterProfiler * 4.8.1 2023-05-03 [1] Bioconductor
codetools 0.2-19 2023-02-01 [4] CRAN (R 4.2.2)
cogeqc * 1.4.0 2023-04-25 [1] Bioconductor
coin 1.4-2 2021-10-08 [1] CRAN (R 4.3.0)
colorspace 2.1-0 2023-01-23 [1] CRAN (R 4.3.0)
commonmark 1.9.0 2023-03-17 [1] CRAN (R 4.3.0)
cowplot 1.1.1 2020-12-30 [1] CRAN (R 4.3.0)
crayon 1.5.2 2022-09-29 [1] CRAN (R 4.3.0)
data.table 1.14.8 2023-02-17 [1] CRAN (R 4.3.0)
DBI 1.1.3 2022-06-18 [1] CRAN (R 4.3.0)
digest 0.6.33 2023-07-07 [1] CRAN (R 4.3.0)
DOSE 3.26.1 2023-05-03 [1] Bioconductor
downloader 0.4 2015-07-09 [1] CRAN (R 4.3.0)
dplyr * 1.1.2 2023-04-20 [1] CRAN (R 4.3.0)
enrichplot * 1.20.0 2023-04-25 [1] Bioconductor
evaluate 0.21 2023-05-05 [1] CRAN (R 4.3.0)
fansi 1.0.4 2023-01-22 [1] CRAN (R 4.3.0)
farver 2.1.1 2022-07-06 [1] CRAN (R 4.3.0)
fastmap 1.1.1 2023-02-24 [1] CRAN (R 4.3.0)
fastmatch 1.1-3 2021-07-23 [1] CRAN (R 4.3.0)
fgsea 1.26.0 2023-04-25 [1] Bioconductor
forcats * 1.0.0 2023-01-29 [1] CRAN (R 4.3.0)
generics 0.1.3 2022-07-05 [1] CRAN (R 4.3.0)
GenomeInfoDb 1.36.0 2023-04-25 [1] Bioconductor
GenomeInfoDbData 1.2.10 2023-04-28 [1] Bioconductor
ggbeeswarm 0.7.2 2023-04-29 [1] CRAN (R 4.3.0)
ggforce 0.4.1 2022-10-04 [1] CRAN (R 4.3.0)
ggfun 0.0.9 2022-11-21 [1] CRAN (R 4.3.0)
ggnewscale 0.4.8 2022-10-06 [1] CRAN (R 4.3.0)
ggplot2 * 3.4.1 2023-02-10 [1] CRAN (R 4.3.0)
ggplotify 0.1.0 2021-09-02 [1] CRAN (R 4.3.0)
ggpubr * 0.6.0 2023-02-10 [1] CRAN (R 4.3.0)
ggraph 2.1.0 2022-10-09 [1] CRAN (R 4.3.0)
ggrepel 0.9.3 2023-02-03 [1] CRAN (R 4.3.0)
ggsci 3.0.0 2023-03-08 [1] CRAN (R 4.3.0)
ggsignif 0.6.4 2022-10-13 [1] CRAN (R 4.3.0)
ggtext 0.1.2 2022-09-16 [1] CRAN (R 4.3.0)
ggtree 3.8.0 2023-04-25 [1] Bioconductor
glue 1.6.2 2022-02-24 [1] CRAN (R 4.3.0)
GO.db 3.17.0 2023-05-02 [1] Bioconductor
GOSemSim 2.26.0 2023-04-25 [1] Bioconductor
graphlayouts 1.0.0 2023-05-01 [1] CRAN (R 4.3.0)
gridExtra 2.3 2017-09-09 [1] CRAN (R 4.3.0)
gridGraphics 0.5-1 2020-12-13 [1] CRAN (R 4.3.0)
gridtext 0.1.5 2022-09-16 [1] CRAN (R 4.3.0)
gson 0.1.0 2023-03-07 [1] CRAN (R 4.3.0)
gtable 0.3.3 2023-03-21 [1] CRAN (R 4.3.0)
HDO.db 0.99.1 2023-06-20 [1] Bioconductor
here * 1.0.1 2020-12-13 [1] CRAN (R 4.3.0)
hms 1.1.3 2023-03-21 [1] CRAN (R 4.3.0)
htmltools 0.5.5 2023-03-23 [1] CRAN (R 4.3.0)
htmlwidgets 1.6.2 2023-03-17 [1] CRAN (R 4.3.0)
httr 1.4.5 2023-02-24 [1] CRAN (R 4.3.0)
igraph 1.4.2 2023-04-07 [1] CRAN (R 4.3.0)
IRanges 2.34.0 2023-04-25 [1] Bioconductor
jsonlite 1.8.7 2023-06-29 [1] CRAN (R 4.3.0)
KEGGREST 1.40.0 2023-04-25 [1] Bioconductor
knitr 1.43 2023-05-25 [1] CRAN (R 4.3.0)
labeling 0.4.2 2020-10-20 [1] CRAN (R 4.3.0)
lattice 0.20-45 2021-09-22 [4] CRAN (R 4.2.0)
lazyeval 0.2.2 2019-03-15 [1] CRAN (R 4.3.0)
libcoin 1.0-9 2021-09-27 [1] CRAN (R 4.3.0)
lifecycle 1.0.3 2022-10-07 [1] CRAN (R 4.3.0)
lubridate * 1.9.2 2023-02-10 [1] CRAN (R 4.3.0)
magrittr 2.0.3 2022-03-30 [1] CRAN (R 4.3.0)
markdown 1.6 2023-04-07 [1] CRAN (R 4.3.0)
MASS 7.3-58.2 2023-01-23 [4] CRAN (R 4.2.2)
Matrix 1.5-1 2022-09-13 [4] CRAN (R 4.2.1)
matrixStats 1.0.0 2023-06-02 [1] CRAN (R 4.3.0)
memoise 2.0.1 2021-11-26 [1] CRAN (R 4.3.0)
modeltools 0.2-23 2020-03-05 [1] CRAN (R 4.3.0)
multcomp 1.4-25 2023-06-20 [1] CRAN (R 4.3.0)
munsell 0.5.0 2018-06-12 [1] CRAN (R 4.3.0)
mvtnorm 1.1-3 2021-10-08 [1] CRAN (R 4.3.0)
nlme 3.1-162 2023-01-31 [4] CRAN (R 4.2.2)
patchwork * 1.1.2 2022-08-19 [1] CRAN (R 4.3.0)
pillar 1.9.0 2023-03-22 [1] CRAN (R 4.3.0)
pkgconfig 2.0.3 2019-09-22 [1] CRAN (R 4.3.0)
plyr 1.8.8 2022-11-11 [1] CRAN (R 4.3.0)
png 0.1-8 2022-11-29 [1] CRAN (R 4.3.0)
polyclip 1.10-4 2022-10-20 [1] CRAN (R 4.3.0)
ppcor 1.1 2015-12-03 [1] CRAN (R 4.3.0)
purrr * 1.0.1 2023-01-10 [1] CRAN (R 4.3.0)
qvalue 2.32.0 2023-04-25 [1] Bioconductor
R6 2.5.1 2021-08-19 [1] CRAN (R 4.3.0)
RColorBrewer 1.1-3 2022-04-03 [1] CRAN (R 4.3.0)
Rcpp 1.0.10 2023-01-22 [1] CRAN (R 4.3.0)
RCurl 1.98-1.12 2023-03-27 [1] CRAN (R 4.3.0)
readr * 2.1.4 2023-02-10 [1] CRAN (R 4.3.0)
reshape2 1.4.4 2020-04-09 [1] CRAN (R 4.3.0)
rlang 1.1.1 2023-04-28 [1] CRAN (R 4.3.0)
rmarkdown 2.23 2023-07-01 [1] CRAN (R 4.3.0)
rprojroot 2.0.3 2022-04-02 [1] CRAN (R 4.3.0)
RSQLite 2.3.1 2023-04-03 [1] CRAN (R 4.3.0)
rstatix * 0.7.2 2023-02-01 [1] CRAN (R 4.3.0)
rstudioapi 0.14 2022-08-22 [1] CRAN (R 4.3.0)
S4Vectors 0.38.0 2023-04-25 [1] Bioconductor
sandwich 3.0-2 2022-06-15 [1] CRAN (R 4.3.0)
scales 1.2.1 2022-08-20 [1] CRAN (R 4.3.0)
scatterpie 0.2.1 2023-06-07 [1] CRAN (R 4.3.0)
sessioninfo 1.2.2 2021-12-06 [1] CRAN (R 4.3.0)
shadowtext 0.1.2 2022-04-22 [1] CRAN (R 4.3.0)
stringi 1.7.12 2023-01-11 [1] CRAN (R 4.3.0)
stringr * 1.5.0 2022-12-02 [1] CRAN (R 4.3.0)
survival 3.5-3 2023-02-12 [4] CRAN (R 4.2.2)
TH.data 1.1-2 2023-04-17 [1] CRAN (R 4.3.0)
tibble * 3.2.1 2023-03-20 [1] CRAN (R 4.3.0)
tidygraph 1.2.3 2023-02-01 [1] CRAN (R 4.3.0)
tidyr * 1.3.0 2023-01-24 [1] CRAN (R 4.3.0)
tidyselect 1.2.0 2022-10-10 [1] CRAN (R 4.3.0)
tidytree 0.4.2 2022-12-18 [1] CRAN (R 4.3.0)
tidyverse * 2.0.0 2023-02-22 [1] CRAN (R 4.3.0)
timechange 0.2.0 2023-01-11 [1] CRAN (R 4.3.0)
treeio 1.24.1 2023-05-31 [1] Bioconductor
tweenr 2.0.2 2022-09-06 [1] CRAN (R 4.3.0)
tzdb 0.3.0 2022-03-28 [1] CRAN (R 4.3.0)
utf8 1.2.3 2023-01-31 [1] CRAN (R 4.3.0)
vctrs 0.6.3 2023-06-14 [1] CRAN (R 4.3.0)
vipor 0.4.5 2017-03-22 [1] CRAN (R 4.3.0)
viridis 0.6.2 2021-10-13 [1] CRAN (R 4.3.0)
viridisLite 0.4.2 2023-05-02 [1] CRAN (R 4.3.0)
withr 2.5.0 2022-03-03 [1] CRAN (R 4.3.0)
xfun 0.39 2023-04-20 [1] CRAN (R 4.3.0)
xml2 1.3.4 2023-04-27 [1] CRAN (R 4.3.0)
XVector 0.40.0 2023-04-25 [1] Bioconductor
yaml 2.3.7 2023-01-23 [1] CRAN (R 4.3.0)
yulab.utils 0.0.6 2022-12-20 [1] CRAN (R 4.3.0)
zlibbioc 1.46.0 2023-04-25 [1] Bioconductor
zoo 1.8-12 2023-04-13 [1] CRAN (R 4.3.0)
[1] /home/faalm/R/x86_64-pc-linux-gnu-library/4.3
[2] /usr/local/lib/R/site-library
[3] /usr/lib/R/site-library
[4] /usr/lib/R/library
──────────────────────────────────────────────────────────────────────────────