From ebede8885267c2608ee4c0bdf1e72ba8828272fd Mon Sep 17 00:00:00 2001
From: Chun-Hui Gao <gaospecial@gmail.com>
Date: Wed, 27 Jul 2022 21:26:43 +0800
Subject: [PATCH 1/2] add GO mapper

---
 DESCRIPTION |  2 +-
 NAMESPACE   |  1 +
 R/gson.R    | 84 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 86 insertions(+), 1 deletion(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 2f246209..9637b9a9 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -50,4 +50,4 @@ Packaged: NA
 biocViews: Annotation, Clustering, GeneSetEnrichment, GO, KEGG,
     MultipleComparison, Pathways, Reactome, Visualization
 Encoding: UTF-8
-RoxygenNote: 7.1.2
+RoxygenNote: 7.2.0
diff --git a/NAMESPACE b/NAMESPACE
index 688f27d5..f2e88a13 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -70,6 +70,7 @@ export(gseMKEGG)
 export(gseWP)
 export(gseaplot)
 export(gsfilter)
+export(gson_GO_mapper)
 export(gson_KEGG)
 export(heatplot)
 export(idType)
diff --git a/R/gson.R b/R/gson.R
index 802787c9..251d1145 100644
--- a/R/gson.R
+++ b/R/gson.R
@@ -54,3 +54,87 @@ gson_GO <- function(OrgDb, keytype = 'ENTREZID', ont = "BP") {
         accessed_date = as.character(Sys.Date())
     )
 }
+
+
+#' Build a gson object that annotate Gene Ontology
+#'
+#' @param data a two-column data.frame of original GO annotation. The columns are "gene_id" and "go_id".
+#' @param ont type of GO annotation, which is "ALL", "BP", "MF", or "CC". default: "ALL".
+#' @param species name of species. Default: NULL.
+#' @param ... pass to `gson::gson()` constructor.
+#'
+#' @return a `gson` instance
+#' @export
+#'
+#' @examples
+#'  data = data.frame(gene_id = "gene1", 
+#'                    go_id = c("GO:0035492", "GO:0009764", "GO:0031063", "GO:0033714", "GO:0036349"))
+#'  gson_go_mapper(data, species = "E. coli")
+gson_GO_mapper = function(data, 
+                     ont = c("ALL", "BP", "CC", "MF"), 
+                     species = NULL, 
+                     ...){
+  ont = match.arg(ont)
+  
+  data = unique(data) # cleanup
+  if (nrow(data) == 0) {
+    simpleError("Data is empty in this call.")
+  }
+  
+  # resources from `GO.db`
+  goterms = AnnotationDbi::Ontology(GO.db::GOTERM)
+  termname = AnnotationDbi::Term(GO.db::GOTERM)
+  go.db_info = GO.db::GO_dbInfo()
+  go.db_source_date = go.db_info[go.db_info$name == "GOSOURCEDATE", "value"]
+  ancestor_map = lapply(c(GO.db::GOBPANCESTOR, 
+                          GO.db::GOCCANCESTOR,
+                          GO.db::GOMFANCESTOR),
+                        as.list) %>%
+    unlist(recursive = FALSE)
+  
+  # filter GO terms
+  data[["ontology"]] = goterms[data[["go_id"]]]
+  n_na_ont = sum(is.na(data[["ontology"]]))
+  if ( n_na_ont > 0){
+    warning(sprintf("%s GO term(s) are too new for current `GO.db` [source date: %s],\n  and are to be dropped. Consider to update `GO.db` if possible.",
+                    n_na_ont, 
+                    go.db_source_date))
+  }
+  
+  # map to GO ancestor
+  ancestor_list = ancestor_map[data$go_id]
+  names(ancestor_list) = data$gene_id
+  ancestor_go = AnnotationDbi::unlist2(ancestor_list)
+  
+  # gsid2gene
+  gsid2gene = data.frame(
+    gsid = c(ancestor_go, data$go_id),
+    gene = c(names(ancestor_go), data$gene_id),
+    ontology = goterms[c(ancestor_go, data$go_id)]
+  ) %>%
+    dplyr::filter(.data$gsid != "all") %>%
+    unique()
+  
+  if (ont != "ALL"){
+    gsid2gene = gsid2gene %>%
+      dplyr::filter(.data$ontology == ont)
+  }
+  
+  # gsid2name
+  uniq_gsid = unique(gsid2gene$gsid) %>% as.character()
+  gsid2name = data.frame(
+    gsid = uniq_gsid,
+    name = termname[uniq_gsid] %>% as.character()
+  )
+  
+  # construct `gson` object
+  gson::gson(
+    gsid2gene = gsid2gene,
+    gsid2name = gsid2name,
+    species = species,
+    gsname = paste0("Gene Ontology: ", ont),
+    version = sprintf("[GO.db source date: %s]", go.db_source_date),
+    accessed_date = as.character(Sys.Date()),
+    ...
+  )
+}
\ No newline at end of file

From 8a233504c4074941931bd35de4a93142a4b59ff1 Mon Sep 17 00:00:00 2001
From: Chun-Hui Gao <gaospecial@gmail.com>
Date: Wed, 27 Jul 2022 21:32:21 +0800
Subject: [PATCH 2/2] update author and version

---
 DESCRIPTION | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 9637b9a9..84744caa 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: clusterProfiler
 Type: Package
 Title: A universal enrichment tool for interpreting omics data
-Version: 4.5.1.902
+Version: 4.5.1.904
 Authors@R: c(
     person(given = "Guangchuang", family = "Yu",        email = "guangchuangyu@gmail.com",   role  = c("aut", "cre", "cph"), comment = c(ORCID = "0000-0002-6485-8781")),
     person(given = "Li-Gen",      family = "Wang",      email = "reeganwang020@gmail.com",   role  = "ctb"),
@@ -9,7 +9,8 @@ Authors@R: c(
     person(given = "Xiao",        family = "Luo",       email = "l77880853349@163.com",      role  = "ctb"),
     person(given = "Meijun",      family = "Chen",      email = "mjchen1996@outlook.com",    role  = "ctb"),
     person(given = "Giovanni",    family = "Dall'Olio", email = "giovanni.dallolio@upf.edu", role = "ctb"),
-    person(given = "Wanqian",     family = "Wei",       email = "altair_wei@outlook.com",    role = "ctb")
+    person(given = "Wanqian",     family = "Wei",       email = "altair_wei@outlook.com",    role = "ctb"),
+    person(given = "Chun-Hui",    family = "Gao",       email = "gaospecial@gmail.com",      role = "ctb", comment = c(ORCID = "0000-0002-1445-7939"))
 	)
 Maintainer: Guangchuang Yu <guangchuangyu@gmail.com>
 Description: This package supports functional characteristics of both coding and non-coding genomics data for thousands of species with up-to-date gene annotation. It provides a univeral interface for gene functional annotation from a variety of sources and thus can be applied in diverse scenarios. It provides a tidy interface to access, manipulate, and visualize enrichment results to help users achieve efficient data interpretation. Datasets obtained from multiple treatments and time points can be analyzed and compared in a single run, easily revealing functional consensus and differences among distinct conditions.