diff --git a/goatools/gosubdag/plot/go_name_shorten.py b/goatools/gosubdag/plot/go_name_shorten.py index 308cba9..37c63f3 100644 --- a/goatools/gosubdag/plot/go_name_shorten.py +++ b/goatools/gosubdag/plot/go_name_shorten.py @@ -4,146 +4,149 @@ __author__ = "DV Klopfenstein" -class ShortenText(object): - """Shorten text for concise display.""" - - greek2hex = {'alpha':0x03b1, 'beta':0x03b2, 'gamma':0x03b3, 'delta':0x03b4} - greek2tex = {'alpha':r'$\alpha$', 'beta':r'$\beta$', 'gamma':r'$\gamma$', 'delta':r'$\delta$'} - - def __init__(self): - self.greek2uni = {g:unichr(h).encode('utf-8') for g, h in self.greek2hex.items()} - self.keep = [ - "defense response to protozoan", - "defense response to bacterium", - "cellular response to interferon-beta", - "defense response to virus", - "response to interferon-gamma", - "innate immune response", - "inflammatory response", - "response to virus", - "immune response"] - - def get_short_plot_name(self, goobj): - """Shorten some GO names so plots are smaller.""" - name = goobj.name - if self._keep_this(name): - return self.replace_greek(name) - name = name.replace("cellular response to chemical stimulus", - "cellular rsp. to chemical stim.") - depth = goobj.depth - if depth > 1: - name = name.replace("regulation of ", "reg. of ") - name = name.replace("positive reg", "+reg") - name = name.replace("negative reg", "-reg") - name = name.replace("involved in", "in") - if depth > 2: - name = name.replace("antigen processing and presentation", "a.p.p") - name = name.replace("MHC class I", "MHC-I") - if depth == 4: - if goobj.id == "GO:0002460": - before = " ".join([ +greek2uni = { + "alpha": "α", + "beta": "β", + "gamma": "γ", + "delta": "δ", +} +greek2tex = { + "alpha": r"$\alpha$", + "beta": r"$\beta$", + "gamma": r"$\gamma$", + "delta": r"$\delta$", +} +keep = [ + "defense response to protozoan", + "defense response to bacterium", + "cellular response to interferon-beta", + "defense response to virus", + "response to interferon-gamma", + "innate immune response", + "inflammatory response", + "response to virus", + "immune response", +] + + +def get_short_plot_name(goobj): + """Shorten some GO names so plots are smaller.""" + name = goobj.name + if _keep_this(name): + return replace_greek(name) + name = name.replace( + "cellular response to chemical stimulus", "cellular rsp. to chemical stim." + ) + depth = goobj.depth + if depth > 1: + name = name.replace("regulation of", "reg. of") + name = name.replace("positive reg", "+reg") + name = name.replace("negative reg", "-reg") + name = name.replace("involved in", "in") + if depth > 2: + name = name.replace("antigen processing and presentation", "a.p.p") + name = name.replace("MHC class I", "MHC-I") + if depth == 4: + if goobj.id == "GO:0002460": + before = " ".join( + [ "adaptive immune response based on somatic recombination of", - "immune receptors built from immunoglobulin superfamily domains"]) - name = name.replace( - before, - "rsp. based on somatic recombination of Ig immune receptors") - if depth > 3: - name = name.replace("signaling pathway", "sig. pw.") - name = name.replace("response", "rsp.") - name = name.replace("immunoglobulin superfamily domains", "Ig domains") - name = name.replace("immunoglobulin", "Ig") - if depth > 4: - name = name.replace("production", "prod.") - if depth == 6 or depth == 5: - name = name.replace("tumor necrosis factor", "TNF") - name = self.replace_greek(name) - return name + "immune receptors built from immunoglobulin superfamily domains", + ] + ) + name = name.replace( + before, + "rsp. based on somatic recombination of Ig immune receptors", + ) + if depth > 3: + name = name.replace("signaling pathway", "sig. pw.") + name = name.replace("response", "rsp.") + name = name.replace("immunoglobulin superfamily domains", "Ig domains") + name = name.replace("immunoglobulin", "Ig") + if depth > 4: + name = name.replace("production", "prod.") + if depth == 6 or depth == 5: + name = name.replace("tumor necrosis factor", "TNF") + name = replace_greek(name) + return name - def shorten_go_name_ptbl1(self, name): - """Shorten GO name for tables in paper.""" - if self._keep_this(name): - return name - name = name.replace("negative", "neg.") - name = name.replace("positive", "pos.") - name = name.replace("response", "rsp.") - name = name.replace("regulation", "reg.") - name = name.replace("antigen processing and presentation", "app.") - return name - def shorten_go_name_ptbl3(self, name, dcnt): - """Shorten GO description for Table 3 in manuscript.""" - if self._keep_this(name): - return name - name = name.replace("positive regulation of immune system process", - "+ reg. of immune sys. process") - name = name.replace("positive regulation of immune response", - "+ reg. of immune response") - name = name.replace("positive regulation of cytokine production", - "+ reg. of cytokine production") - if dcnt < 40: - name = name.replace("antigen processing and presentation", "a.p.p.") - if dcnt < 10: - name = name.replace("negative", "-") - name = name.replace("positive", "+") - #name = name.replace("tumor necrosis factor production", "tumor necrosis factor prod.") - name = name.replace("tumor necrosis factor production", "TNF production") - if dcnt < 4: - name = name.replace("regulation", "reg.") - name = name.replace("exogenous ", "") - name = name.replace(" via ", " w/") - name = name.replace("T cell mediated cytotoxicity", "cytotoxicity via T cell") - name = name.replace('involved in', 'in') - name = name.replace('-positive', '+') +def shorten_go_name_ptbl1(name): + """Shorten GO name for tables in paper.""" + if _keep_this(name): return name + name = name.replace("negative", "neg.") + name = name.replace("positive", "pos.") + name = name.replace("response", "rsp.") + name = name.replace("regulation", "reg.") + name = name.replace("antigen processing and presentation", "app.") + return name - def replace_greek(self, name): - """Replace text representing greek letters with greek letters.""" - name = name.replace('gamma-delta', 'gammadelta') - name = name.replace('interleukin-1 beta', 'interleukin-1beta') - greek_present = False - for greek_txt, uni in self.greek2uni.items(): - if greek_txt in name: - greek_present = True - name = name.replace(greek_txt, "{B}".format(B=uni)) - if greek_present is True: - name = unicode(name, 'utf-8') # For writing to xlsx - return name - def replace_greek_tex(self, name): - """Replace text representing greek letters with greek letters.""" - name = name.replace('gamma-delta', 'gammadelta') - name = name.replace('interleukin-1 beta', 'interleukin-1beta') - # greek_present = False - for greek_txt, tex in self.greek2tex.items(): - if greek_txt in name: - # greek_present = True - name = name.replace(greek_txt, "{B}".format(B=tex)) - # if greek_present is True: - # name = texcode(name, 'utf-8') # For writing to xlsx +def shorten_go_name_ptbl3(name, dcnt): + """Shorten GO description for Table 3 in manuscript.""" + if _keep_this(name): return name + name = name.replace( + "positive regulation of immune system process", + "+ reg. of immune sys. process", + ) + name = name.replace( + "positive regulation of immune response", "+ reg. of immune response" + ) + name = name.replace( + "positive regulation of cytokine production", + "+ reg. of cytokine production", + ) + if dcnt < 40: + name = name.replace("antigen processing and presentation", "a.p.p.") + if dcnt < 10: + name = name.replace("negative", "-") + name = name.replace("positive", "+") + name = name.replace("tumor necrosis factor production", "TNF production") + if dcnt < 4: + name = name.replace("regulation", "reg.") + name = name.replace("exogenous ", "") + name = name.replace(" via ", " w/") + name = name.replace("T cell mediated cytotoxicity", "cytotoxicity via T cell") + name = name.replace("involved in", "in") + name = name.replace("-positive", "+") + return name - def shorten_go_name_all(self, name): - """Shorten GO name for tables in paper, supplemental materials, and plots.""" - name = self.replace_greek(name) - name = name.replace("MHC class I", "MHC-I") - return name - def _keep_this(self, name): - """Return True if there are to be no modifications to name.""" - for keep_name in self.keep: - if name == keep_name: - return True - return False - - #@staticmethod - #def _shorten_go_name(name): - # name = name.replace(' and ', ' & ') - # if gont.dcnt < 15: - # name = name.replace('MHC class I', 'MHC-I') - # name = name.replace('antigen processing & presentation', 'a. p. p.') - # name = name.replace('positive regulation', 'pos. reg.') - # elif gont.depth > 2: - # name = name.replace(' & presentation', ' & pres.') - # return name +def replace_greek(name): + """Replace text representing greek letters with greek letters.""" + name = name.replace("gamma-delta", "gammadelta") + name = name.replace("interleukin-1 beta", "interleukin-1beta") + for greek_txt, uni in greek2uni.items(): + if greek_txt in name: + name = name.replace(greek_txt, uni) + return name + + +def replace_greek_tex(name): + """Replace text representing greek letters with greek letters.""" + name = name.replace("gamma-delta", "gammadelta") + name = name.replace("interleukin-1 beta", "interleukin-1beta") + for greek_txt, tex in greek2tex.items(): + if greek_txt in name: + name = name.replace(greek_txt, tex) + return name + + +def shorten_go_name_all(name): + """Shorten GO name for tables in paper, supplemental materials, and plots.""" + name = replace_greek(name) + name = name.replace("MHC class I", "MHC-I") + return name + + +def _keep_this(name): + """Return True if there are to be no modifications to name.""" + for keep_name in keep: + if name == keep_name: + return True + return False + # Copyright (C) 2016-2017, DV Klopfenstein, H Tang, All rights reserved. diff --git a/goatools/gosubdag/plot/go_node.py b/goatools/gosubdag/plot/go_node.py index cd0f22c..ef9948c 100644 --- a/goatools/gosubdag/plot/go_node.py +++ b/goatools/gosubdag/plot/go_node.py @@ -1,26 +1,32 @@ """Create a pydot Node for a GO Term.""" -__copyright__ = "Copyright (C) 2016-present, DV Klopfenstein, H Tang, All rights reserved." +__copyright__ = ( + "Copyright (C) 2016-present, DV Klopfenstein, H Tang, All rights reserved." +) __author__ = "DV Klopfenstein" import pydot -from goatools.gosubdag.plot.go_name_shorten import ShortenText -from goatools.gosubdag.utils import extract_kwargs + +from ..utils import extract_kwargs +from .go_name_shorten import get_short_plot_name + class GoNodeOpts: """Processes GO Node plot args.""" - exp_keys = set(['goobj2fncname', 'go2txt', 'objgoea', 'prt_flds']) + exp_keys = set(["goobj2fncname", "go2txt", "objgoea", "prt_flds"]) - exp_elems = set([ - 'c2ps', # Count of an object's Parent - 'prt_pcnt', # Always print parent count: pN - 'parentcnt', # Print parent count only if not all parents are shown - 'childcnt', # Always print child count: cN - 'mark_alt_id', # Put an 'a' after GO:NNNNNNN if it is an alternate GO ID - 'shorten', # Shorten GO description - 'no_name', # Do not print GO description - ]) + exp_elems = set( + [ + "c2ps", # Count of an object's Parent + "prt_pcnt", # Always print parent count: pN + "parentcnt", # Print parent count only if not all parents are shown + "childcnt", # Always print child count: cN + "mark_alt_id", # Put an 'a' after GO:NNNNNNN if it is an alternate GO ID + "shorten", # Shorten GO description + "no_name", # Do not print GO description + ] + ) def __init__(self, gosubdag, **kws): self.gosubdag = gosubdag @@ -29,18 +35,18 @@ def __init__(self, gosubdag, **kws): def get_kws(self): """Only load keywords if they are specified by the user.""" - ret = self.kws['dict'].copy() - act_set = self.kws['set'] - if 'shorten' in act_set and 'goobj2fncname' not in ret: - ret['goobj2fncname'] = ShortenText().get_short_plot_name - if 'dict' in self.kws and 'go2txt' in self.kws['dict']: - self._init_go2txt_altgos(self.kws['dict']['go2txt']) + ret = self.kws["dict"].copy() + act_set = self.kws["set"] + if "shorten" in act_set and "goobj2fncname" not in ret: + ret["goobj2fncname"] = get_short_plot_name + if "dict" in self.kws and "go2txt" in self.kws["dict"]: + self._init_go2txt_altgos(self.kws["dict"]["go2txt"]) return ret def get_present(self): """Only store keywords if they are specified by the user.""" # The presence of c2ps marks that the user specified parentcnt=True - return self.kws['set'].difference(['parentcnt']) + return self.kws["set"].difference(["parentcnt"]) def _init_go2txt_altgos(self, go2txt): """If user provided GO.alt_id, add the corressponding main GO ID, if needed""" @@ -55,11 +61,13 @@ def _init_go2txt_altgos(self, go2txt): class GoNode: """Creates pydot Node containing a GO term.""" - exclude = {'tfreq',} + exclude = { + "tfreq", + } def __init__(self, gosubdag, objcolor, optobj): - self.gosubdag = gosubdag # GoSubDag - self.objcolor = objcolor # Go2Color -> color options + self.gosubdag = gosubdag # GoSubDag + self.objcolor = objcolor # Go2Color -> color options self.kws = optobj.get_kws() # GoNodeOpts -> text options self.present = optobj.get_present() self.go2color = objcolor.go2color @@ -72,14 +80,15 @@ def get_node(self, goid, goobj): shape="box", style="rounded, filled", fillcolor=self.go2color.get(goid, "white"), - color=self.objcolor.get_bordercolor(goid)) + color=self.objcolor.get_bordercolor(goid), + ) def str_fmthdr(self, goid, goobj): """Return hdr line seen inside a GO Term box.""" # Shorten: Ex: GO:0007608 -> G0007608 go_txt = goid.replace("GO:", "G") - if 'mark_alt_id' in self.present and goid != goobj.id: - go_txt += 'a' + if "mark_alt_id" in self.present and goid != goobj.id: + go_txt += "a" return go_txt # ---------------------------------------------------------------------------------- @@ -89,27 +98,27 @@ def get_node_text(self, goid, goobj): txt = [] # Header line: "GO:0036464 L04 D06" hdr = self.get_hdr(goid, goobj) - if hdr != '': + if hdr != "": txt.append(hdr) # GO name line: "cytoplamic ribonucleoprotein" - if 'no_name' not in self.present: + if "no_name" not in self.present: txt.append(self._get_go_name(goobj)) # study info line: "24 genes" - if 'objgoea' in self.kws: - study_txt = self.kws['objgoea'].get_study_txt(goid) + if "objgoea" in self.kws: + study_txt = self.kws["objgoea"].get_study_txt(goid) if study_txt is not None: txt.append(study_txt) # Add user-specified text, if needed - if 'go2txt' in self.kws and goid in self.kws['go2txt']: - txt.append(self.kws['go2txt'][goid]) + if "go2txt" in self.kws and goid in self.kws["go2txt"]: + txt.append(self.kws["go2txt"][goid]) return "\n".join(txt) def _get_go_name(self, goobj): """Return GO name/description, as is or edited by a user function.""" - if 'goobj2fncname' not in self.kws: + if "goobj2fncname" not in self.kws: return goobj.name.replace(",", "\n") # Return GO Term name edited by user-provided function - return self.kws['goobj2fncname'](goobj) + return self.kws["goobj2fncname"](goobj) def get_hdr(self, goid, goobj): """Header for GO Term box. Ex: 'G0001719 L6 D9 d3.'""" @@ -117,50 +126,50 @@ def get_hdr(self, goid, goobj): ntgo = self.gosubdag.go2nt.get(goid) prt_flds = self._get_prtflds() # Add letter to depth-01 GO Node. - if 'D1' in prt_flds and goobj.depth == 1: + if "D1" in prt_flds and goobj.depth == 1: hdr.append("{ABC} ".format(ABC=ntgo.D1)) - if 'GO' in prt_flds: + if "GO" in prt_flds: hdr.append(self.str_fmthdr(goid, goobj)) - if 'level' in prt_flds: + if "level" in prt_flds: hdr.append("L{level}".format(level=goobj.level)) - if 'depth' in prt_flds: + if "depth" in prt_flds: hdr.append("D{depth}".format(depth=goobj.depth)) - if 'reldepth' in prt_flds: + if "reldepth" in prt_flds: hdr.append("R{reldepth}".format(reldepth=ntgo.reldepth)) # Print count of parents for this GO term - if 'c2ps' in self.kws: - self._add_parent_cnt(hdr, goobj, self.kws['c2ps']) + if "c2ps" in self.kws: + self._add_parent_cnt(hdr, goobj, self.kws["c2ps"]) # Print count of children for this GO term childcnt_str = self._get_hdr_childcnt(goobj, ntgo) if childcnt_str: hdr.append(childcnt_str) # Print count of all descendants down to the leaf-level for this GO term - if 'dcnt' in prt_flds: + if "dcnt" in prt_flds: hdr.append("d{N}".format(N=ntgo.dcnt)) - if 'tinfo' in prt_flds: + if "tinfo" in prt_flds: hdr.append("i{I:4.02f}".format(I=ntgo.tinfo)) - if 'tfreq' in prt_flds: + if "tfreq" in prt_flds: hdr.append("f{I:4.03f}".format(I=ntgo.tfreq)) - if 'REL' in prt_flds: + if "REL" in prt_flds: hdr.append("{R}".format(R=ntgo.REL_short)) - return " ".join(hdr) if hdr else '' + return " ".join(hdr) if hdr else "" def _get_prtflds(self): """Get print fields for GO header.""" # User-specified print fields - ntflds = self.gosubdag.prt_attr['flds'] - prt_flds = self.kws.get('prt_flds') + ntflds = self.gosubdag.prt_attr["flds"] + prt_flds = self.kws.get("prt_flds") if prt_flds: return prt_flds.intersection(ntflds) # Default print fields exclude = set(self.exclude) if self.gosubdag.relationships: - exclude.add('level') + exclude.add("level") return set(f for f in ntflds if f not in exclude) def _get_hdr_childcnt(self, goobj, ntgo): """Get string representing count of children for this GO term.""" - if 'childcnt' in self.present: + if "childcnt" in self.present: return "c{N}".format(N=len(goobj.children)) if self.gosubdag.relationships and not goobj.children and ntgo.dcnt != 0: return "c0" @@ -170,7 +179,11 @@ def _add_parent_cnt(self, hdr, goobj, c2ps): """Add the parent count to the GO term box for if not all parents are plotted.""" if goobj.id in c2ps: parents = c2ps[goobj.id] - if 'prt_pcnt' in self.present or parents and len(goobj.parents) != len(parents): + if ( + "prt_pcnt" in self.present + or parents + and len(goobj.parents) != len(parents) + ): assert len(goobj.parents) == len(set(goobj.parents)) hdr.append("p{N}".format(N=len(set(goobj.parents)))) diff --git a/makefile b/makefile index b468fc3..2f5db14 100755 --- a/makefile +++ b/makefile @@ -362,6 +362,7 @@ NOSETESTS := \ tests/test_get_unique_fields.py \ tests/test_go_depth1.py \ tests/test_go_draw.py \ + tests/test_go_name_shorten.py \ tests/test_go_print.py \ tests/test_goea_errors.py \ tests/test_goea_local.py \ diff --git a/tests/test_go_name_shorten.py b/tests/test_go_name_shorten.py new file mode 100644 index 0000000..387cc4d --- /dev/null +++ b/tests/test_go_name_shorten.py @@ -0,0 +1,30 @@ +import pytest + +from goatools.gosubdag.plot.go_name_shorten import get_short_plot_name + + +@pytest.mark.parametrize( + "inp, outp", + [ + ("inflammatory response", "inflammatory response"), + ("response to virus", "response to virus"), + ("immune response", "immune response"), + ("cellular response to chemical stimulus", "cellular rsp. to chemical stim."), + ("regulation of", "reg. of"), + ("positive reg", "+reg"), + ("negative reg", "-reg"), + ("involved in", "in"), + ("antigen processing and presentation", "a.p.p"), + ("MHC class I", "MHC-I"), + ("signaling pathway", "sig. pw."), + ("response", "rsp."), + ("immunoglobulin superfamily domains", "Ig domains"), + ("immunoglobulin", "Ig"), + ("production", "prod."), + ("tumor necrosis factor", "TNF"), + ("alpha beta T cell activation", "α β T cell activation"), + ], +) +def test_get_short_plot_name(inp: str, outp: str): + goobj = type("goobj", (object,), {"name": inp, "depth": 6, "id": "GO:0000000"}) + assert get_short_plot_name(goobj) == outp