erikyao/ProntoOntologyReader.py Secret

## ProntoOntologyReader.py
from pronto import Ontology
from pronto.entity.attributes import Relationships


class ProntoOntologyReader:
    """
    Class that reads an obo ontology file into a networkx graph, and further construct ontology document for each node
    in the graph
    """

    """
    We have some super-hub nodes in the ontology graph (e.g. CHEBI:50860, "organic molecular entity") that have
      up to 141,138 descendants and 7,336 successors.

    The 99.9% quantiles of numbers of successors/predecessors/descendants/ancestors are 224.92, 8, 2187.12, and 86.

    Here we limit the max number of successors/predecessors/descendants/ancestors nodes to be included in any
      ontology document to 2000
    """
    NODE_FAMILY_CAPACITY = 2000

    def __init__(self, obo_file_path):
        self.obo_file_path = obo_file_path

        self.ontology_object = Ontology(obo_file_path)

    def get_all_term_ids(self):
        return set(term.id for term in self.ontology_object.terms() if not term.obsolete)

    @classmethod
    def convert_subsets(cls, subsets: set):
        """
        The 'subsets' attribute of a pronto term has 1 of the 3 unique values, {'1_STAR'}, {'2_STAR'}, {'3_STAR'}
        This method will convert the a 'subsets' attribute from a 1-element set to integer correspondingly, i.e. 1, 2, 3
        """
        if not subsets:
            return None

        return int(next(iter(subsets))[0])

    @classmethod
    def convert_relationships(cls, relationships: Relationships):
        """
        The 'relationships' attribute of a pronto term is a 'pronto.entity.attributes.Relationships' object.
        Its 'items()' method return a 'ItemsView' which is an iterator of pairs of

            <class 'pronto.relationship.Relationship'>, <class 'pronto.term.TermSet'>

        This method will convert the 'relationships' attribute to a dict correspondingly. E.g.

            {'has_role': ['CHEBI:68495', 'CHEBI:38637', 'CHEBI:35610'], 'has_functional_parent': [CHEBI:28179']}

        Currently there are 9 types of relationship in the ChEBI obo file:

          {'has_functional_parent', 'has_parent_hydride', 'has_part', 'has_role',
           'is_conjugate_acid_of', 'is_conjugate_base_of', 'is_enantiomer_of', 'is_substituent_group_from',
           'is_tautomer_of'}
        """
        relationship_dict = dict()
        for relationship, term_set in relationships.items():
            relationship_dict[relationship.id] = list(term.id for term in term_set)

        return relationship_dict

    def read_ontology(self, term_id):
        """
        Read the term in the ontology object given `term_id` and convert it to an ontology document
        """

        """
        Key mapping:

          term.alternate_ids -> ontology_dict['secondary_chebi_id']
          term.definition    -> ontology_dict['definition']
          term.name          -> ontology_dict['name']
          term.relationships -> ontology_dict['relationship']
          term.subsets       -> ontology_dict['star']
        """
        term = self.ontology_object.get(term_id)
        if (term is None) or term.obsolete:
            return None

        parents = [_term.id for _term in term.superclasses(distance=1, with_self=False)]
        children = [_term.id for _term in term.subclasses(distance=1, with_self=False)]
        ancestors = [_term.id for _term in term.superclasses(distance=None, with_self=False)]
        descendants = [_term.id for _term in term.subclasses(distance=None, with_self=False)]

        # Start construction of the ontology document
        ontology_dict = dict()
        ontology_dict["id"] = term_id

        ontology_dict["secondary_chebi_id"] = list(term.alternate_ids)
        ontology_dict['definition'] = str(term.definition)  # pronto.definition.Definition is a subclass of 'str'
        ontology_dict['name'] = term.name
        ontology_dict['relationship'] = self.convert_relationships(term.relationships)
        ontology_dict['star'] = self.convert_subsets(term.subsets)

        # Use the same naming convention as in the Mondo parser
        #   See https://github.com/biothings/mydisease.info/blob/master/src/plugins/mondo/parser.py
        ontology_dict["num_children"] = len(children)
        ontology_dict["num_parents"] = len(parents)
        ontology_dict["num_descendants"] = len(descendants)
        ontology_dict["num_ancestors"] = len(ancestors)

        ontology_dict["children"] = children[:self.NODE_FAMILY_CAPACITY]
        ontology_dict["parents"] = parents[:self.NODE_FAMILY_CAPACITY]
        ontology_dict["descendants"] = descendants[:self.NODE_FAMILY_CAPACITY]
        ontology_dict["ancestors"] = ancestors[:self.NODE_FAMILY_CAPACITY]

        return ontology_dict
	from pronto import Ontology
	from pronto.entity.attributes import Relationships


	class ProntoOntologyReader:
	"""
	Class that reads an obo ontology file into a networkx graph, and further construct ontology document for each node
	in the graph
	"""

	"""
	We have some super-hub nodes in the ontology graph (e.g. CHEBI:50860, "organic molecular entity") that have
	up to 141,138 descendants and 7,336 successors.

	The 99.9% quantiles of numbers of successors/predecessors/descendants/ancestors are 224.92, 8, 2187.12, and 86.

	Here we limit the max number of successors/predecessors/descendants/ancestors nodes to be included in any
	ontology document to 2000
	"""
	NODE_FAMILY_CAPACITY = 2000

	def __init__(self, obo_file_path):
	self.obo_file_path = obo_file_path

	self.ontology_object = Ontology(obo_file_path)

	def get_all_term_ids(self):
	return set(term.id for term in self.ontology_object.terms() if not term.obsolete)

	@classmethod
	def convert_subsets(cls, subsets: set):
	"""
	The 'subsets' attribute of a pronto term has 1 of the 3 unique values, {'1_STAR'}, {'2_STAR'}, {'3_STAR'}
	This method will convert the a 'subsets' attribute from a 1-element set to integer correspondingly, i.e. 1, 2, 3
	"""
	if not subsets:
	return None

	return int(next(iter(subsets))[0])

	@classmethod
	def convert_relationships(cls, relationships: Relationships):
	"""
	The 'relationships' attribute of a pronto term is a 'pronto.entity.attributes.Relationships' object.
	Its 'items()' method return a 'ItemsView' which is an iterator of pairs of

	<class 'pronto.relationship.Relationship'>, <class 'pronto.term.TermSet'>

	This method will convert the 'relationships' attribute to a dict correspondingly. E.g.

	{'has_role': ['CHEBI:68495', 'CHEBI:38637', 'CHEBI:35610'], 'has_functional_parent': [CHEBI:28179']}

	Currently there are 9 types of relationship in the ChEBI obo file:

	{'has_functional_parent', 'has_parent_hydride', 'has_part', 'has_role',
	'is_conjugate_acid_of', 'is_conjugate_base_of', 'is_enantiomer_of', 'is_substituent_group_from',
	'is_tautomer_of'}
	"""
	relationship_dict = dict()
	for relationship, term_set in relationships.items():
	relationship_dict[relationship.id] = list(term.id for term in term_set)

	return relationship_dict

	def read_ontology(self, term_id):
	"""
	Read the term in the ontology object given `term_id` and convert it to an ontology document
	"""

	"""
	Key mapping:

	term.alternate_ids -> ontology_dict['secondary_chebi_id']
	term.definition -> ontology_dict['definition']
	term.name -> ontology_dict['name']
	term.relationships -> ontology_dict['relationship']
	term.subsets -> ontology_dict['star']
	"""
	term = self.ontology_object.get(term_id)
	if (term is None) or term.obsolete:
	return None

	parents = [_term.id for _term in term.superclasses(distance=1, with_self=False)]
	children = [_term.id for _term in term.subclasses(distance=1, with_self=False)]
	ancestors = [_term.id for _term in term.superclasses(distance=None, with_self=False)]
	descendants = [_term.id for _term in term.subclasses(distance=None, with_self=False)]

	# Start construction of the ontology document
	ontology_dict = dict()
	ontology_dict["id"] = term_id

	ontology_dict["secondary_chebi_id"] = list(term.alternate_ids)
	ontology_dict['definition'] = str(term.definition) # pronto.definition.Definition is a subclass of 'str'
	ontology_dict['name'] = term.name
	ontology_dict['relationship'] = self.convert_relationships(term.relationships)
	ontology_dict['star'] = self.convert_subsets(term.subsets)

	# Use the same naming convention as in the Mondo parser
	# See https://github.com/biothings/mydisease.info/blob/master/src/plugins/mondo/parser.py
	ontology_dict["num_children"] = len(children)
	ontology_dict["num_parents"] = len(parents)
	ontology_dict["num_descendants"] = len(descendants)
	ontology_dict["num_ancestors"] = len(ancestors)

	ontology_dict["children"] = children[:self.NODE_FAMILY_CAPACITY]
	ontology_dict["parents"] = parents[:self.NODE_FAMILY_CAPACITY]
	ontology_dict["descendants"] = descendants[:self.NODE_FAMILY_CAPACITY]
	ontology_dict["ancestors"] = ancestors[:self.NODE_FAMILY_CAPACITY]

	return ontology_dict