In [1]:
from chembl_webresource_client.unichem import unichem_client as unichem

Get src_compound_ids from src_compound_id

Obtain a list of all src_compound_ids from all sources which are CURRENTLY assigned to the same structure as a currently assigned query src_compound_id. The output will include query src_compound_id if it is a valid src_compound_id with a current assignment.

In [2]:
ret = unichem.get('CHEMBL12',1)
In [3]:
len(ret)
Out[3]:
25
In [4]:
ret
Out[4]:
[{u'src_compound_id': u'SAM001246536', u'src_id': u'8'},
 {u'src_compound_id': u'diazepam', u'src_id': u'12'},
 {u'src_compound_id': u'Q3JTX2Q7TU', u'src_id': u'14'},
 {u'src_compound_id': u'ZINC00006427', u'src_id': u'9'},
 {u'src_compound_id': u'LSM-2359', u'src_id': u'25'},
 {u'src_compound_id': u'CHEMBL12', u'src_id': u'1'},
 {u'src_compound_id': u'PA449283', u'src_id': u'17'},
 {u'src_compound_id': u'HMDB14967', u'src_id': u'18'},
 {u'src_compound_id': u'10016206', u'src_id': u'24'},
 {u'src_compound_id': u'49575', u'src_id': u'7'},
 {u'src_compound_id': u'14799843', u'src_id': u'21'},
 {u'src_compound_id': u'3016', u'src_id': u'22'},
 {u'src_compound_id': u'3364', u'src_id': u'4'},
 {u'src_compound_id': u'C06948', u'src_id': u'6'},
 {u'src_compound_id': u'DB00829', u'src_id': u'2'},
 {u'src_compound_id': u'MCULE-8990989144', u'src_id': u'23'},
 {u'src_compound_id': u'50000766', u'src_id': u'31'},
 {u'src_compound_id': u'DZP', u'src_id': u'3'},
 {u'src_compound_id': u'10C2DCBD238615091B9B54A475769A51', u'src_id': u'11'},
 {u'src_compound_id': u'C06948', u'src_id': u'27'},
 {u'src_compound_id': u'439-14-5', u'src_id': u'26'},
 {u'src_compound_id': u'SCHEMBL21442', u'src_id': u'15'},
 {u'src_compound_id': u'536763', u'src_id': u'10'},
 {u'src_compound_id': u'11100-37-1', u'src_id': u'26'},
 {u'src_compound_id': u'J2.044C', u'src_id': u'29'}]

Note also, that by adding an additional (optional) argument (a valid src_id), then results will be restricted to only the source specified with this optional argument.

In [5]:
ret = unichem.get('CHEMBL12',1,2)
In [6]:
len(ret)
Out[6]:
1
In [7]:
ret
Out[7]:
[{u'src_compound_id': u'DB00829'}]

Get all src_compound_ids from src_compound_id

Obtain a list of all src_compound_ids from all sources (including BOTH current AND obsolete assignments) to the same structure as a currently assigned query src_compound_id. The output will include query src_compound_id if it is a valid src_compound_id with a current assignment.

In [8]:
ret = unichem.get('CHEMBL12', 1, all=True)
In [9]:
len(ret)
Out[9]:
26
In [10]:
ret
Out[10]:
[{u'assignment': u'1', u'src_compound_id': u'SAM001246536', u'src_id': u'8'},
 {u'assignment': u'1', u'src_compound_id': u'diazepam', u'src_id': u'12'},
 {u'assignment': u'1', u'src_compound_id': u'Q3JTX2Q7TU', u'src_id': u'14'},
 {u'assignment': u'1', u'src_compound_id': u'ZINC00006427', u'src_id': u'9'},
 {u'assignment': u'1', u'src_compound_id': u'LSM-2359', u'src_id': u'25'},
 {u'assignment': u'1', u'src_compound_id': u'CHEMBL12', u'src_id': u'1'},
 {u'assignment': u'1', u'src_compound_id': u'PA449283', u'src_id': u'17'},
 {u'assignment': u'1', u'src_compound_id': u'HMDB14967', u'src_id': u'18'},
 {u'assignment': u'1', u'src_compound_id': u'10016206', u'src_id': u'24'},
 {u'assignment': u'1', u'src_compound_id': u'49575', u'src_id': u'7'},
 {u'assignment': u'1', u'src_compound_id': u'14799843', u'src_id': u'21'},
 {u'assignment': u'1', u'src_compound_id': u'3016', u'src_id': u'22'},
 {u'assignment': u'1', u'src_compound_id': u'3364', u'src_id': u'4'},
 {u'assignment': u'1', u'src_compound_id': u'C06948', u'src_id': u'6'},
 {u'assignment': u'1', u'src_compound_id': u'DB00829', u'src_id': u'2'},
 {u'assignment': u'0', u'src_compound_id': u'DB07699', u'src_id': u'2'},
 {u'assignment': u'1',
  u'src_compound_id': u'MCULE-8990989144',
  u'src_id': u'23'},
 {u'assignment': u'1', u'src_compound_id': u'50000766', u'src_id': u'31'},
 {u'assignment': u'1', u'src_compound_id': u'DZP', u'src_id': u'3'},
 {u'assignment': u'1',
  u'src_compound_id': u'10C2DCBD238615091B9B54A475769A51',
  u'src_id': u'11'},
 {u'assignment': u'1', u'src_compound_id': u'C06948', u'src_id': u'27'},
 {u'assignment': u'1', u'src_compound_id': u'439-14-5', u'src_id': u'26'},
 {u'assignment': u'1', u'src_compound_id': u'SCHEMBL21442', u'src_id': u'15'},
 {u'assignment': u'1', u'src_compound_id': u'536763', u'src_id': u'10'},
 {u'assignment': u'1', u'src_compound_id': u'11100-37-1', u'src_id': u'26'},
 {u'assignment': u'1', u'src_compound_id': u'J2.044C', u'src_id': u'29'}]

Note also, that by adding an additional (optional) argument (a valid src_id), then results will be restricted to only the source specified with this optional argument.

In [11]:
ret = unichem.get('CHEMBL12', 1, 2, all=True)
In [12]:
len(ret)
Out[12]:
2
In [13]:
ret
Out[13]:
[{u'assignment': u'1', u'src_compound_id': u'DB00829'},
 {u'assignment': u'0', u'src_compound_id': u'DB07699'}]

Get mapping

Obtain a full mapping between two sources. Uses only currently assigned src_compound_ids from both sources.

In [14]:
ret = unichem.map(4,1)
In [15]:
len(ret)
Out[15]:
4872
In [16]:
ret[0]
Out[16]:
{u'1': u'CHEMBL493', u'4': u'35'}

Get src_compound_ids from InChI Key

Obtain a list of all src_compound_ids (from all sources) which are CURRENTLY assigned to a query InChIKey

In [17]:
ret = unichem.get('AAOVKJBEBIDNHE-UHFFFAOYSA-N')
In [18]:
len(ret)
Out[18]:
25
In [19]:
ret
Out[19]:
[{u'src_compound_id': u'SAM001246536', u'src_id': u'8'},
 {u'src_compound_id': u'diazepam', u'src_id': u'12'},
 {u'src_compound_id': u'Q3JTX2Q7TU', u'src_id': u'14'},
 {u'src_compound_id': u'ZINC00006427', u'src_id': u'9'},
 {u'src_compound_id': u'LSM-2359', u'src_id': u'25'},
 {u'src_compound_id': u'CHEMBL12', u'src_id': u'1'},
 {u'src_compound_id': u'PA449283', u'src_id': u'17'},
 {u'src_compound_id': u'HMDB14967', u'src_id': u'18'},
 {u'src_compound_id': u'10016206', u'src_id': u'24'},
 {u'src_compound_id': u'49575', u'src_id': u'7'},
 {u'src_compound_id': u'14799843', u'src_id': u'21'},
 {u'src_compound_id': u'3016', u'src_id': u'22'},
 {u'src_compound_id': u'3364', u'src_id': u'4'},
 {u'src_compound_id': u'C06948', u'src_id': u'6'},
 {u'src_compound_id': u'DB00829', u'src_id': u'2'},
 {u'src_compound_id': u'MCULE-8990989144', u'src_id': u'23'},
 {u'src_compound_id': u'50000766', u'src_id': u'31'},
 {u'src_compound_id': u'DZP', u'src_id': u'3'},
 {u'src_compound_id': u'10C2DCBD238615091B9B54A475769A51', u'src_id': u'11'},
 {u'src_compound_id': u'C06948', u'src_id': u'27'},
 {u'src_compound_id': u'439-14-5', u'src_id': u'26'},
 {u'src_compound_id': u'SCHEMBL21442', u'src_id': u'15'},
 {u'src_compound_id': u'536763', u'src_id': u'10'},
 {u'src_compound_id': u'11100-37-1', u'src_id': u'26'},
 {u'src_compound_id': u'J2.044C', u'src_id': u'29'}]

Get src_compound_ids all from InChIKey

Obtain a list of all src_compound_ids (from all sources) which have current AND obsolete assignments to a query InChIKey

In [20]:
ret = unichem.get('AAOVKJBEBIDNHE-UHFFFAOYSA-N', all=True)
In [21]:
len(ret)
Out[21]:
26
In [22]:
ret
Out[22]:
[{u'assignment': u'1', u'src_compound_id': u'SAM001246536', u'src_id': u'8'},
 {u'assignment': u'1', u'src_compound_id': u'diazepam', u'src_id': u'12'},
 {u'assignment': u'1', u'src_compound_id': u'Q3JTX2Q7TU', u'src_id': u'14'},
 {u'assignment': u'1', u'src_compound_id': u'ZINC00006427', u'src_id': u'9'},
 {u'assignment': u'1', u'src_compound_id': u'LSM-2359', u'src_id': u'25'},
 {u'assignment': u'1', u'src_compound_id': u'CHEMBL12', u'src_id': u'1'},
 {u'assignment': u'1', u'src_compound_id': u'PA449283', u'src_id': u'17'},
 {u'assignment': u'1', u'src_compound_id': u'HMDB14967', u'src_id': u'18'},
 {u'assignment': u'1', u'src_compound_id': u'10016206', u'src_id': u'24'},
 {u'assignment': u'1', u'src_compound_id': u'49575', u'src_id': u'7'},
 {u'assignment': u'1', u'src_compound_id': u'14799843', u'src_id': u'21'},
 {u'assignment': u'1', u'src_compound_id': u'3016', u'src_id': u'22'},
 {u'assignment': u'1', u'src_compound_id': u'3364', u'src_id': u'4'},
 {u'assignment': u'1', u'src_compound_id': u'C06948', u'src_id': u'6'},
 {u'assignment': u'1', u'src_compound_id': u'DB00829', u'src_id': u'2'},
 {u'assignment': u'0', u'src_compound_id': u'DB07699', u'src_id': u'2'},
 {u'assignment': u'1',
  u'src_compound_id': u'MCULE-8990989144',
  u'src_id': u'23'},
 {u'assignment': u'1', u'src_compound_id': u'50000766', u'src_id': u'31'},
 {u'assignment': u'1', u'src_compound_id': u'DZP', u'src_id': u'3'},
 {u'assignment': u'1',
  u'src_compound_id': u'10C2DCBD238615091B9B54A475769A51',
  u'src_id': u'11'},
 {u'assignment': u'1', u'src_compound_id': u'C06948', u'src_id': u'27'},
 {u'assignment': u'1', u'src_compound_id': u'439-14-5', u'src_id': u'26'},
 {u'assignment': u'1', u'src_compound_id': u'SCHEMBL21442', u'src_id': u'15'},
 {u'assignment': u'1', u'src_compound_id': u'536763', u'src_id': u'10'},
 {u'assignment': u'1', u'src_compound_id': u'11100-37-1', u'src_id': u'26'},
 {u'assignment': u'1', u'src_compound_id': u'J2.044C', u'src_id': u'29'}]

Get all src_ids

Obtain all src_ids currently in UniChem

In [23]:
ret = unichem.src()
In [24]:
len(ret)
Out[24]:
27
In [25]:
ret
Out[25]:
[{u'src_id': u'1'},
 {u'src_id': u'2'},
 {u'src_id': u'3'},
 {u'src_id': u'4'},
 {u'src_id': u'5'},
 {u'src_id': u'6'},
 {u'src_id': u'7'},
 {u'src_id': u'8'},
 {u'src_id': u'9'},
 {u'src_id': u'10'},
 {u'src_id': u'11'},
 {u'src_id': u'12'},
 {u'src_id': u'14'},
 {u'src_id': u'15'},
 {u'src_id': u'17'},
 {u'src_id': u'18'},
 {u'src_id': u'20'},
 {u'src_id': u'21'},
 {u'src_id': u'22'},
 {u'src_id': u'23'},
 {u'src_id': u'24'},
 {u'src_id': u'25'},
 {u'src_id': u'26'},
 {u'src_id': u'27'},
 {u'src_id': u'28'},
 {u'src_id': u'29'},
 {u'src_id': u'31'}]

Get source infomation

Obtain all information on a source by querying with a source id (src_id).

In [26]:
ret = unichem.src(1)
In [27]:
len(ret)
Out[27]:
1
In [28]:
ret
Out[28]:
[{u'aux_for_url': u'0',
  u'base_id_url': u'https://www.ebi.ac.uk/chembldb/compound/inspect/',
  u'base_id_url_available': u'1',
  u'description': u'A database of bioactive drug-like small molecules and bioactivities abstracted from the scientific literature.',
  u'name': u'chembl',
  u'name_label': u'ChEMBL',
  u'name_long': u'ChEMBL',
  u'src_id': u'1',
  u'src_url': u'https://www.ebi.ac.uk/chembl/'}]

Get structure

Obtain structure(s) CURRENTLY assigned to a query src_compound_id.

In [29]:
ret = unichem.structure('CHEMBL12',1)
In [30]:
len(ret)
Out[30]:
1
In [31]:
ret
Out[31]:
[{u'standardinchi': u'InChI=1S/C16H13ClN2O/c1-19-14-8-7-12(17)9-13(14)16(18-10-15(19)20)11-5-3-2-4-6-11/h2-9H,10H2,1H3',
  u'standardinchikey': u'AAOVKJBEBIDNHE-UHFFFAOYSA-N'}]

Get all structures

Obtain structure(s) with current AND obsolete assignments to a query src_compound_id.

In [32]:
ret = unichem.structure('CHEMBL12',1, all=True)
In [33]:
len(ret)
Out[33]:
1
In [34]:
ret
Out[34]:
[{u'assignment': u'1',
  u'standardinchi': u'InChI=1S/C16H13ClN2O/c1-19-14-8-7-12(17)9-13(14)16(18-10-15(19)20)11-5-3-2-4-6-11/h2-9H,10H2,1H3',
  u'standardinchikey': u'AAOVKJBEBIDNHE-UHFFFAOYSA-N'}]

Get URL for src_compound_ids from src_compound_id

Obtain a list of URLs for all src_compound_ids, from a specified source (the to_src_id), which are CURRENTLY assigned to the same structure as a currently assigned query src_compound_id. Method only applicable for sources which support direct URLs to src_compound_id pages.

In [35]:
ret = unichem.get('CHEMBL12',1, 2, url=True)
In [36]:
len(ret)
Out[36]:
1
In [37]:
ret
Out[37]:
[{u'url': u'http://www.drugbank.ca/drugs/DB00829'}]

Method also applicable for to_src_id's where the hyperlink is constructed from auxiliary data [and not from the src_compound_id] as per example below.

In [38]:
ret = unichem.get('CHEMBL490',1, 15, url=True)
In [39]:
len(ret)
Out[39]:
1
In [40]:
ret
Out[40]:
[{u'url': u'https://www.surechembl.org/chemical/SCHEMBL27799'}]

Get src_compound_ids all from obsolete src_compound_id

Obtain a list of all src_compound_ids from all sources with BOTH current AND obsolete to the same structure with an obsolete assignment to the query src_compound_id. The output will include query src_compound_id if it is a valid src_compound_id with an obsolete assignment.

ret = unichem.get('DB07699',2)

In [41]:
len(ret)
Out[41]:
1
In [42]:
ret
Out[42]:
[{u'url': u'https://www.surechembl.org/chemical/SCHEMBL27799'}]

Note also, that by adding an additional (optional) argument (a valid src_id), then results will be restricted to only the source specified with this optional argument.

In [43]:
ret = unichem.get('DB07699',2,1)
In [44]:
len(ret)
Out[44]:
1
In [45]:
ret
Out[45]:
[{u'UCI': u'304698', u'assignment': u'1', u'src_compound_id': u'CHEMBL12'}]

Get verbose src_compound_ids from InChIKey

Obtain all src_compound_ids (from all sources) which are CURRENTLY assigned to a query InChIKey. However, these are returned as part of the following data structure: A list of sources containing these src_compound_ids, including source description, base_id_url, etc. One element in this list is a list of the src_compound_ids currently assigned to the query InChIKey.

In [46]:
ret = unichem.get('AAOVKJBEBIDNHE-UHFFFAOYSA-N', verbose=True)
In [47]:
len(ret)
Out[47]:
24
In [48]:
ret
Out[48]:
[{u'aux_for_url': u'0',
  u'base_id_url': u'https://www.ebi.ac.uk/chembldb/compound/inspect/',
  u'base_id_url_available': u'1',
  u'description': u'A database of bioactive drug-like small molecules and bioactivities abstracted from the scientific literature.',
  u'name': u'chembl',
  u'name_label': u'ChEMBL',
  u'name_long': u'ChEMBL',
  u'src_compound_id': [u'CHEMBL12'],
  u'src_id': u'1',
  u'src_url': u'https://www.ebi.ac.uk/chembl/'},
 {u'aux_for_url': u'0',
  u'base_id_url': u'http://www.drugbank.ca/drugs/',
  u'base_id_url_available': u'1',
  u'description': u'A database that combines drug (i.e. chemical, pharmacological and pharmaceutical) data with drug target (i.e. sequence, structure, and pathway) information.',
  u'name': u'drugbank',
  u'name_label': u'DrugBank',
  u'name_long': u'DrugBank',
  u'src_compound_id': [u'DB00829'],
  u'src_id': u'2',
  u'src_url': u'http://drugbank.ca/'},
 {u'aux_for_url': u'0',
  u'base_id_url': u'http://www.ebi.ac.uk/pdbe-srv/pdbechem/chemicalCompound/show/',
  u'base_id_url_available': u'1',
  u'description': u'The European resource for the collection, organisation and dissemination of data on biological macromolecular structures, including structures of small molecule ligands for proteins.',
  u'name': u'pdb',
  u'name_label': u'PDBe',
  u'name_long': u'PDBe (Protein Data Bank Europe)',
  u'src_compound_id': [u'DZP'],
  u'src_id': u'3',
  u'src_url': u'http://www.ebi.ac.uk/pdbe/'},
 {u'aux_for_url': u'0',
  u'base_id_url': u'http://www.guidetopharmacology.org/GRAC/LigandDisplayForward?ligandId=',
  u'base_id_url_available': u'1',
  u'description': u'The IUPHAR (International Union of Basic and Clinical Pharmacology)/BPS (British Pharmacological Society) Guide to PHARMACOLOGY database contains structures of small molecule ligands, peptides and antibodies, with their affinities at protein targets.',
  u'name': u'gtopdb',
  u'name_label': u'Guide to Pharmacology',
  u'name_long': u'Guide to Pharmacology',
  u'src_compound_id': [u'3364'],
  u'src_id': u'4',
  u'src_url': u'http://www.guidetopharmacology.org'},
 {u'aux_for_url': u'0',
  u'base_id_url': u'http://www.genome.jp/dbget-bin/www_bget?',
  u'base_id_url_available': u'1',
  u'description': u'KEGG LIGAND is a composite DB consisting of COMPOUND, GLYCAN, REACTION, RPAIR, RCLASS, and ENZYME DBs, whose entries are identified by C, G, R, RP, RC, and EC numbers, respectively.',
  u'name': u'kegg_ligand',
  u'name_label': u'KEGG Ligand',
  u'name_long': u'KEGG (Kyoto Encyclopedia of Genes and Genomes) Ligand',
  u'src_compound_id': [u'C06948'],
  u'src_id': u'6',
  u'src_url': u'http://www.genome.jp/kegg/ligand.html'},
 {u'aux_for_url': u'0',
  u'base_id_url': u'http://www.ebi.ac.uk/chebi/searchId.do?chebiId=CHEBI%3A',
  u'base_id_url_available': u'1',
  u'description': u"ChEBI is a freely available dictionary of molecular entities focused on 'small' chemical compounds",
  u'name': u'chebi',
  u'name_label': u'ChEBI',
  u'name_long': u'ChEBI (Chemical Entities of Biological Interest).',
  u'src_compound_id': [u'49575'],
  u'src_id': u'7',
  u'src_url': u'http://www.ebi.ac.uk/chebi/downloadsForward.do'},
 {u'aux_for_url': u'0',
  u'base_id_url': None,
  u'base_id_url_available': u'0',
  u'description': u'Collections of plated arrays of small molecules that have a history of use in human clinical trials. Assembled by the National Institutes of Health (NIH) through the Molecular Libraries Roadmap Initiative',
  u'name': u'nih_ncc',
  u'name_label': u'NIH Clinical Collection',
  u'name_long': u'NIH Clinical Collection',
  u'src_compound_id': [u'SAM001246536'],
  u'src_id': u'8',
  u'src_url': u'http://www.nihclinicalcollection.com/'},
 {u'aux_for_url': u'0',
  u'base_id_url': u'http://zinc.docking.org/substance/',
  u'base_id_url_available': u'1',
  u'description': u'A free database of commercially-available compounds for virtual screening, provided by the Shoichet Laboratory in the Department of Pharmaceutical Chemistry at the University of California, San Francisco (UCSF).  [Irwin and Shoichet, J. Chem. Inf. Model. 2005;45(1):177-82]',
  u'name': u'zinc',
  u'name_label': u'ZINC',
  u'name_long': u'ZINC',
  u'src_compound_id': [u'ZINC00006427'],
  u'src_id': u'9',
  u'src_url': u'http://zinc.docking.org/'},
 {u'aux_for_url': u'0',
  u'base_id_url': u'http://www.emolecules.com/cgi-bin/more?vid=',
  u'base_id_url_available': u'1',
  u'description': u'A free chemical structure search engine containing millions of public domain structures. Pricing, availabilities, and vendor information requires an eMolecules Plus subscription.',
  u'name': u'emolecules',
  u'name_label': u'eMolecules',
  u'name_long': u'eMolecules',
  u'src_compound_id': [u'536763'],
  u'src_id': u'10',
  u'src_url': u'http://www.emolecules.com/'},
 {u'aux_for_url': u'0',
  u'base_id_url': u'http://www-935.ibm.com/services/us/gbs/bao/siip/nih/?sid=',
  u'base_id_url_available': u'1',
  u'description': u'The data are provided by IBM-NIH and include all chemistry extracted by means of text and image mining from the patent corpus (USPTO, WIPO and EPO) for patent documents published through 31-12-2010. Identifiers in UniChem are IBM compound identifiers.',
  u'name': u'ibm',
  u'name_label': u'IBM Patent System',
  u'name_long': u'IBM strategic IP insight platform and the National Institutes of Health',
  u'src_compound_id': [u'10C2DCBD238615091B9B54A475769A51'],
  u'src_id': u'11',
  u'src_url': u'http://www-935.ibm.com/services/us/gbs/bao/siip/nih/'},
 {u'aux_for_url': u'0',
  u'base_id_url': u'http://www.ebi.ac.uk/gxa/query?geneQuery=&exactMatch=true&_exactMatch=on&organism=Any&condition=%22',
  u'base_id_url_available': u'1',
  u'description': u'The Gene Expression Atlas is a semantically enriched database of meta-analysis based summary statistics over a curated subset of ArrayExpress Archive, servicing queries for condition-specific gene expression patterns as well as broader exploratory searches for biologically interesting genes/samples.',
  u'name': u'atlas',
  u'name_label': u'Atlas',
  u'name_long': u'Gene Expression Atlas',
  u'src_compound_id': [u'diazepam'],
  u'src_id': u'12',
  u'src_url': u'http://www.ebi.ac.uk/gxa/'},
 {u'aux_for_url': u'0',
  u'base_id_url': u'http://fdasis.nlm.nih.gov/srs/ProxyServlet?mergeData=true&objectHandle=DBMaint&APPLICATION_NAME=fdasrs&actionHandle=default&nextPage=jsp/srs/ResultScreen.jsp&TXTSUPERLISTID=',
  u'base_id_url_available': u'1',
  u'description': u'The primary goal of the FDA/USP Substance Registration System (SRS) is to unambiguously define all substances present in regulated products. Once a substance has been defined, the SRS assigns a strong identifier that is permanently associated with the substance: a UNII (Unique Ingredient Identifier). This is a a non-proprietary, free, unique, unambiguous, nonsemantic, alphanumeric identifier based on a substances molecular structure and/or descriptive information.',
  u'name': u'fdasrs',
  u'name_label': u'FDA SRS',
  u'name_long': u'FDA/USP Substance Registration System (SRS)',
  u'src_compound_id': [u'Q3JTX2Q7TU'],
  u'src_id': u'14',
  u'src_url': u'http://fdasis.nlm.nih.gov/srs/srs.jsp'},
 {u'aux_for_url': u'0',
  u'base_id_url': u'https://www.surechembl.org/chemical/',
  u'base_id_url_available': u'1',
  u'description': u'SureChEMBL automatically extracts chemistry from the full text of all major patent authorities. Compounds are derived from either chemical names found in text or in chemical depictions. All SureChEMBL compounds are included, except those failing UniChem loading rules.',
  u'name': u'surechembl',
  u'name_label': u'SureChEMBL',
  u'name_long': u'SureChEMBL',
  u'src_compound_id': [u'SCHEMBL21442'],
  u'src_id': u'15',
  u'src_url': u'https://www.surechembl.org'},
 {u'aux_for_url': u'0',
  u'base_id_url': u'http://www.pharmgkb.org/drug/',
  u'base_id_url_available': u'1',
  u'description': u'PharmGKB (Pharmacogenomics Knowledgebase) is a comprehensive resource that curates knowledge about the impact of genetic variation on drug response for clinicians and researchers.',
  u'name': u'pharmgkb',
  u'name_label': u'PharmGKB',
  u'name_long': u'PharmGKB',
  u'src_compound_id': [u'PA449283'],
  u'src_id': u'17',
  u'src_url': u'http://www.pharmgkb.org'},
 {u'aux_for_url': u'0',
  u'base_id_url': u'http://www.hmdb.ca/metabolites/',
  u'base_id_url_available': u'1',
  u'description': u'The Human Metabolome Database (HMDB) is a freely available electronic database containing detailed information about small molecule metabolites found in the human body. It is intended to be used for applications in metabolomics, clinical chemistry, biomarker discovery and general education. The database is designed to contain or link three kinds of data: 1) chemical data, 2) clinical data, and 3) molecular biology/biochemistry data',
  u'name': u'hmdb',
  u'name_label': u'Human Metabolome Database',
  u'name_long': u'Human Metabolome Database (HMDB)',
  u'src_compound_id': [u'HMDB14967'],
  u'src_id': u'18',
  u'src_url': u'http://www.hmdb.ca'},
 {u'aux_for_url': u'0',
  u'base_id_url': u'http://pubchem.ncbi.nlm.nih.gov/summary/summary.cgi?sid=',
  u'base_id_url_available': u'1',
  u'description': u"A subset of the PubChem DB: from the original depositor 'Thomson Pharma'.",
  u'name': u'pubchem_tpharma',
  u'name_label': u'PubChem: Thomson Pharma ',
  u'name_long': u"PubChem ('Thomson Pharma' subset)",
  u'src_compound_id': [u'14799843'],
  u'src_id': u'21',
  u'src_url': u'http://www.thomson-pharma.com/'},
 {u'aux_for_url': u'0',
  u'base_id_url': u'http://pubchem.ncbi.nlm.nih.gov/summary/summary.cgi?cid=',
  u'base_id_url_available': u'1',
  u'description': u'A database of normalized PubChem compounds (CIDs) from the PubChem Database.',
  u'name': u'pubchem',
  u'name_label': u'PubChem',
  u'name_long': u'PubChem Compounds',
  u'src_compound_id': [u'3016'],
  u'src_id': u'22',
  u'src_url': u'http://pubchem.ncbi.nlm.nih.gov'},
 {u'aux_for_url': u'0',
  u'base_id_url': u'https://mcule.com/',
  u'base_id_url_available': u'1',
  u'description': u'An online drug discovery platform with virtual screening and molecular modelling services.',
  u'name': u'mcule',
  u'name_label': u'Mcule',
  u'name_long': u'Mcule',
  u'src_compound_id': [u'MCULE-8990989144'],
  u'src_id': u'23',
  u'src_url': u'https://mcule.com'},
 {u'aux_for_url': u'0',
  u'base_id_url': u'http://nmrshiftdb.org/molecule/',
  u'base_id_url_available': u'1',
  u'description': u'An NMR database (web database) for organic structures and their nuclear magnetic resonance (nmr) spectra. It allows for spectrum prediction (13C, 1H and other nuclei) as well as for searching spectra, structures and other properties. Last not least, it features peer-reviewed submission of datasets by its users.',
  u'name': u'nmrshiftdb2',
  u'name_label': u'NMRShiftDB',
  u'name_long': u'NMRShiftDB',
  u'src_compound_id': [u'10016206'],
  u'src_id': u'24',
  u'src_url': u'http://nmrshiftdb.nmr.uni-koeln.de/portal/media-type/html/user/anon/page/default.psml/js_pane/P-Home'},
 {u'aux_for_url': u'0',
  u'base_id_url': u'http://lincsportal.ccs.miami.edu/entities/#/view/',
  u'base_id_url_available': u'1',
  u'description': u'The LINCS DCIC facilitates and standardized the information relevant to LINCS assays as described in http://www.lincsproject.org/data/data-standards/',
  u'name': u'lincs',
  u'name_label': u'LINCS',
  u'name_long': u'Library of Integrated Network-based Cellular Signatures',
  u'src_compound_id': [u'LSM-2359'],
  u'src_id': u'25',
  u'src_url': u'http://www.lincsproject.org/'},
 {u'aux_for_url': u'0',
  u'base_id_url': u'http://actor.epa.gov/actor/GenericChemical?casrn=',
  u'base_id_url_available': u'1',
  u'description': u'ACToR (Aggregated Computational Toxicology Resource)',
  u'name': u'actor',
  u'name_label': u'ACToR',
  u'name_long': u'ACToR',
  u'src_compound_id': [u'439-14-5', u'11100-37-1'],
  u'src_id': u'26',
  u'src_url': u'http://actor.epa.gov/actor/faces/ACToRHome.jsp'},
 {u'aux_for_url': u'0',
  u'base_id_url': u'http://humanmetabolism.org/?page_id=7&Abbreviation=',
  u'base_id_url_available': u'1',
  u'description': u'A biochemical knowledge-base on human metabolism',
  u'name': u'recon',
  u'name_label': u'Recon',
  u'name_long': u'Recon',
  u'src_compound_id': [u'C06948'],
  u'src_id': u'27',
  u'src_url': u'http://humanmetabolism.org'},
 {u'aux_for_url': u'0',
  u'base_id_url': u'http://nikkajiweb.jst.go.jp/nikkaji_web/pages/top_e.jsp?CONTENT=syosai&SN=',
  u'base_id_url_available': u'1',
  u'description': u' Nakkaji (The Japan Chemical Substance Dictionary) is an organic compound dictionary database prepared by the Japan Science and Technology Agency (JST).',
  u'name': u'nikkaji',
  u'name_label': u'Nikkaji',
  u'name_long': u'Nikkaji',
  u'src_compound_id': [u'J2.044C'],
  u'src_id': u'29',
  u'src_url': u' http://nikkajiweb.jst.go.jp/nikkaji_web/pages/top_e.jsp'},
 {u'aux_for_url': u'0',
  u'base_id_url': u'http://www.bindingdb.org/bind/chemsearch/marvin/MolStructure.jsp?monomerid=',
  u'base_id_url_available': u'1',
  u'description': u'A public, web-accessible database of measured binding affinities, focusing chiefly on the interactions of proteins considered to be drug-targets with small, drug-like molecules',
  u'name': u'bindingdb',
  u'name_label': u'BindingDB',
  u'name_long': u'BindingDB',
  u'src_compound_id': [u'50000766'],
  u'src_id': u'31',
  u'src_url': None}]

Get auxiliary mappings

For a single source, obtain a mapping between all current src_compound_ids to their corresponding auxiliary data. See FAQ for an explanation of auxiliary data.

In [49]:
ret = unichem.map(20)
In [50]:
len(ret)
Out[50]:
1890
In [51]:
ret[0]
Out[51]:
{u'auxiliary data': u'Odanacatib-(MK0822).html',
 u'src_compound_id': u'Odanacatib-(MK0822)'}

Get Connectivity data from InChIKey

For the explanation of a-h arguments, please refer to the documentation.

In [52]:
ret = unichem.connectivity('QJVHTELASVOWBE-YBABNSIOSA-N', c=4, h=1)
In [53]:
len(ret['1'])
Out[53]:
897
In [54]:
ret['1'][1]
Out[54]:
[u'BCD9A6D200C14A6D5A11BD4B8F6E002D',
 u'11',
 None,
 u'1',
 u'',
 u'2',
 u'InChI=1S/C16H19N3O5S.C8H9NO5/c1-16(2)11(15(23)24)19-13(22)10(14(19)25-16)18-12(21)9(17)7-3-5-8(20)6-4-7;10-2-1-4-7(8(12)13)9-5(11)3-6(9)14-4/h3-6,9-11,14,20H,17H2,1-2H3,(H,18,21)(H,23,24);1,6-7,10H,2-3H2,(H,12,13)/b;4-1-/t9?,10-,11+,14-;6-,7-/m11/s1',
 u'InChI=1S/C16H19N3O5S/c1-16(2)11(15(23)24)19-13(22)10(14(19)25-16)18-12(21)9(17)7-3-5-8(20)6-4-7/h3-6,9-11,14,20H,17H2,1-2H3,(H,18,21)(H,23,24)/t9?,10-,11?,14-/m1/s1',
 u'InChI=1S/C16H19N3O5S/c1-16(2)11(15(23)24)19-13(22)10(14(19)25-16)18-12(21)9(17)7-3-5-8(20)6-4-7/h3-6,9-11,14,20H,17H2,1-2H3,(H,18,21)(H,23,24)/t9?,10-,11+,14-/m1/s1',
 u'InChI=1S/C16H19N3O5S/c1-16(2)11(15(23)24)19-13(22)10(14(19)25-16)18-12(21)9(17)7-3-5-8(20)6-4-7/h3-6,9-11,14,20H,17H2,1-2H3,(H,18,21)(H,23,24)/t9?,10-,11?,14-/m1/s1',
 u'',
 u'0',
 u'1',
 u'0',
 u'0',
 u'0',
 u'QJVHTELASVOWBE-YBABNSIOSA-N',
 u'LSQZJLSUYDQPKJ-JDUQTJRRSA-N']
In [55]:
ret = unichem.connectivity('QJVHTELASVOWBE-YBABNSIOSA-N',a=1,c=3)
In [56]:
len(ret['1'])
Out[56]:
1
In [57]:
len(ret['1'][0]['src_matches'])
Out[57]:
23
In [58]:
ret['1'][0]['src_matches'][3]
Out[58]:
{u'B': 0,
 u'CpdId_InChIKey': u'MGOVGYGPKVRDAH-MZUBRJIZSA-L',
 u'Full_CpdId_InChI': u'InChI=1S/C18H21N2O6P.C16H19N3O5S.2Li/c21-17(22)16(11-14-7-3-1-4-8-14)20-27(24,25)13-19-18(23)26-12-15-9-5-2-6-10-15;1-16(2)11(15(23)24)19-13(22)10(14(19)25-16)18-12(21)9(17)7-3-5-8(20)6-4-7;;/h1-10,16H,11-13H2,(H,19,23)(H,21,22)(H2,20,24,25);3-6,9-11,14,20H,17H2,1-2H3,(H,18,21)(H,23,24);;/q;;2*+1/p-2/t16-;9?,10-,11+,14-;;/m11../s1',
 u'assignment': u'1',
 u'aux_src': None,
 u'match_compare': [{u'C': u'3',
   u'Matching_CpdId_InChI': u'InChI=1S/C16H19N3O5S/c1-16(2)11(15(23)24)19-13(22)10(14(19)25-16)18-12(21)9(17)7-3-5-8(20)6-4-7/h3-6,9-11,14,20H,17H2,1-2H3,(H,18,21)(H,23,24)/t9?,10-,11+,14-/m1/s1',
   u'Matching_Query_InChI': u'InChI=1S/C16H19N3O5S/c1-16(2)11(15(23)24)19-13(22)10(14(19)25-16)18-12(21)9(17)7-3-5-8(20)6-4-7/h3-6,9-11,14,20H,17H2,1-2H3,(H,18,21)(H,23,24)/t9?,10-,11+,14-/m1/s1',
   u'b': u'0',
   u'i': u'0',
   u'm': u'0',
   u'p': u'',
   u's': u'0',
   u't': u'0'}],
 u'src_compound_id': u'CHEMBL46868'}
In [59]:
ret = unichem.connectivity('QJVHTELASVOWBE',a=1,c=3)
In [60]:
len(ret['1'])
Out[60]:
1
In [61]:
len(ret['1'][0]['src_matches'])
Out[61]:
23
In [62]:
ret['1'][0]['src_matches'][5]
Out[62]:
{u'B': 0,
 u'CpdId_InChIKey': u'PGLMJJICTRGFMO-LFJTZUKFSA-N',
 u'Full_CpdId_InChI': u'InChI=1S/C17H19N2O6P.C16H19N3O5S/c20-16(21)15(14-9-5-2-6-10-14)19-26(23,24)12-18-17(22)25-11-13-7-3-1-4-8-13;1-16(2)11(15(23)24)19-13(22)10(14(19)25-16)18-12(21)9(17)7-3-5-8(20)6-4-7/h1-10,15H,11-12H2,(H,18,22)(H,20,21)(H2,19,23,24);3-6,9-11,14,20H,17H2,1-2H3,(H,18,21)(H,23,24)/t;9?,10-,11+,14-/m.1/s1',
 u'assignment': u'1',
 u'aux_src': None,
 u'match_compare': [{u'C': u'3',
   u'Matching_CpdId_InChI': u'InChI=1S/C16H19N3O5S/c1-16(2)11(15(23)24)19-13(22)10(14(19)25-16)18-12(21)9(17)7-3-5-8(20)6-4-7/h3-6,9-11,14,20H,17H2,1-2H3,(H,18,21)(H,23,24)/t9?,10-,11+,14-/m1/s1',
   u'Matching_Query_InChI': u'InChI=1S/C16H19N3O5S/c1-16(2)11(15(23)24)19-13(22)10(14(19)25-16)18-12(21)9(17)7-3-5-8(20)6-4-7/h3-6,9-11,14,20H,17H2,1-2H3,(H,18,21)(H,23,24)',
   u'b': u'0',
   u'i': u'0',
   u'm': u'1',
   u'p': u'',
   u's': u'1',
   u't': u'1'}],
 u'src_compound_id': u'CHEMBL1204583'}

Get Connectivity data from src_compound_id

For the explanation of a-h arguments, please refer to the documentation.

In [63]:
ret = unichem.connectivity('CHEMBL121',1,c=4,h=1)
In [64]:
len(ret['1'])
Out[64]:
459
In [65]:
ret['1'][1]
Out[65]:
[u'F1F775C72156A155F020A0B41C640E2F',
 u'11',
 None,
 u'1',
 u'',
 u'1',
 u'InChI=1S/C18H19N3O3S/c1-21(16-4-2-3-9-19-16)10-11-24-14-7-5-13(6-8-14)12-15-17(22)20-18(23)25-15/h2-9,15H,10-12H2,1H3,(H,20,22,23)',
 u'InChI=1S/C18H19N3O3S.C4H6O2/c1-21(16-4-2-3-9-19-16)10-11-24-14-7-5-13(6-8-14)12-15-17(22)20-18(23)25-15;1-2-3-4(5)6/h2-9,15H,10-12H2,1H3,(H,20,22,23);2-3H,1H3,(H,5,6)/b;3-2-',
 u'InChI=1S/C18H19N3O3S/c1-21(16-4-2-3-9-19-16)10-11-24-14-7-5-13(6-8-14)12-15-17(22)20-18(23)25-15/h2-9,15H,10-12H2,1H3,(H,20,22,23)',
 u'InChI=1S/C18H19N3O3S/c1-21(16-4-2-3-9-19-16)10-11-24-14-7-5-13(6-8-14)12-15-17(22)20-18(23)25-15/h2-9,15H,10-12H2,1H3,(H,20,22,23)',
 u'',
 u'0',
 u'0',
 u'0',
 u'0',
 u'0',
 u'YASAKCUCGLMORW-UHFFFAOYSA-N',
 u'BAGQSUMXPOLHSI-AHNKWOMYSA-N']
In [66]:
ret = unichem.connectivity('CHEMBL121',1)
In [67]:
len(ret['1'])
Out[67]:
18
In [68]:
len(ret['1'][0]['src_matches'])
Out[68]:
3
In [69]:
ret['1'][0]['src_matches']
Out[69]:
[{u'B': 0,
  u'CpdId_InChIKey': u'YASAKCUCGLMORW-UHFFFAOYSA-N',
  u'Full_CpdId_InChI': u'InChI=1S/C18H19N3O3S/c1-21(16-4-2-3-9-19-16)10-11-24-14-7-5-13(6-8-14)12-15-17(22)20-18(23)25-15/h2-9,15H,10-12H2,1H3,(H,20,22,23)',
  u'assignment': u'1',
  u'aux_src': None,
  u'match_compare': [{u'C': u'0',
    u'Matching_CpdId_InChI': u'InChI=1S/C18H19N3O3S/c1-21(16-4-2-3-9-19-16)10-11-24-14-7-5-13(6-8-14)12-15-17(22)20-18(23)25-15/h2-9,15H,10-12H2,1H3,(H,20,22,23)',
    u'Matching_Query_InChI': u'InChI=1S/C18H19N3O3S/c1-21(16-4-2-3-9-19-16)10-11-24-14-7-5-13(6-8-14)12-15-17(22)20-18(23)25-15/h2-9,15H,10-12H2,1H3,(H,20,22,23)',
    u'b': u'0',
    u'i': u'0',
    u'm': u'0',
    u'p': u'0',
    u's': u'0',
    u't': u'0'}],
  u'src_compound_id': u'CHEMBL121'},
 {u'B': 0,
  u'CpdId_InChIKey': u'YASAKCUCGLMORW-HNNXBMFYSA-N',
  u'Full_CpdId_InChI': u'InChI=1S/C18H19N3O3S/c1-21(16-4-2-3-9-19-16)10-11-24-14-7-5-13(6-8-14)12-15-17(22)20-18(23)25-15/h2-9,15H,10-12H2,1H3,(H,20,22,23)/t15-/m0/s1',
  u'assignment': u'1',
  u'aux_src': None,
  u'match_compare': [{u'C': u'0',
    u'Matching_CpdId_InChI': u'InChI=1S/C18H19N3O3S/c1-21(16-4-2-3-9-19-16)10-11-24-14-7-5-13(6-8-14)12-15-17(22)20-18(23)25-15/h2-9,15H,10-12H2,1H3,(H,20,22,23)/t15-/m0/s1',
    u'Matching_Query_InChI': u'InChI=1S/C18H19N3O3S/c1-21(16-4-2-3-9-19-16)10-11-24-14-7-5-13(6-8-14)12-15-17(22)20-18(23)25-15/h2-9,15H,10-12H2,1H3,(H,20,22,23)',
    u'b': u'0',
    u'i': u'0',
    u'm': u'1',
    u'p': u'0',
    u's': u'1',
    u't': u'1'}],
  u'src_compound_id': u'CHEMBL121106'},
 {u'B': 0,
  u'CpdId_InChIKey': u'YASAKCUCGLMORW-OAHLLOKOSA-N',
  u'Full_CpdId_InChI': u'InChI=1S/C18H19N3O3S/c1-21(16-4-2-3-9-19-16)10-11-24-14-7-5-13(6-8-14)12-15-17(22)20-18(23)25-15/h2-9,15H,10-12H2,1H3,(H,20,22,23)/t15-/m1/s1',
  u'assignment': u'1',
  u'aux_src': None,
  u'match_compare': [{u'C': u'0',
    u'Matching_CpdId_InChI': u'InChI=1S/C18H19N3O3S/c1-21(16-4-2-3-9-19-16)10-11-24-14-7-5-13(6-8-14)12-15-17(22)20-18(23)25-15/h2-9,15H,10-12H2,1H3,(H,20,22,23)/t15-/m1/s1',
    u'Matching_Query_InChI': u'InChI=1S/C18H19N3O3S/c1-21(16-4-2-3-9-19-16)10-11-24-14-7-5-13(6-8-14)12-15-17(22)20-18(23)25-15/h2-9,15H,10-12H2,1H3,(H,20,22,23)',
    u'b': u'0',
    u'i': u'0',
    u'm': u'1',
    u'p': u'0',
    u's': u'1',
    u't': u'1'}],
  u'src_compound_id': u'CHEMBL333304'}]

Get InChI from InChIKey

Obtain InChI for InChIKey

In [70]:
ret = unichem.inchiFromKey('AAOVKJBEBIDNHE-UHFFFAOYSA-N')
In [71]:
ret
Out[71]:
[{u'standardinchi': u'InChI=1S/C16H13ClN2O/c1-19-14-8-7-12(17)9-13(14)16(18-10-15(19)20)11-5-3-2-4-6-11/h2-9H,10H2,1H3'}]