import io
import urllib.parse
import urllib.request
from urllib.error import HTTPError
import pandas as pd
from ..species._core import Species
_IDs = Literal["UNIPROT_ID", "PDB_ID", "CHEMBL_ID", "DRUGBANK_ID", "ENSEMBL_PRO_ID"]
class Uniprot:
"""Wrapper of the Uniprot REST APIs.
See: https://www.uniprot.org/help/api_idmapping
"""
_URL = "https://www.uniprot.org/uploadlists/"
def __init__(self) -> None:
pass
def query(
self,
prots,
id_type_from="UNIPROT_ID",
id_type_to="ENSEMBL_ID",
columns=None,
species="human",
):
# replace UNIPROT_ID with ACC.
colnames = (
columns.split(",") + [id_type_from, id_type_to]
if columns is not None
else [id_type_from, id_type_to]
)
id_type_from = "ACC" if id_type_from == "UNIPROT_ID" else id_type_from
id_type_to = "ACC" if id_type_to == "UNIPROT_ID" else id_type_to
# taxon id of species
taxon_id = Species().df.loc["human"].taxon_id
# set up params
params = {
"from": id_type_from,
"to": id_type_to,
"columns": columns,
"format": "tab",
"query": " ".join([i for i in prots]),
"taxon": taxon_id,
}
# query uniprot
data = urllib.parse.urlencode(params)
data = data.encode("utf-8")
req = urllib.request.Request(self._URL, data)
try:
with urllib.request.urlopen(req) as f:
response = f.read()
except HTTPError:
raise HTTPError
# format results into a dataframe
data = response.decode("utf-8")
df = pd.read_csv(io.StringIO(data), sep="\t", header=0)
df.columns = colnames
return df