Uniprot REST API

import io
import urllib.parse
import urllib.request
from urllib.error import HTTPError

import pandas as pd

from ..species._core import Species

_IDs = Literal["UNIPROT_ID", "PDB_ID", "CHEMBL_ID", "DRUGBANK_ID", "ENSEMBL_PRO_ID"]


class Uniprot:
    """Wrapper of the Uniprot REST APIs.

    See: https://www.uniprot.org/help/api_idmapping
    """

    _URL = "https://www.uniprot.org/uploadlists/"

    def __init__(self) -> None:
        pass

    def query(
        self,
        prots,
        id_type_from="UNIPROT_ID",
        id_type_to="ENSEMBL_ID",
        columns=None,
        species="human",
    ):
        # replace UNIPROT_ID with ACC.
        colnames = (
            columns.split(",") + [id_type_from, id_type_to]
            if columns is not None
            else [id_type_from, id_type_to]
        )
        id_type_from = "ACC" if id_type_from == "UNIPROT_ID" else id_type_from
        id_type_to = "ACC" if id_type_to == "UNIPROT_ID" else id_type_to

        # taxon id of species
        taxon_id = Species().df.loc["human"].taxon_id

        # set up params
        params = {
            "from": id_type_from,
            "to": id_type_to,
            "columns": columns,
            "format": "tab",
            "query": " ".join([i for i in prots]),
            "taxon": taxon_id,
        }

        # query uniprot
        data = urllib.parse.urlencode(params)
        data = data.encode("utf-8")
        req = urllib.request.Request(self._URL, data)
        try:
            with urllib.request.urlopen(req) as f:
                response = f.read()
        except HTTPError:
            raise HTTPError

        # format results into a dataframe
        data = response.decode("utf-8")
        df = pd.read_csv(io.StringIO(data), sep="\t", header=0)
        df.columns = colnames

        return df