{ "cells": [ { "cell_type": "markdown", "id": "e2c12381", "metadata": {}, "source": [ "# Identify contact residues on MHC Class I molecules" ] }, { "cell_type": "markdown", "id": "1729b13c", "metadata": {}, "source": [ "## Introduction\n", "\n", "In this notebook, we aim to determine the IMGT positions of MHC molecules that make contact with the CDR loops of a contacting TCR. These contacts are then plotted based on the identity of the TCR loop to create a finger print of the TCRs on MHC molecules. We also look at the contacts made by TCRs on the presented peptide." ] }, { "cell_type": "code", "execution_count": 1, "id": "e77572dc", "metadata": { "execution": { "iopub.execute_input": "2024-05-13T17:28:16.694911Z", "iopub.status.busy": "2024-05-13T17:28:16.694066Z", "iopub.status.idle": "2024-05-13T17:28:32.979355Z", "shell.execute_reply": "2024-05-13T17:28:32.978332Z" } }, "outputs": [], "source": [ "import os\n", "\n", "import matplotlib.pyplot as plt\n", "import numpy as np\n", "import pandas as pd\n", "import seaborn as sns\n", "from python_pdb.parsers import parse_pdb_to_pandas\n", "\n", "from tcr_pmhc_interface_analysis.imgt_numbering import assign_cdr_number" ] }, { "cell_type": "code", "execution_count": 2, "id": "7861a522", "metadata": {}, "outputs": [], "source": [ "CUTOFF_DISTANCE = 5 # Å" ] }, { "cell_type": "markdown", "id": "15246f0a", "metadata": {}, "source": [ "## Loading TCR:pMHC-I Structures" ] }, { "cell_type": "code", "execution_count": 3, "id": "e558e68f", "metadata": { "execution": { "iopub.execute_input": "2024-05-13T17:28:32.985285Z", "iopub.status.busy": "2024-05-13T17:28:32.984693Z", "iopub.status.idle": "2024-05-13T17:28:32.989070Z", "shell.execute_reply": "2024-05-13T17:28:32.988312Z" } }, "outputs": [], "source": [ "STCRDAB_PATH = '../data/raw/stcrdab'" ] }, { "cell_type": "code", "execution_count": 4, "id": "cc7f89c7", "metadata": { "execution": { "iopub.execute_input": "2024-05-13T17:28:32.993200Z", "iopub.status.busy": "2024-05-13T17:28:32.992862Z", "iopub.status.idle": "2024-05-13T17:28:33.222616Z", "shell.execute_reply": "2024-05-13T17:28:33.221754Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " | pdb | \n", "Bchain | \n", "Achain | \n", "antigen_chain | \n", "antigen_name | \n", "mhc_chain1 | \n", "mhc_chain2 | \n", "docking_angle | \n", "beta_subgroup | \n", "alpha_subgroup | \n", "... | \n", "alpha_organism | \n", "antigen_organism | \n", "mhc_chain1_organism | \n", "mhc_chain2_organism | \n", "authors | \n", "resolution | \n", "method | \n", "r_free | \n", "r_factor | \n", "engineered | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "8gom | \n", "E | \n", "D | \n", "C | \n", "spike protein s2 | \n", "A | \n", "B | \n", "39.649 | \n", "NaN | \n", "NaN | \n", "... | \n", "homo sapiens | \n", "severe acute respiratory syndrome coronavirus2 | \n", "homo sapiens | \n", "homo sapiens | \n", "Wu, D., Mariuzza, R.A. | \n", "2.783 | \n", "X-RAY DIFFRACTION | \n", "0.248 | \n", "0.195 | \n", "True | \n", "
1 | \n", "8gon | \n", "E | \n", "D | \n", "C | \n", "spike protein s2 | \n", "A | \n", "B | \n", "38.984 | \n", "NaN | \n", "NaN | \n", "... | \n", "homo sapiens | \n", "severe acute respiratory syndrome coronavirus2 | \n", "homo sapiens | \n", "homo sapiens | \n", "Wu, D., Mariuzza, R.A. | \n", "2.601 | \n", "X-RAY DIFFRACTION | \n", "0.253 | \n", "0.198 | \n", "True | \n", "
2 | \n", "7q99 | \n", "E | \n", "D | \n", "C | \n", "asn-leu-ser-ala-leu-gly-ile-phe-ser-thr | \n", "A | \n", "B | \n", "46.371 | \n", "TRBV30 | \n", "TRAV12 | \n", "... | \n", "homo sapiens | \n", "homo sapiens | \n", "homo sapiens | \n", "homo sapiens | \n", "Rizkallah, P.J., Sewell, A.K., Wall, A., Fulle... | \n", "2.550 | \n", "X-RAY DIFFRACTION | \n", "0.272 | \n", "0.218 | \n", "True | \n", "
3 | \n", "7q9a | \n", "E | \n", "D | \n", "C | \n", "leu-leu-leu-gly-ile-gly-ile-leu-val-leu | \n", "A | \n", "B | \n", "48.391 | \n", "TRBV30 | \n", "TRAV12 | \n", "... | \n", "homo sapiens | \n", "homo sapiens | \n", "homo sapiens | \n", "homo sapiens | \n", "Rizkallah, P.J., Sewell, A.K., Wall, A., Fulle... | \n", "2.100 | \n", "X-RAY DIFFRACTION | \n", "0.243 | \n", "0.205 | \n", "True | \n", "
4 | \n", "2ak4 | \n", "E | \n", "D | \n", "C | \n", "ebv peptide lpeplpqgqltay | \n", "A | \n", "B | \n", "71.108 | \n", "TRBV6 | \n", "TRAV19 | \n", "... | \n", "homo sapiens | \n", "NaN | \n", "homo sapiens | \n", "homo sapiens | \n", "Tynan, F.E., Burrows, S.R., Buckle, A.M., Clem... | \n", "2.500 | \n", "X-RAY DIFFRACTION | \n", "0.278 | \n", "0.246 | \n", "True | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
297 | \n", "6q3s | \n", "E | \n", "D | \n", "C | \n", "ser-leu-leu-met-trp-ile-thr-gln-val | \n", "A | \n", "B | \n", "67.059 | \n", "TRBV6 | \n", "TRAV21 | \n", "... | \n", "homo sapiens | \n", "homo sapiens | \n", "homo sapiens | \n", "homo sapiens | \n", "Meijers, R., Anjanappa, R., Springer, S., Garc... | \n", "2.500 | \n", "X-RAY DIFFRACTION | \n", "0.273 | \n", "0.229 | \n", "True | \n", "
298 | \n", "5men | \n", "E | \n", "D | \n", "C | \n", "ile-leu-ala-lys-phe-leu-his-trp-leu | \n", "A | \n", "B | \n", "40.823 | \n", "TRBV6 | \n", "TRAV22 | \n", "... | \n", "homo sapiens | \n", "homo sapiens | \n", "homo sapiens | \n", "homo sapiens | \n", "Rizkallah, P.J., Lloyd, A., Crowther, M., Cole... | \n", "2.810 | \n", "X-RAY DIFFRACTION | \n", "0.272 | \n", "0.189 | \n", "True | \n", "
299 | \n", "1ao7 | \n", "E | \n", "D | \n", "C | \n", "tax peptide | \n", "A | \n", "B | \n", "34.827 | \n", "TRBV6 | \n", "TRAV12 | \n", "... | \n", "homo sapiens | \n", "human t-lymphotropic virus 1 | \n", "homo sapiens | \n", "homo sapiens | \n", "Garboczi, D.N., Ghosh, P., Utz, U., Fan, Q.R.,... | \n", "2.600 | \n", "X-RAY DIFFRACTION | \n", "0.320 | \n", "0.245 | \n", "True | \n", "
300 | \n", "4jff | \n", "E | \n", "D | \n", "C | \n", "melanoma motif | \n", "A | \n", "B | \n", "42.977 | \n", "TRBV30 | \n", "TRAV12 | \n", "... | \n", "homo sapiens | \n", "homo sapiens | \n", "homo sapiens | \n", "homo sapiens | \n", "Rizkallah, P.J., Cole, D.K., Madura, F., Sewel... | \n", "2.430 | \n", "X-RAY DIFFRACTION | \n", "0.263 | \n", "0.210 | \n", "True | \n", "
301 | \n", "3dxa | \n", "O | \n", "N | \n", "M | \n", "ebv decapeptide epitope | \n", "K | \n", "L | \n", "58.225 | \n", "TRBV7 | \n", "TRAV26 | \n", "... | \n", "homo sapiens | \n", "NaN | \n", "homo sapiens | \n", "homo sapiens | \n", "Archbold, J.K., Macdonald, W.A., Gras, S., Ros... | \n", "3.500 | \n", "X-RAY DIFFRACTION | \n", "0.330 | \n", "0.286 | \n", "True | \n", "
302 rows × 24 columns
\n", "\n", " | record_type_tcr | \n", "atom_number_tcr | \n", "atom_name_tcr | \n", "alt_loc_tcr | \n", "residue_name_tcr | \n", "chain_id_tcr | \n", "residue_seq_id_tcr | \n", "residue_insert_code_tcr | \n", "pos_x_tcr | \n", "pos_y_tcr | \n", "... | \n", "pos_y_peptide | \n", "pos_z_peptide | \n", "occupancy_peptide | \n", "b_factor_peptide | \n", "element_peptide | \n", "charge_peptide | \n", "chain_type_peptide | \n", "cdr_peptide | \n", "peptide_length | \n", "peptide_position | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
69473 | \n", "ATOM | \n", "472 | \n", "C | \n", "None | \n", "ASN | \n", "E | \n", "57 | \n", "None | \n", "12.312 | \n", "-60.161 | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
69474 | \n", "ATOM | \n", "472 | \n", "C | \n", "None | \n", "ASN | \n", "E | \n", "57 | \n", "None | \n", "12.312 | \n", "-60.161 | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
70937 | \n", "ATOM | \n", "473 | \n", "O | \n", "None | \n", "ASN | \n", "E | \n", "57 | \n", "None | \n", "11.507 | \n", "-60.966 | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
70938 | \n", "ATOM | \n", "473 | \n", "O | \n", "None | \n", "ASN | \n", "E | \n", "57 | \n", "None | \n", "11.507 | \n", "-60.966 | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
70939 | \n", "ATOM | \n", "473 | \n", "O | \n", "None | \n", "ASN | \n", "E | \n", "57 | \n", "None | \n", "11.507 | \n", "-60.966 | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
31515 | \n", "ATOM | \n", "10078 | \n", "CD2 | \n", "None | \n", "TYR | \n", "N | \n", "113 | \n", "None | \n", "-51.375 | \n", "74.966 | \n", "... | \n", "73.846 | \n", "-3.678 | \n", "1.0 | \n", "63.86 | \n", "O | \n", "None | \n", "antigen_chain | \n", "NaN | \n", "10.0 | \n", "4.0 | \n", "
31516 | \n", "ATOM | \n", "10078 | \n", "CD2 | \n", "None | \n", "TYR | \n", "N | \n", "113 | \n", "None | \n", "-51.375 | \n", "74.966 | \n", "... | \n", "75.095 | \n", "-6.148 | \n", "1.0 | \n", "63.91 | \n", "C | \n", "None | \n", "antigen_chain | \n", "NaN | \n", "10.0 | \n", "4.0 | \n", "
31517 | \n", "ATOM | \n", "10078 | \n", "CD2 | \n", "None | \n", "TYR | \n", "N | \n", "113 | \n", "None | \n", "-51.375 | \n", "74.966 | \n", "... | \n", "76.511 | \n", "-6.671 | \n", "1.0 | \n", "63.85 | \n", "C | \n", "None | \n", "antigen_chain | \n", "NaN | \n", "10.0 | \n", "4.0 | \n", "
31697 | \n", "ATOM | \n", "10080 | \n", "CE2 | \n", "None | \n", "TYR | \n", "N | \n", "113 | \n", "None | \n", "-52.313 | \n", "73.948 | \n", "... | \n", "73.846 | \n", "-3.678 | \n", "1.0 | \n", "63.86 | \n", "O | \n", "None | \n", "antigen_chain | \n", "NaN | \n", "10.0 | \n", "4.0 | \n", "
31698 | \n", "ATOM | \n", "10080 | \n", "CE2 | \n", "None | \n", "TYR | \n", "N | \n", "113 | \n", "None | \n", "-52.313 | \n", "73.948 | \n", "... | \n", "75.095 | \n", "-6.148 | \n", "1.0 | \n", "63.91 | \n", "C | \n", "None | \n", "antigen_chain | \n", "NaN | \n", "10.0 | \n", "4.0 | \n", "
156312 rows × 56 columns
\n", "