{ "cells": [ { "cell_type": "markdown", "id": "b15395b0", "metadata": {}, "source": [ "# pMHC movement based on peptide anchoring" ] }, { "cell_type": "markdown", "id": "f0d5a0e3", "metadata": {}, "source": [ "## Introduction\n", "\n", "In this notebook, *apo*-*holo* comparisons between different anchoring patterns of MHC alleles plotted next to each other. The anchoring patterns were determined by finding the peptide motifs of each MHC allele from the [MHCMotifAtlas](http://mhcmotifatlas.org/home) and using these strong motifs as anchors for the peptide.\n" ] }, { "cell_type": "code", "execution_count": 1, "id": "91fe0fa0", "metadata": { "execution": { "iopub.execute_input": "2024-05-09T14:30:05.459410Z", "iopub.status.busy": "2024-05-09T14:30:05.459138Z", "iopub.status.idle": "2024-05-09T14:30:25.851697Z", "shell.execute_reply": "2024-05-09T14:30:25.850740Z" } }, "outputs": [], "source": [ "import matplotlib.pyplot as plt\n", "import numpy as np\n", "import pandas as pd\n", "import seaborn as sns\n", "from python_pdb.formats.residue import THREE_TO_ONE_CODE" ] }, { "cell_type": "code", "execution_count": null, "id": "719f5e03", "metadata": { "execution": { "iopub.execute_input": "2024-05-09T14:30:25.866602Z", "iopub.status.busy": "2024-05-09T14:30:25.865343Z", "iopub.status.idle": "2024-05-09T14:30:26.209921Z", "shell.execute_reply": "2024-05-09T14:30:26.209285Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " | allele_slug | \n", "position | \n", "amino_acid | \n", "peptide_length | \n", "
---|---|---|---|---|
0 | \n", "hla_a_01_01 | \n", "2 | \n", "T | \n", "9 | \n", "
1 | \n", "hla_a_01_01 | \n", "3 | \n", "D | \n", "9 | \n", "
2 | \n", "hla_a_01_01 | \n", "9 | \n", "Y | \n", "9 | \n", "
3 | \n", "hla_a_02_01 | \n", "2 | \n", "L | \n", "9 | \n", "
4 | \n", "hla_a_02_01 | \n", "9 | \n", "L | \n", "9 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
319 | \n", "hla_g_01_03 | \n", "1 | \n", "K | \n", "9 | \n", "
320 | \n", "hla_g_01_04 | \n", "1 | \n", "R | \n", "9 | \n", "
321 | \n", "hla_g_01_04 | \n", "3 | \n", "P | \n", "9 | \n", "
322 | \n", "hla_g_01_04 | \n", "1 | \n", "K | \n", "9 | \n", "
323 | \n", "hla_g_01_04 | \n", "9 | \n", "L | \n", "9 | \n", "
324 rows × 4 columns
\n", "\n", " | complex_id | \n", "structure_x_name | \n", "structure_y_name | \n", "chain_type | \n", "residue_name | \n", "residue_seq_id | \n", "residue_insert_code | \n", "rmsd | \n", "ca_distance | \n", "chi_angle_change | \n", "com_distance | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|
181 | \n", "5c0a_D-E-C-A-B_tcr_pmhc | \n", "5c0a_D-E-C-A-B_tcr_pmhc.pdb | \n", "5n1y_C-A-B_pmhc.pdb | \n", "antigen_chain | \n", "MET | \n", "1 | \n", "NaN | \n", "0.700531 | \n", "0.237162 | \n", "1.570636 | \n", "0.364628 | \n", "
182 | \n", "5c0a_D-E-C-A-B_tcr_pmhc | \n", "5c0a_D-E-C-A-B_tcr_pmhc.pdb | \n", "5n1y_C-A-B_pmhc.pdb | \n", "antigen_chain | \n", "VAL | \n", "2 | \n", "NaN | \n", "0.114569 | \n", "0.089359 | \n", "0.009407 | \n", "0.043732 | \n", "
183 | \n", "5c0a_D-E-C-A-B_tcr_pmhc | \n", "5c0a_D-E-C-A-B_tcr_pmhc.pdb | \n", "5n1y_C-A-B_pmhc.pdb | \n", "antigen_chain | \n", "TRP | \n", "3 | \n", "NaN | \n", "0.400840 | \n", "0.233532 | \n", "-0.019115 | \n", "0.363989 | \n", "
184 | \n", "5c0a_D-E-C-A-B_tcr_pmhc | \n", "5c0a_D-E-C-A-B_tcr_pmhc.pdb | \n", "5n1y_C-A-B_pmhc.pdb | \n", "antigen_chain | \n", "GLY | \n", "4 | \n", "NaN | \n", "0.495618 | \n", "0.262271 | \n", "NaN | \n", "0.448252 | \n", "
185 | \n", "5c0a_D-E-C-A-B_tcr_pmhc | \n", "5c0a_D-E-C-A-B_tcr_pmhc.pdb | \n", "5n1y_C-A-B_pmhc.pdb | \n", "antigen_chain | \n", "PRO | \n", "5 | \n", "NaN | \n", "0.734430 | \n", "0.486409 | \n", "-0.842595 | \n", "0.536198 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
209372 | \n", "7rtr_D-E-C-A-B_tcr_pmhc | \n", "7rtd_C-A-B_pmhc.pdb | \n", "7rtr_D-E-C-A-B_tcr_pmhc.pdb | \n", "antigen_chain | \n", "ARG | \n", "5 | \n", "NaN | \n", "1.064663 | \n", "0.157924 | \n", "0.364432 | \n", "0.705033 | \n", "
209373 | \n", "7rtr_D-E-C-A-B_tcr_pmhc | \n", "7rtd_C-A-B_pmhc.pdb | \n", "7rtr_D-E-C-A-B_tcr_pmhc.pdb | \n", "antigen_chain | \n", "THR | \n", "6 | \n", "NaN | \n", "0.421897 | \n", "0.345439 | \n", "0.097701 | \n", "0.344074 | \n", "
209374 | \n", "7rtr_D-E-C-A-B_tcr_pmhc | \n", "7rtd_C-A-B_pmhc.pdb | \n", "7rtr_D-E-C-A-B_tcr_pmhc.pdb | \n", "antigen_chain | \n", "PHE | \n", "7 | \n", "NaN | \n", "1.225982 | \n", "0.317819 | \n", "-0.347835 | \n", "0.883236 | \n", "
209375 | \n", "7rtr_D-E-C-A-B_tcr_pmhc | \n", "7rtd_C-A-B_pmhc.pdb | \n", "7rtr_D-E-C-A-B_tcr_pmhc.pdb | \n", "antigen_chain | \n", "LEU | \n", "8 | \n", "NaN | \n", "1.323615 | \n", "0.310356 | \n", "0.612672 | \n", "0.270049 | \n", "
209376 | \n", "7rtr_D-E-C-A-B_tcr_pmhc | \n", "7rtd_C-A-B_pmhc.pdb | \n", "7rtr_D-E-C-A-B_tcr_pmhc.pdb | \n", "antigen_chain | \n", "LEU | \n", "9 | \n", "NaN | \n", "1.599917 | \n", "0.192969 | \n", "0.885323 | \n", "0.175807 | \n", "
9857 rows × 11 columns
\n", "\n", " | file_name | \n", "pdb_id | \n", "structure_type | \n", "state | \n", "alpha_chain | \n", "beta_chain | \n", "antigen_chain | \n", "mhc_chain1 | \n", "mhc_chain2 | \n", "cdr_sequences_collated | \n", "peptide_sequence | \n", "mhc_slug | \n", "group_name | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "1ao7_D-E-C-A-B_tcr_pmhc.pdb | \n", "1ao7 | \n", "tcr_pmhc | \n", "holo | \n", "D | \n", "E | \n", "C | \n", "A | \n", "B | \n", "DRGSQS-IYSNGD-AVTTDSWGKLQ-MNHEY-SVGAGI-ASRPGLA... | \n", "LLFGYPVYV | \n", "hla_a_02_01 | \n", "1ao7_D-E-C-A-B_tcr_pmhc | \n", "
1 | \n", "1b0g_C-A-B_pmhc.pdb | \n", "1b0g | \n", "pmhc | \n", "apo | \n", "NaN | \n", "NaN | \n", "C | \n", "A | \n", "B | \n", "NaN | \n", "ALWGFFPVL | \n", "hla_a_02_01 | \n", "1b0g_C-A-B_pmhc | \n", "
2 | \n", "1b0g_F-D-E_pmhc.pdb | \n", "1b0g | \n", "pmhc | \n", "apo | \n", "NaN | \n", "NaN | \n", "F | \n", "D | \n", "E | \n", "NaN | \n", "ALWGFFPVL | \n", "hla_a_02_01 | \n", "1b0g_F-D-E_pmhc | \n", "
3 | \n", "1bd2_D-E-C-A-B_tcr_pmhc.pdb | \n", "1bd2 | \n", "tcr_pmhc | \n", "holo | \n", "D | \n", "E | \n", "C | \n", "A | \n", "B | \n", "NSMFDY-ISSIKDK-AAMEGAQKLV-MNHEY-SVGAGI-ASSYPGG... | \n", "LLFGYPVYV | \n", "hla_a_02_01 | \n", "1bd2_D-E-C-A-B_tcr_pmhc | \n", "
4 | \n", "1bii_P-A-B_pmhc.pdb | \n", "1bii | \n", "pmhc | \n", "apo | \n", "NaN | \n", "NaN | \n", "P | \n", "A | \n", "B | \n", "NaN | \n", "RGPGRAFVTI | \n", "h2_dd | \n", "1bii_P-A-B_pmhc | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
386 | \n", "7rtd_C-A-B_pmhc.pdb | \n", "7rtd | \n", "pmhc | \n", "apo | \n", "NaN | \n", "NaN | \n", "C | \n", "A | \n", "B | \n", "NaN | \n", "YLQPRTFLL | \n", "hla_a_02_01 | \n", "7rtd_C-A-B_pmhc | \n", "
387 | \n", "7rtr_D-E-C-A-B_tcr_pmhc.pdb | \n", "7rtr | \n", "tcr_pmhc | \n", "holo | \n", "D | \n", "E | \n", "C | \n", "A | \n", "B | \n", "DRGSQS-IYSNGD-AVNRDDKII-SEHNR-FQNEAQ-ASSPDIEQY | \n", "YLQPRTFLL | \n", "hla_a_02_01 | \n", "7rtr_D-E-C-A-B_tcr_pmhc | \n", "
388 | \n", "8gvb_A-B-P-H-L_tcr_pmhc.pdb | \n", "8gvb | \n", "tcr_pmhc | \n", "holo | \n", "A | \n", "B | \n", "P | \n", "H | \n", "L | \n", "YGATPY-YFSGDTLV-AVGFTGGGNKLT-SEHNR-FQNEAQ-ASSD... | \n", "RYPLTFGW | \n", "hla_a_24_02 | \n", "8gvb_A-B-P-H-L_tcr_pmhc | \n", "
389 | \n", "8gvg_A-B-P-H-L_tcr_pmhc.pdb | \n", "8gvg | \n", "tcr_pmhc | \n", "holo | \n", "A | \n", "B | \n", "P | \n", "H | \n", "L | \n", "YGATPY-YFSGDTLV-AVGFTGGGNKLT-SEHNR-FQNEAQ-ASSD... | \n", "RFPLTFGW | \n", "hla_a_24_02 | \n", "8gvg_A-B-P-H-L_tcr_pmhc | \n", "
390 | \n", "8gvi_A-B-P-H-L_tcr_pmhc.pdb | \n", "8gvi | \n", "tcr_pmhc | \n", "holo | \n", "A | \n", "B | \n", "P | \n", "H | \n", "L | \n", "YGATPY-YFSGDTLV-AVVFTGGGNKLT-SEHNR-FQNEAQ-ASSL... | \n", "RYPLTFGW | \n", "hla_a_24_02 | \n", "8gvi_A-B-P-H-L_tcr_pmhc | \n", "
391 rows × 13 columns
\n", "\n", " | complex_id | \n", "structure_x_name | \n", "structure_y_name | \n", "chain_type | \n", "residue_name | \n", "residue_seq_id | \n", "residue_insert_code | \n", "rmsd | \n", "ca_distance | \n", "chi_angle_change | \n", "... | \n", "state | \n", "alpha_chain | \n", "beta_chain | \n", "antigen_chain | \n", "mhc_chain1 | \n", "mhc_chain2 | \n", "cdr_sequences_collated | \n", "peptide_sequence | \n", "mhc_slug | \n", "group_name | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "5c0a_D-E-C-A-B_tcr_pmhc | \n", "5c0a_D-E-C-A-B_tcr_pmhc.pdb | \n", "5n1y_C-A-B_pmhc.pdb | \n", "antigen_chain | \n", "MET | \n", "1 | \n", "NaN | \n", "0.700531 | \n", "0.237162 | \n", "1.570636 | \n", "... | \n", "holo | \n", "D | \n", "E | \n", "C | \n", "A | \n", "B | \n", "NSAFQY-TYSSGN-AMRGDSSYKLI-SGHDY-FNNNVP-ASSLWEK... | \n", "MVWGPDPLYV | \n", "hla_a_02_01 | \n", "5c0a_D-E-C-A-B_tcr_pmhc | \n", "
1 | \n", "5c0a_D-E-C-A-B_tcr_pmhc | \n", "5c0a_D-E-C-A-B_tcr_pmhc.pdb | \n", "5n1y_C-A-B_pmhc.pdb | \n", "antigen_chain | \n", "VAL | \n", "2 | \n", "NaN | \n", "0.114569 | \n", "0.089359 | \n", "0.009407 | \n", "... | \n", "holo | \n", "D | \n", "E | \n", "C | \n", "A | \n", "B | \n", "NSAFQY-TYSSGN-AMRGDSSYKLI-SGHDY-FNNNVP-ASSLWEK... | \n", "MVWGPDPLYV | \n", "hla_a_02_01 | \n", "5c0a_D-E-C-A-B_tcr_pmhc | \n", "
2 | \n", "5c0a_D-E-C-A-B_tcr_pmhc | \n", "5c0a_D-E-C-A-B_tcr_pmhc.pdb | \n", "5n1y_C-A-B_pmhc.pdb | \n", "antigen_chain | \n", "TRP | \n", "3 | \n", "NaN | \n", "0.400840 | \n", "0.233532 | \n", "-0.019115 | \n", "... | \n", "holo | \n", "D | \n", "E | \n", "C | \n", "A | \n", "B | \n", "NSAFQY-TYSSGN-AMRGDSSYKLI-SGHDY-FNNNVP-ASSLWEK... | \n", "MVWGPDPLYV | \n", "hla_a_02_01 | \n", "5c0a_D-E-C-A-B_tcr_pmhc | \n", "
3 | \n", "5c0a_D-E-C-A-B_tcr_pmhc | \n", "5c0a_D-E-C-A-B_tcr_pmhc.pdb | \n", "5n1y_C-A-B_pmhc.pdb | \n", "antigen_chain | \n", "GLY | \n", "4 | \n", "NaN | \n", "0.495618 | \n", "0.262271 | \n", "NaN | \n", "... | \n", "holo | \n", "D | \n", "E | \n", "C | \n", "A | \n", "B | \n", "NSAFQY-TYSSGN-AMRGDSSYKLI-SGHDY-FNNNVP-ASSLWEK... | \n", "MVWGPDPLYV | \n", "hla_a_02_01 | \n", "5c0a_D-E-C-A-B_tcr_pmhc | \n", "
4 | \n", "5c0a_D-E-C-A-B_tcr_pmhc | \n", "5c0a_D-E-C-A-B_tcr_pmhc.pdb | \n", "5n1y_C-A-B_pmhc.pdb | \n", "antigen_chain | \n", "PRO | \n", "5 | \n", "NaN | \n", "0.734430 | \n", "0.486409 | \n", "-0.842595 | \n", "... | \n", "holo | \n", "D | \n", "E | \n", "C | \n", "A | \n", "B | \n", "NSAFQY-TYSSGN-AMRGDSSYKLI-SGHDY-FNNNVP-ASSLWEK... | \n", "MVWGPDPLYV | \n", "hla_a_02_01 | \n", "5c0a_D-E-C-A-B_tcr_pmhc | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
9852 | \n", "7rtr_D-E-C-A-B_tcr_pmhc | \n", "7rtd_C-A-B_pmhc.pdb | \n", "7rtr_D-E-C-A-B_tcr_pmhc.pdb | \n", "antigen_chain | \n", "ARG | \n", "5 | \n", "NaN | \n", "1.064663 | \n", "0.157924 | \n", "0.364432 | \n", "... | \n", "holo | \n", "D | \n", "E | \n", "C | \n", "A | \n", "B | \n", "DRGSQS-IYSNGD-AVNRDDKII-SEHNR-FQNEAQ-ASSPDIEQY | \n", "YLQPRTFLL | \n", "hla_a_02_01 | \n", "7rtr_D-E-C-A-B_tcr_pmhc | \n", "
9853 | \n", "7rtr_D-E-C-A-B_tcr_pmhc | \n", "7rtd_C-A-B_pmhc.pdb | \n", "7rtr_D-E-C-A-B_tcr_pmhc.pdb | \n", "antigen_chain | \n", "THR | \n", "6 | \n", "NaN | \n", "0.421897 | \n", "0.345439 | \n", "0.097701 | \n", "... | \n", "holo | \n", "D | \n", "E | \n", "C | \n", "A | \n", "B | \n", "DRGSQS-IYSNGD-AVNRDDKII-SEHNR-FQNEAQ-ASSPDIEQY | \n", "YLQPRTFLL | \n", "hla_a_02_01 | \n", "7rtr_D-E-C-A-B_tcr_pmhc | \n", "
9854 | \n", "7rtr_D-E-C-A-B_tcr_pmhc | \n", "7rtd_C-A-B_pmhc.pdb | \n", "7rtr_D-E-C-A-B_tcr_pmhc.pdb | \n", "antigen_chain | \n", "PHE | \n", "7 | \n", "NaN | \n", "1.225982 | \n", "0.317819 | \n", "-0.347835 | \n", "... | \n", "holo | \n", "D | \n", "E | \n", "C | \n", "A | \n", "B | \n", "DRGSQS-IYSNGD-AVNRDDKII-SEHNR-FQNEAQ-ASSPDIEQY | \n", "YLQPRTFLL | \n", "hla_a_02_01 | \n", "7rtr_D-E-C-A-B_tcr_pmhc | \n", "
9855 | \n", "7rtr_D-E-C-A-B_tcr_pmhc | \n", "7rtd_C-A-B_pmhc.pdb | \n", "7rtr_D-E-C-A-B_tcr_pmhc.pdb | \n", "antigen_chain | \n", "LEU | \n", "8 | \n", "NaN | \n", "1.323615 | \n", "0.310356 | \n", "0.612672 | \n", "... | \n", "holo | \n", "D | \n", "E | \n", "C | \n", "A | \n", "B | \n", "DRGSQS-IYSNGD-AVNRDDKII-SEHNR-FQNEAQ-ASSPDIEQY | \n", "YLQPRTFLL | \n", "hla_a_02_01 | \n", "7rtr_D-E-C-A-B_tcr_pmhc | \n", "
9856 | \n", "7rtr_D-E-C-A-B_tcr_pmhc | \n", "7rtd_C-A-B_pmhc.pdb | \n", "7rtr_D-E-C-A-B_tcr_pmhc.pdb | \n", "antigen_chain | \n", "LEU | \n", "9 | \n", "NaN | \n", "1.599917 | \n", "0.192969 | \n", "0.885323 | \n", "... | \n", "holo | \n", "D | \n", "E | \n", "C | \n", "A | \n", "B | \n", "DRGSQS-IYSNGD-AVNRDDKII-SEHNR-FQNEAQ-ASSPDIEQY | \n", "YLQPRTFLL | \n", "hla_a_02_01 | \n", "7rtr_D-E-C-A-B_tcr_pmhc | \n", "
9857 rows × 26 columns
\n", "\n", " | dominant_anchor | \n", "mhc_slug | \n", "
---|---|---|
0 | \n", "p2-p5-p9 | \n", "hla_b_08_01 | \n", "
1 | \n", "p2-p9 | \n", "hla_e_01_03 | \n", "