{ "cells": [ { "cell_type": "markdown", "id": "06a51779", "metadata": {}, "source": [ "# Centre of mass analysis" ] }, { "cell_type": "markdown", "id": "5b674b4d", "metadata": {}, "source": [ "## Introduction\n", "\n", "In this notebook we aimed to identify the centre-of-mass (COM) of the TCR framework (Fw) regions and the anchors of the CDR loops. We then investigated the amount of movement these regions undergo between *apo* and *holo* states. We also used these COMs to create an axis of the TCR and measure the angle between both chains of the TCR before determining the angle changes between *apo* and *holo* states." ] }, { "cell_type": "code", "execution_count": 1, "id": "c9cd30b7", "metadata": {}, "outputs": [], "source": [ "import os\n", "from collections import defaultdict\n", "\n", "import numpy as np\n", "import pandas as pd\n", "import seaborn as sns\n", "from python_pdb.parsers import parse_pdb_to_pandas\n", "\n", "from tcr_pmhc_interface_analysis.measurements import get_distance\n", "from tcr_pmhc_interface_analysis.processing import annotate_tcr_pmhc_df, find_anchors\n", "from tcr_pmhc_interface_analysis.utils import get_coords" ] }, { "cell_type": "code", "execution_count": 2, "id": "d76c1da6", "metadata": {}, "outputs": [], "source": [ "DATA_DIR = '../data/processed/apo-holo-tcr-pmhc-class-I'" ] }, { "cell_type": "code", "execution_count": 3, "id": "704e7645", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | file_name | \n", "pdb_id | \n", "structure_type | \n", "state | \n", "alpha_chain | \n", "beta_chain | \n", "antigen_chain | \n", "mhc_chain1 | \n", "mhc_chain2 | \n", "cdr_sequences_collated | \n", "peptide_sequence | \n", "mhc_slug | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "1ao7_D-E-C-A-B_tcr_pmhc.pdb | \n", "1ao7 | \n", "tcr_pmhc | \n", "holo | \n", "D | \n", "E | \n", "C | \n", "A | \n", "B | \n", "DRGSQS-IYSNGD-AVTTDSWGKLQ-MNHEY-SVGAGI-ASRPGLA... | \n", "LLFGYPVYV | \n", "hla_a_02_01 | \n", "
1 | \n", "1b0g_C-A-B_pmhc.pdb | \n", "1b0g | \n", "pmhc | \n", "apo | \n", "NaN | \n", "NaN | \n", "C | \n", "A | \n", "B | \n", "NaN | \n", "ALWGFFPVL | \n", "hla_a_02_01 | \n", "
2 | \n", "1b0g_F-D-E_pmhc.pdb | \n", "1b0g | \n", "pmhc | \n", "apo | \n", "NaN | \n", "NaN | \n", "F | \n", "D | \n", "E | \n", "NaN | \n", "ALWGFFPVL | \n", "hla_a_02_01 | \n", "
3 | \n", "1bd2_D-E-C-A-B_tcr_pmhc.pdb | \n", "1bd2 | \n", "tcr_pmhc | \n", "holo | \n", "D | \n", "E | \n", "C | \n", "A | \n", "B | \n", "NSMFDY-ISSIKDK-AAMEGAQKLV-MNHEY-SVGAGI-ASSYPGG... | \n", "LLFGYPVYV | \n", "hla_a_02_01 | \n", "
4 | \n", "1bii_P-A-B_pmhc.pdb | \n", "1bii | \n", "pmhc | \n", "apo | \n", "NaN | \n", "NaN | \n", "P | \n", "A | \n", "B | \n", "NaN | \n", "RGPGRAFVTI | \n", "h2_dd | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
386 | \n", "7rtd_C-A-B_pmhc.pdb | \n", "7rtd | \n", "pmhc | \n", "apo | \n", "NaN | \n", "NaN | \n", "C | \n", "A | \n", "B | \n", "NaN | \n", "YLQPRTFLL | \n", "hla_a_02_01 | \n", "
387 | \n", "7rtr_D-E-C-A-B_tcr_pmhc.pdb | \n", "7rtr | \n", "tcr_pmhc | \n", "holo | \n", "D | \n", "E | \n", "C | \n", "A | \n", "B | \n", "DRGSQS-IYSNGD-AVNRDDKII-SEHNR-FQNEAQ-ASSPDIEQY | \n", "YLQPRTFLL | \n", "hla_a_02_01 | \n", "
388 | \n", "8gvb_A-B-P-H-L_tcr_pmhc.pdb | \n", "8gvb | \n", "tcr_pmhc | \n", "holo | \n", "A | \n", "B | \n", "P | \n", "H | \n", "L | \n", "YGATPY-YFSGDTLV-AVGFTGGGNKLT-SEHNR-FQNEAQ-ASSD... | \n", "RYPLTFGW | \n", "hla_a_24_02 | \n", "
389 | \n", "8gvg_A-B-P-H-L_tcr_pmhc.pdb | \n", "8gvg | \n", "tcr_pmhc | \n", "holo | \n", "A | \n", "B | \n", "P | \n", "H | \n", "L | \n", "YGATPY-YFSGDTLV-AVGFTGGGNKLT-SEHNR-FQNEAQ-ASSD... | \n", "RFPLTFGW | \n", "hla_a_24_02 | \n", "
390 | \n", "8gvi_A-B-P-H-L_tcr_pmhc.pdb | \n", "8gvi | \n", "tcr_pmhc | \n", "holo | \n", "A | \n", "B | \n", "P | \n", "H | \n", "L | \n", "YGATPY-YFSGDTLV-AVVFTGGGNKLT-SEHNR-FQNEAQ-ASSL... | \n", "RYPLTFGW | \n", "hla_a_24_02 | \n", "
391 rows × 12 columns
\n", "\n", " | complex_id | \n", "structure_x_path | \n", "structure_y_path | \n", "alpha_anchor_com_diff | \n", "beta_anchor_com_diff | \n", "alpha_fw_com_diff | \n", "beta_fw_com_diff | \n", "chain_angle_diff | \n", "
---|---|---|---|---|---|---|---|---|
0 | \n", "3qdg_D-E-C-A-B_tcr_pmhc | \n", "3qdg_D-E-C-A-B_tcr_pmhc.pdb | \n", "3qeu_A-B_tcr.pdb | \n", "0.645120 | \n", "0.457327 | \n", "0.219777 | \n", "0.166987 | \n", "-0.033421 | \n", "
1 | \n", "3qdg_D-E-C-A-B_tcr_pmhc | \n", "3qdg_D-E-C-A-B_tcr_pmhc.pdb | \n", "3qeu_D-E_tcr.pdb | \n", "0.527090 | \n", "0.340932 | \n", "0.164026 | \n", "0.213055 | \n", "-0.040502 | \n", "
2 | \n", "3qdg_D-E-C-A-B_tcr_pmhc | \n", "3qeu_A-B_tcr.pdb | \n", "3qeu_D-E_tcr.pdb | \n", "0.682250 | \n", "0.284114 | \n", "0.334339 | \n", "0.064055 | \n", "-0.007081 | \n", "
3 | \n", "5c0a_D-E-C-A-B_tcr_pmhc | \n", "3utp_D-E_tcr.pdb | \n", "3utp_K-L_tcr.pdb | \n", "0.304534 | \n", "0.602875 | \n", "0.290645 | \n", "0.338509 | \n", "0.041175 | \n", "
4 | \n", "5c0a_D-E-C-A-B_tcr_pmhc | \n", "3utp_D-E_tcr.pdb | \n", "5c0a_D-E-C-A-B_tcr_pmhc.pdb | \n", "0.292352 | \n", "0.514781 | \n", "0.334429 | \n", "0.270019 | \n", "0.032863 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
131 | \n", "5nme_D-E-C-A-B_tcr_pmhc | \n", "5nmd_C-D_tcr.pdb | \n", "5nme_D-E-C-A-B_tcr_pmhc.pdb | \n", "0.211336 | \n", "0.712433 | \n", "0.113214 | \n", "0.130459 | \n", "0.030224 | \n", "
132 | \n", "5hhm_D-E-C-A-B_tcr_pmhc | \n", "2vlm_D-E_tcr.pdb | \n", "5hhm_D-E-C-A-B_tcr_pmhc.pdb | \n", "0.886794 | \n", "0.410827 | \n", "0.467935 | \n", "0.381336 | \n", "-0.115053 | \n", "
133 | \n", "3kpr_D-E-C-A-B_tcr_pmhc | \n", "1kgc_D-E_tcr.pdb | \n", "3kpr_D-E-C-A-B_tcr_pmhc.pdb | \n", "0.919506 | \n", "0.270885 | \n", "0.383752 | \n", "0.151581 | \n", "-0.026228 | \n", "
134 | \n", "1oga_D-E-C-A-B_tcr_pmhc | \n", "1oga_D-E-C-A-B_tcr_pmhc.pdb | \n", "2vlm_D-E_tcr.pdb | \n", "0.944931 | \n", "0.542110 | \n", "0.291887 | \n", "0.336729 | \n", "0.107774 | \n", "
135 | \n", "7rtr_D-E-C-A-B_tcr_pmhc | \n", "7n1d_A-B_tcr.pdb | \n", "7rtr_D-E-C-A-B_tcr_pmhc.pdb | \n", "0.642809 | \n", "0.188609 | \n", "0.388833 | \n", "0.291080 | \n", "0.001583 | \n", "
136 rows × 8 columns
\n", "\n", " | alpha_anchor_com_diff | \n", "beta_anchor_com_diff | \n", "alpha_fw_com_diff | \n", "beta_fw_com_diff | \n", "chain_angle_diff | \n", "chain_angle_diff_deg | \n", "chain_angle_diff_deg_mag | \n", "
---|---|---|---|---|---|---|---|
cdr_sequences_collated | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " |
ATGYPS-ATKADDK-ALSDPVNDMR-SGHAT-FQNNGV-ASSLRGRGDQPQH | \n", "0.112478 | \n", "0.221407 | \n", "0.131706 | \n", "0.140118 | \n", "-0.005545 | \n", "-0.317697 | \n", "0.317697 | \n", "
DRGSQS-IYSNGD-ALTRGPGNQFY-SGHVS-FNYEAQ-ASSSPGGVSTEAF | \n", "1.606143 | \n", "0.370279 | \n", "1.293890 | \n", "0.279836 | \n", "-0.057826 | \n", "-3.313194 | \n", "3.313194 | \n", "
DRGSQS-IYSNGD-AVNFGGGKLI-MRHNA-SNTAGT-ASSLSFGTEAF | \n", "0.626083 | \n", "0.366441 | \n", "0.250095 | \n", "0.178326 | \n", "-0.001998 | \n", "-0.114483 | \n", "0.114483 | \n", "
DRGSQS-IYSNGD-AVNRDDKII-SEHNR-FQNEAQ-ASSPDIEQY | \n", "0.624668 | \n", "0.204869 | \n", "0.302308 | \n", "0.186851 | \n", "-0.011943 | \n", "-0.684280 | \n", "0.684280 | \n", "
DRGSQS-IYSNGD-AVRTNSGYALN-QGHDT-YYEEEE-ASSDTVSYEQY | \n", "0.306039 | \n", "0.615471 | \n", "0.133071 | \n", "0.205778 | \n", "0.024796 | \n", "1.420686 | \n", "1.420686 | \n", "
DRGSQS-IYSNGD-AVTTDSWGKLQ-MNHEY-SVGAGI-ASRPGLAGGRPEQY | \n", "0.385751 | \n", "0.657747 | \n", "0.300314 | \n", "0.374858 | \n", "0.018630 | \n", "1.067402 | \n", "1.067402 | \n", "
DRGSQS-IYSNGD-GTYNQGGKLI-MNHEY-SMNVEV-ASSGASHEQY | \n", "1.099042 | \n", "0.294167 | \n", "0.235768 | \n", "0.174727 | \n", "0.103824 | \n", "5.948649 | \n", "5.948649 | \n", "
DSAIYN-IQSSQRE-AQLNQAGTALI-MNHEY-SVGAGI-ASSYGTGINYGYT | \n", "0.596772 | \n", "0.032190 | \n", "0.184380 | \n", "0.213406 | \n", "-0.037547 | \n", "-2.151258 | \n", "2.151258 | \n", "
DSAIYN-IQSSQRE-AVRMDSSYKLI-SEHNR-FQNEAQ-ASSSWDTGELF | \n", "0.264265 | \n", "0.248560 | \n", "0.103594 | \n", "0.098554 | \n", "-0.009340 | \n", "-0.535154 | \n", "0.535154 | \n", "
DSAIYN-IQSSQRE-AVRPLLDGTYIPT-MNHEY-SVGAGT-ASSYLGNTGELF | \n", "0.408007 | \n", "0.394163 | \n", "0.483043 | \n", "0.506659 | \n", "0.044708 | \n", "2.561604 | \n", "2.561604 | \n", "
DSAIYN-IQSSQRE-AVRPTSGGSYIPT-MNHEY-SVGAGI-ASSYVGNTGELF | \n", "0.524052 | \n", "0.405684 | \n", "0.393972 | \n", "0.306927 | \n", "-0.011481 | \n", "-0.657791 | \n", "0.657791 | \n", "
FLGSQS-TYREGD-AVNDGGRLT-GTSNPN-WGPFG-AWSETGLGMGGWQ | \n", "0.520622 | \n", "0.385860 | \n", "0.254620 | \n", "0.288849 | \n", "-0.009649 | \n", "-0.552852 | \n", "0.552852 | \n", "
NIATNDY-GYKTK-LVGEILDNFNKFY-MDHEN-SYDVKM-ASSQRQEGDTQY | \n", "0.570818 | \n", "0.282032 | \n", "0.423455 | \n", "0.414862 | \n", "0.023776 | \n", "1.362237 | \n", "1.362237 | \n", "
NSAFDY-ILSVSNK-AASASFGDNSKLI-MSHET-SYDVDS-ASSLGHTEVF | \n", "0.124907 | \n", "0.468265 | \n", "0.138290 | \n", "0.401072 | \n", "0.016898 | \n", "0.968205 | \n", "0.968205 | \n", "
NSAFQY-TYSSGN-AMRGDSSYKLI-SGHDY-FNNNVP-ASSLWEKLAKNIQY | \n", "0.269449 | \n", "0.373169 | \n", "0.259665 | \n", "0.247023 | \n", "0.027624 | \n", "1.582753 | \n", "1.582753 | \n", "
NSASQS-VYSSG-VVQPGGYQKVT-MNHNS-SASEGT-ASSEGLWQVGDEQY | \n", "0.194964 | \n", "0.279496 | \n", "0.157453 | \n", "0.355964 | \n", "0.016874 | \n", "0.966819 | \n", "0.966819 | \n", "
NSASQS-VYSSG-VVRAGKLI-MNHEY-SVGEGT-ASGQGNFDIQY | \n", "0.409381 | \n", "0.249897 | \n", "0.205705 | \n", "0.286431 | \n", "-0.055766 | \n", "-3.195168 | \n", "3.195168 | \n", "
SVFSS-VVTGGEV-AGAGSQGNLI-LNHDA-SQIVND-ASSSRSSYEQY | \n", "1.003777 | \n", "0.509446 | \n", "0.331727 | \n", "0.334128 | \n", "0.058476 | \n", "3.350441 | \n", "3.350441 | \n", "
TISGNEY-GLKNN-IVWGGYQKVT-SEHNR-FQNEAQ-ASRYRDDSYNEQF | \n", "1.032154 | \n", "1.282186 | \n", "0.435426 | \n", "0.393399 | \n", "0.066787 | \n", "3.826598 | \n", "3.826598 | \n", "
TISGTDY-GLTSN-ILPLAGGTSYGKLT-SGHVS-FQNEAQ-ASSLGQAYEQY | \n", "0.713302 | \n", "0.195773 | \n", "0.230770 | \n", "0.110227 | \n", "-0.002818 | \n", "-0.161464 | \n", "0.161464 | \n", "
YSATPY-YYSGDPVV-AVSGFASALT-NNHNN-SYGAGS-ASGGGGTLY | \n", "0.304694 | \n", "0.460448 | \n", "0.099579 | \n", "0.079669 | \n", "0.006357 | \n", "0.364241 | \n", "0.364241 | \n", "
YSGSPE-HISR-ALSGFNNAGNMLT-SGHAT-FQNNGV-ASSLGGAGGADTQY | \n", "0.796027 | \n", "0.388025 | \n", "0.043012 | \n", "0.054271 | \n", "0.112168 | \n", "6.426748 | \n", "6.426748 | \n", "