{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Modalities of TCR interactions with pMHCs" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Introduction\n", "\n", "In this notebook, we investigated whether TCRs can be both flexible and rigid depending on the antigen they are contacting.\n", "We also looked at the opposite phenomenon of whether peptides can be both flexible and rigid depending on the TCR that is contacting them." ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
file_namepdb_idstructure_typestatealpha_chainbeta_chainantigen_chainmhc_chain1mhc_chain2cdr_sequences_collatedpeptide_sequencemhc_slug
id
1ao7_D-E-C-A-B_tcr_pmhc1ao7_D-E-C-A-B_tcr_pmhc.pdb1ao7tcr_pmhcholoDECABDRGSQS-IYSNGD-AVTTDSWGKLQ-MNHEY-SVGAGI-ASRPGLA...LLFGYPVYVhla_a_02_01
1b0g_C-A-B_pmhc1b0g_C-A-B_pmhc.pdb1b0gpmhcapoNaNNaNCABNaNALWGFFPVLhla_a_02_01
1b0g_F-D-E_pmhc1b0g_F-D-E_pmhc.pdb1b0gpmhcapoNaNNaNFDENaNALWGFFPVLhla_a_02_01
1bd2_D-E-C-A-B_tcr_pmhc1bd2_D-E-C-A-B_tcr_pmhc.pdb1bd2tcr_pmhcholoDECABNSMFDY-ISSIKDK-AAMEGAQKLV-MNHEY-SVGAGI-ASSYPGG...LLFGYPVYVhla_a_02_01
1bii_P-A-B_pmhc1bii_P-A-B_pmhc.pdb1biipmhcapoNaNNaNPABNaNRGPGRAFVTIh2_dd
.......................................
7rtd_C-A-B_pmhc7rtd_C-A-B_pmhc.pdb7rtdpmhcapoNaNNaNCABNaNYLQPRTFLLhla_a_02_01
7rtr_D-E-C-A-B_tcr_pmhc7rtr_D-E-C-A-B_tcr_pmhc.pdb7rtrtcr_pmhcholoDECABDRGSQS-IYSNGD-AVNRDDKII-SEHNR-FQNEAQ-ASSPDIEQYYLQPRTFLLhla_a_02_01
8gvb_A-B-P-H-L_tcr_pmhc8gvb_A-B-P-H-L_tcr_pmhc.pdb8gvbtcr_pmhcholoABPHLYGATPY-YFSGDTLV-AVGFTGGGNKLT-SEHNR-FQNEAQ-ASSD...RYPLTFGWhla_a_24_02
8gvg_A-B-P-H-L_tcr_pmhc8gvg_A-B-P-H-L_tcr_pmhc.pdb8gvgtcr_pmhcholoABPHLYGATPY-YFSGDTLV-AVGFTGGGNKLT-SEHNR-FQNEAQ-ASSD...RFPLTFGWhla_a_24_02
8gvi_A-B-P-H-L_tcr_pmhc8gvi_A-B-P-H-L_tcr_pmhc.pdb8gvitcr_pmhcholoABPHLYGATPY-YFSGDTLV-AVVFTGGGNKLT-SEHNR-FQNEAQ-ASSL...RYPLTFGWhla_a_24_02
\n", "

391 rows × 12 columns

\n", "
" ], "text/plain": [ " file_name pdb_id structure_type \\\n", "id \n", "1ao7_D-E-C-A-B_tcr_pmhc 1ao7_D-E-C-A-B_tcr_pmhc.pdb 1ao7 tcr_pmhc \n", "1b0g_C-A-B_pmhc 1b0g_C-A-B_pmhc.pdb 1b0g pmhc \n", "1b0g_F-D-E_pmhc 1b0g_F-D-E_pmhc.pdb 1b0g pmhc \n", "1bd2_D-E-C-A-B_tcr_pmhc 1bd2_D-E-C-A-B_tcr_pmhc.pdb 1bd2 tcr_pmhc \n", "1bii_P-A-B_pmhc 1bii_P-A-B_pmhc.pdb 1bii pmhc \n", "... ... ... ... \n", "7rtd_C-A-B_pmhc 7rtd_C-A-B_pmhc.pdb 7rtd pmhc \n", "7rtr_D-E-C-A-B_tcr_pmhc 7rtr_D-E-C-A-B_tcr_pmhc.pdb 7rtr tcr_pmhc \n", "8gvb_A-B-P-H-L_tcr_pmhc 8gvb_A-B-P-H-L_tcr_pmhc.pdb 8gvb tcr_pmhc \n", "8gvg_A-B-P-H-L_tcr_pmhc 8gvg_A-B-P-H-L_tcr_pmhc.pdb 8gvg tcr_pmhc \n", "8gvi_A-B-P-H-L_tcr_pmhc 8gvi_A-B-P-H-L_tcr_pmhc.pdb 8gvi tcr_pmhc \n", "\n", " state alpha_chain beta_chain antigen_chain mhc_chain1 \\\n", "id \n", "1ao7_D-E-C-A-B_tcr_pmhc holo D E C A \n", "1b0g_C-A-B_pmhc apo NaN NaN C A \n", "1b0g_F-D-E_pmhc apo NaN NaN F D \n", "1bd2_D-E-C-A-B_tcr_pmhc holo D E C A \n", "1bii_P-A-B_pmhc apo NaN NaN P A \n", "... ... ... ... ... ... \n", "7rtd_C-A-B_pmhc apo NaN NaN C A \n", "7rtr_D-E-C-A-B_tcr_pmhc holo D E C A \n", "8gvb_A-B-P-H-L_tcr_pmhc holo A B P H \n", "8gvg_A-B-P-H-L_tcr_pmhc holo A B P H \n", "8gvi_A-B-P-H-L_tcr_pmhc holo A B P H \n", "\n", " mhc_chain2 \\\n", "id \n", "1ao7_D-E-C-A-B_tcr_pmhc B \n", "1b0g_C-A-B_pmhc B \n", "1b0g_F-D-E_pmhc E \n", "1bd2_D-E-C-A-B_tcr_pmhc B \n", "1bii_P-A-B_pmhc B \n", "... ... \n", "7rtd_C-A-B_pmhc B \n", "7rtr_D-E-C-A-B_tcr_pmhc B \n", "8gvb_A-B-P-H-L_tcr_pmhc L \n", "8gvg_A-B-P-H-L_tcr_pmhc L \n", "8gvi_A-B-P-H-L_tcr_pmhc L \n", "\n", " cdr_sequences_collated \\\n", "id \n", "1ao7_D-E-C-A-B_tcr_pmhc DRGSQS-IYSNGD-AVTTDSWGKLQ-MNHEY-SVGAGI-ASRPGLA... \n", "1b0g_C-A-B_pmhc NaN \n", "1b0g_F-D-E_pmhc NaN \n", "1bd2_D-E-C-A-B_tcr_pmhc NSMFDY-ISSIKDK-AAMEGAQKLV-MNHEY-SVGAGI-ASSYPGG... \n", "1bii_P-A-B_pmhc NaN \n", "... ... \n", "7rtd_C-A-B_pmhc NaN \n", "7rtr_D-E-C-A-B_tcr_pmhc DRGSQS-IYSNGD-AVNRDDKII-SEHNR-FQNEAQ-ASSPDIEQY \n", "8gvb_A-B-P-H-L_tcr_pmhc YGATPY-YFSGDTLV-AVGFTGGGNKLT-SEHNR-FQNEAQ-ASSD... \n", "8gvg_A-B-P-H-L_tcr_pmhc YGATPY-YFSGDTLV-AVGFTGGGNKLT-SEHNR-FQNEAQ-ASSD... \n", "8gvi_A-B-P-H-L_tcr_pmhc YGATPY-YFSGDTLV-AVVFTGGGNKLT-SEHNR-FQNEAQ-ASSL... \n", "\n", " peptide_sequence mhc_slug \n", "id \n", "1ao7_D-E-C-A-B_tcr_pmhc LLFGYPVYV hla_a_02_01 \n", "1b0g_C-A-B_pmhc ALWGFFPVL hla_a_02_01 \n", "1b0g_F-D-E_pmhc ALWGFFPVL hla_a_02_01 \n", "1bd2_D-E-C-A-B_tcr_pmhc LLFGYPVYV hla_a_02_01 \n", "1bii_P-A-B_pmhc RGPGRAFVTI h2_dd \n", "... ... ... \n", "7rtd_C-A-B_pmhc YLQPRTFLL hla_a_02_01 \n", "7rtr_D-E-C-A-B_tcr_pmhc YLQPRTFLL hla_a_02_01 \n", "8gvb_A-B-P-H-L_tcr_pmhc RYPLTFGW hla_a_24_02 \n", "8gvg_A-B-P-H-L_tcr_pmhc RFPLTFGW hla_a_24_02 \n", "8gvi_A-B-P-H-L_tcr_pmhc RYPLTFGW hla_a_24_02 \n", "\n", "[391 rows x 12 columns]" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "apo_holo_summary = pd.read_csv('../data/processed/apo-holo-tcr-pmhc-class-I/apo_holo_summary.csv')\n", "apo_holo_summary['id'] = apo_holo_summary['file_name'].str.replace('.pdb', '', regex=False)\n", "apo_holo_summary = apo_holo_summary.set_index('id')\n", "\n", "apo_holo_summary" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "def categorize_movement(rmsd: float) -> str:\n", " if rmsd < 0.5:\n", " return f'Little Movement (<0.5 Å)'\n", "\n", " if 0.5 <= rmsd < 1.0:\n", " return f'Some Movement (0.5 to 1.0 Å)'\n", "\n", " if 1.0 <= rmsd < 2.0:\n", " return 'Movement (1.0 to 2.0 Å)'\n", "\n", " if 2.0 <= rmsd < 4.0:\n", " return 'Large Movement (2.0 to 4.0 Å)'\n", "\n", " if 4.0 <= rmsd:\n", " return 'Significant Movement (>4.0 Å)'\n", "\n", "\n", "movement_order = pd.CategoricalDtype(categories=['Little Movement (<0.5 Å)',\n", " 'Some Movement (0.5 to 1.0 Å)',\n", " 'Movement (1.0 to 2.0 Å)',\n", " 'Large Movement (2.0 to 4.0 Å)',\n", " 'Significant Movement (>4.0 Å)'], ordered=True)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "def classify_modality(rmsd: float) -> str | None:\n", " return 'rigid' if rmsd < 0.5 else 'flexible' if rmsd > 1.0 else None" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## TCR CDR Analysis" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Load data" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "results_fw_align = pd.read_csv('../data/processed/apo-holo-tcr-pmhc-class-I-comparisons/rmsd_cdr_fw_align_results.csv')\n", "\n", "results_fw_align = results_fw_align.merge(\n", " apo_holo_summary[['file_name',\n", " 'pdb_id',\n", " 'structure_type',\n", " 'state',\n", " 'alpha_chain',\n", " 'beta_chain',\n", " 'antigen_chain',\n", " 'mhc_chain1',\n", " 'mhc_chain2']],\n", " how='left',\n", " left_on='structure_x_name',\n", " right_on='file_name',\n", ").merge(\n", " apo_holo_summary[['file_name',\n", " 'pdb_id',\n", " 'structure_type',\n", " 'state',\n", " 'alpha_chain',\n", " 'beta_chain',\n", " 'antigen_chain',\n", " 'mhc_chain1',\n", " 'mhc_chain2']],\n", " how='left',\n", " left_on='structure_y_name',\n", " right_on='file_name',\n", ").merge(\n", " apo_holo_summary[['cdr_sequences_collated', 'peptide_sequence', 'mhc_slug']],\n", " how='left',\n", " left_on='complex_id',\n", " right_index=True,\n", ")" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "results_fw_align['comparison'] = results_fw_align['state_x'] + '-' + results_fw_align['state_y']\n", "results_fw_align['comparison'] = results_fw_align['comparison'].map(\n", " lambda entry: 'apo-holo' if entry == 'holo-apo' else entry\n", ")\n", "\n", "results_fw_align = results_fw_align.query(\"comparison == 'apo-holo'\").reset_index(drop=True)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "results_fw_align['structure_comparison'] = results_fw_align.apply(\n", " lambda row: '-'.join(sorted([row.structure_x_name, row.structure_y_name])),\n", " axis='columns',\n", ")\n", "results_fw_align = results_fw_align.drop_duplicates(['structure_comparison', 'chain_type', 'cdr']).reset_index(drop=True)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
complex_idstructure_x_namestructure_y_namechain_typecdrrmsdfile_name_xpdb_id_xstructure_type_xstate_x...alpha_chain_ybeta_chain_yantigen_chain_ymhc_chain1_ymhc_chain2_ycdr_sequences_collatedpeptide_sequencemhc_slugcomparisonstructure_comparison
03qdg_D-E-C-A-B_tcr_pmhc3qdg_D-E-C-A-B_tcr_pmhc.pdb3qeu_A-B_tcr.pdbalpha_chain11.9328063qdg_D-E-C-A-B_tcr_pmhc.pdb3qdgtcr_pmhcholo...ABNaNNaNNaNDRGSQS-IYSNGD-AVNFGGGKLI-MRHNA-SNTAGT-ASSLSFGTEAFELAGIGILTVhla_a_02_01apo-holo3qdg_D-E-C-A-B_tcr_pmhc.pdb-3qeu_A-B_tcr.pdb
13qdg_D-E-C-A-B_tcr_pmhc3qdg_D-E-C-A-B_tcr_pmhc.pdb3qeu_A-B_tcr.pdbalpha_chain21.3085983qdg_D-E-C-A-B_tcr_pmhc.pdb3qdgtcr_pmhcholo...ABNaNNaNNaNDRGSQS-IYSNGD-AVNFGGGKLI-MRHNA-SNTAGT-ASSLSFGTEAFELAGIGILTVhla_a_02_01apo-holo3qdg_D-E-C-A-B_tcr_pmhc.pdb-3qeu_A-B_tcr.pdb
23qdg_D-E-C-A-B_tcr_pmhc3qdg_D-E-C-A-B_tcr_pmhc.pdb3qeu_A-B_tcr.pdbalpha_chain31.2440623qdg_D-E-C-A-B_tcr_pmhc.pdb3qdgtcr_pmhcholo...ABNaNNaNNaNDRGSQS-IYSNGD-AVNFGGGKLI-MRHNA-SNTAGT-ASSLSFGTEAFELAGIGILTVhla_a_02_01apo-holo3qdg_D-E-C-A-B_tcr_pmhc.pdb-3qeu_A-B_tcr.pdb
33qdg_D-E-C-A-B_tcr_pmhc3qdg_D-E-C-A-B_tcr_pmhc.pdb3qeu_A-B_tcr.pdbbeta_chain10.8090663qdg_D-E-C-A-B_tcr_pmhc.pdb3qdgtcr_pmhcholo...ABNaNNaNNaNDRGSQS-IYSNGD-AVNFGGGKLI-MRHNA-SNTAGT-ASSLSFGTEAFELAGIGILTVhla_a_02_01apo-holo3qdg_D-E-C-A-B_tcr_pmhc.pdb-3qeu_A-B_tcr.pdb
43qdg_D-E-C-A-B_tcr_pmhc3qdg_D-E-C-A-B_tcr_pmhc.pdb3qeu_A-B_tcr.pdbbeta_chain20.6885973qdg_D-E-C-A-B_tcr_pmhc.pdb3qdgtcr_pmhcholo...ABNaNNaNNaNDRGSQS-IYSNGD-AVNFGGGKLI-MRHNA-SNTAGT-ASSLSFGTEAFELAGIGILTVhla_a_02_01apo-holo3qdg_D-E-C-A-B_tcr_pmhc.pdb-3qeu_A-B_tcr.pdb
..................................................................
5697rtr_D-E-C-A-B_tcr_pmhc7n1d_A-B_tcr.pdb7rtr_D-E-C-A-B_tcr_pmhc.pdbalpha_chain20.8101707n1d_A-B_tcr.pdb7n1dtcrapo...DECABDRGSQS-IYSNGD-AVNRDDKII-SEHNR-FQNEAQ-ASSPDIEQYYLQPRTFLLhla_a_02_01apo-holo7n1d_A-B_tcr.pdb-7rtr_D-E-C-A-B_tcr_pmhc.pdb
5707rtr_D-E-C-A-B_tcr_pmhc7n1d_A-B_tcr.pdb7rtr_D-E-C-A-B_tcr_pmhc.pdbalpha_chain30.5632637n1d_A-B_tcr.pdb7n1dtcrapo...DECABDRGSQS-IYSNGD-AVNRDDKII-SEHNR-FQNEAQ-ASSPDIEQYYLQPRTFLLhla_a_02_01apo-holo7n1d_A-B_tcr.pdb-7rtr_D-E-C-A-B_tcr_pmhc.pdb
5717rtr_D-E-C-A-B_tcr_pmhc7n1d_A-B_tcr.pdb7rtr_D-E-C-A-B_tcr_pmhc.pdbbeta_chain10.3991827n1d_A-B_tcr.pdb7n1dtcrapo...DECABDRGSQS-IYSNGD-AVNRDDKII-SEHNR-FQNEAQ-ASSPDIEQYYLQPRTFLLhla_a_02_01apo-holo7n1d_A-B_tcr.pdb-7rtr_D-E-C-A-B_tcr_pmhc.pdb
5727rtr_D-E-C-A-B_tcr_pmhc7n1d_A-B_tcr.pdb7rtr_D-E-C-A-B_tcr_pmhc.pdbbeta_chain20.2844557n1d_A-B_tcr.pdb7n1dtcrapo...DECABDRGSQS-IYSNGD-AVNRDDKII-SEHNR-FQNEAQ-ASSPDIEQYYLQPRTFLLhla_a_02_01apo-holo7n1d_A-B_tcr.pdb-7rtr_D-E-C-A-B_tcr_pmhc.pdb
5737rtr_D-E-C-A-B_tcr_pmhc7n1d_A-B_tcr.pdb7rtr_D-E-C-A-B_tcr_pmhc.pdbbeta_chain30.4324537n1d_A-B_tcr.pdb7n1dtcrapo...DECABDRGSQS-IYSNGD-AVNRDDKII-SEHNR-FQNEAQ-ASSPDIEQYYLQPRTFLLhla_a_02_01apo-holo7n1d_A-B_tcr.pdb-7rtr_D-E-C-A-B_tcr_pmhc.pdb
\n", "

574 rows × 29 columns

\n", "
" ], "text/plain": [ " complex_id structure_x_name \\\n", "0 3qdg_D-E-C-A-B_tcr_pmhc 3qdg_D-E-C-A-B_tcr_pmhc.pdb \n", "1 3qdg_D-E-C-A-B_tcr_pmhc 3qdg_D-E-C-A-B_tcr_pmhc.pdb \n", "2 3qdg_D-E-C-A-B_tcr_pmhc 3qdg_D-E-C-A-B_tcr_pmhc.pdb \n", "3 3qdg_D-E-C-A-B_tcr_pmhc 3qdg_D-E-C-A-B_tcr_pmhc.pdb \n", "4 3qdg_D-E-C-A-B_tcr_pmhc 3qdg_D-E-C-A-B_tcr_pmhc.pdb \n", ".. ... ... \n", "569 7rtr_D-E-C-A-B_tcr_pmhc 7n1d_A-B_tcr.pdb \n", "570 7rtr_D-E-C-A-B_tcr_pmhc 7n1d_A-B_tcr.pdb \n", "571 7rtr_D-E-C-A-B_tcr_pmhc 7n1d_A-B_tcr.pdb \n", "572 7rtr_D-E-C-A-B_tcr_pmhc 7n1d_A-B_tcr.pdb \n", "573 7rtr_D-E-C-A-B_tcr_pmhc 7n1d_A-B_tcr.pdb \n", "\n", " structure_y_name chain_type cdr rmsd \\\n", "0 3qeu_A-B_tcr.pdb alpha_chain 1 1.932806 \n", "1 3qeu_A-B_tcr.pdb alpha_chain 2 1.308598 \n", "2 3qeu_A-B_tcr.pdb alpha_chain 3 1.244062 \n", "3 3qeu_A-B_tcr.pdb beta_chain 1 0.809066 \n", "4 3qeu_A-B_tcr.pdb beta_chain 2 0.688597 \n", ".. ... ... ... ... \n", "569 7rtr_D-E-C-A-B_tcr_pmhc.pdb alpha_chain 2 0.810170 \n", "570 7rtr_D-E-C-A-B_tcr_pmhc.pdb alpha_chain 3 0.563263 \n", "571 7rtr_D-E-C-A-B_tcr_pmhc.pdb beta_chain 1 0.399182 \n", "572 7rtr_D-E-C-A-B_tcr_pmhc.pdb beta_chain 2 0.284455 \n", "573 7rtr_D-E-C-A-B_tcr_pmhc.pdb beta_chain 3 0.432453 \n", "\n", " file_name_x pdb_id_x structure_type_x state_x ... \\\n", "0 3qdg_D-E-C-A-B_tcr_pmhc.pdb 3qdg tcr_pmhc holo ... \n", "1 3qdg_D-E-C-A-B_tcr_pmhc.pdb 3qdg tcr_pmhc holo ... \n", "2 3qdg_D-E-C-A-B_tcr_pmhc.pdb 3qdg tcr_pmhc holo ... \n", "3 3qdg_D-E-C-A-B_tcr_pmhc.pdb 3qdg tcr_pmhc holo ... \n", "4 3qdg_D-E-C-A-B_tcr_pmhc.pdb 3qdg tcr_pmhc holo ... \n", ".. ... ... ... ... ... \n", "569 7n1d_A-B_tcr.pdb 7n1d tcr apo ... \n", "570 7n1d_A-B_tcr.pdb 7n1d tcr apo ... \n", "571 7n1d_A-B_tcr.pdb 7n1d tcr apo ... \n", "572 7n1d_A-B_tcr.pdb 7n1d tcr apo ... \n", "573 7n1d_A-B_tcr.pdb 7n1d tcr apo ... \n", "\n", " alpha_chain_y beta_chain_y antigen_chain_y mhc_chain1_y mhc_chain2_y \\\n", "0 A B NaN NaN NaN \n", "1 A B NaN NaN NaN \n", "2 A B NaN NaN NaN \n", "3 A B NaN NaN NaN \n", "4 A B NaN NaN NaN \n", ".. ... ... ... ... ... \n", "569 D E C A B \n", "570 D E C A B \n", "571 D E C A B \n", "572 D E C A B \n", "573 D E C A B \n", "\n", " cdr_sequences_collated peptide_sequence \\\n", "0 DRGSQS-IYSNGD-AVNFGGGKLI-MRHNA-SNTAGT-ASSLSFGTEAF ELAGIGILTV \n", "1 DRGSQS-IYSNGD-AVNFGGGKLI-MRHNA-SNTAGT-ASSLSFGTEAF ELAGIGILTV \n", "2 DRGSQS-IYSNGD-AVNFGGGKLI-MRHNA-SNTAGT-ASSLSFGTEAF ELAGIGILTV \n", "3 DRGSQS-IYSNGD-AVNFGGGKLI-MRHNA-SNTAGT-ASSLSFGTEAF ELAGIGILTV \n", "4 DRGSQS-IYSNGD-AVNFGGGKLI-MRHNA-SNTAGT-ASSLSFGTEAF ELAGIGILTV \n", ".. ... ... \n", "569 DRGSQS-IYSNGD-AVNRDDKII-SEHNR-FQNEAQ-ASSPDIEQY YLQPRTFLL \n", "570 DRGSQS-IYSNGD-AVNRDDKII-SEHNR-FQNEAQ-ASSPDIEQY YLQPRTFLL \n", "571 DRGSQS-IYSNGD-AVNRDDKII-SEHNR-FQNEAQ-ASSPDIEQY YLQPRTFLL \n", "572 DRGSQS-IYSNGD-AVNRDDKII-SEHNR-FQNEAQ-ASSPDIEQY YLQPRTFLL \n", "573 DRGSQS-IYSNGD-AVNRDDKII-SEHNR-FQNEAQ-ASSPDIEQY YLQPRTFLL \n", "\n", " mhc_slug comparison structure_comparison \n", "0 hla_a_02_01 apo-holo 3qdg_D-E-C-A-B_tcr_pmhc.pdb-3qeu_A-B_tcr.pdb \n", "1 hla_a_02_01 apo-holo 3qdg_D-E-C-A-B_tcr_pmhc.pdb-3qeu_A-B_tcr.pdb \n", "2 hla_a_02_01 apo-holo 3qdg_D-E-C-A-B_tcr_pmhc.pdb-3qeu_A-B_tcr.pdb \n", "3 hla_a_02_01 apo-holo 3qdg_D-E-C-A-B_tcr_pmhc.pdb-3qeu_A-B_tcr.pdb \n", "4 hla_a_02_01 apo-holo 3qdg_D-E-C-A-B_tcr_pmhc.pdb-3qeu_A-B_tcr.pdb \n", ".. ... ... ... \n", "569 hla_a_02_01 apo-holo 7n1d_A-B_tcr.pdb-7rtr_D-E-C-A-B_tcr_pmhc.pdb \n", "570 hla_a_02_01 apo-holo 7n1d_A-B_tcr.pdb-7rtr_D-E-C-A-B_tcr_pmhc.pdb \n", "571 hla_a_02_01 apo-holo 7n1d_A-B_tcr.pdb-7rtr_D-E-C-A-B_tcr_pmhc.pdb \n", "572 hla_a_02_01 apo-holo 7n1d_A-B_tcr.pdb-7rtr_D-E-C-A-B_tcr_pmhc.pdb \n", "573 hla_a_02_01 apo-holo 7n1d_A-B_tcr.pdb-7rtr_D-E-C-A-B_tcr_pmhc.pdb \n", "\n", "[574 rows x 29 columns]" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "results_fw_align" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Analysis" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "def average_and_classify_cdr_movement(group):\n", " group_mean = group.groupby(['chain_type',\n", " 'cdr',\n", " 'peptide_sequence',\n", " 'mhc_slug']).agg({'rmsd': 'mean'})\n", " group_mean['movement'] = group_mean['rmsd'].map(categorize_movement).astype(movement_order)\n", " group_mean['classification'] = group_mean['rmsd'].map(classify_modality)\n", "\n", " return group_mean\n", "\n", "cdr_results_agg = (results_fw_align.groupby('cdr_sequences_collated')\n", " .apply(average_and_classify_cdr_movement)\n", " .reset_index())" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "cdr_results_agg_multi_pmhc = (cdr_results_agg.groupby(['cdr_sequences_collated', 'chain_type', 'cdr'])\n", " .filter(lambda group: len(group) > 1))" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "cdr_results_agg_multi_pmhc['num_modalities'] = (\n", " cdr_results_agg_multi_pmhc.groupby(['cdr_sequences_collated', 'chain_type', 'cdr'])['classification']\n", " .transform(lambda movement: movement.nunique())\n", ")" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
cdr_sequences_collatedchain_typecdrpeptide_sequencemhc_slugrmsdmovementclassificationnum_modalities
11DRGSQS-IYSNGD-AVNFGGGKLI-MRHNA-SNTAGT-ASSLSFGTEAFalpha_chain1AAGIGILTVhla_a_02_012.149292Large Movement (2.0 to 4.0 Å)flexible1
12DRGSQS-IYSNGD-AVNFGGGKLI-MRHNA-SNTAGT-ASSLSFGTEAFalpha_chain1ELAGIGILTVhla_a_02_012.188011Large Movement (2.0 to 4.0 Å)flexible1
13DRGSQS-IYSNGD-AVNFGGGKLI-MRHNA-SNTAGT-ASSLSFGTEAFalpha_chain1MMWDRGLGMMhla_a_02_011.983439Movement (1.0 to 2.0 Å)flexible1
14DRGSQS-IYSNGD-AVNFGGGKLI-MRHNA-SNTAGT-ASSLSFGTEAFalpha_chain1SMLGIGIVPVhla_a_02_012.269887Large Movement (2.0 to 4.0 Å)flexible1
15DRGSQS-IYSNGD-AVNFGGGKLI-MRHNA-SNTAGT-ASSLSFGTEAFalpha_chain2AAGIGILTVhla_a_02_011.095455Movement (1.0 to 2.0 Å)flexible1
..............................
287YSATPY-YYSGDPVV-AVSGFASALT-NNHNN-SYGAGS-ASGGGGTLYbeta_chain1SIYRYYGLh2_kb0.579553Some Movement (0.5 to 1.0 Å)None0
288YSATPY-YYSGDPVV-AVSGFASALT-NNHNN-SYGAGS-ASGGGGTLYbeta_chain2EQYKFYSVh2_kb0.850569Some Movement (0.5 to 1.0 Å)None0
289YSATPY-YYSGDPVV-AVSGFASALT-NNHNN-SYGAGS-ASGGGGTLYbeta_chain2SIYRYYGLh2_kb0.938313Some Movement (0.5 to 1.0 Å)None0
290YSATPY-YYSGDPVV-AVSGFASALT-NNHNN-SYGAGS-ASGGGGTLYbeta_chain3EQYKFYSVh2_kb1.084084Movement (1.0 to 2.0 Å)flexible1
291YSATPY-YYSGDPVV-AVSGFASALT-NNHNN-SYGAGS-ASGGGGTLYbeta_chain3SIYRYYGLh2_kb0.925296Some Movement (0.5 to 1.0 Å)None1
\n", "

226 rows × 9 columns

\n", "
" ], "text/plain": [ " cdr_sequences_collated chain_type cdr \\\n", "11 DRGSQS-IYSNGD-AVNFGGGKLI-MRHNA-SNTAGT-ASSLSFGTEAF alpha_chain 1 \n", "12 DRGSQS-IYSNGD-AVNFGGGKLI-MRHNA-SNTAGT-ASSLSFGTEAF alpha_chain 1 \n", "13 DRGSQS-IYSNGD-AVNFGGGKLI-MRHNA-SNTAGT-ASSLSFGTEAF alpha_chain 1 \n", "14 DRGSQS-IYSNGD-AVNFGGGKLI-MRHNA-SNTAGT-ASSLSFGTEAF alpha_chain 1 \n", "15 DRGSQS-IYSNGD-AVNFGGGKLI-MRHNA-SNTAGT-ASSLSFGTEAF alpha_chain 2 \n", ".. ... ... ... \n", "287 YSATPY-YYSGDPVV-AVSGFASALT-NNHNN-SYGAGS-ASGGGGTLY beta_chain 1 \n", "288 YSATPY-YYSGDPVV-AVSGFASALT-NNHNN-SYGAGS-ASGGGGTLY beta_chain 2 \n", "289 YSATPY-YYSGDPVV-AVSGFASALT-NNHNN-SYGAGS-ASGGGGTLY beta_chain 2 \n", "290 YSATPY-YYSGDPVV-AVSGFASALT-NNHNN-SYGAGS-ASGGGGTLY beta_chain 3 \n", "291 YSATPY-YYSGDPVV-AVSGFASALT-NNHNN-SYGAGS-ASGGGGTLY beta_chain 3 \n", "\n", " peptide_sequence mhc_slug rmsd movement \\\n", "11 AAGIGILTV hla_a_02_01 2.149292 Large Movement (2.0 to 4.0 Å) \n", "12 ELAGIGILTV hla_a_02_01 2.188011 Large Movement (2.0 to 4.0 Å) \n", "13 MMWDRGLGMM hla_a_02_01 1.983439 Movement (1.0 to 2.0 Å) \n", "14 SMLGIGIVPV hla_a_02_01 2.269887 Large Movement (2.0 to 4.0 Å) \n", "15 AAGIGILTV hla_a_02_01 1.095455 Movement (1.0 to 2.0 Å) \n", ".. ... ... ... ... \n", "287 SIYRYYGL h2_kb 0.579553 Some Movement (0.5 to 1.0 Å) \n", "288 EQYKFYSV h2_kb 0.850569 Some Movement (0.5 to 1.0 Å) \n", "289 SIYRYYGL h2_kb 0.938313 Some Movement (0.5 to 1.0 Å) \n", "290 EQYKFYSV h2_kb 1.084084 Movement (1.0 to 2.0 Å) \n", "291 SIYRYYGL h2_kb 0.925296 Some Movement (0.5 to 1.0 Å) \n", "\n", " classification num_modalities \n", "11 flexible 1 \n", "12 flexible 1 \n", "13 flexible 1 \n", "14 flexible 1 \n", "15 flexible 1 \n", ".. ... ... \n", "287 None 0 \n", "288 None 0 \n", "289 None 0 \n", "290 flexible 1 \n", "291 None 1 \n", "\n", "[226 rows x 9 columns]" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cdr_results_agg_multi_pmhc" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "count 226.000000\n", "mean 0.814159\n", "std 0.389842\n", "min 0.000000\n", "25% 1.000000\n", "50% 1.000000\n", "75% 1.000000\n", "max 1.000000\n", "Name: num_modalities, dtype: float64" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cdr_results_agg_multi_pmhc['num_modalities'].describe()" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
cdr_sequences_collatedchain_typecdrpeptide_sequencemhc_slugrmsdmovementclassificationnum_modalities
\n", "
" ], "text/plain": [ "Empty DataFrame\n", "Columns: [cdr_sequences_collated, chain_type, cdr, peptide_sequence, mhc_slug, rmsd, movement, classification, num_modalities]\n", "Index: []" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cdr_results_agg_multi_pmhc.query('num_modalities > 1')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "No TCRs had both <0.5 Å RMSD movement binding to one antigen and >1.0 Å RMSD binding to a different antigen." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Peptide Analysis" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Load data" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "pmhc_results = pd.read_csv('../data/processed/apo-holo-tcr-pmhc-class-I-comparisons/pmhc_tcr_contact_apo_holo.csv')\n", "\n", "peptide_results = pmhc_results.query(\"chain_type == 'antigen_chain'\").reset_index(drop=True)\n", "peptide_results = peptide_results.drop(columns=['chain_type', 'tcr_contact'])\n", "\n", "peptide_results = peptide_results.merge(\n", " apo_holo_summary[['file_name',\n", " 'pdb_id',\n", " 'structure_type',\n", " 'state',\n", " 'alpha_chain',\n", " 'beta_chain',\n", " 'antigen_chain',\n", " 'mhc_chain1',\n", " 'mhc_chain2']],\n", " how='left',\n", " left_on='structure_x_name',\n", " right_on='file_name',\n", ").merge(\n", " apo_holo_summary[['file_name',\n", " 'pdb_id',\n", " 'structure_type',\n", " 'state',\n", " 'alpha_chain',\n", " 'beta_chain',\n", " 'antigen_chain',\n", " 'mhc_chain1',\n", " 'mhc_chain2']],\n", " how='left',\n", " left_on='structure_y_name',\n", " right_on='file_name',\n", ").merge(\n", " apo_holo_summary[['cdr_sequences_collated', 'peptide_sequence', 'mhc_slug']],\n", " how='left',\n", " left_on='complex_id',\n", " right_index=True,\n", ")" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "peptide_results['comparison'] = peptide_results['state_x'] + '-' + peptide_results['state_y']\n", "peptide_results['comparison'] = peptide_results['comparison'].map(\n", " lambda entry: 'apo-holo' if entry == 'holo-apo' else entry\n", ")\n", "\n", "peptide_results = peptide_results.query(\"comparison == 'apo-holo'\").reset_index(drop=True)" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [], "source": [ "peptide_results['structure_comparison'] = peptide_results.apply(\n", " lambda row: '-'.join(sorted([row.structure_x_name, row.structure_y_name])),\n", " axis='columns',\n", ")\n", "peptide_results = peptide_results.drop_duplicates(['structure_comparison']).reset_index(drop=True)" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
complex_idstructure_x_namestructure_y_namermsdfile_name_xpdb_id_xstructure_type_xstate_xalpha_chain_xbeta_chain_x...alpha_chain_ybeta_chain_yantigen_chain_ymhc_chain1_ymhc_chain2_ycdr_sequences_collatedpeptide_sequencemhc_slugcomparisonstructure_comparison
05c0a_D-E-C-A-B_tcr_pmhc5c0a_D-E-C-A-B_tcr_pmhc.pdb5n1y_C-A-B_pmhc.pdb0.4488585c0a_D-E-C-A-B_tcr_pmhc.pdb5c0atcr_pmhcholoDE...NaNNaNCABNSAFQY-TYSSGN-AMRGDSSYKLI-SGHDY-FNNNVP-ASSLWEK...MVWGPDPLYVhla_a_02_01apo-holo5c0a_D-E-C-A-B_tcr_pmhc.pdb-5n1y_C-A-B_pmhc.pdb
15wlg_D-E-C-A-B_tcr_pmhc5wlg_D-E-C-A-B_tcr_pmhc.pdb5wli_C-A-B_pmhc.pdb0.4981485wlg_D-E-C-A-B_tcr_pmhc.pdb5wlgtcr_pmhcholoDE...NaNNaNCABTYTTV-IRSNERE-ATVYAQGLT-NNHDY-SYVADS-ASSDWGDTGQLYSQLLNAKYLh2_dbapo-holo5wlg_D-E-C-A-B_tcr_pmhc.pdb-5wli_C-A-B_pmhc.pdb
25wlg_D-E-C-A-B_tcr_pmhc5wlg_D-E-C-A-B_tcr_pmhc.pdb5wli_F-D-E_pmhc.pdb0.5195075wlg_D-E-C-A-B_tcr_pmhc.pdb5wlgtcr_pmhcholoDE...NaNNaNFDETYTTV-IRSNERE-ATVYAQGLT-NNHDY-SYVADS-ASSDWGDTGQLYSQLLNAKYLh2_dbapo-holo5wlg_D-E-C-A-B_tcr_pmhc.pdb-5wli_F-D-E_pmhc.pdb
35wlg_D-E-C-A-B_tcr_pmhc5wlg_D-E-C-A-B_tcr_pmhc.pdb5wli_I-G-H_pmhc.pdb0.4590255wlg_D-E-C-A-B_tcr_pmhc.pdb5wlgtcr_pmhcholoDE...NaNNaNIGHTYTTV-IRSNERE-ATVYAQGLT-NNHDY-SYVADS-ASSDWGDTGQLYSQLLNAKYLh2_dbapo-holo5wlg_D-E-C-A-B_tcr_pmhc.pdb-5wli_I-G-H_pmhc.pdb
45wlg_D-E-C-A-B_tcr_pmhc5wlg_D-E-C-A-B_tcr_pmhc.pdb5wli_L-J-K_pmhc.pdb0.4947055wlg_D-E-C-A-B_tcr_pmhc.pdb5wlgtcr_pmhcholoDE...NaNNaNLJKTYTTV-IRSNERE-ATVYAQGLT-NNHDY-SYVADS-ASSDWGDTGQLYSQLLNAKYLh2_dbapo-holo5wlg_D-E-C-A-B_tcr_pmhc.pdb-5wli_L-J-K_pmhc.pdb
..................................................................
3727rtr_D-E-C-A-B_tcr_pmhc7n6d_G-E-F_pmhc.pdb7rtr_D-E-C-A-B_tcr_pmhc.pdb0.4688697n6d_G-E-F_pmhc.pdb7n6dpmhcapoNaNNaN...DECABDRGSQS-IYSNGD-AVNRDDKII-SEHNR-FQNEAQ-ASSPDIEQYYLQPRTFLLhla_a_02_01apo-holo7n6d_G-E-F_pmhc.pdb-7rtr_D-E-C-A-B_tcr_pmhc.pdb
3737rtr_D-E-C-A-B_tcr_pmhc7n6d_K-I-J_pmhc.pdb7rtr_D-E-C-A-B_tcr_pmhc.pdb0.4110507n6d_K-I-J_pmhc.pdb7n6dpmhcapoNaNNaN...DECABDRGSQS-IYSNGD-AVNRDDKII-SEHNR-FQNEAQ-ASSPDIEQYYLQPRTFLLhla_a_02_01apo-holo7n6d_K-I-J_pmhc.pdb-7rtr_D-E-C-A-B_tcr_pmhc.pdb
3747rtr_D-E-C-A-B_tcr_pmhc7n6d_O-M-N_pmhc.pdb7rtr_D-E-C-A-B_tcr_pmhc.pdb0.4489057n6d_O-M-N_pmhc.pdb7n6dpmhcapoNaNNaN...DECABDRGSQS-IYSNGD-AVNRDDKII-SEHNR-FQNEAQ-ASSPDIEQYYLQPRTFLLhla_a_02_01apo-holo7n6d_O-M-N_pmhc.pdb-7rtr_D-E-C-A-B_tcr_pmhc.pdb
3757rtr_D-E-C-A-B_tcr_pmhc7p3d_C-A-B_pmhc.pdb7rtr_D-E-C-A-B_tcr_pmhc.pdb0.7652957p3d_C-A-B_pmhc.pdb7p3dpmhcapoNaNNaN...DECABDRGSQS-IYSNGD-AVNRDDKII-SEHNR-FQNEAQ-ASSPDIEQYYLQPRTFLLhla_a_02_01apo-holo7p3d_C-A-B_pmhc.pdb-7rtr_D-E-C-A-B_tcr_pmhc.pdb
3767rtr_D-E-C-A-B_tcr_pmhc7rtd_C-A-B_pmhc.pdb7rtr_D-E-C-A-B_tcr_pmhc.pdb0.4550787rtd_C-A-B_pmhc.pdb7rtdpmhcapoNaNNaN...DECABDRGSQS-IYSNGD-AVNRDDKII-SEHNR-FQNEAQ-ASSPDIEQYYLQPRTFLLhla_a_02_01apo-holo7rtd_C-A-B_pmhc.pdb-7rtr_D-E-C-A-B_tcr_pmhc.pdb
\n", "

377 rows × 27 columns

\n", "
" ], "text/plain": [ " complex_id structure_x_name \\\n", "0 5c0a_D-E-C-A-B_tcr_pmhc 5c0a_D-E-C-A-B_tcr_pmhc.pdb \n", "1 5wlg_D-E-C-A-B_tcr_pmhc 5wlg_D-E-C-A-B_tcr_pmhc.pdb \n", "2 5wlg_D-E-C-A-B_tcr_pmhc 5wlg_D-E-C-A-B_tcr_pmhc.pdb \n", "3 5wlg_D-E-C-A-B_tcr_pmhc 5wlg_D-E-C-A-B_tcr_pmhc.pdb \n", "4 5wlg_D-E-C-A-B_tcr_pmhc 5wlg_D-E-C-A-B_tcr_pmhc.pdb \n", ".. ... ... \n", "372 7rtr_D-E-C-A-B_tcr_pmhc 7n6d_G-E-F_pmhc.pdb \n", "373 7rtr_D-E-C-A-B_tcr_pmhc 7n6d_K-I-J_pmhc.pdb \n", "374 7rtr_D-E-C-A-B_tcr_pmhc 7n6d_O-M-N_pmhc.pdb \n", "375 7rtr_D-E-C-A-B_tcr_pmhc 7p3d_C-A-B_pmhc.pdb \n", "376 7rtr_D-E-C-A-B_tcr_pmhc 7rtd_C-A-B_pmhc.pdb \n", "\n", " structure_y_name rmsd file_name_x \\\n", "0 5n1y_C-A-B_pmhc.pdb 0.448858 5c0a_D-E-C-A-B_tcr_pmhc.pdb \n", "1 5wli_C-A-B_pmhc.pdb 0.498148 5wlg_D-E-C-A-B_tcr_pmhc.pdb \n", "2 5wli_F-D-E_pmhc.pdb 0.519507 5wlg_D-E-C-A-B_tcr_pmhc.pdb \n", "3 5wli_I-G-H_pmhc.pdb 0.459025 5wlg_D-E-C-A-B_tcr_pmhc.pdb \n", "4 5wli_L-J-K_pmhc.pdb 0.494705 5wlg_D-E-C-A-B_tcr_pmhc.pdb \n", ".. ... ... ... \n", "372 7rtr_D-E-C-A-B_tcr_pmhc.pdb 0.468869 7n6d_G-E-F_pmhc.pdb \n", "373 7rtr_D-E-C-A-B_tcr_pmhc.pdb 0.411050 7n6d_K-I-J_pmhc.pdb \n", "374 7rtr_D-E-C-A-B_tcr_pmhc.pdb 0.448905 7n6d_O-M-N_pmhc.pdb \n", "375 7rtr_D-E-C-A-B_tcr_pmhc.pdb 0.765295 7p3d_C-A-B_pmhc.pdb \n", "376 7rtr_D-E-C-A-B_tcr_pmhc.pdb 0.455078 7rtd_C-A-B_pmhc.pdb \n", "\n", " pdb_id_x structure_type_x state_x alpha_chain_x beta_chain_x ... \\\n", "0 5c0a tcr_pmhc holo D E ... \n", "1 5wlg tcr_pmhc holo D E ... \n", "2 5wlg tcr_pmhc holo D E ... \n", "3 5wlg tcr_pmhc holo D E ... \n", "4 5wlg tcr_pmhc holo D E ... \n", ".. ... ... ... ... ... ... \n", "372 7n6d pmhc apo NaN NaN ... \n", "373 7n6d pmhc apo NaN NaN ... \n", "374 7n6d pmhc apo NaN NaN ... \n", "375 7p3d pmhc apo NaN NaN ... \n", "376 7rtd pmhc apo NaN NaN ... \n", "\n", " alpha_chain_y beta_chain_y antigen_chain_y mhc_chain1_y mhc_chain2_y \\\n", "0 NaN NaN C A B \n", "1 NaN NaN C A B \n", "2 NaN NaN F D E \n", "3 NaN NaN I G H \n", "4 NaN NaN L J K \n", ".. ... ... ... ... ... \n", "372 D E C A B \n", "373 D E C A B \n", "374 D E C A B \n", "375 D E C A B \n", "376 D E C A B \n", "\n", " cdr_sequences_collated peptide_sequence \\\n", "0 NSAFQY-TYSSGN-AMRGDSSYKLI-SGHDY-FNNNVP-ASSLWEK... MVWGPDPLYV \n", "1 TYTTV-IRSNERE-ATVYAQGLT-NNHDY-SYVADS-ASSDWGDTGQLY SQLLNAKYL \n", "2 TYTTV-IRSNERE-ATVYAQGLT-NNHDY-SYVADS-ASSDWGDTGQLY SQLLNAKYL \n", "3 TYTTV-IRSNERE-ATVYAQGLT-NNHDY-SYVADS-ASSDWGDTGQLY SQLLNAKYL \n", "4 TYTTV-IRSNERE-ATVYAQGLT-NNHDY-SYVADS-ASSDWGDTGQLY SQLLNAKYL \n", ".. ... ... \n", "372 DRGSQS-IYSNGD-AVNRDDKII-SEHNR-FQNEAQ-ASSPDIEQY YLQPRTFLL \n", "373 DRGSQS-IYSNGD-AVNRDDKII-SEHNR-FQNEAQ-ASSPDIEQY YLQPRTFLL \n", "374 DRGSQS-IYSNGD-AVNRDDKII-SEHNR-FQNEAQ-ASSPDIEQY YLQPRTFLL \n", "375 DRGSQS-IYSNGD-AVNRDDKII-SEHNR-FQNEAQ-ASSPDIEQY YLQPRTFLL \n", "376 DRGSQS-IYSNGD-AVNRDDKII-SEHNR-FQNEAQ-ASSPDIEQY YLQPRTFLL \n", "\n", " mhc_slug comparison structure_comparison \n", "0 hla_a_02_01 apo-holo 5c0a_D-E-C-A-B_tcr_pmhc.pdb-5n1y_C-A-B_pmhc.pdb \n", "1 h2_db apo-holo 5wlg_D-E-C-A-B_tcr_pmhc.pdb-5wli_C-A-B_pmhc.pdb \n", "2 h2_db apo-holo 5wlg_D-E-C-A-B_tcr_pmhc.pdb-5wli_F-D-E_pmhc.pdb \n", "3 h2_db apo-holo 5wlg_D-E-C-A-B_tcr_pmhc.pdb-5wli_I-G-H_pmhc.pdb \n", "4 h2_db apo-holo 5wlg_D-E-C-A-B_tcr_pmhc.pdb-5wli_L-J-K_pmhc.pdb \n", ".. ... ... ... \n", "372 hla_a_02_01 apo-holo 7n6d_G-E-F_pmhc.pdb-7rtr_D-E-C-A-B_tcr_pmhc.pdb \n", "373 hla_a_02_01 apo-holo 7n6d_K-I-J_pmhc.pdb-7rtr_D-E-C-A-B_tcr_pmhc.pdb \n", "374 hla_a_02_01 apo-holo 7n6d_O-M-N_pmhc.pdb-7rtr_D-E-C-A-B_tcr_pmhc.pdb \n", "375 hla_a_02_01 apo-holo 7p3d_C-A-B_pmhc.pdb-7rtr_D-E-C-A-B_tcr_pmhc.pdb \n", "376 hla_a_02_01 apo-holo 7rtd_C-A-B_pmhc.pdb-7rtr_D-E-C-A-B_tcr_pmhc.pdb \n", "\n", "[377 rows x 27 columns]" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "peptide_results" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Analysis" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [], "source": [ "def average_and_classify_peptide_movement(group):\n", " group_mean = group.groupby('cdr_sequences_collated').agg({'rmsd': 'mean'})\n", " group_mean['movement'] = group_mean['rmsd'].map(categorize_movement).astype(movement_order)\n", " group_mean['classification'] = group_mean['rmsd'].map(classify_modality)\n", "\n", " return group_mean\n", "\n", "peptide_results_agg = (peptide_results.groupby(['peptide_sequence', 'mhc_slug'])\n", " .apply(average_and_classify_peptide_movement)\n", " .reset_index())" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [], "source": [ "peptide_results_agg_multi_tcr = (peptide_results_agg.groupby(['mhc_slug', 'peptide_sequence'])\n", " .filter(lambda group: len(group) > 1))" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [], "source": [ "peptide_results_agg_multi_tcr['num_modalities'] = (\n", " peptide_results_agg_multi_tcr.groupby(['mhc_slug', 'peptide_sequence'])['classification']\n", " .transform(lambda movement: movement.nunique())\n", ")" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
peptide_sequencemhc_slugcdr_sequences_collatedrmsdmovementclassificationnum_modalities
0AAGIGILTVhla_a_02_01DRGSQS-IYSNGD-AVNFGGGKLI-MRHNA-SNTAGT-ASSLSFGTEAF1.157017Movement (1.0 to 2.0 Å)flexible1
1AAGIGILTVhla_a_02_01DRGSQS-IYSNGD-AVNVAGKST-GTSNPN-SVGIG-AWSETGLGT...1.134248Movement (1.0 to 2.0 Å)flexible1
2AAGIGILTVhla_a_02_01FLGSQS-TYREGD-AVNDGGRLT-GTSNPN-WGPFG-AWSETGLGM...0.986466Some Movement (0.5 to 1.0 Å)None1
3AAGIGILTVhla_a_02_01SIFNT-LYKAGEL-AGGTGNQFY-ENHRY-SYGVKD-AISEVGVGQPQH1.323428Movement (1.0 to 2.0 Å)flexible1
4AAGIGILTVhla_a_02_01YRGSQS-IYSNGD-AVNFGGGKLI-MRHNA-SNTAGT-ASSWSFGTEAF0.526953Some Movement (0.5 to 1.0 Å)None1
8APRGPHGGAASGLhla_b_07_02NIATNDY-GYKTK-LVGEILDNFNKFY-MDHEN-SYDVKM-ASSQR...4.232887Significant Movement (>4.0 Å)flexible1
9APRGPHGGAASGLhla_b_07_02NIATNDY-GYKTK-LVVDQKLV-SGDLS-YYNGEE-ASSGGHTGSNEQF2.814299Large Movement (2.0 to 4.0 Å)flexible1
11ASNENMETMh2_dbDSTFNY-IRSVSDK-AASEGSGSWQLI-MNHDT-YYDKIL-ASSAG...0.396101Little Movement (<0.5 Å)rigid1
12ASNENMETMh2_dbDSTFNY-IRSVSDK-AASETSGSWQLI-MNHDT-YYDKIL-ASSRD...0.409245Little Movement (<0.5 Å)rigid1
13ASNENMETMh2_dbTTMRS-LASGT-AAVTGNTGKLI-MNHDT-YYDKIL-ASSRGTIHS...0.558530Some Movement (0.5 to 1.0 Å)None1
14EAAGIGILTVhla_a_02_01DRGSQS-IYSNGD-AVNVAGKST-GTSNPN-SVGIG-AWSETGLGT...3.621428Large Movement (2.0 to 4.0 Å)flexible1
15EAAGIGILTVhla_a_02_01FLGSQS-TYREGD-AVNDGGRLT-GTSNPN-WGPFG-AWSETGLGM...3.573235Large Movement (2.0 to 4.0 Å)flexible1
22FLRGRAYGLhla_b_08_01NSASQS-VYSSG-VVRAGKLI-MNHEY-SVGEGT-ASGQGNFDIQY0.617307Some Movement (0.5 to 1.0 Å)None0
23FLRGRAYGLhla_b_08_01TISGTDY-GLTSN-ILPLAGGTSYGKLT-SGHVS-FQNEAQ-ASSL...0.717857Some Movement (0.5 to 1.0 Å)None0
24FLRGRAYGLhla_b_08_01TSDPSYG-QGSYDQGN-AMREDTGNQFY-SGHAT-FQNNGV-ASSF...0.596415Some Movement (0.5 to 1.0 Å)None0
27GILGFVFTLhla_a_02_01SSNFYA-MTLNGDE-AFDTNAGKST-LNHDA-SQIVND-ASSIFGQ...0.446045Little Movement (<0.5 Å)rigid1
28GILGFVFTLhla_a_02_01SVFSS-VVTGGEV-AGAGSQGNLI-LNHDA-SQIVND-ASSSRASYEQY0.581065Some Movement (0.5 to 1.0 Å)None1
29GILGFVFTLhla_a_02_01SVFSS-VVTGGEV-AGAGSQGNLI-LNHDA-SQIVND-ASSSRSSYEQY0.465703Little Movement (<0.5 Å)rigid1
30GILGFVFTLhla_a_02_01SVFSS-VVTGGEV-AGAIGPSNTGKLI-LNHDA-SQIVND-ASSIR...0.344910Little Movement (<0.5 Å)rigid1
31GILGFVFTLhla_a_02_01TSESDYY-QEAYKQQN-AWGVNAGGTSYGKLT-LNHDA-SQIVND-...0.564845Some Movement (0.5 to 1.0 Å)None1
37HMTEVVRHChla_a_02_01NSAFQY-TYSSGN-AMSGLKEDSSYKLI-MNHEY-SMNVEV-ASSI...0.694367Some Movement (0.5 to 1.0 Å)None0
38HMTEVVRHChla_a_02_01NSASQS-VYSSG-VVQPGGYQKVT-MNHNS-SASEGT-ASSEGLWQ...0.785581Some Movement (0.5 to 1.0 Å)None0
39HMTEVVRHChla_a_02_01NYSPAY-IRENEKE-ALDIYPHDMR-SGHAT-FQNNGV-ASSLDPG...0.864221Some Movement (0.5 to 1.0 Å)None0
40HMTEVVRHChla_a_02_01TSENNYY-QEAYK-AFMGYSGAGSYQLT-ENHRY-SYGVKD-AISE...0.809779Some Movement (0.5 to 1.0 Å)None0
45IPLTEEAELhla_b_35_01TRDTTYY-RNSFDEQN-ALSHNSGGSNYKLT-SGHNT-YYREEE-A...1.013667Movement (1.0 to 2.0 Å)flexible1
46IPLTEEAELhla_b_35_01TSWWSYY-QGSDEQN-ALGEGGAQKLV-MNHNS-SASEGT-ASRTR...1.104902Movement (1.0 to 2.0 Å)flexible1
58LLFGYPVYVhla_a_02_01DRGSQS-IYSNGD-AVTTDSWGKLQ-MNHEY-SVGAGI-ASRPGLA...1.071247Movement (1.0 to 2.0 Å)flexible1
59LLFGYPVYVhla_a_02_01DRGSQS-IYSNGD-AVTTDSWGKLQ-MNHEY-SVGAGI-ASRPGLM...1.084802Movement (1.0 to 2.0 Å)flexible1
60LLFGYPVYVhla_a_02_01IRSSTS-IYSNGD-AVTTDRSGKLQ-MNHEY-SVGVGI-ASRPGAA...0.659812Some Movement (0.5 to 1.0 Å)None1
61LLFGYPVYVhla_a_02_01NSMFDY-ISSIKDK-AAMEGAQKLV-MNHEY-SVGAGI-ASSYPGG...1.105653Movement (1.0 to 2.0 Å)flexible1
63LPEPLPQGQLTAYhla_b_35_08TRDTTYY-RNSFDEQN-ALSGFYNTDKLI-MNHNS-SASEGT-ASP...0.467122Little Movement (<0.5 Å)rigid1
64LPEPLPQGQLTAYhla_b_35_08TRDTTYY-RNSFDEQN-ALSGFYNTDKLI-MNHNS-SASEGT-ASP...0.450190Little Movement (<0.5 Å)rigid1
65LPEPLPQGQLTAYhla_b_35_08TTSDR-LLSNGAV-AVGGGSNYQLI-SGHDT-YYEEEE-ASSRTGS...0.781614Some Movement (0.5 to 1.0 Å)None1
69NLVPMVATVhla_a_02_01SSNFYA-MTLNGDE-ARNTGNQFY-MNHEY-SVGAGI-ASSPVTGG...0.569056Some Movement (0.5 to 1.0 Å)None0
70NLVPMVATVhla_a_02_01TISGTDY-GLTSN-ILDNNNDMR-SGHVS-FNYEAQ-ASSLAPGTT...0.717804Some Movement (0.5 to 1.0 Å)None0
74RFPLTFGWCFhla_a_24_02DRGSQS-IYSNGD-GTYNQGGKLI-MNHEY-SMNVEV-ASSGASHEQY1.302746Movement (1.0 to 2.0 Å)flexible1
75RFPLTFGWCFhla_a_24_02YGATPY-YFSGDTLV-AVGAPSGAGSYQLT-MGHRA-YSYEKL-AS...0.830768Some Movement (0.5 to 1.0 Å)None1
85RYPLTFGWhla_a_24_02YGATPY-YFSGDTLV-AVGFTGGGNKLT-SEHNR-FQNEAQ-ASSD...0.869939Some Movement (0.5 to 1.0 Å)None0
86RYPLTFGWhla_a_24_02YGATPY-YFSGDTLV-AVVFTGGGNKLT-SEHNR-FQNEAQ-ASSL...0.708066Some Movement (0.5 to 1.0 Å)None0
87RYPLTFGWCFhla_a_24_02DSAIYN-IQSSQRE-AVRMDSSYKLI-SEHNR-FQNEAQ-ASSSWD...1.522271Movement (1.0 to 2.0 Å)flexible1
88RYPLTFGWCFhla_a_24_02TSWWSYY-QGSDEQN-ALGELARSGGYQKVT-GTSNPN-SVGIG-A...1.112057Movement (1.0 to 2.0 Å)flexible1
91SLLMWITQChla_a_02_01DSAIYN-IPFWQRE-AVRPTSGGSYIPT-MNHEY-SVSVGM-ASSY...0.972491Some Movement (0.5 to 1.0 Å)None0
92SLLMWITQChla_a_02_01DSAIYN-IQSSQRE-AVRPLLDGTYIPT-MNHEY-SVGAGT-ASSY...0.875077Some Movement (0.5 to 1.0 Å)None0
93SLLMWITQChla_a_02_01DSAIYN-IQSSQRE-AVRPTSGGSYIPT-MNHEY-SVGAGI-ASSY...0.880234Some Movement (0.5 to 1.0 Å)None0
94SLLMWITQChla_a_02_01DSAIYN-ITPWQRE-AVRPLLDGTYIPT-MNHEY-SVAIQT-ASSY...0.826961Some Movement (0.5 to 1.0 Å)None0
95SLLMWITQChla_a_02_01DSAIYN-ITPWQRE-AVRPLLDGTYIPT-MNHEY-SVSVGM-ASSY...0.839051Some Movement (0.5 to 1.0 Å)None0
100SSLCNFRAYVh2_dbTISGNEY-GLQQN-ILSGGCNYKLT-MSHET-SYDVDS-ASSFGREQY0.344040Little Movement (<0.5 Å)rigid1
101SSLCNFRAYVh2_dbTISGNEY-GLQQN-ILSGGSNYKLT-MSHET-SYDVDS-ASSFGREQY0.356701Little Movement (<0.5 Å)rigid1
106VVVGADGVGKhla_a_11_01TRDTAYY-QPWWGEQN-AMSVPSGDGSYQFT-MNHEY-SVGEGT-A...1.256482Movement (1.0 to 2.0 Å)flexible1
107VVVGADGVGKhla_a_11_01TRDTTYY-RNSFDEQN-ALSGPSGAGSYQLT-MNHEY-SVGEGT-A...1.259841Movement (1.0 to 2.0 Å)flexible1
111YLQPRTFLLhla_a_02_01DRGSQS-IYSNGD-AVNRDDKII-SEHNR-FQNEAQ-ASSPDIEQY0.512357Some Movement (0.5 to 1.0 Å)None0
112YLQPRTFLLhla_a_02_01NSASQS-VYSSG-VVNINTDKLI-SEHNR-FQNEAQ-ASSSANSGELF0.627788Some Movement (0.5 to 1.0 Å)None0
113YLQPRTFLLhla_a_02_01NSASQS-VYSSG-VVNRNNDMR-LNHDA-SQIVND-AGQVTNTGELF0.685808Some Movement (0.5 to 1.0 Å)None0
\n", "
" ], "text/plain": [ " peptide_sequence mhc_slug \\\n", "0 AAGIGILTV hla_a_02_01 \n", "1 AAGIGILTV hla_a_02_01 \n", "2 AAGIGILTV hla_a_02_01 \n", "3 AAGIGILTV hla_a_02_01 \n", "4 AAGIGILTV hla_a_02_01 \n", "8 APRGPHGGAASGL hla_b_07_02 \n", "9 APRGPHGGAASGL hla_b_07_02 \n", "11 ASNENMETM h2_db \n", "12 ASNENMETM h2_db \n", "13 ASNENMETM h2_db \n", "14 EAAGIGILTV hla_a_02_01 \n", "15 EAAGIGILTV hla_a_02_01 \n", "22 FLRGRAYGL hla_b_08_01 \n", "23 FLRGRAYGL hla_b_08_01 \n", "24 FLRGRAYGL hla_b_08_01 \n", "27 GILGFVFTL hla_a_02_01 \n", "28 GILGFVFTL hla_a_02_01 \n", "29 GILGFVFTL hla_a_02_01 \n", "30 GILGFVFTL hla_a_02_01 \n", "31 GILGFVFTL hla_a_02_01 \n", "37 HMTEVVRHC hla_a_02_01 \n", "38 HMTEVVRHC hla_a_02_01 \n", "39 HMTEVVRHC hla_a_02_01 \n", "40 HMTEVVRHC hla_a_02_01 \n", "45 IPLTEEAEL hla_b_35_01 \n", "46 IPLTEEAEL hla_b_35_01 \n", "58 LLFGYPVYV hla_a_02_01 \n", "59 LLFGYPVYV hla_a_02_01 \n", "60 LLFGYPVYV hla_a_02_01 \n", "61 LLFGYPVYV hla_a_02_01 \n", "63 LPEPLPQGQLTAY hla_b_35_08 \n", "64 LPEPLPQGQLTAY hla_b_35_08 \n", "65 LPEPLPQGQLTAY hla_b_35_08 \n", "69 NLVPMVATV hla_a_02_01 \n", "70 NLVPMVATV hla_a_02_01 \n", "74 RFPLTFGWCF hla_a_24_02 \n", "75 RFPLTFGWCF hla_a_24_02 \n", "85 RYPLTFGW hla_a_24_02 \n", "86 RYPLTFGW hla_a_24_02 \n", "87 RYPLTFGWCF hla_a_24_02 \n", "88 RYPLTFGWCF hla_a_24_02 \n", "91 SLLMWITQC hla_a_02_01 \n", "92 SLLMWITQC hla_a_02_01 \n", "93 SLLMWITQC hla_a_02_01 \n", "94 SLLMWITQC hla_a_02_01 \n", "95 SLLMWITQC hla_a_02_01 \n", "100 SSLCNFRAYV h2_db \n", "101 SSLCNFRAYV h2_db \n", "106 VVVGADGVGK hla_a_11_01 \n", "107 VVVGADGVGK hla_a_11_01 \n", "111 YLQPRTFLL hla_a_02_01 \n", "112 YLQPRTFLL hla_a_02_01 \n", "113 YLQPRTFLL hla_a_02_01 \n", "\n", " cdr_sequences_collated rmsd \\\n", "0 DRGSQS-IYSNGD-AVNFGGGKLI-MRHNA-SNTAGT-ASSLSFGTEAF 1.157017 \n", "1 DRGSQS-IYSNGD-AVNVAGKST-GTSNPN-SVGIG-AWSETGLGT... 1.134248 \n", "2 FLGSQS-TYREGD-AVNDGGRLT-GTSNPN-WGPFG-AWSETGLGM... 0.986466 \n", "3 SIFNT-LYKAGEL-AGGTGNQFY-ENHRY-SYGVKD-AISEVGVGQPQH 1.323428 \n", "4 YRGSQS-IYSNGD-AVNFGGGKLI-MRHNA-SNTAGT-ASSWSFGTEAF 0.526953 \n", "8 NIATNDY-GYKTK-LVGEILDNFNKFY-MDHEN-SYDVKM-ASSQR... 4.232887 \n", "9 NIATNDY-GYKTK-LVVDQKLV-SGDLS-YYNGEE-ASSGGHTGSNEQF 2.814299 \n", "11 DSTFNY-IRSVSDK-AASEGSGSWQLI-MNHDT-YYDKIL-ASSAG... 0.396101 \n", "12 DSTFNY-IRSVSDK-AASETSGSWQLI-MNHDT-YYDKIL-ASSRD... 0.409245 \n", "13 TTMRS-LASGT-AAVTGNTGKLI-MNHDT-YYDKIL-ASSRGTIHS... 0.558530 \n", "14 DRGSQS-IYSNGD-AVNVAGKST-GTSNPN-SVGIG-AWSETGLGT... 3.621428 \n", "15 FLGSQS-TYREGD-AVNDGGRLT-GTSNPN-WGPFG-AWSETGLGM... 3.573235 \n", "22 NSASQS-VYSSG-VVRAGKLI-MNHEY-SVGEGT-ASGQGNFDIQY 0.617307 \n", "23 TISGTDY-GLTSN-ILPLAGGTSYGKLT-SGHVS-FQNEAQ-ASSL... 0.717857 \n", "24 TSDPSYG-QGSYDQGN-AMREDTGNQFY-SGHAT-FQNNGV-ASSF... 0.596415 \n", "27 SSNFYA-MTLNGDE-AFDTNAGKST-LNHDA-SQIVND-ASSIFGQ... 0.446045 \n", "28 SVFSS-VVTGGEV-AGAGSQGNLI-LNHDA-SQIVND-ASSSRASYEQY 0.581065 \n", "29 SVFSS-VVTGGEV-AGAGSQGNLI-LNHDA-SQIVND-ASSSRSSYEQY 0.465703 \n", "30 SVFSS-VVTGGEV-AGAIGPSNTGKLI-LNHDA-SQIVND-ASSIR... 0.344910 \n", "31 TSESDYY-QEAYKQQN-AWGVNAGGTSYGKLT-LNHDA-SQIVND-... 0.564845 \n", "37 NSAFQY-TYSSGN-AMSGLKEDSSYKLI-MNHEY-SMNVEV-ASSI... 0.694367 \n", "38 NSASQS-VYSSG-VVQPGGYQKVT-MNHNS-SASEGT-ASSEGLWQ... 0.785581 \n", "39 NYSPAY-IRENEKE-ALDIYPHDMR-SGHAT-FQNNGV-ASSLDPG... 0.864221 \n", "40 TSENNYY-QEAYK-AFMGYSGAGSYQLT-ENHRY-SYGVKD-AISE... 0.809779 \n", "45 TRDTTYY-RNSFDEQN-ALSHNSGGSNYKLT-SGHNT-YYREEE-A... 1.013667 \n", "46 TSWWSYY-QGSDEQN-ALGEGGAQKLV-MNHNS-SASEGT-ASRTR... 1.104902 \n", "58 DRGSQS-IYSNGD-AVTTDSWGKLQ-MNHEY-SVGAGI-ASRPGLA... 1.071247 \n", "59 DRGSQS-IYSNGD-AVTTDSWGKLQ-MNHEY-SVGAGI-ASRPGLM... 1.084802 \n", "60 IRSSTS-IYSNGD-AVTTDRSGKLQ-MNHEY-SVGVGI-ASRPGAA... 0.659812 \n", "61 NSMFDY-ISSIKDK-AAMEGAQKLV-MNHEY-SVGAGI-ASSYPGG... 1.105653 \n", "63 TRDTTYY-RNSFDEQN-ALSGFYNTDKLI-MNHNS-SASEGT-ASP... 0.467122 \n", "64 TRDTTYY-RNSFDEQN-ALSGFYNTDKLI-MNHNS-SASEGT-ASP... 0.450190 \n", "65 TTSDR-LLSNGAV-AVGGGSNYQLI-SGHDT-YYEEEE-ASSRTGS... 0.781614 \n", "69 SSNFYA-MTLNGDE-ARNTGNQFY-MNHEY-SVGAGI-ASSPVTGG... 0.569056 \n", "70 TISGTDY-GLTSN-ILDNNNDMR-SGHVS-FNYEAQ-ASSLAPGTT... 0.717804 \n", "74 DRGSQS-IYSNGD-GTYNQGGKLI-MNHEY-SMNVEV-ASSGASHEQY 1.302746 \n", "75 YGATPY-YFSGDTLV-AVGAPSGAGSYQLT-MGHRA-YSYEKL-AS... 0.830768 \n", "85 YGATPY-YFSGDTLV-AVGFTGGGNKLT-SEHNR-FQNEAQ-ASSD... 0.869939 \n", "86 YGATPY-YFSGDTLV-AVVFTGGGNKLT-SEHNR-FQNEAQ-ASSL... 0.708066 \n", "87 DSAIYN-IQSSQRE-AVRMDSSYKLI-SEHNR-FQNEAQ-ASSSWD... 1.522271 \n", "88 TSWWSYY-QGSDEQN-ALGELARSGGYQKVT-GTSNPN-SVGIG-A... 1.112057 \n", "91 DSAIYN-IPFWQRE-AVRPTSGGSYIPT-MNHEY-SVSVGM-ASSY... 0.972491 \n", "92 DSAIYN-IQSSQRE-AVRPLLDGTYIPT-MNHEY-SVGAGT-ASSY... 0.875077 \n", "93 DSAIYN-IQSSQRE-AVRPTSGGSYIPT-MNHEY-SVGAGI-ASSY... 0.880234 \n", "94 DSAIYN-ITPWQRE-AVRPLLDGTYIPT-MNHEY-SVAIQT-ASSY... 0.826961 \n", "95 DSAIYN-ITPWQRE-AVRPLLDGTYIPT-MNHEY-SVSVGM-ASSY... 0.839051 \n", "100 TISGNEY-GLQQN-ILSGGCNYKLT-MSHET-SYDVDS-ASSFGREQY 0.344040 \n", "101 TISGNEY-GLQQN-ILSGGSNYKLT-MSHET-SYDVDS-ASSFGREQY 0.356701 \n", "106 TRDTAYY-QPWWGEQN-AMSVPSGDGSYQFT-MNHEY-SVGEGT-A... 1.256482 \n", "107 TRDTTYY-RNSFDEQN-ALSGPSGAGSYQLT-MNHEY-SVGEGT-A... 1.259841 \n", "111 DRGSQS-IYSNGD-AVNRDDKII-SEHNR-FQNEAQ-ASSPDIEQY 0.512357 \n", "112 NSASQS-VYSSG-VVNINTDKLI-SEHNR-FQNEAQ-ASSSANSGELF 0.627788 \n", "113 NSASQS-VYSSG-VVNRNNDMR-LNHDA-SQIVND-AGQVTNTGELF 0.685808 \n", "\n", " movement classification num_modalities \n", "0 Movement (1.0 to 2.0 Å) flexible 1 \n", "1 Movement (1.0 to 2.0 Å) flexible 1 \n", "2 Some Movement (0.5 to 1.0 Å) None 1 \n", "3 Movement (1.0 to 2.0 Å) flexible 1 \n", "4 Some Movement (0.5 to 1.0 Å) None 1 \n", "8 Significant Movement (>4.0 Å) flexible 1 \n", "9 Large Movement (2.0 to 4.0 Å) flexible 1 \n", "11 Little Movement (<0.5 Å) rigid 1 \n", "12 Little Movement (<0.5 Å) rigid 1 \n", "13 Some Movement (0.5 to 1.0 Å) None 1 \n", "14 Large Movement (2.0 to 4.0 Å) flexible 1 \n", "15 Large Movement (2.0 to 4.0 Å) flexible 1 \n", "22 Some Movement (0.5 to 1.0 Å) None 0 \n", "23 Some Movement (0.5 to 1.0 Å) None 0 \n", "24 Some Movement (0.5 to 1.0 Å) None 0 \n", "27 Little Movement (<0.5 Å) rigid 1 \n", "28 Some Movement (0.5 to 1.0 Å) None 1 \n", "29 Little Movement (<0.5 Å) rigid 1 \n", "30 Little Movement (<0.5 Å) rigid 1 \n", "31 Some Movement (0.5 to 1.0 Å) None 1 \n", "37 Some Movement (0.5 to 1.0 Å) None 0 \n", "38 Some Movement (0.5 to 1.0 Å) None 0 \n", "39 Some Movement (0.5 to 1.0 Å) None 0 \n", "40 Some Movement (0.5 to 1.0 Å) None 0 \n", "45 Movement (1.0 to 2.0 Å) flexible 1 \n", "46 Movement (1.0 to 2.0 Å) flexible 1 \n", "58 Movement (1.0 to 2.0 Å) flexible 1 \n", "59 Movement (1.0 to 2.0 Å) flexible 1 \n", "60 Some Movement (0.5 to 1.0 Å) None 1 \n", "61 Movement (1.0 to 2.0 Å) flexible 1 \n", "63 Little Movement (<0.5 Å) rigid 1 \n", "64 Little Movement (<0.5 Å) rigid 1 \n", "65 Some Movement (0.5 to 1.0 Å) None 1 \n", "69 Some Movement (0.5 to 1.0 Å) None 0 \n", "70 Some Movement (0.5 to 1.0 Å) None 0 \n", "74 Movement (1.0 to 2.0 Å) flexible 1 \n", "75 Some Movement (0.5 to 1.0 Å) None 1 \n", "85 Some Movement (0.5 to 1.0 Å) None 0 \n", "86 Some Movement (0.5 to 1.0 Å) None 0 \n", "87 Movement (1.0 to 2.0 Å) flexible 1 \n", "88 Movement (1.0 to 2.0 Å) flexible 1 \n", "91 Some Movement (0.5 to 1.0 Å) None 0 \n", "92 Some Movement (0.5 to 1.0 Å) None 0 \n", "93 Some Movement (0.5 to 1.0 Å) None 0 \n", "94 Some Movement (0.5 to 1.0 Å) None 0 \n", "95 Some Movement (0.5 to 1.0 Å) None 0 \n", "100 Little Movement (<0.5 Å) rigid 1 \n", "101 Little Movement (<0.5 Å) rigid 1 \n", "106 Movement (1.0 to 2.0 Å) flexible 1 \n", "107 Movement (1.0 to 2.0 Å) flexible 1 \n", "111 Some Movement (0.5 to 1.0 Å) None 0 \n", "112 Some Movement (0.5 to 1.0 Å) None 0 \n", "113 Some Movement (0.5 to 1.0 Å) None 0 " ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "peptide_results_agg_multi_tcr" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "count 53.000000\n", "mean 0.641509\n", "std 0.484146\n", "min 0.000000\n", "25% 0.000000\n", "50% 1.000000\n", "75% 1.000000\n", "max 1.000000\n", "Name: num_modalities, dtype: float64" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "peptide_results_agg_multi_tcr['num_modalities'].describe()" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
peptide_sequencemhc_slugcdr_sequences_collatedrmsdmovementclassificationnum_modalities
\n", "
" ], "text/plain": [ "Empty DataFrame\n", "Columns: [peptide_sequence, mhc_slug, cdr_sequences_collated, rmsd, movement, classification, num_modalities]\n", "Index: []" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "peptide_results_agg_multi_tcr.query('num_modalities > 1')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Similarly, no peptides have both large (>1.0 Å RMSD) conformational changes and small (<0.5 Å RMSD) conformational changes for different TCRs." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Conclusion\n", "\n", "Neither the TCR CDR loops, nor the peptides show different movement modalities (rigid vs flexible) when contacted by different entities.\n", "These results indicate that the flexiblity of TCRs between *apo* and *holo* conformations is dependent on the composition of the entitity itself and not the entitity it is contacting." ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.14" } }, "nbformat": 4, "nbformat_minor": 2 }