{ "cells": [ { "cell_type": "markdown", "id": "20382f65", "metadata": {}, "source": [ "# Comparing *apo* and *holo* CDR loop clustering" ] }, { "cell_type": "markdown", "id": "6b6b6403", "metadata": {}, "source": [ "## Introduction\n", "\n", "In this notebook, we set out to assess whether the CDR loops of TCRs stay in there canonical forms between *apo* and *holo* states or if they disrupt them." ] }, { "cell_type": "code", "execution_count": 1, "id": "a155933e", "metadata": {}, "outputs": [], "source": [ "import itertools\n", "\n", "import matplotlib.pyplot as plt\n", "import pandas as pd\n", "import seaborn as sns" ] }, { "cell_type": "code", "execution_count": 2, "id": "23992f3b", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
nameclusterchain_typecdrsequencecluster_type
07zt2_DE12alpha_chain1TSGFNGpseudo
17zt3_DE12alpha_chain1TSGFNGpseudo
27zt4_DE12alpha_chain1TSGFNGpseudo
37zt5_DE12alpha_chain1TSGFNGpseudo
47zt7_DE12alpha_chain1TSGFNGpseudo
.....................
48076miv_CD22beta_chain3ASGDEGYTQYcanonical
48083rtq_CD22beta_chain3ASGDEGYTQYcanonical
48093dxa_NOnoisebeta_chain3ASRYRDDSYNEQFNaN
48101d9k_ABnoisebeta_chain3ASGGQGRAEQFNaN
48114gg6_GHnoisebeta_chain3ASSVAVSAGTYEQYNaN
\n", "

4812 rows × 6 columns

\n", "
" ], "text/plain": [ " name cluster chain_type cdr sequence cluster_type\n", "0 7zt2_DE 12 alpha_chain 1 TSGFNG pseudo\n", "1 7zt3_DE 12 alpha_chain 1 TSGFNG pseudo\n", "2 7zt4_DE 12 alpha_chain 1 TSGFNG pseudo\n", "3 7zt5_DE 12 alpha_chain 1 TSGFNG pseudo\n", "4 7zt7_DE 12 alpha_chain 1 TSGFNG pseudo\n", "... ... ... ... ... ... ...\n", "4807 6miv_CD 22 beta_chain 3 ASGDEGYTQY canonical\n", "4808 3rtq_CD 22 beta_chain 3 ASGDEGYTQY canonical\n", "4809 3dxa_NO noise beta_chain 3 ASRYRDDSYNEQF NaN\n", "4810 1d9k_AB noise beta_chain 3 ASGGQGRAEQF NaN\n", "4811 4gg6_GH noise beta_chain 3 ASSVAVSAGTYEQY NaN\n", "\n", "[4812 rows x 6 columns]" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = pd.read_csv('../data/processed/stcrdab_clusters.csv')\n", "df" ] }, { "cell_type": "markdown", "id": "b2243a5a", "metadata": {}, "source": [ "## Compare *apo* and *holo* forms of loops" ] }, { "cell_type": "code", "execution_count": 3, "id": "05a288aa", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
file_namepdb_idstructure_typestatealpha_chainbeta_chainantigen_chainmhc_chain1mhc_chain2cdr_sequences_collatedpeptide_sequencemhc_slug
01ao7_D-E-C-A-B_tcr_pmhc.pdb1ao7tcr_pmhcholoDECABDRGSQS-IYSNGD-AVTTDSWGKLQ-MNHEY-SVGAGI-ASRPGLA...LLFGYPVYVhla_a_02_01
11b0g_C-A-B_pmhc.pdb1b0gpmhcapoNaNNaNCABNaNALWGFFPVLhla_a_02_01
21b0g_F-D-E_pmhc.pdb1b0gpmhcapoNaNNaNFDENaNALWGFFPVLhla_a_02_01
31bd2_D-E-C-A-B_tcr_pmhc.pdb1bd2tcr_pmhcholoDECABNSMFDY-ISSIKDK-AAMEGAQKLV-MNHEY-SVGAGI-ASSYPGG...LLFGYPVYVhla_a_02_01
41bii_P-A-B_pmhc.pdb1biipmhcapoNaNNaNPABNaNRGPGRAFVTIh2_dd
.......................................
3867rtd_C-A-B_pmhc.pdb7rtdpmhcapoNaNNaNCABNaNYLQPRTFLLhla_a_02_01
3877rtr_D-E-C-A-B_tcr_pmhc.pdb7rtrtcr_pmhcholoDECABDRGSQS-IYSNGD-AVNRDDKII-SEHNR-FQNEAQ-ASSPDIEQYYLQPRTFLLhla_a_02_01
3888gvb_A-B-P-H-L_tcr_pmhc.pdb8gvbtcr_pmhcholoABPHLYGATPY-YFSGDTLV-AVGFTGGGNKLT-SEHNR-FQNEAQ-ASSD...RYPLTFGWhla_a_24_02
3898gvg_A-B-P-H-L_tcr_pmhc.pdb8gvgtcr_pmhcholoABPHLYGATPY-YFSGDTLV-AVGFTGGGNKLT-SEHNR-FQNEAQ-ASSD...RFPLTFGWhla_a_24_02
3908gvi_A-B-P-H-L_tcr_pmhc.pdb8gvitcr_pmhcholoABPHLYGATPY-YFSGDTLV-AVVFTGGGNKLT-SEHNR-FQNEAQ-ASSL...RYPLTFGWhla_a_24_02
\n", "

391 rows × 12 columns

\n", "
" ], "text/plain": [ " file_name pdb_id structure_type state alpha_chain \\\n", "0 1ao7_D-E-C-A-B_tcr_pmhc.pdb 1ao7 tcr_pmhc holo D \n", "1 1b0g_C-A-B_pmhc.pdb 1b0g pmhc apo NaN \n", "2 1b0g_F-D-E_pmhc.pdb 1b0g pmhc apo NaN \n", "3 1bd2_D-E-C-A-B_tcr_pmhc.pdb 1bd2 tcr_pmhc holo D \n", "4 1bii_P-A-B_pmhc.pdb 1bii pmhc apo NaN \n", ".. ... ... ... ... ... \n", "386 7rtd_C-A-B_pmhc.pdb 7rtd pmhc apo NaN \n", "387 7rtr_D-E-C-A-B_tcr_pmhc.pdb 7rtr tcr_pmhc holo D \n", "388 8gvb_A-B-P-H-L_tcr_pmhc.pdb 8gvb tcr_pmhc holo A \n", "389 8gvg_A-B-P-H-L_tcr_pmhc.pdb 8gvg tcr_pmhc holo A \n", "390 8gvi_A-B-P-H-L_tcr_pmhc.pdb 8gvi tcr_pmhc holo A \n", "\n", " beta_chain antigen_chain mhc_chain1 mhc_chain2 \\\n", "0 E C A B \n", "1 NaN C A B \n", "2 NaN F D E \n", "3 E C A B \n", "4 NaN P A B \n", ".. ... ... ... ... \n", "386 NaN C A B \n", "387 E C A B \n", "388 B P H L \n", "389 B P H L \n", "390 B P H L \n", "\n", " cdr_sequences_collated peptide_sequence \\\n", "0 DRGSQS-IYSNGD-AVTTDSWGKLQ-MNHEY-SVGAGI-ASRPGLA... LLFGYPVYV \n", "1 NaN ALWGFFPVL \n", "2 NaN ALWGFFPVL \n", "3 NSMFDY-ISSIKDK-AAMEGAQKLV-MNHEY-SVGAGI-ASSYPGG... LLFGYPVYV \n", "4 NaN RGPGRAFVTI \n", ".. ... ... \n", "386 NaN YLQPRTFLL \n", "387 DRGSQS-IYSNGD-AVNRDDKII-SEHNR-FQNEAQ-ASSPDIEQY YLQPRTFLL \n", "388 YGATPY-YFSGDTLV-AVGFTGGGNKLT-SEHNR-FQNEAQ-ASSD... RYPLTFGW \n", "389 YGATPY-YFSGDTLV-AVGFTGGGNKLT-SEHNR-FQNEAQ-ASSD... RFPLTFGW \n", "390 YGATPY-YFSGDTLV-AVVFTGGGNKLT-SEHNR-FQNEAQ-ASSL... RYPLTFGW \n", "\n", " mhc_slug \n", "0 hla_a_02_01 \n", "1 hla_a_02_01 \n", "2 hla_a_02_01 \n", "3 hla_a_02_01 \n", "4 h2_dd \n", ".. ... \n", "386 hla_a_02_01 \n", "387 hla_a_02_01 \n", "388 hla_a_24_02 \n", "389 hla_a_24_02 \n", "390 hla_a_24_02 \n", "\n", "[391 rows x 12 columns]" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "apo_holo_summary_df = pd.read_csv('../data/processed/apo-holo-tcr-pmhc-class-I/apo_holo_summary.csv')\n", "apo_holo_summary_df" ] }, { "cell_type": "code", "execution_count": 4, "id": "73bc8019", "metadata": {}, "outputs": [], "source": [ "df[['pdb_id', 'chains']] = df['name'].str.split('_').apply(pd.Series)\n", "df[['alpha_chain', 'beta_chain']] = df['chains'].apply(list).apply(pd.Series)" ] }, { "cell_type": "code", "execution_count": 5, "id": "da2a0946", "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
nameclusterchain_typecdrsequencecluster_typepdb_idchainsalpha_chainbeta_chainfile_namestructure_typestateantigen_chainmhc_chain1mhc_chain2cdr_sequences_collatedpeptide_sequencemhc_slug
02ak4_DE4alpha_chain1TRDTTYYcanonical2ak4DEDE2ak4_D-E-C-A-B_tcr_pmhc.pdbtcr_pmhcholoCABTRDTTYY-RNSFDEQN-ALSGFYNTDKLI-MNHNS-SASEGT-ASP...LPEPLPQGQLTAYhla_b_35_08
12ak4_DE6beta_chain1MNHNScanonical2ak4DEDE2ak4_D-E-C-A-B_tcr_pmhc.pdbtcr_pmhcholoCABTRDTTYY-RNSFDEQN-ALSGFYNTDKLI-MNHNS-SASEGT-ASP...LPEPLPQGQLTAYhla_b_35_08
22ak4_DE6alpha_chain2RNSFDEQNpseudo2ak4DEDE2ak4_D-E-C-A-B_tcr_pmhc.pdbtcr_pmhcholoCABTRDTTYY-RNSFDEQN-ALSGFYNTDKLI-MNHNS-SASEGT-ASP...LPEPLPQGQLTAYhla_b_35_08
32ak4_DEnoisebeta_chain2SASEGTNaN2ak4DEDE2ak4_D-E-C-A-B_tcr_pmhc.pdbtcr_pmhcholoCABTRDTTYY-RNSFDEQN-ALSGFYNTDKLI-MNHNS-SASEGT-ASP...LPEPLPQGQLTAYhla_b_35_08
42ak4_DE12alpha_chain3ALSGFYNTDKLIpseudo2ak4DEDE2ak4_D-E-C-A-B_tcr_pmhc.pdbtcr_pmhcholoCABTRDTTYY-RNSFDEQN-ALSGFYNTDKLI-MNHNS-SASEGT-ASP...LPEPLPQGQLTAYhla_b_35_08
............................................................
11054jff_DE0beta_chain1GTSNPNpseudo4jffDEDE4jff_D-E-C-A-B_tcr_pmhc.pdbtcr_pmhcholoCABFLGSQS-TYREGD-AVNDGGRLT-GTSNPN-WGPFG-AWSETGLGM...ELAGIGILTVhla_a_02_01
11064jff_DE17alpha_chain2TYREGDcanonical4jffDEDE4jff_D-E-C-A-B_tcr_pmhc.pdbtcr_pmhcholoCABFLGSQS-TYREGD-AVNDGGRLT-GTSNPN-WGPFG-AWSETGLGM...ELAGIGILTVhla_a_02_01
11074jff_DEnoisebeta_chain2WGPFGNaN4jffDEDE4jff_D-E-C-A-B_tcr_pmhc.pdbtcr_pmhcholoCABFLGSQS-TYREGD-AVNDGGRLT-GTSNPN-WGPFG-AWSETGLGM...ELAGIGILTVhla_a_02_01
11084jff_DE26alpha_chain3AVNDGGRLTcanonical4jffDEDE4jff_D-E-C-A-B_tcr_pmhc.pdbtcr_pmhcholoCABFLGSQS-TYREGD-AVNDGGRLT-GTSNPN-WGPFG-AWSETGLGM...ELAGIGILTVhla_a_02_01
11094jff_DEnoisebeta_chain3AWSETGLGMGGWQNaN4jffDEDE4jff_D-E-C-A-B_tcr_pmhc.pdbtcr_pmhcholoCABFLGSQS-TYREGD-AVNDGGRLT-GTSNPN-WGPFG-AWSETGLGM...ELAGIGILTVhla_a_02_01
\n", "

1110 rows × 19 columns

\n", "
" ], "text/plain": [ " name cluster chain_type cdr sequence cluster_type pdb_id \\\n", "0 2ak4_DE 4 alpha_chain 1 TRDTTYY canonical 2ak4 \n", "1 2ak4_DE 6 beta_chain 1 MNHNS canonical 2ak4 \n", "2 2ak4_DE 6 alpha_chain 2 RNSFDEQN pseudo 2ak4 \n", "3 2ak4_DE noise beta_chain 2 SASEGT NaN 2ak4 \n", "4 2ak4_DE 12 alpha_chain 3 ALSGFYNTDKLI pseudo 2ak4 \n", "... ... ... ... ... ... ... ... \n", "1105 4jff_DE 0 beta_chain 1 GTSNPN pseudo 4jff \n", "1106 4jff_DE 17 alpha_chain 2 TYREGD canonical 4jff \n", "1107 4jff_DE noise beta_chain 2 WGPFG NaN 4jff \n", "1108 4jff_DE 26 alpha_chain 3 AVNDGGRLT canonical 4jff \n", "1109 4jff_DE noise beta_chain 3 AWSETGLGMGGWQ NaN 4jff \n", "\n", " chains alpha_chain beta_chain file_name \\\n", "0 DE D E 2ak4_D-E-C-A-B_tcr_pmhc.pdb \n", "1 DE D E 2ak4_D-E-C-A-B_tcr_pmhc.pdb \n", "2 DE D E 2ak4_D-E-C-A-B_tcr_pmhc.pdb \n", "3 DE D E 2ak4_D-E-C-A-B_tcr_pmhc.pdb \n", "4 DE D E 2ak4_D-E-C-A-B_tcr_pmhc.pdb \n", "... ... ... ... ... \n", "1105 DE D E 4jff_D-E-C-A-B_tcr_pmhc.pdb \n", "1106 DE D E 4jff_D-E-C-A-B_tcr_pmhc.pdb \n", "1107 DE D E 4jff_D-E-C-A-B_tcr_pmhc.pdb \n", "1108 DE D E 4jff_D-E-C-A-B_tcr_pmhc.pdb \n", "1109 DE D E 4jff_D-E-C-A-B_tcr_pmhc.pdb \n", "\n", " structure_type state antigen_chain mhc_chain1 mhc_chain2 \\\n", "0 tcr_pmhc holo C A B \n", "1 tcr_pmhc holo C A B \n", "2 tcr_pmhc holo C A B \n", "3 tcr_pmhc holo C A B \n", "4 tcr_pmhc holo C A B \n", "... ... ... ... ... ... \n", "1105 tcr_pmhc holo C A B \n", "1106 tcr_pmhc holo C A B \n", "1107 tcr_pmhc holo C A B \n", "1108 tcr_pmhc holo C A B \n", "1109 tcr_pmhc holo C A B \n", "\n", " cdr_sequences_collated peptide_sequence \\\n", "0 TRDTTYY-RNSFDEQN-ALSGFYNTDKLI-MNHNS-SASEGT-ASP... LPEPLPQGQLTAY \n", "1 TRDTTYY-RNSFDEQN-ALSGFYNTDKLI-MNHNS-SASEGT-ASP... LPEPLPQGQLTAY \n", "2 TRDTTYY-RNSFDEQN-ALSGFYNTDKLI-MNHNS-SASEGT-ASP... LPEPLPQGQLTAY \n", "3 TRDTTYY-RNSFDEQN-ALSGFYNTDKLI-MNHNS-SASEGT-ASP... LPEPLPQGQLTAY \n", "4 TRDTTYY-RNSFDEQN-ALSGFYNTDKLI-MNHNS-SASEGT-ASP... LPEPLPQGQLTAY \n", "... ... ... \n", "1105 FLGSQS-TYREGD-AVNDGGRLT-GTSNPN-WGPFG-AWSETGLGM... ELAGIGILTV \n", "1106 FLGSQS-TYREGD-AVNDGGRLT-GTSNPN-WGPFG-AWSETGLGM... ELAGIGILTV \n", "1107 FLGSQS-TYREGD-AVNDGGRLT-GTSNPN-WGPFG-AWSETGLGM... ELAGIGILTV \n", "1108 FLGSQS-TYREGD-AVNDGGRLT-GTSNPN-WGPFG-AWSETGLGM... ELAGIGILTV \n", "1109 FLGSQS-TYREGD-AVNDGGRLT-GTSNPN-WGPFG-AWSETGLGM... ELAGIGILTV \n", "\n", " mhc_slug \n", "0 hla_b_35_08 \n", "1 hla_b_35_08 \n", "2 hla_b_35_08 \n", "3 hla_b_35_08 \n", "4 hla_b_35_08 \n", "... ... \n", "1105 hla_a_02_01 \n", "1106 hla_a_02_01 \n", "1107 hla_a_02_01 \n", "1108 hla_a_02_01 \n", "1109 hla_a_02_01 \n", "\n", "[1110 rows x 19 columns]" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "apo_holo_clusters = df.merge(apo_holo_summary_df,\n", " how='inner',\n", " on=['pdb_id', 'alpha_chain', 'beta_chain'])\n", "apo_holo_clusters" ] }, { "cell_type": "code", "execution_count": 6, "id": "4c42f850", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
cdr_sequences_collatedchain_typecdrapo_clustersholo_clusters
0ATGYPS-ATKADDK-ALSDPVNDMR-SGHAT-FQNNGV-ASSLRGR...alpha_chain1[13][noise]
1ATGYPS-ATKADDK-ALSDPVNDMR-SGHAT-FQNNGV-ASSLRGR...alpha_chain2[noise][noise]
2ATGYPS-ATKADDK-ALSDPVNDMR-SGHAT-FQNNGV-ASSLRGR...alpha_chain3[noise][noise]
3ATGYPS-ATKADDK-ALSDPVNDMR-SGHAT-FQNNGV-ASSLRGR...beta_chain1[4][4]
4ATGYPS-ATKADDK-ALSDPVNDMR-SGHAT-FQNNGV-ASSLRGR...beta_chain2[2][2]
..................
511YSGSPE-HISR-ALSGFNNAGNMLT-SGHAT-FQNNGV-ASSLGGA...alpha_chain2[noise][noise]
512YSGSPE-HISR-ALSGFNNAGNMLT-SGHAT-FQNNGV-ASSLGGA...alpha_chain3[noise][2]
513YSGSPE-HISR-ALSGFNNAGNMLT-SGHAT-FQNNGV-ASSLGGA...beta_chain1[4][4]
514YSGSPE-HISR-ALSGFNNAGNMLT-SGHAT-FQNNGV-ASSLGGA...beta_chain2[2][2]
515YSGSPE-HISR-ALSGFNNAGNMLT-SGHAT-FQNNGV-ASSLGGA...beta_chain3[noise][noise]
\n", "

516 rows × 5 columns

\n", "
" ], "text/plain": [ " cdr_sequences_collated chain_type cdr \\\n", "0 ATGYPS-ATKADDK-ALSDPVNDMR-SGHAT-FQNNGV-ASSLRGR... alpha_chain 1 \n", "1 ATGYPS-ATKADDK-ALSDPVNDMR-SGHAT-FQNNGV-ASSLRGR... alpha_chain 2 \n", "2 ATGYPS-ATKADDK-ALSDPVNDMR-SGHAT-FQNNGV-ASSLRGR... alpha_chain 3 \n", "3 ATGYPS-ATKADDK-ALSDPVNDMR-SGHAT-FQNNGV-ASSLRGR... beta_chain 1 \n", "4 ATGYPS-ATKADDK-ALSDPVNDMR-SGHAT-FQNNGV-ASSLRGR... beta_chain 2 \n", ".. ... ... ... \n", "511 YSGSPE-HISR-ALSGFNNAGNMLT-SGHAT-FQNNGV-ASSLGGA... alpha_chain 2 \n", "512 YSGSPE-HISR-ALSGFNNAGNMLT-SGHAT-FQNNGV-ASSLGGA... alpha_chain 3 \n", "513 YSGSPE-HISR-ALSGFNNAGNMLT-SGHAT-FQNNGV-ASSLGGA... beta_chain 1 \n", "514 YSGSPE-HISR-ALSGFNNAGNMLT-SGHAT-FQNNGV-ASSLGGA... beta_chain 2 \n", "515 YSGSPE-HISR-ALSGFNNAGNMLT-SGHAT-FQNNGV-ASSLGGA... beta_chain 3 \n", "\n", " apo_clusters holo_clusters \n", "0 [13] [noise] \n", "1 [noise] [noise] \n", "2 [noise] [noise] \n", "3 [4] [4] \n", "4 [2] [2] \n", ".. ... ... \n", "511 [noise] [noise] \n", "512 [noise] [2] \n", "513 [4] [4] \n", "514 [2] [2] \n", "515 [noise] [noise] \n", "\n", "[516 rows x 5 columns]" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "def collate_clusters(group):\n", " apo_clusters = sorted(group.query(\"state == 'apo'\")['cluster'].tolist())\n", " holo_clusters = sorted(group.query(\"state == 'holo'\")['cluster'].tolist())\n", "\n", " return pd.Series({'apo_clusters': apo_clusters, 'holo_clusters': holo_clusters})\n", "\n", "apo_holo_clusters_agg = apo_holo_clusters.groupby(['cdr_sequences_collated',\n", " 'chain_type',\n", " 'cdr']).apply(collate_clusters).reset_index()\n", "apo_holo_clusters_agg" ] }, { "cell_type": "code", "execution_count": 7, "id": "e283ecaa", "metadata": {}, "outputs": [], "source": [ "cluster_types = (df[['chain_type', 'cdr', 'cluster', 'cluster_type']].drop_duplicates()\n", " .set_index(['chain_type', 'cdr', 'cluster'])\n", " ['cluster_type'])" ] }, { "cell_type": "code", "execution_count": 8, "id": "567c6954", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
cdr_sequences_collatedchain_typecdrtypescount
0ATGYPS-ATKADDK-ALSDPVNDMR-SGHAT-FQNNGV-ASSLRGR...alpha_chain1Canonical Cluster Same0.0
1ATGYPS-ATKADDK-ALSDPVNDMR-SGHAT-FQNNGV-ASSLRGR...alpha_chain1Canonical Cluster Shift0.0
2ATGYPS-ATKADDK-ALSDPVNDMR-SGHAT-FQNNGV-ASSLRGR...alpha_chain1Canonical Cluster to Noise1.0
3ATGYPS-ATKADDK-ALSDPVNDMR-SGHAT-FQNNGV-ASSLRGR...alpha_chain1Canonical to Pseudo0.0
4ATGYPS-ATKADDK-ALSDPVNDMR-SGHAT-FQNNGV-ASSLRGR...alpha_chain1Noise0.0
..................
5671YSGSPE-HISR-ALSGFNNAGNMLT-SGHAT-FQNNGV-ASSLGGA...beta_chain3Noise to Pseudo Cluster0.0
5672YSGSPE-HISR-ALSGFNNAGNMLT-SGHAT-FQNNGV-ASSLGGA...beta_chain3Pseudo Cluster Same0.0
5673YSGSPE-HISR-ALSGFNNAGNMLT-SGHAT-FQNNGV-ASSLGGA...beta_chain3Pseudo Cluster Shift0.0
5674YSGSPE-HISR-ALSGFNNAGNMLT-SGHAT-FQNNGV-ASSLGGA...beta_chain3Pseudo Cluster to Noise0.0
5675YSGSPE-HISR-ALSGFNNAGNMLT-SGHAT-FQNNGV-ASSLGGA...beta_chain3Pseudo to Canonical0.0
\n", "

5676 rows × 5 columns

\n", "
" ], "text/plain": [ " cdr_sequences_collated chain_type cdr \\\n", "0 ATGYPS-ATKADDK-ALSDPVNDMR-SGHAT-FQNNGV-ASSLRGR... alpha_chain 1 \n", "1 ATGYPS-ATKADDK-ALSDPVNDMR-SGHAT-FQNNGV-ASSLRGR... alpha_chain 1 \n", "2 ATGYPS-ATKADDK-ALSDPVNDMR-SGHAT-FQNNGV-ASSLRGR... alpha_chain 1 \n", "3 ATGYPS-ATKADDK-ALSDPVNDMR-SGHAT-FQNNGV-ASSLRGR... alpha_chain 1 \n", "4 ATGYPS-ATKADDK-ALSDPVNDMR-SGHAT-FQNNGV-ASSLRGR... alpha_chain 1 \n", "... ... ... ... \n", "5671 YSGSPE-HISR-ALSGFNNAGNMLT-SGHAT-FQNNGV-ASSLGGA... beta_chain 3 \n", "5672 YSGSPE-HISR-ALSGFNNAGNMLT-SGHAT-FQNNGV-ASSLGGA... beta_chain 3 \n", "5673 YSGSPE-HISR-ALSGFNNAGNMLT-SGHAT-FQNNGV-ASSLGGA... beta_chain 3 \n", "5674 YSGSPE-HISR-ALSGFNNAGNMLT-SGHAT-FQNNGV-ASSLGGA... beta_chain 3 \n", "5675 YSGSPE-HISR-ALSGFNNAGNMLT-SGHAT-FQNNGV-ASSLGGA... beta_chain 3 \n", "\n", " types count \n", "0 Canonical Cluster Same 0.0 \n", "1 Canonical Cluster Shift 0.0 \n", "2 Canonical Cluster to Noise 1.0 \n", "3 Canonical to Pseudo 0.0 \n", "4 Noise 0.0 \n", "... ... ... \n", "5671 Noise to Pseudo Cluster 0.0 \n", "5672 Pseudo Cluster Same 0.0 \n", "5673 Pseudo Cluster Shift 0.0 \n", "5674 Pseudo Cluster to Noise 0.0 \n", "5675 Pseudo to Canonical 0.0 \n", "\n", "[5676 rows x 5 columns]" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "def classify_movement(chain_type: str,\n", " cdr: str,\n", " apo_clusters: list[str],\n", " holo_clusters: list[str],\n", " cluster_types: pd.Series) -> pd.Series:\n", " types = {\n", " 'Noise': 0,\n", " 'Noise to Canonical Cluster': 0,\n", " 'Noise to Pseudo Cluster': 0,\n", " 'Canonical Cluster to Noise': 0,\n", " 'Pseudo Cluster to Noise': 0,\n", " 'Canonical Cluster Shift': 0,\n", " 'Pseudo Cluster Shift': 0,\n", " 'Canonical to Pseudo': 0,\n", " 'Pseudo to Canonical': 0,\n", " 'Canonical Cluster Same': 0,\n", " 'Pseudo Cluster Same': 0,\n", " }\n", "\n", " for apo_clust, holo_clust in itertools.product(apo_clusters, holo_clusters):\n", " if apo_clust == 'noise' and holo_clust == 'noise':\n", " types['Noise'] += 1\n", "\n", " elif apo_clust == 'noise' and holo_clust != 'noise':\n", " if cluster_types.loc[chain_type, cdr, holo_clust] == 'canonical':\n", " types['Noise to Canonical Cluster'] += 1\n", "\n", " else:\n", " types['Noise to Pseudo Cluster'] += 1\n", "\n", " elif apo_clust != 'noise' and holo_clust == 'noise':\n", " if cluster_types.loc[chain_type, cdr, apo_clust] == 'canonical':\n", " types['Canonical Cluster to Noise'] += 1\n", "\n", " else:\n", " types['Pseudo Cluster to Noise'] += 1\n", "\n", " elif apo_clust != holo_clust:\n", " if (cluster_types.loc[chain_type, cdr, apo_clust]\n", " == cluster_types.loc[chain_type, cdr, holo_clust]\n", " == 'canonical'):\n", " types['Canonical Cluster Shift'] += 1\n", "\n", " elif (cluster_types.loc[chain_type, cdr, apo_clust] == 'canonical'\n", " and cluster_types.loc[chain_type, cdr, holo_clust] == 'pseudo'):\n", " types['Canonical to Pseudo'] += 1\n", "\n", " else:\n", " types['Pseudo to Canonical'] += 1\n", "\n", " elif apo_clust == holo_clust:\n", " if cluster_types.loc[chain_type, cdr, apo_clust] == 'canonical':\n", " types['Canonical Cluster Same'] += 1\n", "\n", " else:\n", " types['Pseudo Cluster Same'] += 1\n", "\n", " return pd.Series(types)\n", "\n", "\n", "apo_holo_clusters_agg = apo_holo_clusters_agg.join(apo_holo_clusters_agg.apply(\n", " lambda row: classify_movement(row.chain_type, row.cdr, row.apo_clusters, row.holo_clusters, cluster_types), axis=1\n", "))\n", "\n", "# Wide to long\n", "apo_holo_clusters_agg = apo_holo_clusters_agg.melt(id_vars=['cdr_sequences_collated', 'chain_type', 'cdr'],\n", " value_vars=['Noise',\n", " 'Noise to Canonical Cluster',\n", " 'Noise to Pseudo Cluster',\n", " 'Canonical Cluster to Noise',\n", " 'Pseudo Cluster to Noise',\n", " 'Canonical Cluster Shift',\n", " 'Pseudo Cluster Shift',\n", " 'Canonical to Pseudo',\n", " 'Pseudo to Canonical',\n", " 'Canonical Cluster Same',\n", " 'Pseudo Cluster Same'],\n", " var_name='types', value_name='count')\n", "\n", "# Normalize for differeing group sizes\n", "apo_holo_clusters_agg = apo_holo_clusters_agg.groupby(\n", " ['cdr_sequences_collated', 'chain_type', 'cdr', 'types'],\n", ")['count'].median().reset_index()\n", "apo_holo_clusters_agg" ] }, { "cell_type": "code", "execution_count": 9, "id": "59eb90ca", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
cdr_sequences_collatedchain_typecdrtypescount
0ATGYPS-ATKADDK-ALSDPVNDMR-SGHAT-FQNNGV-ASSLRGR...alpha_chain1Canonical Cluster Same0.0
1ATGYPS-ATKADDK-ALSDPVNDMR-SGHAT-FQNNGV-ASSLRGR...alpha_chain1Canonical Cluster Shift0.0
2ATGYPS-ATKADDK-ALSDPVNDMR-SGHAT-FQNNGV-ASSLRGR...alpha_chain1Canonical Cluster to Noise1.0
3ATGYPS-ATKADDK-ALSDPVNDMR-SGHAT-FQNNGV-ASSLRGR...alpha_chain1Canonical to Pseudo0.0
4ATGYPS-ATKADDK-ALSDPVNDMR-SGHAT-FQNNGV-ASSLRGR...alpha_chain1Noise0.0
..................
5671YSGSPE-HISR-ALSGFNNAGNMLT-SGHAT-FQNNGV-ASSLGGA...beta_chain3Noise to Pseudo Cluster0.0
5672YSGSPE-HISR-ALSGFNNAGNMLT-SGHAT-FQNNGV-ASSLGGA...beta_chain3Pseudo Cluster Same0.0
5673YSGSPE-HISR-ALSGFNNAGNMLT-SGHAT-FQNNGV-ASSLGGA...beta_chain3Pseudo Cluster Shift0.0
5674YSGSPE-HISR-ALSGFNNAGNMLT-SGHAT-FQNNGV-ASSLGGA...beta_chain3Pseudo Cluster to Noise0.0
5675YSGSPE-HISR-ALSGFNNAGNMLT-SGHAT-FQNNGV-ASSLGGA...beta_chain3Pseudo to Canonical0.0
\n", "

5676 rows × 5 columns

\n", "
" ], "text/plain": [ " cdr_sequences_collated chain_type cdr \\\n", "0 ATGYPS-ATKADDK-ALSDPVNDMR-SGHAT-FQNNGV-ASSLRGR... alpha_chain 1 \n", "1 ATGYPS-ATKADDK-ALSDPVNDMR-SGHAT-FQNNGV-ASSLRGR... alpha_chain 1 \n", "2 ATGYPS-ATKADDK-ALSDPVNDMR-SGHAT-FQNNGV-ASSLRGR... alpha_chain 1 \n", "3 ATGYPS-ATKADDK-ALSDPVNDMR-SGHAT-FQNNGV-ASSLRGR... alpha_chain 1 \n", "4 ATGYPS-ATKADDK-ALSDPVNDMR-SGHAT-FQNNGV-ASSLRGR... alpha_chain 1 \n", "... ... ... ... \n", "5671 YSGSPE-HISR-ALSGFNNAGNMLT-SGHAT-FQNNGV-ASSLGGA... beta_chain 3 \n", "5672 YSGSPE-HISR-ALSGFNNAGNMLT-SGHAT-FQNNGV-ASSLGGA... beta_chain 3 \n", "5673 YSGSPE-HISR-ALSGFNNAGNMLT-SGHAT-FQNNGV-ASSLGGA... beta_chain 3 \n", "5674 YSGSPE-HISR-ALSGFNNAGNMLT-SGHAT-FQNNGV-ASSLGGA... beta_chain 3 \n", "5675 YSGSPE-HISR-ALSGFNNAGNMLT-SGHAT-FQNNGV-ASSLGGA... beta_chain 3 \n", "\n", " types count \n", "0 Canonical Cluster Same 0.0 \n", "1 Canonical Cluster Shift 0.0 \n", "2 Canonical Cluster to Noise 1.0 \n", "3 Canonical to Pseudo 0.0 \n", "4 Noise 0.0 \n", "... ... ... \n", "5671 Noise to Pseudo Cluster 0.0 \n", "5672 Pseudo Cluster Same 0.0 \n", "5673 Pseudo Cluster Shift 0.0 \n", "5674 Pseudo Cluster to Noise 0.0 \n", "5675 Pseudo to Canonical 0.0 \n", "\n", "[5676 rows x 5 columns]" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "apo_holo_clusters_agg" ] }, { "cell_type": "code", "execution_count": 10, "id": "3d449881", "metadata": {}, "outputs": [ { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "g = sns.catplot(apo_holo_clusters_agg.sort_values(['chain_type', 'cdr', 'types']),\n", " row='chain_type', col='cdr',\n", " x='types', y='count',\n", " kind='bar')\n", "\n", "for ax in g.axes.flat:\n", " labels = ax.get_xticklabels()\n", " plt.setp(labels, rotation=90)" ] }, { "cell_type": "code", "execution_count": 11, "id": "1fc7dbc6", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "chain_type cdr types \n", "alpha_chain 1 Canonical Cluster Same 66\n", " Canonical Cluster to Noise 1\n", " Noise 3\n", " Noise to Canonical Cluster 5\n", " Noise to Pseudo Cluster 4\n", " Pseudo Cluster Same 9\n", " Pseudo to Canonical 8\n", " 2 Canonical Cluster Same 54\n", " Canonical Cluster to Noise 5\n", " Noise 8\n", " Noise to Canonical Cluster 5\n", " Pseudo Cluster Same 14\n", " Pseudo Cluster to Noise 10\n", " 3 Canonical Cluster Same 16\n", " Noise 14\n", " Noise to Canonical Cluster 10\n", " Noise to Pseudo Cluster 30\n", " Pseudo Cluster Same 26\n", "beta_chain 1 Canonical Cluster Same 78\n", " Canonical Cluster Shift 11\n", " Noise to Canonical Cluster 2\n", " Pseudo Cluster Same 5\n", " 2 Canonical Cluster Same 38\n", " Canonical Cluster Shift 2\n", " Canonical Cluster to Noise 8\n", " Noise 37\n", " Noise to Canonical Cluster 2\n", " Noise to Pseudo Cluster 4\n", " Pseudo Cluster to Noise 5\n", " 3 Canonical Cluster Same 4\n", " Noise 33\n", " Noise to Canonical Cluster 9\n", " Noise to Pseudo Cluster 26\n", " Pseudo Cluster Same 24\n", "Name: count, dtype: int64" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cluster_shift_counts = apo_holo_clusters_agg.groupby(['chain_type', 'cdr', 'types'])['count'].sum().astype(int)\n", "cluster_shift_counts[cluster_shift_counts > 0]" ] }, { "cell_type": "code", "execution_count": null, "id": "b454cb2d", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Movement TypeCDR1\\textalpha{}CDR1\\textbeta{}CDR2\\textalpha{}CDR2\\textbeta{}CDR3\\textalpha{}CDR3\\textbeta{}
0Canonical Cluster Same66785438164
1Canonical Cluster Shift-11-2--
2Canonical Cluster to Noise1-58--
3Canonical to Pseudo------
4Noise3-8371433
5Noise to Canonical Cluster5252109
6Noise to Pseudo Cluster4--43026
7Pseudo Cluster Same9514-2624
8Pseudo Cluster Shift------
9Pseudo Cluster to Noise--105--
10Pseudo to Canonical8-----
\n", "
" ], "text/plain": [ " Movement Type CDR1\\textalpha{} CDR1\\textbeta{} \\\n", "0 Canonical Cluster Same 66 78 \n", "1 Canonical Cluster Shift - 11 \n", "2 Canonical Cluster to Noise 1 - \n", "3 Canonical to Pseudo - - \n", "4 Noise 3 - \n", "5 Noise to Canonical Cluster 5 2 \n", "6 Noise to Pseudo Cluster 4 - \n", "7 Pseudo Cluster Same 9 5 \n", "8 Pseudo Cluster Shift - - \n", "9 Pseudo Cluster to Noise - - \n", "10 Pseudo to Canonical 8 - \n", "\n", " CDR2\\textalpha{} CDR2\\textbeta{} CDR3\\textalpha{} CDR3\\textbeta{} \n", "0 54 38 16 4 \n", "1 - 2 - - \n", "2 5 8 - - \n", "3 - - - - \n", "4 8 37 14 33 \n", "5 5 2 10 9 \n", "6 - 4 30 26 \n", "7 14 - 26 24 \n", "8 - - - - \n", "9 10 5 - - \n", "10 - - - - " ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "export_cluster_shift_counts = cluster_shift_counts.to_frame().reset_index()\n", "\n", "export_cluster_shift_counts = export_cluster_shift_counts.rename({'types': 'Movement Type'}, axis='columns')\n", "\n", "export_cluster_shift_counts['CDR Type'] = export_cluster_shift_counts.apply(\n", " lambda row: f\"CDR{row.cdr}\\\\text{row.chain_type.split('_')[0]}{{}}\",\n", " axis='columns',\n", ")\n", "\n", "export_cluster_shift_counts = export_cluster_shift_counts.pivot(index='Movement Type',\n", " columns='CDR Type',\n", " values='count')\n", "export_cluster_shift_counts = export_cluster_shift_counts.sort_values('Movement Type')\n", "\n", "export_cluster_shift_counts = export_cluster_shift_counts.replace(0, '-')\n", "export_cluster_shift_counts = export_cluster_shift_counts.sort_index()\n", "export_cluster_shift_counts.columns.name = None\n", "export_cluster_shift_counts = export_cluster_shift_counts.reset_index()\n", "\n", "export_cluster_shift_counts" ] }, { "cell_type": "markdown", "id": "cc673e49", "metadata": {}, "source": [ "## Conclusion\n", "\n", "The analysis here shows that for the most part, the CDR-1s and CDR-2s stay in their canonical forms between *apo* and *holo* states where as the CDR-3 loops have a variety of modes.\n", "As expected, it is more difficult to cluster the CDR-3 loops as they are more structurally diverse." ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.14" } }, "nbformat": 4, "nbformat_minor": 5 }