Centre of mass analysis

Introduction

In this notebook we aimed to identify the centre-of-mass (COM) of the TCR framework (Fw) regions and the anchors of the CDR loops. We then investigated the amount of movement these regions undergo between apo and holo states. We also used these COMs to create an axis of the TCR and measure the angle between both chains of the TCR before determining the angle changes between apo and holo states.

[1]:
import os
from collections import defaultdict

import numpy as np
import pandas as pd
import seaborn as sns
from python_pdb.parsers import parse_pdb_to_pandas

from tcr_pmhc_interface_analysis.measurements import get_distance
from tcr_pmhc_interface_analysis.processing import annotate_tcr_pmhc_df, find_anchors
from tcr_pmhc_interface_analysis.utils import get_coords
[2]:
DATA_DIR = '../data/processed/apo-holo-tcr-pmhc-class-I'
[3]:
apo_holo_summary_df = pd.read_csv(os.path.join(DATA_DIR, 'apo_holo_summary.csv'))
apo_holo_summary_df
[3]:
file_name pdb_id structure_type state alpha_chain beta_chain antigen_chain mhc_chain1 mhc_chain2 cdr_sequences_collated peptide_sequence mhc_slug
0 1ao7_D-E-C-A-B_tcr_pmhc.pdb 1ao7 tcr_pmhc holo D E C A B DRGSQS-IYSNGD-AVTTDSWGKLQ-MNHEY-SVGAGI-ASRPGLA... LLFGYPVYV hla_a_02_01
1 1bd2_D-E-C-A-B_tcr_pmhc.pdb 1bd2 tcr_pmhc holo D E C A B NSMFDY-ISSIKDK-AAMEGAQKLV-MNHEY-SVGAGI-ASSYPGG... LLFGYPVYV hla_a_02_01
2 1bii_A-B-P_pmhc.pdb 1bii pmhc apo NaN NaN P A B NaN RGPGRAFVTI h2_dd
3 1ddh_A-B-P_pmhc.pdb 1ddh pmhc apo NaN NaN P A B NaN RGPGRAFVTI h2_dd
4 1duz_A-B-C_pmhc.pdb 1duz pmhc apo NaN NaN C A B NaN LLFGYPVYV hla_a_02_01
... ... ... ... ... ... ... ... ... ... ... ... ...
353 8gon_D-E-C-A-B_tcr_pmhc.pdb 8gon tcr_pmhc holo D E C A B TSESDYY-QEAYKQQN-ASSGNTPLV-SGHNS-FNNNVP-ASTWGR... NaN NaN
354 8gop_A-B_tcr.pdb 8gop tcr apo A B NaN NaN NaN TSESDYY-QEAYKQQN-ASSGNTPLV-SGHNS-FNNNVP-ASTWGR... NaN NaN
355 8gvb_A-B-P-H-L_tcr_pmhc.pdb 8gvb tcr_pmhc holo A B P H L YGATPY-YFSGDTLV-AVGFTGGGNKLT-SEHNR-FQNEAQ-ASSD... RYPLTFGW hla_a_24_02
356 8gvg_A-B-P-H-L_tcr_pmhc.pdb 8gvg tcr_pmhc holo A B P H L YGATPY-YFSGDTLV-AVGFTGGGNKLT-SEHNR-FQNEAQ-ASSD... RFPLTFGW hla_a_24_02
357 8gvi_A-B-P-H-L_tcr_pmhc.pdb 8gvi tcr_pmhc holo A B P H L YGATPY-YFSGDTLV-AVVFTGGGNKLT-SEHNR-FQNEAQ-ASSL... RYPLTFGW hla_a_24_02

358 rows × 12 columns

[4]:
complexes = [complex_id for complex_id in os.listdir(DATA_DIR)
                 if os.path.isdir(os.path.join(DATA_DIR, complex_id))]

Computing Measurements

[5]:
def calculate_angle(vec1, vec2):
    '''Caculate the angle between two vectors.'''
    return np.arccos(np.dot(vec1, vec2) / (np.linalg.norm(vec1) * np.linalg.norm(vec2)))
[6]:
complex_ids = []
structure_x_paths = []
structure_y_paths = []

alpha_anchor_com_diffs = []
beta_anchor_com_diffs = []

alpha_fw_com_diffs = []
beta_fw_com_diffs = []

chain_angle_diffs = []

for complex_id in complexes:
    complex_path = os.path.join(DATA_DIR, complex_id)
    complex_pdb_files = [file_ for file_ in os.listdir(complex_path) if file_.endswith('.pdb')]
    complex_summary = apo_holo_summary_df[apo_holo_summary_df['file_name'].isin(complex_pdb_files)]

    comparison_structures = complex_summary.query("structure_type == 'tcr' or state == 'holo'")
    comparisons = pd.merge(comparison_structures, comparison_structures, how='cross')
    comparisons['comparison'] = comparisons.apply(lambda row: '-'.join(sorted([row.file_name_x, row.file_name_y])),
                                                  axis='columns')
    comparisons = comparisons.drop_duplicates('comparison')
    comparisons = comparisons.drop('comparison', axis='columns')
    comparisons = comparisons.query('file_name_x != file_name_y')

    for _, comparison in comparisons.iterrows():
        tcrs = []
        for suffix in '_x', '_y':
            with open(os.path.join(complex_path, comparison['file_name' + suffix]), 'r') as fh:
                structure_df = parse_pdb_to_pandas(fh.read())

            chains = comparison.filter(like='chain').filter(regex=f'{suffix}$').replace({np.nan: None}).tolist()
            structure_df = annotate_tcr_pmhc_df(structure_df, *chains)

            structure_df['backbone'] = structure_df['atom_name'].map(
                lambda atom_name: (atom_name == 'N' or atom_name == 'CA' or atom_name == 'C' or atom_name == 'O')
            )
            tcr_df = structure_df.query("chain_type in ['alpha_chain', 'beta_chain']").copy().reset_index()

            tcrs.append(tcr_df)

        tcr_x, tcr_y = tcrs

         # Compute C.O.Ms and coordinate vectors
        calculations = defaultdict(dict)

        for label, tcr_df in (('x', tcr_x), ('y', tcr_y)):
            for chain_type in ('alpha_chain', 'beta_chain'):
                anchors_ca = pd.DataFrame()

                for cdr in (1, 2, 3):
                    cdr_df = tcr_df.query('chain_type == @chain_type and cdr == @cdr')

                    for cdr_anchor in find_anchors(cdr_df, tcr_df):
                        anchors_ca = pd.concat([anchors_ca, cdr_anchor.query("atom_name == 'CA'")])

                fw_ca_df = tcr_df.query("cdr.isnull() and residue_seq_id <= 128 and chain_type == @chain_type and atom_name == 'CA'")

                anchors_com = np.average(get_coords(anchors_ca), axis=0)
                fw_com = np.average(get_coords(fw_ca_df), axis=0)

                tcr_chain_direction_vec = fw_com - anchors_com

                calculations[label][chain_type] = {}

                calculations[label][chain_type]['anchors_com'] = anchors_com
                calculations[label][chain_type]['fw_com'] = fw_com
                calculations[label][chain_type]['direction_vector'] = tcr_chain_direction_vec

        # Calculate differences
        chain_anchor_com = []
        chain_fw_com = []
        for chain_type in ('alpha_chain', 'beta_chain'):
            chain_anchor_com.append(get_distance(calculations['x'][chain_type]['anchors_com'],
                                                 calculations['y'][chain_type]['anchors_com']))

            chain_fw_com.append(get_distance(calculations['x'][chain_type]['fw_com'],
                                             calculations['y'][chain_type]['fw_com']))


        angle_x = calculate_angle(calculations['x']['alpha_chain']['direction_vector'],
                                    calculations['x']['beta_chain']['direction_vector'])

        angle_y = calculate_angle(calculations['y']['alpha_chain']['direction_vector'],
                                     calculations['y']['beta_chain']['direction_vector'])

        angle_diff = angle_y - angle_x

        # Collect Data
        complex_ids.append(complex_id)
        structure_x_paths.append(comparison['file_name_x'])
        structure_y_paths.append(comparison['file_name_y'])

        alpha_anchor_com_diffs.append(chain_anchor_com[0])
        beta_anchor_com_diffs.append(chain_anchor_com[1])

        alpha_fw_com_diffs.append(chain_fw_com[0])
        beta_fw_com_diffs.append(chain_fw_com[1])

        chain_angle_diffs.append(angle_diff)

results = pd.DataFrame({
    'complex_id': complex_ids,
    'structure_x_path': structure_x_paths,
    'structure_y_path': structure_y_paths,
    'alpha_anchor_com_diff': alpha_anchor_com_diffs,
    'beta_anchor_com_diff': beta_anchor_com_diffs,
    'alpha_fw_com_diff': alpha_fw_com_diffs,
    'beta_fw_com_diff': beta_fw_com_diffs,
    'chain_angle_diff': chain_angle_diffs,
})
results
[6]:
complex_id structure_x_path structure_y_path alpha_anchor_com_diff beta_anchor_com_diff alpha_fw_com_diff beta_fw_com_diff chain_angle_diff
0 3qdg_D-E-C-A-B_tcr_pmhc 3qdg_D-E-C-A-B_tcr_pmhc.pdb 3qeu_A-B_tcr.pdb 0.645120 0.457327 0.219777 0.166987 -0.033421
1 3qdg_D-E-C-A-B_tcr_pmhc 3qdg_D-E-C-A-B_tcr_pmhc.pdb 3qeu_D-E_tcr.pdb 0.527090 0.340932 0.164026 0.213055 -0.040502
2 3qdg_D-E-C-A-B_tcr_pmhc 3qeu_A-B_tcr.pdb 3qeu_D-E_tcr.pdb 0.682250 0.284114 0.334339 0.064055 -0.007081
3 5c0a_D-E-C-A-B_tcr_pmhc 3utp_D-E_tcr.pdb 3utp_K-L_tcr.pdb 0.304534 0.602875 0.290645 0.338509 0.041175
4 5c0a_D-E-C-A-B_tcr_pmhc 3utp_D-E_tcr.pdb 5c0a_D-E-C-A-B_tcr_pmhc.pdb 0.292352 0.514781 0.334429 0.270019 0.032863
... ... ... ... ... ... ... ... ...
260 5nmg_I-J-H-F-G_tcr_pmhc 5nmd_A-B_tcr.pdb 5nmd_C-D_tcr.pdb 0.234261 0.191756 0.035814 0.154754 -0.011535
261 5nmg_I-J-H-F-G_tcr_pmhc 5nmd_A-B_tcr.pdb 5nmg_I-J-H-F-G_tcr_pmhc.pdb 0.600266 0.408148 0.183775 0.178921 0.028342
262 5nmg_I-J-H-F-G_tcr_pmhc 5nmd_C-D_tcr.pdb 5nmg_I-J-H-F-G_tcr_pmhc.pdb 0.367783 0.301645 0.148664 0.271494 0.039878
263 5yxu_A-B-I-C-D_tcr_pmhc 5yxu_A-B-I-C-D_tcr_pmhc.pdb 5yxu_F-G_tcr.pdb 0.283526 0.094541 0.313619 0.237034 0.006849
264 7rtr_D-E-C-A-B_tcr_pmhc 7n1d_A-B_tcr.pdb 7rtr_D-E-C-A-B_tcr_pmhc.pdb 0.642809 0.188609 0.388833 0.291080 0.001583

265 rows × 8 columns

[7]:
apo_holo_summary_df['ids'] = apo_holo_summary_df['file_name'].str.replace('.pdb', '')
/var/scratch/bmcmaste/1760251/ipykernel_865377/2116153506.py:1: FutureWarning: The default value of regex will change from True to False in a future version.
  apo_holo_summary_df['ids'] = apo_holo_summary_df['file_name'].str.replace('.pdb', '')
[8]:
results = results.merge(apo_holo_summary_df[['ids', 'cdr_sequences_collated', 'mhc_slug', 'peptide_sequence']],
                        how='left',
                        left_on='complex_id', right_on='ids')
[9]:
results = results.merge(
    apo_holo_summary_df[['file_name', 'structure_type', 'state']],
    how='left',
    left_on='structure_x_path', right_on='file_name',
).merge(
    apo_holo_summary_df[['file_name', 'structure_type', 'state']],
    how='left',
    left_on='structure_y_path', right_on='file_name',
)
[10]:
results['composite_name'] = results.apply(lambda row: '-'.join(sorted([row.structure_x_path, row.structure_y_path])), axis='columns')
results = results.drop_duplicates('composite_name')
[11]:
apo_holo_results = results.query('state_x != state_y')
[12]:
apo_holo_results_norm = apo_holo_results.groupby('cdr_sequences_collated').agg({
    'alpha_anchor_com_diff': 'mean',
    'beta_anchor_com_diff': 'mean',
    'alpha_fw_com_diff': 'mean',
    'beta_fw_com_diff': 'mean',
    'chain_angle_diff': 'mean',
})
[13]:
apo_holo_results_norm['chain_angle_diff_deg'] = apo_holo_results_norm['chain_angle_diff'].apply(np.degrees)
apo_holo_results_norm['chain_angle_diff_deg_mag'] = apo_holo_results_norm['chain_angle_diff_deg'].apply(np.abs)
[14]:
apo_holo_results_norm
[14]:
alpha_anchor_com_diff beta_anchor_com_diff alpha_fw_com_diff beta_fw_com_diff chain_angle_diff chain_angle_diff_deg chain_angle_diff_deg_mag
cdr_sequences_collated
ATGYPS-ATKADDK-ALSDPVNDMR-SGHAT-FQNNGV-ASSLRGRGDQPQH 0.340963 0.160583 0.335554 0.092950 -0.014279 -0.818098 0.818098
DRGSQS-IYSNGD-ALTRGPGNQFY-SGHVS-FNYEAQ-ASSSPGGVSTEAF 1.614983 0.363393 1.286903 0.279959 -0.062121 -3.559295 3.559295
DRGSQS-IYSNGD-AVNFGGGKLI-MRHNA-SNTAGT-ASSLSFGTEAF 0.626083 0.366441 0.250095 0.178326 -0.001998 -0.114483 0.114483
DRGSQS-IYSNGD-AVNRDDKII-SEHNR-FQNEAQ-ASSPDIEQY 0.624668 0.204869 0.302308 0.186851 -0.011943 -0.684280 0.684280
DRGSQS-IYSNGD-AVRTNSGYALN-QGHDT-YYEEEE-ASSDTVSYEQY 0.362689 0.550055 0.145354 0.181286 0.027609 1.581901 1.581901
DRGSQS-IYSNGD-AVTTDSWGKLQ-MNHEY-SVGAGI-ASRPGLAGGRPEQY 0.385751 0.657747 0.300314 0.374858 0.018630 1.067402 1.067402
DRGSQS-IYSNGD-GTYNQGGKLI-MNHEY-SMNVEV-ASSGASHEQY 1.017497 0.357841 0.276897 0.223025 0.091942 5.267913 5.267913
DSAIYN-IQSSQRE-AQLNQAGTALI-MNHEY-SVGAGI-ASSYGTGINYGYT 0.596772 0.032190 0.184380 0.213406 -0.037547 -2.151258 2.151258
DSAIYN-IQSSQRE-AVRMDSSYKLI-SEHNR-FQNEAQ-ASSSWDTGELF 0.264265 0.248560 0.103594 0.098554 -0.009340 -0.535154 0.535154
DSAIYN-IQSSQRE-AVRPLLDGTYIPT-MNHEY-SVGAGT-ASSYLGNTGELF 0.408007 0.394163 0.483043 0.506659 0.044708 2.561604 2.561604
DSAIYN-IQSSQRE-AVRPTSGGSYIPT-MNHEY-SVGAGI-ASSYVGNTGELF 0.510472 0.378292 0.388653 0.280078 -0.013727 -0.786515 0.786515
FLGSQS-TYREGD-AVNDGGRLT-GTSNPN-WGPFG-AWSETGLGMGGWQ 0.520622 0.385860 0.254620 0.288849 -0.009649 -0.552852 0.552852
NIATNDY-GYKTK-LVGEILDNFNKFY-MDHEN-SYDVKM-ASSQRQEGDTQY 0.570818 0.282032 0.423455 0.414862 0.023776 1.362237 1.362237
NSAFDY-ILSVSNK-AASASFGDNSKLI-MSHET-SYDVDS-ASSLGHTEVF 0.124907 0.468265 0.138290 0.401072 0.016898 0.968205 0.968205
NSAFQY-TYSSGN-AMRGDSSYKLI-SGHDY-FNNNVP-ASSLWEKLAKNIQY 0.296695 0.381612 0.262137 0.276894 0.025618 1.467794 1.467794
NSASQS-VYSSG-VVQPGGYQKVT-MNHNS-SASEGT-ASSEGLWQVGDEQY 0.194964 0.279496 0.157453 0.355964 0.016874 0.966819 0.966819
NSASQS-VYSSG-VVRAGKLI-MNHEY-SVGEGT-ASGQGNFDIQY 0.392269 0.274606 0.158128 0.199036 -0.023181 -1.328174 1.328174
SVFSS-VVTGGEV-AGAGSQGNLI-LNHDA-SQIVND-ASSSRSSYEQY 0.958644 0.472360 0.360223 0.345909 0.028590 1.638094 1.638094
TISGNEY-GLKNN-IVWGGYQKVT-SEHNR-FQNEAQ-ASRYRDDSYNEQF 1.076693 1.254781 0.466742 0.428322 0.074629 4.275936 4.275936
TISGTDY-GLTSN-ILPLAGGTSYGKLT-SGHVS-FQNEAQ-ASSLGQAYEQY 0.742800 0.218874 0.212584 0.086862 0.005153 0.295263 0.295263
TSENDYI-QEAYKQQN-AYGEDDKII-MGHDK-SYGVNS-ASRRGSAELY 0.283526 0.094541 0.313619 0.237034 0.006849 0.392417 0.392417
TSESDYY-QEAYKQQN-ASSGNTPLV-SGHNS-FNNNVP-ASTWGRASTDTQY 1.346103 0.398149 0.348850 0.216115 -0.103135 -5.909185 5.909185
YSATPY-YYSGDPVV-AVSAKGTGSKLS-NSHNY-SYGAGN-ASSDAPGQLY 0.540875 0.426192 0.526714 0.731495 -0.043797 -2.509360 2.509360
YSATPY-YYSGDPVV-AVSGFASALT-NNHNN-SYGAGS-ASGGGGTLY 0.364028 0.458383 0.133653 0.122776 -0.009807 -0.561905 0.561905
YSGSPE-HISR-ALSGFNNAGNMLT-SGHAT-FQNNGV-ASSLGGAGGADTQY 0.796027 0.388025 0.043012 0.054271 0.112168 6.426748 6.426748

Visualizing Results

Changes in Framework (Fw) region and Loop Anchor Centre of Masses (COM)

[15]:
coms = apo_holo_results_norm.melt(value_vars=['alpha_anchor_com_diff', 'beta_anchor_com_diff', 'alpha_fw_com_diff', 'beta_fw_com_diff'],
                                  var_name='location',
                                  value_name='com_diff')

coms['chain_type'] = coms['location'].map(lambda location: location.split('_')[0])
coms['location'] = coms['location'].map(lambda location: location.split('_')[1])
[16]:
sns.boxplot(coms, y='com_diff', x='location', hue='chain_type')
[16]:
<AxesSubplot: xlabel='location', ylabel='com_diff'>
../_images/source_Centre_of_mass_analysis_20_1.png

There is slight movement between the COMs but not very much.

Changes in angle between \(\alpha\)- and \(\beta\)- chain

[17]:
sns.boxplot(apo_holo_results_norm, y='chain_angle_diff')
apo_holo_results_norm['chain_angle_diff'].describe()
[17]:
count    25.000000
mean      0.006117
std       0.045933
min      -0.103135
25%      -0.013727
50%       0.005153
75%       0.025618
max       0.112168
Name: chain_angle_diff, dtype: float64
../_images/source_Centre_of_mass_analysis_23_1.png
[18]:
sns.boxplot(apo_holo_results_norm, y='chain_angle_diff_deg')
apo_holo_results_norm['chain_angle_diff_deg'].describe()
[18]:
count    25.000000
mean      0.350471
std       2.631778
min      -5.909185
25%      -0.786515
50%       0.295263
75%       1.467794
max       6.426748
Name: chain_angle_diff_deg, dtype: float64
../_images/source_Centre_of_mass_analysis_24_1.png
[19]:
sns.boxplot(apo_holo_results_norm, y='chain_angle_diff_deg_mag')
apo_holo_results_norm['chain_angle_diff_deg_mag'].describe()
[19]:
count    25.000000
mean      1.911316
std       1.802460
min       0.114483
25%       0.684280
50%       1.328174
75%       2.509360
max       6.426748
Name: chain_angle_diff_deg_mag, dtype: float64
../_images/source_Centre_of_mass_analysis_25_1.png

From these plots, it does not seem that their is a lot of angular change between domains.

Conclusion

Based on the results of this analysis, the TCR Fw region remains fairly static between apo and holo.