Centre of mass analysis

Introduction

In this notebook we aimed to identify the centre-of-mass (COM) of the TCR framework (Fw) regions and the anchors of the CDR loops. We then investigated the amount of movement these regions undergo between apo and holo states. We also used these COMs to create an axis of the TCR and measure the angle between both chains of the TCR before determining the angle changes between apo and holo states.

[1]:
import os
from collections import defaultdict

import numpy as np
import pandas as pd
import seaborn as sns
from python_pdb.parsers import parse_pdb_to_pandas

from tcr_pmhc_interface_analysis.measurements import get_distance
from tcr_pmhc_interface_analysis.processing import annotate_tcr_pmhc_df, find_anchors
from tcr_pmhc_interface_analysis.utils import get_coords
[2]:
DATA_DIR = '../data/processed/apo-holo-tcr-pmhc-class-I'
[3]:
apo_holo_summary_df = pd.read_csv(os.path.join(DATA_DIR, 'apo_holo_summary.csv'))
apo_holo_summary_df
[3]:
file_name pdb_id structure_type state alpha_chain beta_chain antigen_chain mhc_chain1 mhc_chain2 cdr_sequences_collated peptide_sequence mhc_slug
0 1ao7_D-E-C-A-B_tcr_pmhc.pdb 1ao7 tcr_pmhc holo D E C A B DRGSQS-IYSNGD-AVTTDSWGKLQ-MNHEY-SVGAGI-ASRPGLA... LLFGYPVYV hla_a_02_01
1 1b0g_C-A-B_pmhc.pdb 1b0g pmhc apo NaN NaN C A B NaN ALWGFFPVL hla_a_02_01
2 1b0g_F-D-E_pmhc.pdb 1b0g pmhc apo NaN NaN F D E NaN ALWGFFPVL hla_a_02_01
3 1bd2_D-E-C-A-B_tcr_pmhc.pdb 1bd2 tcr_pmhc holo D E C A B NSMFDY-ISSIKDK-AAMEGAQKLV-MNHEY-SVGAGI-ASSYPGG... LLFGYPVYV hla_a_02_01
4 1bii_P-A-B_pmhc.pdb 1bii pmhc apo NaN NaN P A B NaN RGPGRAFVTI h2_dd
... ... ... ... ... ... ... ... ... ... ... ... ...
386 7rtd_C-A-B_pmhc.pdb 7rtd pmhc apo NaN NaN C A B NaN YLQPRTFLL hla_a_02_01
387 7rtr_D-E-C-A-B_tcr_pmhc.pdb 7rtr tcr_pmhc holo D E C A B DRGSQS-IYSNGD-AVNRDDKII-SEHNR-FQNEAQ-ASSPDIEQY YLQPRTFLL hla_a_02_01
388 8gvb_A-B-P-H-L_tcr_pmhc.pdb 8gvb tcr_pmhc holo A B P H L YGATPY-YFSGDTLV-AVGFTGGGNKLT-SEHNR-FQNEAQ-ASSD... RYPLTFGW hla_a_24_02
389 8gvg_A-B-P-H-L_tcr_pmhc.pdb 8gvg tcr_pmhc holo A B P H L YGATPY-YFSGDTLV-AVGFTGGGNKLT-SEHNR-FQNEAQ-ASSD... RFPLTFGW hla_a_24_02
390 8gvi_A-B-P-H-L_tcr_pmhc.pdb 8gvi tcr_pmhc holo A B P H L YGATPY-YFSGDTLV-AVVFTGGGNKLT-SEHNR-FQNEAQ-ASSL... RYPLTFGW hla_a_24_02

391 rows × 12 columns

[4]:
complexes = [complex_id for complex_id in os.listdir(DATA_DIR)
                 if os.path.isdir(os.path.join(DATA_DIR, complex_id))]

Computing Measurements

[5]:
def calculate_angle(vec1, vec2):
    '''Caculate the angle between two vectors.'''
    return np.arccos(np.dot(vec1, vec2) / (np.linalg.norm(vec1) * np.linalg.norm(vec2)))
[6]:
complex_ids = []
structure_x_paths = []
structure_y_paths = []

alpha_anchor_com_diffs = []
beta_anchor_com_diffs = []

alpha_fw_com_diffs = []
beta_fw_com_diffs = []

chain_angle_diffs = []

for complex_id in complexes:
    complex_path = os.path.join(DATA_DIR, complex_id)
    complex_pdb_files = [file_ for file_ in os.listdir(complex_path) if file_.endswith('.pdb')]
    complex_summary = apo_holo_summary_df[apo_holo_summary_df['file_name'].isin(complex_pdb_files)]

    comparison_structures = complex_summary.query("structure_type == 'tcr' or state == 'holo'")
    comparisons = pd.merge(comparison_structures, comparison_structures, how='cross')
    comparisons['comparison'] = comparisons.apply(lambda row: '-'.join(sorted([row.file_name_x, row.file_name_y])),
                                                  axis='columns')
    comparisons = comparisons.drop_duplicates('comparison')
    comparisons = comparisons.drop('comparison', axis='columns')
    comparisons = comparisons.query('file_name_x != file_name_y')

    for _, comparison in comparisons.iterrows():
        tcrs = []
        for suffix in '_x', '_y':
            with open(os.path.join(complex_path, comparison['file_name' + suffix]), 'r') as fh:
                structure_df = parse_pdb_to_pandas(fh.read())

            chains = comparison.filter(like='chain').filter(regex=f'{suffix}$').replace({np.nan: None}).tolist()
            structure_df = annotate_tcr_pmhc_df(structure_df, *chains)

            structure_df['backbone'] = structure_df['atom_name'].map(
                lambda atom_name: (atom_name == 'N' or atom_name == 'CA' or atom_name == 'C' or atom_name == 'O')
            )
            tcr_df = structure_df.query("chain_type in ['alpha_chain', 'beta_chain']").copy().reset_index()

            tcrs.append(tcr_df)

        tcr_x, tcr_y = tcrs

         # Compute C.O.Ms and coordinate vectors
        calculations = defaultdict(dict)

        for label, tcr_df in (('x', tcr_x), ('y', tcr_y)):
            for chain_type in ('alpha_chain', 'beta_chain'):
                anchors_ca = pd.DataFrame()

                for cdr in (1, 2, 3):
                    cdr_df = tcr_df.query('chain_type == @chain_type and cdr == @cdr')

                    for cdr_anchor in find_anchors(cdr_df, tcr_df):
                        anchors_ca = pd.concat([anchors_ca, cdr_anchor.query("atom_name == 'CA'")])

                fw_ca_df = tcr_df.query("cdr.isnull() and residue_seq_id <= 128 and chain_type == @chain_type and atom_name == 'CA'")

                anchors_com = np.average(get_coords(anchors_ca), axis=0)
                fw_com = np.average(get_coords(fw_ca_df), axis=0)

                tcr_chain_direction_vec = fw_com - anchors_com

                calculations[label][chain_type] = {}

                calculations[label][chain_type]['anchors_com'] = anchors_com
                calculations[label][chain_type]['fw_com'] = fw_com
                calculations[label][chain_type]['direction_vector'] = tcr_chain_direction_vec

        # Calculate differences
        chain_anchor_com = []
        chain_fw_com = []
        for chain_type in ('alpha_chain', 'beta_chain'):
            chain_anchor_com.append(get_distance(calculations['x'][chain_type]['anchors_com'],
                                                 calculations['y'][chain_type]['anchors_com']))

            chain_fw_com.append(get_distance(calculations['x'][chain_type]['fw_com'],
                                             calculations['y'][chain_type]['fw_com']))


        angle_x = calculate_angle(calculations['x']['alpha_chain']['direction_vector'],
                                    calculations['x']['beta_chain']['direction_vector'])

        angle_y = calculate_angle(calculations['y']['alpha_chain']['direction_vector'],
                                     calculations['y']['beta_chain']['direction_vector'])

        angle_diff = angle_y - angle_x

        # Collect Data
        complex_ids.append(complex_id)
        structure_x_paths.append(comparison['file_name_x'])
        structure_y_paths.append(comparison['file_name_y'])

        alpha_anchor_com_diffs.append(chain_anchor_com[0])
        beta_anchor_com_diffs.append(chain_anchor_com[1])

        alpha_fw_com_diffs.append(chain_fw_com[0])
        beta_fw_com_diffs.append(chain_fw_com[1])

        chain_angle_diffs.append(angle_diff)

results = pd.DataFrame({
    'complex_id': complex_ids,
    'structure_x_path': structure_x_paths,
    'structure_y_path': structure_y_paths,
    'alpha_anchor_com_diff': alpha_anchor_com_diffs,
    'beta_anchor_com_diff': beta_anchor_com_diffs,
    'alpha_fw_com_diff': alpha_fw_com_diffs,
    'beta_fw_com_diff': beta_fw_com_diffs,
    'chain_angle_diff': chain_angle_diffs,
})
results
[6]:
complex_id structure_x_path structure_y_path alpha_anchor_com_diff beta_anchor_com_diff alpha_fw_com_diff beta_fw_com_diff chain_angle_diff
0 3qdg_D-E-C-A-B_tcr_pmhc 3qdg_D-E-C-A-B_tcr_pmhc.pdb 3qeu_A-B_tcr.pdb 0.645120 0.457327 0.219777 0.166987 -0.033421
1 3qdg_D-E-C-A-B_tcr_pmhc 3qdg_D-E-C-A-B_tcr_pmhc.pdb 3qeu_D-E_tcr.pdb 0.527090 0.340932 0.164026 0.213055 -0.040502
2 3qdg_D-E-C-A-B_tcr_pmhc 3qeu_A-B_tcr.pdb 3qeu_D-E_tcr.pdb 0.682250 0.284114 0.334339 0.064055 -0.007081
3 5c0a_D-E-C-A-B_tcr_pmhc 3utp_D-E_tcr.pdb 3utp_K-L_tcr.pdb 0.304534 0.602875 0.290645 0.338509 0.041175
4 5c0a_D-E-C-A-B_tcr_pmhc 3utp_D-E_tcr.pdb 5c0a_D-E-C-A-B_tcr_pmhc.pdb 0.292352 0.514781 0.334429 0.270019 0.032863
... ... ... ... ... ... ... ... ...
131 5nme_D-E-C-A-B_tcr_pmhc 5nmd_C-D_tcr.pdb 5nme_D-E-C-A-B_tcr_pmhc.pdb 0.211336 0.712433 0.113214 0.130459 0.030224
132 5hhm_D-E-C-A-B_tcr_pmhc 2vlm_D-E_tcr.pdb 5hhm_D-E-C-A-B_tcr_pmhc.pdb 0.886794 0.410827 0.467935 0.381336 -0.115053
133 3kpr_D-E-C-A-B_tcr_pmhc 1kgc_D-E_tcr.pdb 3kpr_D-E-C-A-B_tcr_pmhc.pdb 0.919506 0.270885 0.383752 0.151581 -0.026228
134 1oga_D-E-C-A-B_tcr_pmhc 1oga_D-E-C-A-B_tcr_pmhc.pdb 2vlm_D-E_tcr.pdb 0.944931 0.542110 0.291887 0.336729 0.107774
135 7rtr_D-E-C-A-B_tcr_pmhc 7n1d_A-B_tcr.pdb 7rtr_D-E-C-A-B_tcr_pmhc.pdb 0.642809 0.188609 0.388833 0.291080 0.001583

136 rows × 8 columns

[7]:
apo_holo_summary_df['ids'] = apo_holo_summary_df['file_name'].str.replace('.pdb', '')
/var/scratch/bmcmaste/2178229/ipykernel_2172418/2116153506.py:1: FutureWarning: The default value of regex will change from True to False in a future version.
  apo_holo_summary_df['ids'] = apo_holo_summary_df['file_name'].str.replace('.pdb', '')
[8]:
results = results.merge(apo_holo_summary_df[['ids', 'cdr_sequences_collated', 'mhc_slug', 'peptide_sequence']],
                        how='left',
                        left_on='complex_id', right_on='ids')
[9]:
results = results.merge(
    apo_holo_summary_df[['file_name', 'structure_type', 'state']],
    how='left',
    left_on='structure_x_path', right_on='file_name',
).merge(
    apo_holo_summary_df[['file_name', 'structure_type', 'state']],
    how='left',
    left_on='structure_y_path', right_on='file_name',
)
[10]:
results['composite_name'] = results.apply(lambda row: '-'.join(sorted([row.structure_x_path, row.structure_y_path])), axis='columns')
results = results.drop_duplicates('composite_name')
[11]:
apo_holo_results = results.query('state_x != state_y')
[12]:
apo_holo_results_norm = apo_holo_results.groupby('cdr_sequences_collated').agg({
    'alpha_anchor_com_diff': 'mean',
    'beta_anchor_com_diff': 'mean',
    'alpha_fw_com_diff': 'mean',
    'beta_fw_com_diff': 'mean',
    'chain_angle_diff': 'mean',
})
[13]:
apo_holo_results_norm['chain_angle_diff_deg'] = apo_holo_results_norm['chain_angle_diff'].apply(np.degrees)
apo_holo_results_norm['chain_angle_diff_deg_mag'] = apo_holo_results_norm['chain_angle_diff_deg'].apply(np.abs)
[14]:
apo_holo_results_norm
[14]:
alpha_anchor_com_diff beta_anchor_com_diff alpha_fw_com_diff beta_fw_com_diff chain_angle_diff chain_angle_diff_deg chain_angle_diff_deg_mag
cdr_sequences_collated
ATGYPS-ATKADDK-ALSDPVNDMR-SGHAT-FQNNGV-ASSLRGRGDQPQH 0.112478 0.221407 0.131706 0.140118 -0.005545 -0.317697 0.317697
DRGSQS-IYSNGD-ALTRGPGNQFY-SGHVS-FNYEAQ-ASSSPGGVSTEAF 1.606143 0.370279 1.293890 0.279836 -0.057826 -3.313194 3.313194
DRGSQS-IYSNGD-AVNFGGGKLI-MRHNA-SNTAGT-ASSLSFGTEAF 0.626083 0.366441 0.250095 0.178326 -0.001998 -0.114483 0.114483
DRGSQS-IYSNGD-AVNRDDKII-SEHNR-FQNEAQ-ASSPDIEQY 0.624668 0.204869 0.302308 0.186851 -0.011943 -0.684280 0.684280
DRGSQS-IYSNGD-AVRTNSGYALN-QGHDT-YYEEEE-ASSDTVSYEQY 0.306039 0.615471 0.133071 0.205778 0.024796 1.420686 1.420686
DRGSQS-IYSNGD-AVTTDSWGKLQ-MNHEY-SVGAGI-ASRPGLAGGRPEQY 0.385751 0.657747 0.300314 0.374858 0.018630 1.067402 1.067402
DRGSQS-IYSNGD-GTYNQGGKLI-MNHEY-SMNVEV-ASSGASHEQY 1.099042 0.294167 0.235768 0.174727 0.103824 5.948649 5.948649
DSAIYN-IQSSQRE-AQLNQAGTALI-MNHEY-SVGAGI-ASSYGTGINYGYT 0.596772 0.032190 0.184380 0.213406 -0.037547 -2.151258 2.151258
DSAIYN-IQSSQRE-AVRMDSSYKLI-SEHNR-FQNEAQ-ASSSWDTGELF 0.264265 0.248560 0.103594 0.098554 -0.009340 -0.535154 0.535154
DSAIYN-IQSSQRE-AVRPLLDGTYIPT-MNHEY-SVGAGT-ASSYLGNTGELF 0.408007 0.394163 0.483043 0.506659 0.044708 2.561604 2.561604
DSAIYN-IQSSQRE-AVRPTSGGSYIPT-MNHEY-SVGAGI-ASSYVGNTGELF 0.524052 0.405684 0.393972 0.306927 -0.011481 -0.657791 0.657791
FLGSQS-TYREGD-AVNDGGRLT-GTSNPN-WGPFG-AWSETGLGMGGWQ 0.520622 0.385860 0.254620 0.288849 -0.009649 -0.552852 0.552852
NIATNDY-GYKTK-LVGEILDNFNKFY-MDHEN-SYDVKM-ASSQRQEGDTQY 0.570818 0.282032 0.423455 0.414862 0.023776 1.362237 1.362237
NSAFDY-ILSVSNK-AASASFGDNSKLI-MSHET-SYDVDS-ASSLGHTEVF 0.124907 0.468265 0.138290 0.401072 0.016898 0.968205 0.968205
NSAFQY-TYSSGN-AMRGDSSYKLI-SGHDY-FNNNVP-ASSLWEKLAKNIQY 0.269449 0.373169 0.259665 0.247023 0.027624 1.582753 1.582753
NSASQS-VYSSG-VVQPGGYQKVT-MNHNS-SASEGT-ASSEGLWQVGDEQY 0.194964 0.279496 0.157453 0.355964 0.016874 0.966819 0.966819
NSASQS-VYSSG-VVRAGKLI-MNHEY-SVGEGT-ASGQGNFDIQY 0.409381 0.249897 0.205705 0.286431 -0.055766 -3.195168 3.195168
SVFSS-VVTGGEV-AGAGSQGNLI-LNHDA-SQIVND-ASSSRSSYEQY 1.003777 0.509446 0.331727 0.334128 0.058476 3.350441 3.350441
TISGNEY-GLKNN-IVWGGYQKVT-SEHNR-FQNEAQ-ASRYRDDSYNEQF 1.032154 1.282186 0.435426 0.393399 0.066787 3.826598 3.826598
TISGTDY-GLTSN-ILPLAGGTSYGKLT-SGHVS-FQNEAQ-ASSLGQAYEQY 0.713302 0.195773 0.230770 0.110227 -0.002818 -0.161464 0.161464
YSATPY-YYSGDPVV-AVSGFASALT-NNHNN-SYGAGS-ASGGGGTLY 0.304694 0.460448 0.099579 0.079669 0.006357 0.364241 0.364241
YSGSPE-HISR-ALSGFNNAGNMLT-SGHAT-FQNNGV-ASSLGGAGGADTQY 0.796027 0.388025 0.043012 0.054271 0.112168 6.426748 6.426748

Visualizing Results

Changes in Framework (Fw) region and Loop Anchor Centre of Masses (COM)

[15]:
coms = apo_holo_results_norm.melt(value_vars=['alpha_anchor_com_diff', 'beta_anchor_com_diff', 'alpha_fw_com_diff', 'beta_fw_com_diff'],
                                  var_name='location',
                                  value_name='com_diff')

coms['chain_type'] = coms['location'].map(lambda location: location.split('_')[0])
coms['location'] = coms['location'].map(lambda location: location.split('_')[1])
[16]:
sns.boxplot(coms, y='com_diff', x='location', hue='chain_type')
[16]:
<AxesSubplot: xlabel='location', ylabel='com_diff'>
../_images/source_Centre_of_mass_analysis_20_1.png

There is slight movement between the COMs but not very much.

Changes in angle between \(\alpha\)- and \(\beta\)- chain

[17]:
sns.boxplot(apo_holo_results_norm, y='chain_angle_diff')
apo_holo_results_norm['chain_angle_diff'].describe()
[17]:
count    22.000000
mean      0.014409
std       0.043607
min      -0.057826
25%      -0.009572
50%       0.011616
75%       0.026917
max       0.112168
Name: chain_angle_diff, dtype: float64
../_images/source_Centre_of_mass_analysis_23_1.png
[18]:
sns.boxplot(apo_holo_results_norm, y='chain_angle_diff_deg')
apo_holo_results_norm['chain_angle_diff_deg'].describe()
[18]:
count    22.000000
mean      0.825593
std       2.498524
min      -3.313194
25%      -0.548427
50%       0.665530
75%       1.542237
max       6.426748
Name: chain_angle_diff_deg, dtype: float64
../_images/source_Centre_of_mass_analysis_24_1.png
[19]:
sns.boxplot(apo_holo_results_norm, y='chain_angle_diff_deg_mag')
apo_holo_results_norm['chain_angle_diff_deg_mag'].describe()
[19]:
count    22.000000
mean      1.887715
std       1.795418
min       0.114483
25%       0.579086
50%       1.214819
75%       3.036777
max       6.426748
Name: chain_angle_diff_deg_mag, dtype: float64
../_images/source_Centre_of_mass_analysis_25_1.png

From these plots, it does not seem that their is a lot of angular change between domains.

Conclusion

Based on the results of this analysis, the TCR Fw region remains fairly static between apo and holo.