{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Ascertaining the generalisability of the structure data" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Introduction\n", "\n", "In this notebook, we set out to ascertain the generalisability of our data and analysis to the broader TCR space.\n", "We compared sequence properties of our dataset to a background of TCR sequences sampled from [OTS](https://opig.stats.ox.ac.uk/webapps/ots/ots_paired/) and also look at how the selected structures fit in the overall available TCR structures space." ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import matplotlib.pyplot as plt\n", "import numpy as np\n", "import pandas as pd\n", "import seaborn as sns" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "def rmsd(x: np.ndarray) -> float:\n", " '''Compute the RMSD of an array.'''\n", " x_bar = np.mean(x)\n", " n = len(x)\n", " return np.sqrt(np.sum((x - x_bar) ** 2) / n)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Load Data" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Load OTS data" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | cdr1_aa_alpha | \n", "cdr2_aa_alpha | \n", "cdr3_aa_alpha | \n", "cdr1_aa_beta | \n", "cdr2_aa_beta | \n", "cdr3_aa_beta | \n", "v_call_alpha | \n", "v_call_beta | \n", "j_call_alpha | \n", "j_call_beta | \n", "Species | \n", "sample_num | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "TISGNEY | \n", "GLKNN | \n", "IVRVVWGGGADGLT | \n", "SGHDN | \n", "FVKESK | \n", "ASSLLGVSTDTQY | \n", "TRAV26-1*01 | \n", "TRBV14*01 | \n", "TRAJ45*01 | \n", "TRBJ2-3*01 | \n", "human | \n", "1 | \n", "
1 | \n", "NSMFDY | \n", "ISSIKDK | \n", "AASAQGGSYIPT | \n", "LGHDT | \n", "YNNKEL | \n", "ASSRRPTDTQY | \n", "TRAV29/DV5*04 | \n", "TRBV3-1*01 | \n", "TRAJ6*01 | \n", "TRBJ2-3*01 | \n", "human | \n", "1 | \n", "
2 | \n", "SSNFYA | \n", "MTLNGDE | \n", "ALGRNSGNTPLV | \n", "SGHAT | \n", "FQNNGV | \n", "ASNLAGAYEQY | \n", "TRAV24*01 | \n", "TRBV11-2*01 | \n", "TRAJ29*01 | \n", "TRBJ2-7*01 | \n", "human | \n", "1 | \n", "
3 | \n", "SSVPPY | \n", "YTTGATLV | \n", "AVSEPGSQGNLI | \n", "DFQATT | \n", "SNEGSKA | \n", "SALGQPLGETQY | \n", "TRAV8-4*01 | \n", "TRBV20-1*02 | \n", "TRAJ42*01 | \n", "TRBJ2-5*01 | \n", "human | \n", "1 | \n", "
4 | \n", "TSGFNG | \n", "NVLDGL | \n", "AVRDLRGSQGNLI | \n", "MGHRA | \n", "YSYEKL | \n", "ASSQAPQGADTQY | \n", "TRAV1-2*01 | \n", "TRBV4-1*01 | \n", "TRAJ42*01 | \n", "TRBJ2-3*01 | \n", "human | \n", "1 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
9995 | \n", "SSVSVY | \n", "YLSGSTLV | \n", "AVSVRGSQGNLI | \n", "MNHNY | \n", "SVGAGI | \n", "ASSYGNREGYT | \n", "TRAV8-6*01 | \n", "TRBV6-6*01 | \n", "TRAJ42*01 | \n", "TRBJ1-2*01 | \n", "human | \n", "10 | \n", "
9996 | \n", "NSAFQY | \n", "TYSSGN | \n", "AMRRNTNAGKST | \n", "DFQATT | \n", "SNEGSKA | \n", "SAPTDPAGTEAF | \n", "TRAV12-3*01 | \n", "TRBV20-1*01 | \n", "TRAJ27*01 | \n", "TRBJ1-1*01 | \n", "human | \n", "10 | \n", "
9997 | \n", "SVFSS | \n", "VVTGGEV | \n", "AAIIQGAQKLV | \n", "MDHEN | \n", "SYDVKM | \n", "ASSYQYYEQY | \n", "TRAV27*01 | \n", "TRBV28*01 | \n", "TRAJ54*01 | \n", "TRBJ2-7*01 | \n", "human | \n", "10 | \n", "
9998 | \n", "DSAIYN | \n", "IQSSQRE | \n", "APYSGGGADGLT | \n", "SEHNR | \n", "FQNEAQ | \n", "ASSSGTGGVEHGYT | \n", "TRAV21*02 | \n", "TRBV7-9*01 | \n", "TRAJ45*01 | \n", "TRBJ1-2*01 | \n", "human | \n", "10 | \n", "
9999 | \n", "NSAFQY | \n", "TYSSGN | \n", "AMTGAGSYQLT | \n", "LGHNA | \n", "YSLEER | \n", "ASSQELLGAELHYGYT | \n", "TRAV12-3*01 | \n", "TRBV4-3*01 | \n", "TRAJ28*01 | \n", "TRBJ1-2*01 | \n", "human | \n", "10 | \n", "
10000 rows × 12 columns
\n", "\n", " | v_call_alpha | \n", "v_call_beta | \n", "j_call_alpha | \n", "j_call_beta | \n", "sample_num | \n", "alpha_subgroup | \n", "beta_subgroup | \n", "
---|---|---|---|---|---|---|---|
0 | \n", "TRAV26-1*01 | \n", "TRBV14*01 | \n", "TRAJ45*01 | \n", "TRBJ2-3*01 | \n", "1 | \n", "TRAV26 | \n", "TRBV14 | \n", "
1 | \n", "TRAV29/DV5*04 | \n", "TRBV3-1*01 | \n", "TRAJ6*01 | \n", "TRBJ2-3*01 | \n", "1 | \n", "TRAV29 | \n", "TRBV3 | \n", "
2 | \n", "TRAV24*01 | \n", "TRBV11-2*01 | \n", "TRAJ29*01 | \n", "TRBJ2-7*01 | \n", "1 | \n", "TRAV24 | \n", "TRBV11 | \n", "
3 | \n", "TRAV8-4*01 | \n", "TRBV20-1*02 | \n", "TRAJ42*01 | \n", "TRBJ2-5*01 | \n", "1 | \n", "TRAV8 | \n", "TRBV20 | \n", "
4 | \n", "TRAV1-2*01 | \n", "TRBV4-1*01 | \n", "TRAJ42*01 | \n", "TRBJ2-3*01 | \n", "1 | \n", "TRAV1 | \n", "TRBV4 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
9995 | \n", "TRAV8-6*01 | \n", "TRBV6-6*01 | \n", "TRAJ42*01 | \n", "TRBJ1-2*01 | \n", "10 | \n", "TRAV8 | \n", "TRBV6 | \n", "
9996 | \n", "TRAV12-3*01 | \n", "TRBV20-1*01 | \n", "TRAJ27*01 | \n", "TRBJ1-1*01 | \n", "10 | \n", "TRAV12 | \n", "TRBV20 | \n", "
9997 | \n", "TRAV27*01 | \n", "TRBV28*01 | \n", "TRAJ54*01 | \n", "TRBJ2-7*01 | \n", "10 | \n", "TRAV27 | \n", "TRBV28 | \n", "
9998 | \n", "TRAV21*02 | \n", "TRBV7-9*01 | \n", "TRAJ45*01 | \n", "TRBJ1-2*01 | \n", "10 | \n", "TRAV21 | \n", "TRBV7 | \n", "
9999 | \n", "TRAV12-3*01 | \n", "TRBV4-3*01 | \n", "TRAJ28*01 | \n", "TRBJ1-2*01 | \n", "10 | \n", "TRAV12 | \n", "TRBV4 | \n", "
10000 rows × 7 columns
\n", "\n", " | sample_num | \n", "cdr_type | \n", "sequence | \n", "cdr | \n", "chain_type | \n", "length | \n", "
---|---|---|---|---|---|---|
0 | \n", "1 | \n", "cdr1_aa_alpha | \n", "TISGNEY | \n", "1 | \n", "alpha | \n", "7 | \n", "
1 | \n", "1 | \n", "cdr1_aa_alpha | \n", "NSMFDY | \n", "1 | \n", "alpha | \n", "6 | \n", "
2 | \n", "1 | \n", "cdr1_aa_alpha | \n", "SSNFYA | \n", "1 | \n", "alpha | \n", "6 | \n", "
3 | \n", "1 | \n", "cdr1_aa_alpha | \n", "SSVPPY | \n", "1 | \n", "alpha | \n", "6 | \n", "
4 | \n", "1 | \n", "cdr1_aa_alpha | \n", "TSGFNG | \n", "1 | \n", "alpha | \n", "6 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
59995 | \n", "10 | \n", "cdr3_aa_beta | \n", "ASSYGNREGYT | \n", "3 | \n", "beta | \n", "11 | \n", "
59996 | \n", "10 | \n", "cdr3_aa_beta | \n", "SAPTDPAGTEAF | \n", "3 | \n", "beta | \n", "12 | \n", "
59997 | \n", "10 | \n", "cdr3_aa_beta | \n", "ASSYQYYEQY | \n", "3 | \n", "beta | \n", "10 | \n", "
59998 | \n", "10 | \n", "cdr3_aa_beta | \n", "ASSSGTGGVEHGYT | \n", "3 | \n", "beta | \n", "14 | \n", "
59999 | \n", "10 | \n", "cdr3_aa_beta | \n", "ASSQELLGAELHYGYT | \n", "3 | \n", "beta | \n", "16 | \n", "
60000 rows × 6 columns
\n", "\n", " | pdb | \n", "Bchain | \n", "Achain | \n", "Dchain | \n", "Gchain | \n", "TCRtype | \n", "model | \n", "antigen_chain | \n", "antigen_type | \n", "antigen_name | \n", "... | \n", "authors | \n", "resolution | \n", "method | \n", "r_free | \n", "r_factor | \n", "affinity | \n", "affinity_method | \n", "affinity_temperature | \n", "affinity_pmid | \n", "engineered | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "7zt2 | \n", "E | \n", "D | \n", "NaN | \n", "NaN | \n", "abTCR | \n", "0 | \n", "A | A | \n", "protein | Hapten | \n", "major histocompatibility complex class i-relat... | \n", "... | \n", "Karuppiah, V., Srikannathasan, V., Robinson, R.A. | \n", "2.4 | \n", "X-RAY DIFFRACTION | \n", "0.276 | \n", "0.215 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "True | \n", "
1 | \n", "7zt3 | \n", "E | \n", "D | \n", "NaN | \n", "NaN | \n", "abTCR | \n", "0 | \n", "A | \n", "protein | \n", "major histocompatibility complex class i-relat... | \n", "... | \n", "Karuppiah, V., Srikannathasan, V., Robinson, R.A. | \n", "2.4 | \n", "X-RAY DIFFRACTION | \n", "0.236 | \n", "0.191 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "True | \n", "
2 | \n", "7zt4 | \n", "E | \n", "D | \n", "NaN | \n", "NaN | \n", "abTCR | \n", "0 | \n", "A | \n", "protein | \n", "major histocompatibility complex class i-relat... | \n", "... | \n", "Karuppiah, V., Robinson, R.A. | \n", "2.02 | \n", "X-RAY DIFFRACTION | \n", "0.268 | \n", "0.234 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "True | \n", "
3 | \n", "7zt5 | \n", "E | \n", "D | \n", "NaN | \n", "NaN | \n", "abTCR | \n", "0 | \n", "A | \n", "protein | \n", "major histocompatibility complex class i-relat... | \n", "... | \n", "Karuppiah, V., Robinson, R.A. | \n", "2.09 | \n", "X-RAY DIFFRACTION | \n", "0.266 | \n", "0.225 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "True | \n", "
4 | \n", "7zt7 | \n", "E | \n", "D | \n", "NaN | \n", "NaN | \n", "abTCR | \n", "0 | \n", "A | \n", "protein | \n", "major histocompatibility complex class i-relat... | \n", "... | \n", "Karuppiah, V., Robinson, R.A. | \n", "1.84 | \n", "X-RAY DIFFRACTION | \n", "0.255 | \n", "0.207 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "True | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
931 | \n", "3rtq | \n", "D | \n", "C | \n", "NaN | \n", "NaN | \n", "abTCR | \n", "0 | \n", "A | \n", "Hapten | \n", "N-[(2S,3S,4R)-3,4-DIHYDROXY-1-{[(1S,2S,3R,4R,5... | \n", "... | \n", "Yu, E.D., Zajonc, D.M. | \n", "2.8 | \n", "X-RAY DIFFRACTION | \n", "0.268 | \n", "0.227 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "True | \n", "
932 | \n", "3dxa | \n", "O | \n", "N | \n", "NaN | \n", "NaN | \n", "abTCR | \n", "0 | \n", "M | \n", "peptide | \n", "ebv decapeptide epitope | \n", "... | \n", "Archbold, J.K., Macdonald, W.A., Gras, S., Ros... | \n", "3.5 | \n", "X-RAY DIFFRACTION | \n", "0.330 | \n", "0.286 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "True | \n", "
933 | \n", "1d9k | \n", "B | \n", "A | \n", "NaN | \n", "NaN | \n", "abTCR | \n", "0 | \n", "P | \n", "peptide | \n", "conalbumin peptide | \n", "... | \n", "Reinherz, E.L., Tan, K., Tang, L., Kern, P., L... | \n", "3.2 | \n", "X-RAY DIFFRACTION | \n", "0.293 | \n", "0.247 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "True | \n", "
934 | \n", "4gg6 | \n", "H | \n", "G | \n", "NaN | \n", "NaN | \n", "abTCR | \n", "0 | \n", "J | \n", "peptide | \n", "peptide from alpha/beta-gliadin mm1 | \n", "... | \n", "Broughton, S.E., Theodossis, A., Petersen, J.,... | \n", "3.2 | \n", "X-RAY DIFFRACTION | \n", "0.285 | \n", "0.246 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "True | \n", "
935 | \n", "2apf | \n", "A | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "... | \n", "Cho, S., Swaminathan, C.P., Yang, J., Kerzic, ... | \n", "1.8 | \n", "X-RAY DIFFRACTION | \n", "0.263 | \n", "0.197 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "True | \n", "
936 rows × 39 columns
\n", "\n", " | file_name | \n", "pdb_id | \n", "structure_type | \n", "state | \n", "alpha_chain | \n", "beta_chain | \n", "antigen_chain | \n", "mhc_chain1 | \n", "mhc_chain2 | \n", "cdr_sequences_collated | \n", "peptide_sequence | \n", "mhc_slug | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "1ao7_D-E-C-A-B_tcr_pmhc.pdb | \n", "1ao7 | \n", "tcr_pmhc | \n", "holo | \n", "D | \n", "E | \n", "C | \n", "A | \n", "B | \n", "DRGSQS-IYSNGD-AVTTDSWGKLQ-MNHEY-SVGAGI-ASRPGLA... | \n", "LLFGYPVYV | \n", "hla_a_02_01 | \n", "
1 | \n", "1b0g_C-A-B_pmhc.pdb | \n", "1b0g | \n", "pmhc | \n", "apo | \n", "NaN | \n", "NaN | \n", "C | \n", "A | \n", "B | \n", "NaN | \n", "ALWGFFPVL | \n", "hla_a_02_01 | \n", "
2 | \n", "1b0g_F-D-E_pmhc.pdb | \n", "1b0g | \n", "pmhc | \n", "apo | \n", "NaN | \n", "NaN | \n", "F | \n", "D | \n", "E | \n", "NaN | \n", "ALWGFFPVL | \n", "hla_a_02_01 | \n", "
3 | \n", "1bd2_D-E-C-A-B_tcr_pmhc.pdb | \n", "1bd2 | \n", "tcr_pmhc | \n", "holo | \n", "D | \n", "E | \n", "C | \n", "A | \n", "B | \n", "NSMFDY-ISSIKDK-AAMEGAQKLV-MNHEY-SVGAGI-ASSYPGG... | \n", "LLFGYPVYV | \n", "hla_a_02_01 | \n", "
4 | \n", "1bii_P-A-B_pmhc.pdb | \n", "1bii | \n", "pmhc | \n", "apo | \n", "NaN | \n", "NaN | \n", "P | \n", "A | \n", "B | \n", "NaN | \n", "RGPGRAFVTI | \n", "h2_dd | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
386 | \n", "7rtd_C-A-B_pmhc.pdb | \n", "7rtd | \n", "pmhc | \n", "apo | \n", "NaN | \n", "NaN | \n", "C | \n", "A | \n", "B | \n", "NaN | \n", "YLQPRTFLL | \n", "hla_a_02_01 | \n", "
387 | \n", "7rtr_D-E-C-A-B_tcr_pmhc.pdb | \n", "7rtr | \n", "tcr_pmhc | \n", "holo | \n", "D | \n", "E | \n", "C | \n", "A | \n", "B | \n", "DRGSQS-IYSNGD-AVNRDDKII-SEHNR-FQNEAQ-ASSPDIEQY | \n", "YLQPRTFLL | \n", "hla_a_02_01 | \n", "
388 | \n", "8gvb_A-B-P-H-L_tcr_pmhc.pdb | \n", "8gvb | \n", "tcr_pmhc | \n", "holo | \n", "A | \n", "B | \n", "P | \n", "H | \n", "L | \n", "YGATPY-YFSGDTLV-AVGFTGGGNKLT-SEHNR-FQNEAQ-ASSD... | \n", "RYPLTFGW | \n", "hla_a_24_02 | \n", "
389 | \n", "8gvg_A-B-P-H-L_tcr_pmhc.pdb | \n", "8gvg | \n", "tcr_pmhc | \n", "holo | \n", "A | \n", "B | \n", "P | \n", "H | \n", "L | \n", "YGATPY-YFSGDTLV-AVGFTGGGNKLT-SEHNR-FQNEAQ-ASSD... | \n", "RFPLTFGW | \n", "hla_a_24_02 | \n", "
390 | \n", "8gvi_A-B-P-H-L_tcr_pmhc.pdb | \n", "8gvi | \n", "tcr_pmhc | \n", "holo | \n", "A | \n", "B | \n", "P | \n", "H | \n", "L | \n", "YGATPY-YFSGDTLV-AVVFTGGGNKLT-SEHNR-FQNEAQ-ASSL... | \n", "RYPLTFGW | \n", "hla_a_24_02 | \n", "
391 rows × 12 columns
\n", "\n", " | cdr_type | \n", "sequence | \n", "cdr | \n", "chain_type | \n", "length | \n", "
---|---|---|---|---|---|
0 | \n", "cdr1_alpha | \n", "DRGSQS | \n", "1 | \n", "alpha | \n", "6 | \n", "
1 | \n", "cdr1_alpha | \n", "NSMFDY | \n", "1 | \n", "alpha | \n", "6 | \n", "
2 | \n", "cdr1_alpha | \n", "TQDSSYF | \n", "1 | \n", "alpha | \n", "7 | \n", "
3 | \n", "cdr1_alpha | \n", "YSATPY | \n", "1 | \n", "alpha | \n", "6 | \n", "
4 | \n", "cdr1_alpha | \n", "TISGTDY | \n", "1 | \n", "alpha | \n", "7 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
511 | \n", "cdr3_beta | \n", "ASSSANSGELF | \n", "3 | \n", "beta | \n", "11 | \n", "
512 | \n", "cdr3_beta | \n", "ASSYGTGINYGYT | \n", "3 | \n", "beta | \n", "13 | \n", "
513 | \n", "cdr3_beta | \n", "ASSLDPGDTGELF | \n", "3 | \n", "beta | \n", "13 | \n", "
514 | \n", "cdr3_beta | \n", "ASSDRDRVPETQY | \n", "3 | \n", "beta | \n", "13 | \n", "
515 | \n", "cdr3_beta | \n", "ASSLRDRVPETQY | \n", "3 | \n", "beta | \n", "13 | \n", "
516 rows × 5 columns
\n", "\n", " | sample_num | \n", "cdr_type | \n", "sequence | \n", "cdr | \n", "chain_type | \n", "length | \n", "source | \n", "
---|---|---|---|---|---|---|---|
0 | \n", "1.0 | \n", "cdr1_aa_alpha | \n", "TISGNEY | \n", "1 | \n", "alpha | \n", "7 | \n", "OTS | \n", "
1 | \n", "1.0 | \n", "cdr1_aa_alpha | \n", "NSMFDY | \n", "1 | \n", "alpha | \n", "6 | \n", "OTS | \n", "
2 | \n", "1.0 | \n", "cdr1_aa_alpha | \n", "SSNFYA | \n", "1 | \n", "alpha | \n", "6 | \n", "OTS | \n", "
3 | \n", "1.0 | \n", "cdr1_aa_alpha | \n", "SSVPPY | \n", "1 | \n", "alpha | \n", "6 | \n", "OTS | \n", "
4 | \n", "1.0 | \n", "cdr1_aa_alpha | \n", "TSGFNG | \n", "1 | \n", "alpha | \n", "6 | \n", "OTS | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
511 | \n", "NaN | \n", "cdr3_beta | \n", "ASSSANSGELF | \n", "3 | \n", "beta | \n", "11 | \n", "STCRDab | \n", "
512 | \n", "NaN | \n", "cdr3_beta | \n", "ASSYGTGINYGYT | \n", "3 | \n", "beta | \n", "13 | \n", "STCRDab | \n", "
513 | \n", "NaN | \n", "cdr3_beta | \n", "ASSLDPGDTGELF | \n", "3 | \n", "beta | \n", "13 | \n", "STCRDab | \n", "
514 | \n", "NaN | \n", "cdr3_beta | \n", "ASSDRDRVPETQY | \n", "3 | \n", "beta | \n", "13 | \n", "STCRDab | \n", "
515 | \n", "NaN | \n", "cdr3_beta | \n", "ASSLRDRVPETQY | \n", "3 | \n", "beta | \n", "13 | \n", "STCRDab | \n", "
60516 rows × 7 columns
\n", "\n", " | \n", " | \n", " | mode | \n", "mean | \n", "std | \n", "
---|---|---|---|---|---|
chain_type | \n", "cdr | \n", "source | \n", "\n", " | \n", " | \n", " |
alpha | \n", "1 | \n", "OTS | \n", "6 | \n", "6.003800 | \n", "0.570279 | \n", "
STCRDab | \n", "6 | \n", "6.127907 | \n", "0.609658 | \n", "||
2 | \n", "OTS | \n", "7 | \n", "6.805900 | \n", "1.031664 | \n", "|
STCRDab | \n", "7 | \n", "6.546512 | \n", "1.001845 | \n", "||
3 | \n", "OTS | \n", "12 | \n", "11.730200 | \n", "1.692954 | \n", "|
STCRDab | \n", "11 | \n", "11.255814 | \n", "1.688620 | \n", "||
beta | \n", "1 | \n", "OTS | \n", "5 | \n", "5.116500 | \n", "0.320840 | \n", "
STCRDab | \n", "5 | \n", "5.069767 | \n", "0.256249 | \n", "||
2 | \n", "OTS | \n", "6 | \n", "6.092400 | \n", "0.363147 | \n", "|
STCRDab | \n", "6 | \n", "5.976744 | \n", "0.215666 | \n", "||
3 | \n", "OTS | \n", "13 | \n", "12.568400 | \n", "1.753918 | \n", "|
STCRDab | \n", "12 | \n", "12.046512 | \n", "1.578584 | \n", "
\n", " | sample_num | \n", "source | \n", "subgroup | \n", "gene | \n", "subgroup_num | \n", "
---|---|---|---|---|---|
0 | \n", "1.0 | \n", "OTS | \n", "alpha_subgroup | \n", "TRAV26 | \n", "26 | \n", "
1 | \n", "1.0 | \n", "OTS | \n", "alpha_subgroup | \n", "TRAV29 | \n", "29 | \n", "
2 | \n", "1.0 | \n", "OTS | \n", "alpha_subgroup | \n", "TRAV24 | \n", "24 | \n", "
3 | \n", "1.0 | \n", "OTS | \n", "alpha_subgroup | \n", "TRAV8 | \n", "8 | \n", "
4 | \n", "1.0 | \n", "OTS | \n", "alpha_subgroup | \n", "TRAV1 | \n", "1 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
20163 | \n", "NaN | \n", "STCRDab | \n", "beta_subgroup | \n", "TRBV7 | \n", "7 | \n", "
20164 | \n", "NaN | \n", "STCRDab | \n", "beta_subgroup | \n", "TRBV6 | \n", "6 | \n", "
20165 | \n", "NaN | \n", "STCRDab | \n", "beta_subgroup | \n", "TRBV11 | \n", "11 | \n", "
20166 | \n", "NaN | \n", "STCRDab | \n", "beta_subgroup | \n", "TRBV7 | \n", "7 | \n", "
20167 | \n", "NaN | \n", "STCRDab | \n", "beta_subgroup | \n", "TRBV7 | \n", "7 | \n", "
20168 rows × 5 columns
\n", "\n", " | sample_num | \n", "chain_type | \n", "cdr | \n", "residue | \n", "proportion | \n", "
---|---|---|---|---|---|
0 | \n", "1 | \n", "alpha | \n", "1 | \n", "A | \n", "0.067791 | \n", "
1 | \n", "1 | \n", "alpha | \n", "1 | \n", "C | \n", "0.000000 | \n", "
2 | \n", "1 | \n", "alpha | \n", "1 | \n", "D | \n", "0.062615 | \n", "
3 | \n", "1 | \n", "alpha | \n", "1 | \n", "E | \n", "0.014527 | \n", "
4 | \n", "1 | \n", "alpha | \n", "1 | \n", "F | \n", "0.033228 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
1195 | \n", "10 | \n", "beta | \n", "3 | \n", "S | \n", "0.181695 | \n", "
1196 | \n", "10 | \n", "beta | \n", "3 | \n", "T | \n", "0.073013 | \n", "
1197 | \n", "10 | \n", "beta | \n", "3 | \n", "V | \n", "0.021305 | \n", "
1198 | \n", "10 | \n", "beta | \n", "3 | \n", "W | \n", "0.006623 | \n", "
1199 | \n", "10 | \n", "beta | \n", "3 | \n", "Y | \n", "0.066949 | \n", "
1200 rows × 5 columns
\n", "\n", " | chain_type | \n", "cdr | \n", "residue | \n", "proportion | \n", "
---|---|---|---|---|
0 | \n", "alpha | \n", "1 | \n", "A | \n", "0.055028 | \n", "
1 | \n", "alpha | \n", "1 | \n", "C | \n", "0.000000 | \n", "
2 | \n", "alpha | \n", "1 | \n", "D | \n", "0.087287 | \n", "
3 | \n", "alpha | \n", "1 | \n", "E | \n", "0.017078 | \n", "
4 | \n", "alpha | \n", "1 | \n", "F | \n", "0.037951 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
115 | \n", "beta | \n", "3 | \n", "S | \n", "0.179537 | \n", "
116 | \n", "beta | \n", "3 | \n", "T | \n", "0.062741 | \n", "
117 | \n", "beta | \n", "3 | \n", "V | \n", "0.023166 | \n", "
118 | \n", "beta | \n", "3 | \n", "W | \n", "0.013514 | \n", "
119 | \n", "beta | \n", "3 | \n", "Y | \n", "0.076255 | \n", "
120 rows × 4 columns
\n", "\n", " | sample_num | \n", "species | \n", "proportion | \n", "
---|---|---|---|
0 | \n", "1 | \n", "human | \n", "0.993 | \n", "
1 | \n", "1 | \n", "mouse | \n", "0.007 | \n", "
2 | \n", "2 | \n", "human | \n", "0.983 | \n", "
3 | \n", "2 | \n", "mouse | \n", "0.017 | \n", "
4 | \n", "3 | \n", "human | \n", "0.979 | \n", "
5 | \n", "3 | \n", "mouse | \n", "0.021 | \n", "
6 | \n", "4 | \n", "human | \n", "0.982 | \n", "
7 | \n", "4 | \n", "mouse | \n", "0.018 | \n", "
8 | \n", "5 | \n", "human | \n", "0.981 | \n", "
9 | \n", "5 | \n", "mouse | \n", "0.019 | \n", "
10 | \n", "6 | \n", "human | \n", "0.985 | \n", "
11 | \n", "6 | \n", "mouse | \n", "0.015 | \n", "
12 | \n", "7 | \n", "human | \n", "0.984 | \n", "
13 | \n", "7 | \n", "mouse | \n", "0.016 | \n", "
14 | \n", "8 | \n", "human | \n", "0.975 | \n", "
15 | \n", "8 | \n", "mouse | \n", "0.025 | \n", "
16 | \n", "9 | \n", "human | \n", "0.991 | \n", "
17 | \n", "9 | \n", "mouse | \n", "0.009 | \n", "
18 | \n", "10 | \n", "human | \n", "0.986 | \n", "
19 | \n", "10 | \n", "mouse | \n", "0.014 | \n", "
\n", " | alpha_organism | \n", "beta_organism | \n", "
---|
\n", " | name | \n", "cluster | \n", "chain_type | \n", "cdr | \n", "sequence | \n", "cluster_type | \n", "
---|---|---|---|---|---|---|
0 | \n", "7zt2_DE | \n", "12 | \n", "alpha_chain | \n", "1 | \n", "TSGFNG | \n", "pseudo | \n", "
1 | \n", "7zt3_DE | \n", "12 | \n", "alpha_chain | \n", "1 | \n", "TSGFNG | \n", "pseudo | \n", "
2 | \n", "7zt4_DE | \n", "12 | \n", "alpha_chain | \n", "1 | \n", "TSGFNG | \n", "pseudo | \n", "
3 | \n", "7zt5_DE | \n", "12 | \n", "alpha_chain | \n", "1 | \n", "TSGFNG | \n", "pseudo | \n", "
4 | \n", "7zt7_DE | \n", "12 | \n", "alpha_chain | \n", "1 | \n", "TSGFNG | \n", "pseudo | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
4807 | \n", "6miv_CD | \n", "22 | \n", "beta_chain | \n", "3 | \n", "ASGDEGYTQY | \n", "canonical | \n", "
4808 | \n", "3rtq_CD | \n", "22 | \n", "beta_chain | \n", "3 | \n", "ASGDEGYTQY | \n", "canonical | \n", "
4809 | \n", "3dxa_NO | \n", "noise | \n", "beta_chain | \n", "3 | \n", "ASRYRDDSYNEQF | \n", "NaN | \n", "
4810 | \n", "1d9k_AB | \n", "noise | \n", "beta_chain | \n", "3 | \n", "ASGGQGRAEQF | \n", "NaN | \n", "
4811 | \n", "4gg6_GH | \n", "noise | \n", "beta_chain | \n", "3 | \n", "ASSVAVSAGTYEQY | \n", "NaN | \n", "
4812 rows × 6 columns
\n", "