TCRen potential & contact-statistics analysis#

Reproduces the core TCRen-manuscript analyses from the committed contact data (data/contact_maps_PDB.csv + data/summary_PDB_structures.csv):

the TCRen potential (re-derived) and its agreement with the published values;
the Miyazawa-Jernigan and Keskin reference potentials;
the distribution of TCR-peptide contacts per structure and per TCR region;
how contacts distribute over peptide and CDR3 positions as a function of length.

[1]:

# Environment + load the manuscript contact data
%matplotlib inline
import sys, numpy as np, polars as pl, matplotlib.pyplot as plt, tcren
from tcren import analysis as an
from tcren.potential import tcren as load_tcren, mj, keskin, derive_tcren
print('python', sys.version.split()[0], '| polars', pl.__version__, '| tcren', tcren.__version__)

CONTACTS, SUMMARY = 'legacy/data/contact_maps_PDB.csv', 'legacy/data/summary_PDB_structures.csv'
import os
if not os.path.exists(CONTACTS):  # allow running from notebooks/
    CONTACTS, SUMMARY = '../' + CONTACTS, '../' + SUMMARY
df = an.load_interface_contacts(CONTACTS, SUMMARY)
df.select('pdb.id', 'chain.type.from', 'region.type.from', 'residue.aa.from', 'residue.aa.to', 'peptide_pos', 'peptide_len', 'cdr3_len').head()

python 3.12.12 | polars 1.39.3 | tcren 0.1.0

[1]:

shape: (5, 8)

pdb.id	chain.type.from	region.type.from	residue.aa.from	residue.aa.to	peptide_pos	peptide_len	cdr3_len
str	str	str	str	str	i64	u32	u32
"5m01"	"TRA"	"CDR3"	"Y"	"K"	0	9	12
"5m01"	"TRA"	"CDR3"	"Y"	"K"	0	9	12
"5m01"	"TRA"	"CDR3"	"Y"	"A"	1	9	12
"5m01"	"TRA"	"CDR3"	"Y"	"A"	1	9	12
"5m01"	"TRA"	"CDR3"	"G"	"A"	3	9	12

[2]:

# Helper: plot a pairwise potential as a heatmap
def plot_potential(ax, potential, title, cmap='RdBu_r', vlim=None):
    m, froms, tos = an.potential_matrix(potential)
    vlim = vlim or np.nanmax(np.abs(m))
    im = ax.imshow(m, cmap=cmap, vmin=-vlim, vmax=vlim, aspect='equal')
    ax.set_xticks(range(len(tos))); ax.set_xticklabels(tos, fontsize=6)
    ax.set_yticks(range(len(froms))); ax.set_yticklabels(froms, fontsize=6)
    ax.set_xlabel('antigen residue'); ax.set_ylabel('TCR residue'); ax.set_title(title)
    return im

[3]:

# TCRen potential: re-derived from the non-redundant set, vs the published values, + difference
nonred = pl.read_csv(SUMMARY).filter(pl.col('nonred'))['pdb.id'].to_list()
derived = derive_tcren(pl.read_csv(CONTACTS), include=nonred)
cmp = an.compare_potentials(derived, load_tcren())
print('max |derived - published| =', round(cmp['diff'].abs().max(), 12))

fig, axes = plt.subplots(1, 3, figsize=(15, 5))
im0 = plot_potential(axes[0], derived, 'TCRen (re-derived)')
im1 = plot_potential(axes[1], load_tcren(), 'TCRen (published)')
# difference heatmap
diff = derived.matrix.join(load_tcren().matrix, on=['residue.aa.from','residue.aa.to'], suffix='_pub')
froms = sorted(diff['residue.aa.from'].unique().to_list()); tos = sorted(diff['residue.aa.to'].unique().to_list())
M = np.full((len(froms), len(tos)), np.nan)
fi = {a:i for i,a in enumerate(froms)}; ti = {a:i for i,a in enumerate(tos)}
for r in diff.iter_rows(named=True): M[fi[r['residue.aa.from']], ti[r['residue.aa.to']]] = r['value']-r['value_pub']
im2 = axes[2].imshow(M, cmap='PiYG', vmin=-1e-9, vmax=1e-9); axes[2].set_title('difference (~0)')
fig.colorbar(im0, ax=axes[0], fraction=0.046); fig.colorbar(im1, ax=axes[1], fraction=0.046)
fig.colorbar(im2, ax=axes[2], fraction=0.046); plt.tight_layout()

max |derived - published| = 0.0

../_images/notebooks_tcren_analysis_3_1.png

[4]:

# Reference potentials: Miyazawa-Jernigan and Keskin
fig, axes = plt.subplots(1, 2, figsize=(11, 5))
for ax, pot, name in zip(axes, [mj(), keskin()], ['Miyazawa-Jernigan', 'Keskin']):
    im = plot_potential(ax, pot, name)
    fig.colorbar(im, ax=ax, fraction=0.046)
plt.tight_layout()

../_images/notebooks_tcren_analysis_4_0.png

[5]:

# Distribution of TCR-peptide contacts per structure (TRA vs TRB)
cps = an.contacts_per_structure(df)
fig, ax = plt.subplots(figsize=(7, 4))
for chain, color in [('TRA', '#0072B2'), ('TRB', '#56B4E9')]:
    vals = cps.filter(pl.col('chain.type.from') == chain)['n_contacts'].to_list()
    ax.hist(vals, bins=range(0, 40, 2), alpha=0.6, label=chain, color=color)
ax.set_xlabel('TCR-peptide contacts per structure'); ax.set_ylabel('structures'); ax.legend(); plt.tight_layout()

../_images/notebooks_tcren_analysis_5_0.png

[6]:

# Contacts by TCR region and chain (CDR3 dominates; TRB > TRA)
reg = an.region_contact_counts(df)
order = ['FR1', 'CDR1', 'FR2', 'CDR2', 'FR3', 'CDR3']
fig, ax = plt.subplots(figsize=(7, 4))
x = np.arange(len(order)); w = 0.4
for k, (chain, color) in enumerate([('TRA', '#0072B2'), ('TRB', '#56B4E9')]):
    d = {r['region.type.from']: r['n_contacts'] for r in reg.filter(pl.col('chain.type.from')==chain).iter_rows(named=True)}
    ax.bar(x + (k-0.5)*w, [d.get(r, 0) for r in order], w, label=chain, color=color)
ax.set_xticks(x); ax.set_xticklabels(order); ax.set_ylabel('contacts'); ax.legend(); plt.tight_layout()

../_images/notebooks_tcren_analysis_6_0.png

[7]:

# Contact distribution over position, stratified by length: peptide, CDR3a, CDR3b
def plot_position_heatmap(ax, dist, title):
    lengths = sorted(dist['length'].unique().to_list())
    maxpos = dist['position'].max() + 1
    M = np.zeros((len(lengths), int(maxpos)))
    li = {L: i for i, L in enumerate(lengths)}
    for r in dist.iter_rows(named=True): M[li[r['length']], int(r['position'])] = r['n_contacts']
    M = M / M.sum(axis=1, keepdims=True).clip(min=1)  # row-normalise to a per-length profile
    im = ax.imshow(M, aspect='auto', cmap='viridis')
    ax.set_yticks(range(len(lengths))); ax.set_yticklabels(lengths)
    ax.set_xlabel('position'); ax.set_ylabel('length'); ax.set_title(title); return im

fig, axes = plt.subplots(1, 3, figsize=(15, 4))
for ax, side, title in zip(axes, ['peptide', 'cdr3a', 'cdr3b'],
                           ['peptide', 'CDR3α', 'CDR3β']):
    im = plot_position_heatmap(ax, an.position_distribution(df, side), f'{title} contacts')
    fig.colorbar(im, ax=ax, fraction=0.046)
plt.tight_layout()

../_images/notebooks_tcren_analysis_7_0.png