
‣

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import matplotlib.colorbar as mcolorbar
import matplotlib.colors as mcolors
import seaborn as sns
from scipy.stats import zscore
#data loading
meta = pd.read_csv("metadata.tsv", sep="\t")
path_raw = pd.read_csv("merged_pathabundance_cpm.tsv", sep="\t")
path_raw.columns = path_raw.columns.str.replace("_Abundance", "", regex=False)
path_raw = path_raw.rename(columns={path_raw.columns[0]: "pathway"})
#Filter to community-level pathways only
df = path_raw[
~path_raw["pathway"].str.contains(r"\|", regex=True) &
~path_raw["pathway"].str.startswith("UNMAPPED") &
~path_raw["pathway"].str.startswith("UNINTEGRATED")
].copy()
#pathway names
df["pathway_label"] = df["pathway"].str.extract(r":\s*(.+)")[0].fillna(df["pathway"])
df = df.set_index("pathway_label").drop(columns=["pathway"], errors="ignore")
df = df[meta["SampleID"].tolist()]
#order samples.sludge,water(May first),biofilm
type_rank = {"Sludge": 0, "Water": 1, "Biofilm": 2}
time_rank = {"May2024": 0, "July2025": 1}
meta_sorted = (
meta
.assign(
type_rank=meta["Type"].map(type_rank),
time_rank=meta["TimePoint"].map(time_rank)
)
.sort_values(["type_rank", "time_rank", "SampleID"])
)
sample_order = meta_sorted["SampleID"].tolist()
df = df[sample_order]
#z score normalis
df_z = df.apply(zscore, axis=1, result_type="broadcast").fillna(0)
#colour bar
def color_group(row):
if row["Type"] == "Water" and row["TimePoint"] == "May2024":
return "Water (May 2024)"
elif row["Type"] == "Water":
return "Water (July 2025)"
else:
return row["Type"]
meta_sorted["color_group"] = meta_sorted.apply(color_group, axis=1)
type_palette = {
"Sludge": "#7B4F2E", # brown
"Water (May 2024)": "#2EC4B6", # turquoise
"Water (July 2025)": "#2166AC", # dark blue
"Biofilm": "#6BAB6E", # green
}
# name=None removes the "color_group" label from the strip
col_colors = meta_sorted.set_index("SampleID")["color_group"].map(type_palette)
col_colors.name = "Sample Type"
# x ax labels
label_map = {
row["SampleID"]: f"{row['Description']}\n({row['TimePoint']})"
for _, row in meta.iterrows()
}
col_labels = [label_map[s] for s in sample_order]
#clustermap
sns.set_theme(style="white", font="sans-serif")
g = sns.clustermap(
df_z,
col_cluster=False,
row_cluster=True,
col_colors=col_colors,
cmap="RdBu_r",
center=0,
vmin=-2, vmax=2,
linewidths=0.4,
linecolor="white",
figsize=(16, 11),
xticklabels=col_labels,
yticklabels=True,
cbar_pos=None, # disable built-in colorbar
dendrogram_ratio=(0.0, 0.10),
colors_ratio=0.02
)
#style
g.ax_heatmap.set_xlabel("")
g.ax_heatmap.set_ylabel("")
g.ax_heatmap.set_xticklabels(
col_labels, rotation=40, ha="right", fontsize=7.5, color="#333333"
)
g.ax_heatmap.set_yticklabels(
g.ax_heatmap.get_yticklabels(), rotation=0, fontsize=9, style="italic"
)
#zscore colorbar
g.fig.canvas.draw()
hm = g.ax_heatmap.get_position()
#colorbar
cbar_ax = g.fig.add_axes([
hm.x0 - 0.05,
hm.y0 + 0.02,
0.013,
hm.height * 0.28
])
norm = mcolors.Normalize(vmin=-2, vmax=2)
cb = mcolorbar.ColorbarBase(
cbar_ax, cmap=plt.cm.RdBu_r, norm=norm, orientation="vertical"
)
cb.set_ticks([-2, -1, 0, 1, 2])
cb.ax.tick_params(labelsize=8)
cb.set_label("z-score", fontsize=9, labelpad=5)
#sample type legend
legend_handles = [
mpatches.Patch(color=col, label=label)
for label, col in type_palette.items()
]
g.fig.legend(
handles=legend_handles,
title="Sample Type",
title_fontsize=9,
fontsize=8,
loc="upper left",
bbox_to_anchor=(-0.10, 0.92), # top-left empty corner, away from pathway names
frameon=True,
framealpha=0.9,
edgecolor="lightgrey"
)
#title look
g.fig.suptitle(
"Gowanus Biofilm BioBAT — Metabolic Pathway Abundance",
x=0.35, y=0.97,
ha="center", fontsize=14, fontweight="bold"
)
g.fig.text(
0.35, 0.94,
"HUMAnN 4 · MetaCyc pathways · z-score normalised | Red = enriched · Blue = depleted",
ha="center", fontsize=10, color="grey"
)
#save
plt.savefig("pathway_heatmap.pdf", dpi=300, bbox_inches="tight")
plt.savefig("pathway_heatmap.png", dpi=300, bbox_inches="tight")
print("✓ Saved: pathway_heatmap.pdf and pathway_heatmap.png")