Computational Pathology Research Framework

Logo

A tested PyTorch framework for computational pathology research with working benchmarks on PatchCamelyon and CAMELYON16

View on GitHub matthewvaishnav/computational-pathology-research

API Reference

Complete API documentation for the Computational Pathology Research Framework.


Table of Contents


Data Loading

PatchCamelyonDataset

Loads 96x96 pixel patches from the PatchCamelyon dataset.

from src.data import PatchCamelyonDataset

dataset = PatchCamelyonDataset(
    root_dir="data/pcam",
    split="train",  # "train", "val", or "test"
    transform=None
)

Parameters:

Returns:


CAMELYONSlideDataset

Loads pre-extracted features for slide-level classification.

from src.data import CAMELYONSlideDataset

dataset = CAMELYONSlideDataset(
    root_dir="data/camelyon/features",
    split="train",
    max_patches=None
)

Parameters:

Returns:


collate_slide_bags

Collates variable-length slides into batched tensors with masking.

from src.data import collate_slide_bags

batch = collate_slide_bags(samples)

Parameters:

Returns:


Model Architectures

SimpleClassifier

Basic CNN classifier for patch-level classification.

from src.models import SimpleClassifier

model = SimpleClassifier(
    num_classes=2,
    dropout=0.5
)

Parameters:

Forward:

output = model(images)  # images: (batch, 3, 96, 96)
# output: (batch, num_classes)

SimpleSlideClassifier

Slide-level classifier with attention-based aggregation.

from src.models import SimpleSlideClassifier

model = SimpleSlideClassifier(
    feature_dim=2048,
    hidden_dim=256,
    num_classes=2,
    pooling="attention",  # "mean", "max", or "attention"
    dropout=0.5
)

Parameters:

Forward:

output = model(features, num_patches)
# features: (batch, max_patches, feature_dim)
# num_patches: (batch,)
# output: (batch, num_classes)

Pretrained Models

load_pretrained_encoder

Loads pretrained encoders from torchvision or timm.

from src.models.pretrained import load_pretrained_encoder

encoder = load_pretrained_encoder(
    model_name="resnet50",
    source="torchvision",  # "torchvision" or "timm"
    pretrained=True,
    num_classes=2
)

# Access feature dimension
feature_dim = encoder.feature_dim

Parameters:

Supported Models:

torchvision:

timm:


Training

train_epoch

Trains model for one epoch.

from src.training import train_epoch

metrics = train_epoch(
    model=model,
    train_loader=train_loader,
    criterion=criterion,
    optimizer=optimizer,
    device=device,
    epoch=epoch
)

Parameters:

Returns:


Evaluation

evaluate

Evaluates model on validation/test set.

from src.training import evaluate

metrics = evaluate(
    model=model,
    val_loader=val_loader,
    criterion=criterion,
    device=device
)

Parameters:

Returns:


Utilities

set_seed

Sets random seed for reproducibility.

from src.utils import set_seed

set_seed(42)

Parameters:


save_checkpoint

Saves model checkpoint.

from src.utils import save_checkpoint

save_checkpoint(
    model=model,
    optimizer=optimizer,
    epoch=epoch,
    metrics=metrics,
    path="checkpoints/model.pth"
)

Parameters:


load_checkpoint

Loads model checkpoint.

from src.utils import load_checkpoint

checkpoint = load_checkpoint(
    path="checkpoints/model.pth",
    model=model,
    optimizer=optimizer
)

Parameters:

Returns:


Configuration

Training Configuration

Example YAML configuration for training:

# experiments/configs/pcam.yaml
data:
  root_dir: "data/pcam"
  batch_size: 32
  num_workers: 4

model:
  architecture: "resnet18"
  num_classes: 2
  dropout: 0.5

training:
  epochs: 10
  learning_rate: 0.001
  weight_decay: 0.0001
  optimizer: "adam"
  scheduler: "step"
  step_size: 5
  gamma: 0.1

logging:
  log_interval: 10
  checkpoint_dir: "checkpoints/pcam"
  save_best: true

Examples

Training a PCam Model

import torch
from torch.utils.data import DataLoader
from src.data import PatchCamelyonDataset
from src.models import SimpleClassifier
from src.training import train_epoch, evaluate
from src.utils import set_seed

# Set seed for reproducibility
set_seed(42)

# Create datasets
train_dataset = PatchCamelyonDataset("data/pcam", "train")
val_dataset = PatchCamelyonDataset("data/pcam", "val")

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

# Create model
model = SimpleClassifier(num_classes=2)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

# Training setup
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Training loop
for epoch in range(10):
    train_metrics = train_epoch(model, train_loader, criterion, optimizer, device, epoch)
    val_metrics = evaluate(model, val_loader, criterion, device)
    
    print(f"Epoch {epoch+1}/10")
    print(f"Train Loss: {train_metrics['loss']:.4f}, Acc: {train_metrics['accuracy']:.4f}")
    print(f"Val Loss: {val_metrics['loss']:.4f}, Acc: {val_metrics['accuracy']:.4f}")

Using Pretrained Models

from src.models.pretrained import load_pretrained_encoder
import torch.nn as nn

# Load pretrained ResNet50
encoder = load_pretrained_encoder(
    model_name="resnet50",
    source="torchvision",
    pretrained=True,
    num_classes=2
)

# Get feature dimension
print(f"Feature dimension: {encoder.feature_dim}")

# Use in training
model = encoder.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)