语义分割实战:PyTorch 实现 U-Net 在医学影像数据集上达到 0.85+ MIoU
2026/7/5 8:07:12 网站建设 项目流程

语义分割实战:PyTorch 实现 U-Net 在医学影像数据集上达到 0.85+ MIoU

医学影像分析正经历一场由深度学习驱动的革命。当放射科医生需要从数千张CT扫描片中定位肿瘤区域,或皮肤科医师要评估病变范围时,像素级精确的分割结果能显著提升诊断效率。本文将手把手带您实现一个能在ISIC皮肤病变数据集上达到0.85+ MIoU的U-Net模型,从数据预处理到模型调优全程实战。

1. 环境配置与数据准备

1.1 基础环境搭建

推荐使用Python 3.8+和PyTorch 1.12+环境,以下是关键依赖:

pip install torch torchvision torchaudio pip install opencv-python albumentations pandas tqdm

对于GPU加速,建议配置CUDA 11.3:

import torch print(f"PyTorch版本: {torch.__version__}") print(f"CUDA可用: {torch.cuda.is_available()}") print(f"GPU数量: {torch.cuda.device_count()}")

1.2 ISIC数据集处理

ISIC数据集包含皮肤镜图像及其对应的病变区域标注,我们需要特别处理类别不平衡问题:

from glob import glob import cv2 import numpy as np class ISICDataset(torch.utils.data.Dataset): def __init__(self, img_dir, mask_dir, transform=None): self.img_paths = sorted(glob(f"{img_dir}/*.jpg")) self.mask_paths = sorted(glob(f"{mask_dir}/*.png")) self.transform = transform def __getitem__(self, idx): img = cv2.cvtColor(cv2.imread(self.img_paths[idx]), cv2.COLOR_BGR2RGB) mask = cv2.imread(self.mask_paths[idx], cv2.IMREAD_GRAYSCALE) if self.transform: augmented = self.transform(image=img, mask=mask) img, mask = augmented['image'], augmented['mask'] return img.float(), mask.long()

数据增强策略对医学影像尤为重要:

import albumentations as A train_transform = A.Compose([ A.RandomResizedCrop(256, 256, scale=(0.8, 1.2)), A.HorizontalFlip(p=0.5), A.VerticalFlip(p=0.5), A.RandomRotate90(p=0.5), A.GaussNoise(var_limit=(10, 50), p=0.3), A.GaussianBlur(blur_limit=3, p=0.2), A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)) ])

2. U-Net模型深度解析

2.1 编码器-解码器架构

U-Net的核心在于其对称的收缩路径和扩展路径:

import torch.nn as nn class DoubleConv(nn.Module): """(卷积 => [BN] => ReLU) * 2""" def __init__(self, in_channels, out_channels): super().__init__() self.double_conv = nn.Sequential( nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1), nn.BatchNorm2d(out_channels), nn.ReLU(inplace=True), nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1), nn.BatchNorm2d(out_channels), nn.ReLU(inplace=True) ) def forward(self, x): return self.double_conv(x) class Down(nn.Module): """下采样模块""" def __init__(self, in_channels, out_channels): super().__init__() self.maxpool_conv = nn.Sequential( nn.MaxPool2d(2), DoubleConv(in_channels, out_channels) ) def forward(self, x): return self.maxpool_conv(x)

2.2 跳跃连接与上采样

跳跃连接能保留低级特征信息,解决梯度消失问题:

class Up(nn.Module): """上采样模块""" def __init__(self, in_channels, out_channels, bilinear=True): super().__init__() if bilinear: self.up = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True) else: self.up = nn.ConvTranspose2d(in_channels // 2, in_channels // 2, kernel_size=2, stride=2) self.conv = DoubleConv(in_channels, out_channels) def forward(self, x1, x2): x1 = self.up(x1) diffY = x2.size()[2] - x1.size()[2] diffX = x2.size()[3] - x1.size()[3] x1 = F.pad(x1, [diffX // 2, diffX - diffX // 2, diffY // 2, diffY - diffY // 2]) x = torch.cat([x2, x1], dim=1) return self.conv(x)

2.3 完整U-Net实现

整合各组件构建最终模型:

class UNet(nn.Module): def __init__(self, n_channels, n_classes, bilinear=True): super(UNet, self).__init__() self.n_channels = n_channels self.n_classes = n_classes self.bilinear = bilinear self.inc = DoubleConv(n_channels, 64) self.down1 = Down(64, 128) self.down2 = Down(128, 256) self.down3 = Down(256, 512) self.down4 = Down(512, 1024) self.up1 = Up(1024, 512, bilinear) self.up2 = Up(512, 256, bilinear) self.up3 = Up(256, 128, bilinear) self.up4 = Up(128, 64, bilinear) self.outc = nn.Conv2d(64, n_classes, kernel_size=1) def forward(self, x): x1 = self.inc(x) x2 = self.down1(x1) x3 = self.down2(x2) x4 = self.down3(x3) x5 = self.down4(x4) x = self.up1(x5, x4) x = self.up2(x, x3) x = self.up3(x, x2) x = self.up4(x, x1) logits = self.outc(x) return logits

3. 训练策略与损失函数

3.1 混合损失函数

医学影像中常采用组合损失应对类别不平衡:

class DiceBCELoss(nn.Module): def __init__(self, weight=None, size_average=True): super(DiceBCELoss, self).__init__() def forward(self, inputs, targets, smooth=1): inputs = torch.sigmoid(inputs) # 展平预测和真实标签 inputs = inputs.view(-1) targets = targets.view(-1) intersection = (inputs * targets).sum() dice_loss = 1 - (2.*intersection + smooth)/(inputs.sum() + targets.sum() + smooth) BCE = F.binary_cross_entropy(inputs, targets, reduction='mean') return BCE + dice_loss

3.2 优化器配置

采用AdamW优化器配合余弦退火学习率调度:

model = UNet(n_channels=3, n_classes=1).to(device) optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4, weight_decay=1e-5) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=100, eta_min=1e-6) # 早停机制 early_stopping = EarlyStopping(patience=15, verbose=True)

3.3 训练循环实现

完整的训练流程包含验证阶段和指标计算:

def train_epoch(model, loader, optimizer, criterion, device): model.train() running_loss = 0.0 for images, masks in tqdm(loader): images = images.to(device) masks = masks.float().unsqueeze(1).to(device) optimizer.zero_grad() outputs = model(images) loss = criterion(outputs, masks) loss.backward() optimizer.step() running_loss += loss.item() * images.size(0) return running_loss / len(loader.dataset) def validate(model, loader, criterion, device): model.eval() val_loss = 0.0 iou_scores = [] with torch.no_grad(): for images, masks in loader: images = images.to(device) masks = masks.float().unsqueeze(1).to(device) outputs = model(images) loss = criterion(outputs, masks) val_loss += loss.item() * images.size(0) preds = torch.sigmoid(outputs) > 0.5 iou_scores.append(calculate_iou(preds, masks)) return val_loss / len(loader.dataset), np.mean(iou_scores)

4. 模型评估与性能优化

4.1 评估指标实现

MIoU(平均交并比)是语义分割的核心指标:

def calculate_iou(pred, target, smooth=1e-6): intersection = (pred & target).float().sum((1, 2)) union = (pred | target).float().sum((1, 2)) iou = (intersection + smooth) / (union + smooth) return iou.mean().item() def calculate_dice(pred, target, smooth=1e-6): intersection = (pred * target).sum() return (2. * intersection + smooth) / (pred.sum() + target.sum() + smooth)

4.2 测试集评估流程

使用多尺度推理提升模型鲁棒性:

def evaluate_multiscale(model, test_loader, scales=[0.75, 1.0, 1.25], device='cuda'): model.eval() total_iou = 0 with torch.no_grad(): for img, mask in test_loader: img = img.to(device) mask = mask.to(device).unsqueeze(1) preds = [] for scale in scales: h, w = int(img.shape[2]*scale), int(img.shape[3]*scale) scaled_img = F.interpolate(img, size=(h,w), mode='bilinear') scaled_pred = model(scaled_img) scaled_pred = F.interpolate(scaled_pred, size=img.shape[2:], mode='bilinear') preds.append(scaled_pred) final_pred = torch.sigmoid(torch.mean(torch.stack(preds), dim=0)) > 0.5 total_iou += calculate_iou(final_pred, mask) return total_iou / len(test_loader)

4.3 性能优化技巧

通过以下策略可进一步提升MIoU:

  1. 深度监督:在解码器的每个阶段添加辅助损失
  2. 注意力机制:在跳跃连接处添加CBAM模块
  3. 数据增强:使用弹性变形等医学影像专用增强
  4. 后处理:采用CRF(条件随机场)细化边缘
# 示例:CBAM注意力模块 class CBAM(nn.Module): def __init__(self, channels, reduction_ratio=16): super(CBAM, self).__init__() self.channel_attention = nn.Sequential( nn.AdaptiveAvgPool2d(1), nn.Conv2d(channels, channels//reduction_ratio, kernel_size=1), nn.ReLU(inplace=True), nn.Conv2d(channels//reduction_ratio, channels, kernel_size=1), nn.Sigmoid() ) self.spatial_attention = nn.Sequential( nn.Conv2d(2, 1, kernel_size=7, padding=3), nn.Sigmoid() ) def forward(self, x): channel_att = self.channel_attention(x) x_channel = x * channel_att max_pool = torch.max(x_channel, dim=1, keepdim=True)[0] avg_pool = torch.mean(x_channel, dim=1, keepdim=True) spatial_att = self.spatial_attention(torch.cat([max_pool, avg_pool], dim=1)) return x_channel * spatial_att

5. 部署与可视化

5.1 模型导出与部署

将训练好的模型导出为ONNX格式:

dummy_input = torch.randn(1, 3, 256, 256, device='cuda') torch.onnx.export(model, dummy_input, "unet_medical.onnx", input_names=["input"], output_names=["output"], dynamic_axes={ "input": {0: "batch_size"}, "output": {0: "batch_size"} })

5.2 结果可视化

使用Matplotlib对比预测结果与真实标注:

def plot_results(image, mask, pred, save_path=None): plt.figure(figsize=(15,5)) plt.subplot(1,3,1) plt.imshow(image.permute(1,2,0).cpu().numpy()) plt.title("Input Image") plt.subplot(1,3,2) plt.imshow(mask.squeeze().cpu().numpy(), cmap='gray') plt.title("Ground Truth") plt.subplot(1,3,3) plt.imshow(pred.squeeze().cpu().numpy() > 0.5, cmap='gray') plt.title("Prediction") if save_path: plt.savefig(save_path, bbox_inches='tight') plt.show()

5.3 实际应用示例

在DICOM格式的医学影像上应用模型:

import pydicom def process_dicom(dicom_path, model, device): dicom = pydicom.dcmread(dicom_path) img = dicom.pixel_array.astype(np.float32) img = (img - img.min()) / (img.max() - img.min()) # 转换为3通道RGB if len(img.shape) == 2: img = np.stack([img]*3, axis=-1) # 预处理 transform = A.Compose([ A.Resize(256, 256), A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)) ]) augmented = transform(image=img) img_tensor = torch.from_numpy(augmented['image']).permute(2,0,1).unsqueeze(0).to(device) # 推理 with torch.no_grad(): pred = torch.sigmoid(model(img_tensor)) > 0.5 return img, pred.squeeze().cpu().numpy()

需要专业的网站建设服务?

联系我们获取免费的网站建设咨询和方案报价,让我们帮助您实现业务目标

立即咨询