1. USGS EarthExplorer
https://earthexplorer.usgs.gov/https://earthexplorer.usgs.gov/USGS EarthExplorer 是全球遥感研究的核心数据源,提供了包括 **Landsat (1-9)**, **Sentinel-2**, **Hyperion (高光谱)** 等多种免费影像。对于多时相研究,这里是获取历史长序列数据的最佳场所。
2. 数据筛选与下载流程
2.1 区域定位 (Search Criteria)
1. 地址筛选:
在 `Address/Place` 输入研究区域(如 DSNY 或特定的经纬度);也可以手动查找具体的区域。以botswana地区为例,找到这个地方之后单击,就可以在地图上打点:
然后点击左边use map,就会出现可以选定的区域,调节四个角可以调范围
![]()
2. 日期范围:
设置 `Date Range`。做多时相研究时,建议选择一个能被多次拍摄的地点,然后选尽量长的时间,后面便于筛选
2.2 数据集选择 (Data Sets)
多光谱 (Multispectral): 选择 `Landsat -> Landsat Collection 2 Level-2`(已做过大气校正的地表反射率)。
高光谱 (Hyperspectral): 选择 `EO-1 -> Hyperion`(注:这是目前公开最常用的卫星高光谱数据)。
然后点results,就可以看有哪些数据集了
2.3 关键:筛选“同一区域”
点击左边数据集上的脚印标志可以查看它在图中的位置:
多点几个,如果他们重合就说明是同一地点的不同时间拍摄得到的。
找到合适的几个多时相数据集后,可以点中间的下载图标,得到:
下载L1R版本就行。但是由于这些数据并不是完全配准对齐的,因此还需要处理
3. Python 自动化预处理流水线
下载后的数据通常是 `.tar.gz` 压缩包,内含多个单波段的 `.TIF` 文件。我们需要将它们合成为多维张量。
# ============================================================ # 1. 导入依赖 # ============================================================ import os, json import numpy as np import tifffile as tiff import matplotlib.pyplot as plt from scipy import ndimage from PIL import Image Image.MAX_IMAGE_PIXELS = None# ============================================================ # 2. 所有处理函数 # ============================================================ # 论文中 145 个有效波段 (1-based) VALID_BANDS = [b for s, e in [(10,55),(82,97),(102,119),(134,164),(187,220)] for b in range(s, e+1)] RGB_IDX = (40, 20, 10) # 从 145 个波段中挑选 R/G/B def load_cube(data_dir, bands=VALID_BANDS): """加载指定波段并堆叠为 (H, W, C)""" base = os.path.basename(data_dir.rstrip('/')).replace('_1T', '') pattern = os.path.join(data_dir, base + "_B{:03d}_L1T.TIF") imgs = [tiff.imread(pattern.format(b)) for b in bands] return np.transpose(np.array(imgs), (1, 2, 0)) def read_map_info(data_dir): """从 B001 TIFF 读取 GeoTIFF 坐标, 返回 (x0, y0, dx, dy) - (x0, y0) 左上角 UTM - dx, dy 像素大小 (m) """ base = os.path.basename(data_dir.rstrip('/')).replace('_1T', '') p = os.path.join(data_dir, base + "_B001_L1T.TIF") im = Image.open(p) pix = im.tag_v2.get(33550) tie = im.tag_v2.get(33922) if pix is None or tie is None: raise ValueError(f"{p} 无 GeoTIFF 坐标标签") return float(tie[3]), float(tie[4]), float(pix[0]), float(pix[1]) def align_by_utm(results, map_infos): """按 UTM 公共重叠区裁剪多个结果, 返回裁剪后的 results 与公共 map_info。 要求 results 为 process() 的输出 (原始未旋转或已旋转都可以, 这里建议传未旋转版本). """ dx, dy = map_infos[0][2], map_infos[0][3] exts = [] for r, (x0, y0, _, _) in zip(results, map_infos): H, W = r['cube'].shape[:2] exts.append((x0, x0 + W*dx, y0 - H*dy, y0)) X0 = max(e[0] for e in exts); X1 = min(e[1] for e in exts) Y0 = max(e[2] for e in exts); Y1 = min(e[3] for e in exts) if X1 <= X0 or Y1 <= Y0: raise ValueError("无公共重叠区") print(f" 公共 UTM: X=[{X0:.0f},{X1:.0f}], Y=[{Y0:.0f},{Y1:.0f}] " f"→ {int((X1-X0)/dx)} x {int((Y1-Y0)/dy)} 像素") for r, (x0, y0, _, _) in zip(results, map_infos): c0 = int(round((X0 - x0) / dx)); c1 = int(round((X1 - x0) / dx)) r0 = int(round((y0 - Y1) / dy)); r1 = int(round((y0 - Y0) / dy)) r['cube'] = r['cube'][r0:r1, c0:c1] r['rgb'] = r['rgb'][r0:r1, c0:c1] H = min(r['cube'].shape[0] for r in results) W = min(r['cube'].shape[1] for r in results) for r in results: r['cube'] = r['cube'][:H, :W] r['rgb'] = r['rgb'][:H, :W] print(f" 裁剪后统一形状: {results[0]['cube'].shape}") return results, (X0, Y1, dx, dy) def normalize(band, lo=2, hi=98): a, b = np.percentile(band, (lo, hi)) return np.clip((band - a) / (b - a + 1e-8), 0, 1) def make_rgb(cube, idx=RGB_IDX): """生成归一化的伪彩色 RGB""" r, g, b = idx rgb = np.stack([cube[..., r], cube[..., g], cube[..., b]], axis=-1) return np.stack([normalize(rgb[..., i]) for i in range(3)], axis=-1) def crop_by_mask(cube, rgb, mask): """按 mask 的有效区域裁剪 cube / rgb""" rows = np.where(np.any(mask, axis=1))[0] cols = np.where(np.any(mask, axis=0))[0] if len(rows) == 0 or len(cols) == 0: return cube, rgb r0, r1, c0, c1 = rows[0], rows[-1]+1, cols[0], cols[-1]+1 return cube[r0:r1, c0:c1], rgb[r0:r1, c0:c1] def detect_angle(rgb, thr=0.1): """基于最大连通区域的主轴估计旋转角度""" binary = np.mean(rgb, axis=-1) > thr labels, n = ndimage.label(binary) if n == 0: return 0.0 region = (labels == np.bincount(labels[binary]).argmax()) ys, xs = np.where(region) if len(xs) < 3: return 0.0 _, vecs = np.linalg.eig(np.cov(xs, ys)) angle = np.degrees(np.arctan2(vecs[1, 0], vecs[0, 0])) if angle > 45: angle -= 90 if angle < -45: angle += 90 return float(angle) def rotate_cube(cube, rgb, angle): out = np.zeros_like(cube) for i in range(cube.shape[-1]): out[..., i] = ndimage.rotate(cube[..., i], angle, order=1, reshape=False) return out, ndimage.rotate(rgb, angle, order=1, reshape=False) def _max_rect_in_histogram(h): """单调栈, 返回 (area, height, l, r): 最大直方图矩形""" stack = [] best = (0, 0, 0, 0) # area, height, l, r for i, x in enumerate(list(h) + [0]): start = i while stack and stack[-1][1] > x: j, hj = stack.pop() area = hj * (i - j) if area > best[0]: best = (area, hj, j, i) start = j stack.append((start, x)) return best def largest_valid_rect(mask): """mask: bool 2D, 返回最大内接矩形 (r0, r1, c0, c1)""" H, W = mask.shape heights = np.zeros(W, dtype=int) best = (0, 0, 0, 0, 0) # area, r0, r1, c0, c1 for r in range(H): heights = np.where(mask[r], heights + 1, 0) area, h, l, rr = _max_rect_in_histogram(heights) if area > best[0]: best = (area, r - h + 1, r + 1, l, rr) _, r0, r1, c0, c1 = best return r0, r1, c0, c1 def crop_common_valid_rect(results, angle=None, thr=0.02): """UTM 对齐后: 旋转 + 求公共有效区最大内接矩形 + 裁剪。 - 公共有效区 = 各景 RGB 亮度 > thr 的逻辑与 - 若未给 angle, 用公共 mask 估计 """ # 1. 每景有效 mask (UTM 对齐后形状已经一致) masks = [np.mean(r['rgb'], axis=-1) > thr for r in results] common = np.logical_and.reduce(masks) print(f" 公共有效区占比: {common.mean()*100:.1f}%") # 2. 估计旋转角 (从公共 mask 主轴) if angle is None: rgb_proxy = np.dstack([common.astype(np.float32)] * 3) angle = detect_angle(rgb_proxy, thr=0.5) print(f" 旋转角度: {angle:.2f}°") # 3. 旋转所有 cube/rgb 和公共 mask (reshape=False 保持尺寸一致) for r in results: r['cube'], r['rgb'] = rotate_cube(r['cube'], r['rgb'], angle) common_rot = ndimage.rotate(common.astype(np.uint8), angle, order=0, reshape=False) > 0 # 4. 最大内接矩形 r0, r1, c0, c1 = largest_valid_rect(common_rot) print(f" 最大内接矩形: rows [{r0},{r1}), cols [{c0},{c1}) " f"→ {r1-r0} x {c1-c0}") for r in results: r['cube'] = r['cube'][r0:r1, c0:c1] r['rgb'] = r['rgb'][r0:r1, c0:c1] r['angle'] = angle return results, angle def process(data_dir, bands=VALID_BANDS, rotate=True): """完整处理流程: 加载 → 伪彩色 → 裁剪 → (可选)旋转 → 再裁剪 - rotate=True 用于单景可视化 (失去 UTM 网格对应) - rotate=False 保留原始 UTM 网格, 便于按坐标对齐 """ print(f"[{os.path.basename(data_dir)}] 加载...") cube = load_cube(data_dir, bands) rgb = make_rgb(cube) print(f" 原始: {cube.shape}") angle = 0.0 if rotate: # 仅做轻量去黑边 + 旋转矫正 (可视化) cube, rgb = crop_by_mask(cube, rgb, np.mean(cube, axis=-1) > 1) cube, rgb = crop_by_mask(cube, rgb, np.mean(rgb, axis=-1) > 0.05) print(f" 裁剪: {cube.shape}") angle = detect_angle(rgb) cube, rgb = rotate_cube(cube, rgb, angle) cube, rgb = crop_by_mask(cube, rgb, np.mean(rgb, axis=-1) > 0.05) print(f" 旋转 {angle:.2f}° 后: {cube.shape}") else: print(f" (保留 UTM 网格, 不旋转不裁剪)") return {'cube': cube, 'rgb': rgb, 'angle': angle} def show(rgb, title=''): plt.figure(figsize=(10, 8)) plt.imshow(rgb); plt.title(title); plt.axis('off'); plt.show() def save_result(cube, rgb, out_dir, tag, extra=None): """统一保存: npz + rgb tif + cube tif + metadata""" os.makedirs(out_dir, exist_ok=True) np.savez_compressed(os.path.join(out_dir, f'hyperion_{tag}.npz'), hyperspectral_cube=cube, valid_bands=VALID_BANDS) tiff.imwrite(os.path.join(out_dir, f'hyperion_rgb_{tag}.tif'), (rgb * 255).astype(np.uint8)) tiff.imwrite(os.path.join(out_dir, f'hyperion_{tag}.tif'), np.transpose(cube, (2, 0, 1)).astype(np.float32)) meta = {'shape': list(cube.shape), 'bands': len(VALID_BANDS), 'rgb_idx': RGB_IDX} if extra: meta.update(extra) with open(os.path.join(out_dir, f'meta_{tag}.json'), 'w', encoding='utf-8') as f: json.dump(meta, f, indent=2, ensure_ascii=False) print(f" ✓ 已保存到 {out_dir} (tag={tag})") def align_shapes(results): """将多个结果裁剪到相同 (H, W)""" h = min(r['cube'].shape[0] for r in results) w = min(r['cube'].shape[1] for r in results) for r in results: r['cube'] = r['cube'][:h, :w] r['rgb'] = r['rgb'][:h, :w] print(f"对齐到 ({h}, {w})") return results# ============================================================ # 4. 批量处理所有数据集 # ============================================================ results = [process(d) for d in DATA_DIRS] for d, r in zip(DATA_DIRS, results): show(r['rgb'], title=os.path.basename(d)) # ============================================================ # 5. 对齐形状并保存 # ============================================================ results = align_shapes(results) fig, axes = plt.subplots(1, len(results), figsize=(7*len(results), 6)) if len(results) == 1: axes = [axes] for ax, d, r in zip(axes, DATA_DIRS, results): ax.imshow(r['rgb']); ax.set_title(os.path.basename(d)); ax.axis('off') plt.tight_layout(); plt.show() for d, r in zip(DATA_DIRS, results): tag = os.path.basename(d).replace('_1T', '') save_result(r['cube'], r['rgb'], OUTPUT_DIR, tag, extra={'rotation_angle': r['angle']}) print(f"\n✅ 全部完成,输出目录: {OUTPUT_DIR}")# ============================================================ # 6. (数据集2) UTM 对齐 → 旋转 → 去黑边 → 公共最大内接矩形 # ============================================================ # 流程: # (a) 读 GeoTIFF 坐标, 把三景裁到 UTM 公共重叠区 (shape 一致) # (b) 用公共 mask 估计旋转角, 整体旋转 # (c) 在公共有效区里取最大内接矩形, 得到三张完全同尺寸的无黑边影像 print(">> 读取 GeoTIFF 坐标:") map_infos2 = [read_map_info(d) for d in DATA_DIRS2] for d, mi in zip(DATA_DIRS2, map_infos2): print(f" [{os.path.basename(d)}] 左上角 UTM=({mi[0]:.0f}, {mi[1]:.0f}), " f"pix={mi[2]:.2f} x {mi[3]:.2f} m") print("\n>> (a) 加载 (不旋转) 并按 UTM 对齐:") results2 = [process(d, rotate=False) for d in DATA_DIRS2] results2, ref_mapinfo2 = align_by_utm(results2, map_infos2) print("\n>> (b)(c) 旋转 + 公共最大内接矩形:") results2, angle2 = crop_common_valid_rect(results2) # 可视化 fig, axes = plt.subplots(1, len(results2), figsize=(7*len(results2), 6)) if len(results2) == 1: axes = [axes] for ax, d, r in zip(axes, DATA_DIRS2, results2): ax.imshow(r['rgb']); ax.set_title(os.path.basename(d)); ax.axis('off') plt.tight_layout(); plt.show() # 保存 OUTPUT_DIR2_ALIGNED = OUTPUT_DIR2 + '_utm_aligned' X_left, Y_top, dx, dy = ref_mapinfo2 for d, r in zip(DATA_DIRS2, results2): tag = os.path.basename(d).replace('_1T', '') save_result(r['cube'], r['rgb'], OUTPUT_DIR2_ALIGNED, tag, extra={'map_info': {'x_left_utm': X_left, 'y_top_utm': Y_top, 'pixel_size_m': [dx, dy]}, 'rotation_angle': float(angle2)}) print(f"\n✅ 完成: UTM 对齐 + 旋转 {angle2:.2f}° + 公共矩形裁剪") print(f" 统一形状: {results2[0]['cube'].shape}, 输出: {OUTPUT_DIR2_ALIGNED}")按照坐标对齐之后就可以得到多时相高光谱数据集了:
有需要还可以自行裁剪: