# -*- coding: utf-8 -*- """ =============================================================================== 本模块采用辐射地形校正的 Sentinel-1 卫星数据 (Radiometric Terrain Corrected Sentinel-1, RTC-S1) 作为合成孔径雷达 (SAR) 反向散射数据源, 替代 Sentinel-1 原 L1 级 数据预处理流程. RTC-S1 为经专业预处理的 标准产品, 可显著简化数据准备工作流程, 仅需 通过数据拼接和空间裁剪即可快速获取适用于土壤湿度反演的标准化输入数据. 产品说明: RTC-S1 数据产品由美国宇航局喷气推进实验室 (JPL) OPERA 项目组研制, 基于 Sentinel-1 SLC L1 级原始数据, 通过标准化处理流程生成的雷达反向散射系数产品. - 官方数据门户: https://www.jpl.nasa.gov/go/opera/products/rtc-product/ - 官方使用示例: https://github.com/OPERA-Cal-Val/OPERA_Applications/blob/main/RTC/notebooks/RTC_notebook.ipynb - 标准处理流程: 1. 多视处理 (Multilooking) 2. 精密轨道校正 3. 热噪声去除 (Thermal Denoising) 4. 辐射定标 (Radiometric Calibration) 5. 地形校正 (Terrain Flattening) 6. UTM投影重采样 - 产品特性: - 时间覆盖: 2021-01-01起 (持续更新) - 空间分辨率: 30米 - 数据格式: GeoTIFF/HDF5 - 进一步预处理: - 将同一天内同一区域的多个burst拼接为单景影像 - 将后向散射系数转换为分贝 (dB) 单位 - 按照UTM军事格网 (UTM-MGRS) 格式裁剪为子区影像 ------------------------------------------------------------------------------- Authors: Hong Xie Last Updated: 2025-03-10 =============================================================================== """ import os import sys import glob import json import time from datetime import datetime import warnings import dask.distributed import logging import earthaccess from geopandas import read_file import numpy as np import xarray as xr from rioxarray import open_rasterio sys.path.append("D:/NASA_EarthData_Script") from utils.common_utils import setup_dask_environment, clip_image, mosaic_images from HLS_SuPER.HLS_Su import earthdata_search def download_granule( asset_name: list[str], granule_urls: list[str], output_dir: str ) -> bool: """ 下载单批 OPERA RTC-S1 数据 Parameters ---------- asset_name: str 数据集名称 granule_urls: list 查询返回的规范化 RTC-S1 数据 URL 列表 output_dir: str 输出目录 Returns ------- download_state: bool 下载状态 True or False """ asset_name = asset_name[0] if "STATIC" not in asset_name: # SAR 极化文件: OPERA_L2_RTC-S1_T040-083952-IW1_20240605T102012Z_20240612T053743Z_S1A_30_v1.0_VH.tif granule_urls = [url for url in granule_urls if ".h5" not in url] # 拆解文件名 date = granule_urls[0].split("_")[4].split("T")[0] # 将日期格式化为 YYYYDOY date = datetime.strptime(date, "%Y%m%d").strftime("%Y%j") download_dir = os.path.join(output_dir, date, "burst") else: # 元数据文件: OPERA_L2_RTC-S1-STATIC_T113-240749-IW1_20140403_S1A_30_v1.0_local_incidence_angle.tif granule_urls = [url for url in granule_urls if "mask" not in url] download_dir = os.path.join(output_dir, "burst") os.makedirs(download_dir, exist_ok=True) # 检查是否已下载 if not all( os.path.isfile(os.path.join(download_dir, url.split("/")[-1])) for url in granule_urls ): try: earthaccess.download(granule_urls, download_dir) except Exception as e: logging.error(f"Error downloading RTC-S1 data: {e}. Skipping.") return False logging.info("RTC-S1 data already downloaded.") return True def convert_to_db(image: xr.DataArray) -> xr.DataArray: """ 将后向散射系数转换为分贝值(dB), 优化包含: 1. 处理零值和负值异常 2. 显式处理无效值 3. 保持数据坐标系属性 Parameters ---------- image: xr.DataArray 原始后向散射系数数组 Returns ------- db: xr.DataArray 转换后的分贝值数组, 无效值保留为-9999 """ # 创建有效值掩模, 排除0, 负值和空值 valid_mask = (image > 0) & image.notnull() # 带保护的log10转换 with warnings.catch_warnings(): warnings.simplefilter("ignore", RuntimeWarning) db_values = 10 * np.log10(image.where(valid_mask)) # 保持原始数据属性 db = xr.DataArray( db_values, coords=image.coords, dims=image.dims, attrs=image.attrs ) db.rio.write_nodata(-9999, inplace=True) # 添加转换记录 db.attrs["processing_history"] = ( "converted to dB with zero/negative value filtering" ) return db def process_sar_granule( url_basenames: list[str], output_dir: str, date: str, tile_id: str, roi: list, clip=True, ) -> bool: """ OPERA RTC-S1 预处理 对同一天 OPERA RTC-S1 数据进行镶嵌与裁剪. Parameters ---------- url_basenames : list[str] 用于文件匹配的 RTC-S1 数据下载链接的文件名列表 output_dir : str 输出根目录 date : str 日期, 格式为 YYYY%j tile_id : str 输出网格 tile id roi : list 感兴趣区域 (ROI) 的坐标 clip : bool 是否按照 ROI 裁剪, 默认为 True Returns ------- process_state: bool 是否成功处理 """ if not os.path.isdir(os.path.join(output_dir, date)): return False final_output_dir = os.path.join(output_dir, date) burst_dir = os.path.join(output_dir, date, "burst") # 筛选与本次下载链接匹配的本地文件 vv_tif_list = [ f for f in glob.glob(os.path.join(burst_dir, f"*_VV.tif")) if os.path.basename(f) in url_basenames ] vh_tif_list = [ f for f in glob.glob(os.path.join(burst_dir, f"*_VH.tif")) if os.path.basename(f) in url_basenames ] if not vv_tif_list or not vh_tif_list: return False vv_file_name = f"SAR.RTC-S1.{tile_id}.{date}.VV.tif" vh_file_name = f"SAR.RTC-S1.{tile_id}.{date}.VH.tif" vv_file_path = os.path.join(final_output_dir, vv_file_name) vh_file_path = os.path.join(final_output_dir, vh_file_name) if os.path.isfile(vv_file_path) and os.path.isfile(vh_file_path): logging.info(f"{vv_file_name} and {vh_file_name} already processed.") return False with open_rasterio(vv_tif_list[0]) as ds: crs = ds.rio.crs # 将同一天数据集镶嵌, 生成形状为 (1, y, x) 的 numpy 数组, 以及 transform vv_ds = mosaic_images(vv_tif_list, tif_crs=crs) vv_ds = vv_ds.assign_attrs( DOY=date, file_name=vv_file_name, ) vh_ds = mosaic_images(vh_tif_list, tif_crs=crs) vh_ds = vh_ds.assign_attrs( DOY=date, file_name=vh_file_name, ) # 裁剪 if clip: vv_ds = clip_image(vv_ds, roi) vh_ds = clip_image(vh_ds, roi) # 转换为 dB vv_ds = convert_to_db(vv_ds) vh_ds = convert_to_db(vh_ds) # 网格区域的最终影像大小为 (1, 3661, 3661) # TODO: 但部分影像为 (1, 2089, 3661), 需要确保其最终形状为 (1, 3661, 3661), 缺失区域填充值为-9999 if vv_ds.shape[1] != 3661 or vh_ds.shape[1] != 3661: logging.warning( f"{vv_file_name} and {vh_file_name} shape not perfect match the UTM-MRGS grid {tile_id}." ) # 保存 vv_ds.rio.to_raster( vv_file_path, driver="COG", compress="DEFLATE", dtype="float32", ) vh_ds.rio.to_raster( vh_file_path, driver="COG", compress="DEFLATE", dtype="float32", ) logging.info(f"{date} RTC-S1 VV and VH data saved.") return True def process_sar_static_granule( url_basenames: list[str], output_dir: str, tile_id: str, roi: list, clip=True ) -> bool: """ OPERA RTC-S1-STATIC 静态数据预处理 对同一区域的 OPERA RTC-S1-STATIC 静态数据进行镶嵌与裁剪. Parameters ---------- url_basenames : list[str] 用于文件匹配的 RTC-S1-STATIC 数据下载链接的文件名列表 output_dir: str 输出根目录 tile_id: str tile id roi: list 感兴趣区域 (ROI) 的坐标 clip: bool 是否按照 ROI 裁剪, 默认为 True Returns ------- process_state: bool 是否成功处理 """ burst_dir = os.path.join(output_dir, "burst") # 筛选与本次下载链接匹配的本地文件 lia_tif_list = [ f for f in glob.glob(os.path.join(burst_dir, "*local_incidence_angle.tif")) if os.path.basename(f) in url_basenames ] ia_tif_list = [ f for f in glob.glob(os.path.join(burst_dir, "*incidence_angle.tif")) if os.path.basename(f) in url_basenames ] # Local-incidence angle (LIA) data 局部入射角数据 lia_file_name = f"SAR.RTC-S1.{tile_id}.2015.LIA.tif" # Incidence angle (IA) data 入射角数据 ia_file_name = f"SAR.RTC-S1.{tile_id}.2015.IA.tif" lia_file_path = os.path.join(output_dir, lia_file_name) ia_file_path = os.path.join(output_dir, ia_file_name) if os.path.isfile(lia_file_path) and os.path.isfile(ia_file_path): logging.info(f"{lia_file_name} and {ia_file_name} already processed.") return True with open_rasterio(lia_tif_list[0]) as ds: crs = ds.rio.crs lia_ds = mosaic_images(lia_tif_list) lia_ds = lia_ds.assign_attrs( file_name=lia_file_name, ) lia_ds.rio.write_crs(crs, inplace=True) ia_ds = mosaic_images(ia_tif_list) ia_ds = ia_ds.assign_attrs( file_name=ia_file_name, ) ia_ds.rio.write_crs(crs, inplace=True) if clip: lia_ds = clip_image(lia_ds, roi) ia_ds = clip_image(ia_ds, roi) lia_ds.rio.to_raster( lia_file_path, driver="COG", compress="DEFLATE", dtype="float32", ) ia_ds.rio.to_raster( ia_file_path, driver="COG", compress="DEFLATE", dtype="float32", ) logging.info(f"{tile_id} SAR.RTC-S1 static data saved.") return True def main( asset_name: list[str], region: list, years: list[str | int], output_dir: str, tile_id: str, ): bbox = tuple(list(region.total_bounds)) # 示例文件名称: OPERA_L2_RTC-S1_T040-083952-IW1_20240605T102012Z_20240612T053743Z_S1A_30_v1.0_VH.tif static_name = ["OPERA_L2_RTC-S1-STATIC_V1"] sar_output_dir = os.path.join(output_dir, "RTC_S1") static_output_dir = os.path.join(output_dir, "RTC_S1_STATIC") os.makedirs(sar_output_dir, exist_ok=True) os.makedirs(static_output_dir, exist_ok=True) results_urls = [] results_urls_file = os.path.join( sar_output_dir, f"RTC_S1_{tile_id}_results_urls.json" ) static_urls_file = os.path.join( static_output_dir, f"RTC_S1_STATIC_{tile_id}_results_urls.json" ) for year in years: year_results_dir = os.path.join(sar_output_dir, year) os.makedirs(year_results_dir, exist_ok=True) year_results_file = os.path.join( year_results_dir, f"RTC_S1_{tile_id}_{year}_results_urls.json" ) year_temporal = (f"{year}-06-01T00:00:00", f"{year}-08-31T23:59:59") year_results = earthdata_search(asset_name, year_temporal, roi=bbox) results_urls.extend(year_results) with open(year_results_file, "w") as f: json.dump(year_results, f) with open(results_urls_file, "w") as f: json.dump(results_urls, f) static_granule_urls = earthdata_search(static_name, roi=bbox) static_url_basenames = [ os.path.basename(url) for sublist in static_granule_urls for url in sublist ] with open(static_urls_file, "w") as f: json.dump(static_granule_urls, f) # 配置日志 logging.basicConfig( level=logging.INFO, format="%(levelname)s:%(asctime)s ||| %(message)s", handlers=[ logging.StreamHandler(sys.stdout), logging.FileHandler( os.path.join(output_dir, f"RTC_S1_SuPER_{tile_id}_{year}.log") ), ], ) logging.info(f"Found {len(results_urls)} RTC-S1 granules.") logging.info(f"Found {len(static_granule_urls)} RTC-S1 static granules.") client = dask.distributed.Client(timeout=60, memory_limit="8GB") client.run(setup_dask_environment) all_start_time = time.time() logging.info(f"Start downloading and processing RTC-S1 ...") for year in years: year_results_dir = os.path.join(sar_output_dir, year) year_results_file = os.path.join( year_results_dir, f"RTC_S1_{tile_id}_{year}_results_urls.json" ) year_results = json.load(open(year_results_file)) url_basenames = [ os.path.basename(url) for sublist in year_results for url in sublist ] client.scatter(year_results) start_time = time.time() logging.info(f"Start {year}...") download_tasks = [ dask.delayed(download_granule)(asset_name, granule_urls, year_results_dir) for granule_urls in year_results ] process_tasks = [ dask.delayed(process_sar_granule)( url_basenames, year_results_dir, date, tile_id, region ) for date in os.listdir(year_results_dir) ] # Step1: 下载 RTC RTC-S1 数据 dask.compute(*download_tasks) # Step2: RTC-S1 数据处理 dask.compute(*process_tasks) total_time = time.time() - start_time logging.info( f"{year} RTC-S1 downloading complete and proccessed. Total time: {total_time:.2f} seconds" ) # Step3: 下载 RTC-S1-STATIC 数据 start_time = time.time() logging.info(f"Start downloading and processing RTC-S1-STATIC ...") download_tasks = [ dask.delayed(download_granule)(static_name, granule_urls, static_output_dir) for granule_urls in static_granule_urls ] process_task = dask.delayed(process_sar_static_granule)( static_url_basenames, static_output_dir, tile_id, region ) dask.compute(*download_tasks) dask.compute(process_task) total_time = time.time() - start_time logging.info( f"RTC-S1-STATIC downloading complete and proccessed. Total time: {total_time:.2f} seconds" ) client.close(timeout=30) # 延长关闭超时时间 all_total_time = time.time() - all_start_time logging.info( f"All RTC-S1 Downloading complete and proccessed. Total time: {all_total_time:.2f} seconds" ) if __name__ == "__main__": earthaccess.login(persist=True) asset_name = ["OPERA_L2_RTC-S1_V1"] tile_id = "49REL" region = read_file(f".\\data\\vectors\\{tile_id}.geojson") # region = read_file("./data/vectors/wuling_guanqu_polygon.geojson") # years = ["2024", "2023", "2022"] years = ["2024"] output_dir = ".\\data\\SAR" os.makedirs(output_dir, exist_ok=True) main(asset_name, region, years, output_dir, tile_id)