466 lines
15 KiB
Python

# -*- coding: utf-8 -*-
"""
===============================================================================
本模块采用辐射地形校正的 Sentinel-1 卫星数据 (Radiometric Terrain Corrected
Sentinel-1, RTC-S1) 作为合成孔径雷达 (SAR) 反向散射数据源, 替代 Sentinel-1 原 L1 级
数据预处理流程. RTC-S1 为经专业预处理的 标准产品, 可显著简化数据准备工作流程, 仅需
通过数据拼接和空间裁剪即可快速获取适用于土壤湿度反演的标准化输入数据.
产品说明:
RTC-S1 数据产品由美国宇航局喷气推进实验室 (JPL) OPERA 项目组研制,
基于 Sentinel-1 SLC L1 级原始数据, 通过标准化处理流程生成的雷达反向散射系数产品.
- 官方数据门户: https://www.jpl.nasa.gov/go/opera/products/rtc-product/
- 官方使用示例: https://github.com/OPERA-Cal-Val/OPERA_Applications/blob/main/RTC/notebooks/RTC_notebook.ipynb
- 标准处理流程:
1. 多视处理 (Multilooking)
2. 精密轨道校正
3. 热噪声去除 (Thermal Denoising)
4. 辐射定标 (Radiometric Calibration)
5. 地形校正 (Terrain Flattening)
6. UTM投影重采样
- 产品特性:
- 时间覆盖: 2021-01-01起 (持续更新)
- 空间分辨率: 30米
- 数据格式: GeoTIFF/HDF5
- 进一步预处理:
- 将同一天内同一区域的多个burst拼接为单景影像
- 将后向散射系数转换为分贝 (dB) 单位
- 按照UTM军事格网 (UTM-MGRS) 格式裁剪为子区影像
-------------------------------------------------------------------------------
Authors: Hong Xie
Last Updated: 2025-03-10
===============================================================================
"""
import os
import sys
import glob
import json
import time
from datetime import datetime
import warnings
import dask.distributed
import logging
import earthaccess
from geopandas import read_file
import numpy as np
import xarray as xr
from rioxarray import open_rasterio
# 动态获取项目根目录路径
project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.append(project_root)
from utils.common_utils import setup_dask_environment, clip_image, mosaic_images
from HLS_SuPER.HLS_Su import earthdata_search
def download_granule(
asset_name: list[str], granule_urls: list[str], output_dir: str
) -> bool:
"""
下载单批 OPERA RTC-S1 数据
Parameters
----------
asset_name: str
数据集名称
granule_urls: list
查询返回的规范化 RTC-S1 数据 URL 列表
output_dir: str
输出目录
Returns
-------
download_state: bool
下载状态 True or False
"""
asset_name = asset_name[0]
if "STATIC" not in asset_name:
# SAR 极化文件: OPERA_L2_RTC-S1_T040-083952-IW1_20240605T102012Z_20240612T053743Z_S1A_30_v1.0_VH.tif
granule_urls = [url for url in granule_urls if ".h5" not in url]
# 拆解文件名
date = granule_urls[0].split("_")[4].split("T")[0]
# 将日期格式化为 YYYYDOY
date = datetime.strptime(date, "%Y%m%d").strftime("%Y%j")
download_dir = os.path.join(output_dir, date, "burst")
else:
# 元数据文件: OPERA_L2_RTC-S1-STATIC_T113-240749-IW1_20140403_S1A_30_v1.0_local_incidence_angle.tif
granule_urls = [url for url in granule_urls if "mask" not in url]
download_dir = os.path.join(output_dir, "burst")
os.makedirs(download_dir, exist_ok=True)
# 检查是否已下载
if not all(
os.path.isfile(os.path.join(download_dir, url.split("/")[-1]))
for url in granule_urls
):
try:
earthaccess.download(granule_urls, download_dir)
except Exception as e:
logging.error(f"Error downloading RTC-S1 data: {e}. Skipping.")
return False
logging.info("RTC-S1 data already downloaded.")
return True
def convert_to_db(image: xr.DataArray) -> xr.DataArray:
"""
将后向散射系数转换为分贝值(dB), 优化包含:
1. 处理零值和负值异常
2. 显式处理无效值
3. 保持数据坐标系属性
Parameters
----------
image: xr.DataArray
原始后向散射系数数组
Returns
-------
db: xr.DataArray
转换后的分贝值数组, 无效值保留为-9999
"""
# 创建有效值掩模, 排除0, 负值和空值
valid_mask = (image > 0) & image.notnull()
# 带保护的log10转换
with warnings.catch_warnings():
warnings.simplefilter("ignore", RuntimeWarning)
db_values = 10 * np.log10(image.where(valid_mask))
# 保持原始数据属性
db = xr.DataArray(
db_values, coords=image.coords, dims=image.dims, attrs=image.attrs
)
db.rio.write_nodata(-9999, inplace=True)
# 添加转换记录
db.attrs["processing_history"] = (
"converted to dB with zero/negative value filtering"
)
return db
def process_sar_granule(
url_basenames: list[str],
output_dir: str,
date: str,
tile_id: str,
roi: list,
clip=True,
) -> bool:
"""
OPERA RTC-S1 预处理
对同一天 OPERA RTC-S1 数据进行镶嵌与裁剪.
Parameters
----------
url_basenames : list[str]
用于文件匹配的 RTC-S1 数据下载链接的文件名列表
output_dir : str
输出根目录
date : str
日期, 格式为 YYYY%j
tile_id : str
输出网格 tile id
roi : list
感兴趣区域 (ROI) 的坐标
clip : bool
是否按照 ROI 裁剪, 默认为 True
Returns
-------
process_state: bool
是否成功处理
"""
if not os.path.isdir(os.path.join(output_dir, date)):
return False
final_output_dir = os.path.join(output_dir, date)
burst_dir = os.path.join(output_dir, date, "burst")
# 筛选与本次下载链接匹配的本地文件
vv_tif_list = [
f
for f in glob.glob(os.path.join(burst_dir, f"*_VV.tif"))
if os.path.basename(f) in url_basenames
]
vh_tif_list = [
f
for f in glob.glob(os.path.join(burst_dir, f"*_VH.tif"))
if os.path.basename(f) in url_basenames
]
if not vv_tif_list or not vh_tif_list:
return False
vv_file_name = f"SAR.RTC-S1.{tile_id}.{date}.VV.tif"
vh_file_name = f"SAR.RTC-S1.{tile_id}.{date}.VH.tif"
vv_file_path = os.path.join(final_output_dir, vv_file_name)
vh_file_path = os.path.join(final_output_dir, vh_file_name)
if os.path.isfile(vv_file_path) and os.path.isfile(vh_file_path):
logging.info(f"{vv_file_name} and {vh_file_name} already processed.")
return False
with open_rasterio(vv_tif_list[0]) as ds:
crs = ds.rio.crs
# 将同一天数据集镶嵌, 生成形状为 (1, y, x) 的 numpy 数组, 以及 transform
vv_ds = mosaic_images(vv_tif_list, tif_crs=crs)
vv_ds = vv_ds.assign_attrs(
DOY=date,
file_name=vv_file_name,
)
vh_ds = mosaic_images(vh_tif_list, tif_crs=crs)
vh_ds = vh_ds.assign_attrs(
DOY=date,
file_name=vh_file_name,
)
# 裁剪
if clip:
vv_ds = clip_image(vv_ds, roi)
vh_ds = clip_image(vh_ds, roi)
# 转换为 dB
vv_ds = convert_to_db(vv_ds)
vh_ds = convert_to_db(vh_ds)
# 网格区域的最终影像大小为 (1, 3661, 3661)
# TODO: 但部分影像为 (1, 2089, 3661), 需要确保其最终形状为 (1, 3661, 3661), 缺失区域填充值为-9999
if vv_ds.shape[1] != 3661 or vh_ds.shape[1] != 3661:
logging.warning(
f"{vv_file_name} and {vh_file_name} shape not perfect match the UTM-MRGS grid {tile_id}."
)
# 保存
vv_ds.rio.to_raster(
vv_file_path,
driver="COG",
compress="DEFLATE",
dtype="float32",
)
vh_ds.rio.to_raster(
vh_file_path,
driver="COG",
compress="DEFLATE",
dtype="float32",
)
logging.info(f"{date} RTC-S1 VV and VH data saved.")
return True
def process_sar_static_granule(
url_basenames: list[str], output_dir: str, tile_id: str, roi: list, clip=True
) -> bool:
"""
OPERA RTC-S1-STATIC 静态数据预处理
对同一区域的 OPERA RTC-S1-STATIC 静态数据进行镶嵌与裁剪.
Parameters
----------
url_basenames : list[str]
用于文件匹配的 RTC-S1-STATIC 数据下载链接的文件名列表
output_dir: str
输出根目录
tile_id: str
tile id
roi: list
感兴趣区域 (ROI) 的坐标
clip: bool
是否按照 ROI 裁剪, 默认为 True
Returns
-------
process_state: bool
是否成功处理
"""
burst_dir = os.path.join(output_dir, "burst")
# 筛选与本次下载链接匹配的本地文件
lia_tif_list = [
f
for f in glob.glob(os.path.join(burst_dir, "*local_incidence_angle.tif"))
if os.path.basename(f) in url_basenames
]
ia_tif_list = [
f
for f in glob.glob(os.path.join(burst_dir, "*incidence_angle.tif"))
if os.path.basename(f) in url_basenames
]
# Local-incidence angle (LIA) data 局部入射角数据
lia_file_name = f"SAR.RTC-S1.{tile_id}.2015.LIA.tif"
# Incidence angle (IA) data 入射角数据
ia_file_name = f"SAR.RTC-S1.{tile_id}.2015.IA.tif"
lia_file_path = os.path.join(output_dir, lia_file_name)
ia_file_path = os.path.join(output_dir, ia_file_name)
if os.path.isfile(lia_file_path) and os.path.isfile(ia_file_path):
logging.info(f"{lia_file_name} and {ia_file_name} already processed.")
return True
with open_rasterio(lia_tif_list[0]) as ds:
crs = ds.rio.crs
lia_ds = mosaic_images(lia_tif_list)
lia_ds = lia_ds.assign_attrs(
file_name=lia_file_name,
)
lia_ds.rio.write_crs(crs, inplace=True)
ia_ds = mosaic_images(ia_tif_list)
ia_ds = ia_ds.assign_attrs(
file_name=ia_file_name,
)
ia_ds.rio.write_crs(crs, inplace=True)
if clip:
lia_ds = clip_image(lia_ds, roi)
ia_ds = clip_image(ia_ds, roi)
lia_ds.rio.to_raster(
lia_file_path,
driver="COG",
compress="DEFLATE",
dtype="float32",
)
ia_ds.rio.to_raster(
ia_file_path,
driver="COG",
compress="DEFLATE",
dtype="float32",
)
logging.info(f"{tile_id} SAR.RTC-S1 static data saved.")
return True
def main(
asset_name: list[str],
region: list,
years: list[str | int],
output_dir: str,
tile_id: str,
):
bbox = tuple(list(region.total_bounds))
# 示例文件名称: OPERA_L2_RTC-S1_T040-083952-IW1_20240605T102012Z_20240612T053743Z_S1A_30_v1.0_VH.tif
static_name = ["OPERA_L2_RTC-S1-STATIC_V1"]
sar_output_dir = os.path.join(output_dir, "RTC_S1")
static_output_dir = os.path.join(output_dir, "RTC_S1_STATIC")
os.makedirs(sar_output_dir, exist_ok=True)
os.makedirs(static_output_dir, exist_ok=True)
results_urls = []
results_urls_file = os.path.join(
sar_output_dir, f"RTC_S1_{tile_id}_results_urls.json"
)
static_urls_file = os.path.join(
static_output_dir, f"RTC_S1_STATIC_{tile_id}_results_urls.json"
)
for year in years:
year_results_dir = os.path.join(sar_output_dir, year)
os.makedirs(year_results_dir, exist_ok=True)
year_results_file = os.path.join(
year_results_dir, f"RTC_S1_{tile_id}_{year}_results_urls.json"
)
year_temporal = (f"{year}-06-01T00:00:00", f"{year}-08-31T23:59:59")
year_results = earthdata_search(asset_name, year_temporal, roi=bbox)
results_urls.extend(year_results)
with open(year_results_file, "w") as f:
json.dump(year_results, f)
with open(results_urls_file, "w") as f:
json.dump(results_urls, f)
static_granule_urls = earthdata_search(static_name, roi=bbox)
static_url_basenames = [
os.path.basename(url) for sublist in static_granule_urls for url in sublist
]
with open(static_urls_file, "w") as f:
json.dump(static_granule_urls, f)
# 配置日志
logging.basicConfig(
level=logging.INFO,
format="%(levelname)s:%(asctime)s ||| %(message)s",
handlers=[
logging.StreamHandler(sys.stdout),
logging.FileHandler(
os.path.join(output_dir, f"RTC_S1_SuPER_{tile_id}_{year}.log")
),
],
)
logging.info(f"Found {len(results_urls)} RTC-S1 granules.")
logging.info(f"Found {len(static_granule_urls)} RTC-S1 static granules.")
client = dask.distributed.Client(timeout=60, memory_limit="8GB")
client.run(setup_dask_environment)
all_start_time = time.time()
logging.info(f"Start downloading and processing RTC-S1 ...")
for year in years:
year_results_dir = os.path.join(sar_output_dir, year)
year_results_file = os.path.join(
year_results_dir, f"RTC_S1_{tile_id}_{year}_results_urls.json"
)
year_results = json.load(open(year_results_file))
url_basenames = [
os.path.basename(url) for sublist in year_results for url in sublist
]
client.scatter(year_results)
start_time = time.time()
logging.info(f"Start {year}...")
download_tasks = [
dask.delayed(download_granule)(asset_name, granule_urls, year_results_dir)
for granule_urls in year_results
]
process_tasks = [
dask.delayed(process_sar_granule)(
url_basenames, year_results_dir, date, tile_id, region
)
for date in os.listdir(year_results_dir)
]
# Step1: 下载 RTC RTC-S1 数据
dask.compute(*download_tasks)
# Step2: RTC-S1 数据处理
dask.compute(*process_tasks)
total_time = time.time() - start_time
logging.info(
f"{year} RTC-S1 downloading complete and proccessed. Total time: {total_time:.2f} seconds"
)
# Step3: 下载 RTC-S1-STATIC 数据
start_time = time.time()
logging.info(f"Start downloading and processing RTC-S1-STATIC ...")
download_tasks = [
dask.delayed(download_granule)(static_name, granule_urls, static_output_dir)
for granule_urls in static_granule_urls
]
process_task = dask.delayed(process_sar_static_granule)(
static_url_basenames, static_output_dir, tile_id, region
)
dask.compute(*download_tasks)
dask.compute(process_task)
total_time = time.time() - start_time
logging.info(
f"RTC-S1-STATIC downloading complete and proccessed. Total time: {total_time:.2f} seconds"
)
client.close(timeout=30) # 延长关闭超时时间
all_total_time = time.time() - all_start_time
logging.info(
f"All RTC-S1 Downloading complete and proccessed. Total time: {all_total_time:.2f} seconds"
)
if __name__ == "__main__":
earthaccess.login(persist=True)
asset_name = ["OPERA_L2_RTC-S1_V1"]
tile_id = "49REL"
region = read_file(f".\\data\\vectors\\{tile_id}.geojson")
# region = read_file("./data/vectors/wuling_guanqu_polygon.geojson")
# years = ["2024", "2023", "2022"]
years = ["2024"]
output_dir = ".\\data\\SAR"
os.makedirs(output_dir, exist_ok=True)
main(asset_name, region, years, output_dir, tile_id)