366 lines
13 KiB
Python
366 lines
13 KiB
Python
# -*- coding: utf-8 -*-
|
||
"""
|
||
===============================================================================
|
||
This module contains functions related to preprocessing DEM data.
|
||
For example, elevation, slope, aspect
|
||
|
||
Step1: Use earthaccess search and download DEM Data
|
||
- NASADEM_HGT
|
||
- includes 30m DEM, based on SRTM data
|
||
- https://lpdaac.usgs.gov/products/nasadem_hgtv001/
|
||
- NASADEM_SC
|
||
- includes 30m slope, aspect, based on NASADEM_HGT
|
||
- https://lpdaac.usgs.gov/products/nasadem_scv001/
|
||
- ALOS PALSAR RTC Project
|
||
- includes 12.5, 30m DEM, based on ALOS PALSAR data
|
||
- https://www.earthdata.nasa.gov/data/projects/alos-palsar-rtc-project
|
||
|
||
Step2a: Process NASADEM data
|
||
- 下载的 NASADEM 均为 *.zip 文件, 需先进行解压
|
||
- NASADEM 文件名称结构为: NASADEM_类型_网格编号/网格编号.数据类型
|
||
- 高程示例: NASADEM_HGT_n30e113/n30e113.hgt
|
||
- 坡度示例: NASADEM_SC_n30e113/n30e113.slope
|
||
- 坡向示例: NASADEM_SC_n30e113/n30e113.aspect
|
||
- 读取文件按指定范围进行裁剪并镶嵌, 坡度和坡向数据需要进行缩放处理, 将网格范围的结果保存为 *.tif 文件
|
||
|
||
Step2b: Process ALOS PALSAR RTC Project data
|
||
- 下载的 ALOS PALSAR RTC Project 均为 *.zip 文件, 需先进行解压
|
||
- ALOS PALSAR RTC Project 文件名称结构为: AP_轨道号_CCC_DDDD_RT1.数据类型.tif
|
||
- 高程示例: AP_16112_FBS_F0620_RT1.dem.tif
|
||
- 读取文件按指定范围进行裁剪并镶嵌, 坡度和坡向数据需要进行缩放处理, 将网格范围的结果保存为 *.tif 文件
|
||
|
||
|
||
-------------------------------------------------------------------------------
|
||
Authors: Hong Xie
|
||
Last Updated: 2025-10-20
|
||
===============================================================================
|
||
"""
|
||
|
||
import os
|
||
import sys
|
||
import glob
|
||
from pathlib import Path
|
||
import json
|
||
import requests
|
||
import zipfile
|
||
import time
|
||
import dask.distributed
|
||
import logging
|
||
import earthaccess
|
||
import numpy as np
|
||
from rioxarray import open_rasterio
|
||
|
||
sys.path.append(str(Path(".").parent))
|
||
|
||
from utils.common_utils import (
|
||
setup_dask_environment,
|
||
setup_logging,
|
||
clip_image,
|
||
mosaic_images,
|
||
)
|
||
from HLS_SuPER.HLS_Su import earthdata_search
|
||
|
||
|
||
def reorganize_nasadem_urls(dem_results_urls: list):
|
||
"""
|
||
重组 NASADEM 下载链接
|
||
|
||
将同一格网内的高程, 坡度坡向数据链接进行组合
|
||
|
||
Parameters
|
||
----------
|
||
dem_results_urls: list
|
||
查询返回的 NASADEM 数据 URL 列表
|
||
|
||
Returns
|
||
-------
|
||
grouped_results_urls: list
|
||
重组后的 NASADEM 数据 URL 列表
|
||
"""
|
||
tile_ids = []
|
||
for granule in dem_results_urls:
|
||
tile_id = granule[0].split("/")[-2].split("_")[-1]
|
||
tile_ids.append(tile_id)
|
||
|
||
tile_ids = np.array(tile_ids)
|
||
# 根据瓦片ID找到对应的索引
|
||
tile_id_indices = np.where(tile_ids == tile_id)
|
||
# 根据索引过滤结果
|
||
return [dem_results_urls[i] for i in tile_id_indices[0]]
|
||
|
||
|
||
def download_granule(granule_urls: list[str], output_dir: str) -> bool:
|
||
"""
|
||
下载单批数据
|
||
|
||
Parameters
|
||
----------
|
||
granule_urls: list
|
||
查询返回的规范化待下载数据 URL 列表
|
||
output_dir: str
|
||
下载目录
|
||
|
||
Returns
|
||
-------
|
||
download_state: bool
|
||
下载状态 True or False
|
||
"""
|
||
# 检查是否已下载
|
||
if not all(
|
||
os.path.isfile(os.path.join(output_dir, os.path.basename(url)))
|
||
for url in granule_urls
|
||
):
|
||
try:
|
||
earthaccess.download(granule_urls, output_dir)
|
||
except Exception as e:
|
||
# 下载失败时, 先尝试使用 requests 库下载
|
||
for url in granule_urls:
|
||
response = requests.get(url, timeout=30)
|
||
if response.status_code == 200:
|
||
with open(
|
||
os.path.join(output_dir, os.path.basename(url)), "wb"
|
||
) as f:
|
||
f.write(response.content)
|
||
else:
|
||
logging.error(
|
||
f"Error downloading data: {response.status_code}. Skipping."
|
||
)
|
||
return False
|
||
logging.info("All Data already downloaded.")
|
||
return True
|
||
|
||
|
||
def process_dem_zip(
|
||
zip_file_list: list[str], output_dir: Path, mode_str: str = "NASADEM"
|
||
) -> None:
|
||
"""
|
||
处理下载的 DEM ZIP 文件,
|
||
|
||
若为 NASADEM, 则将其解压后的文件统一为可读写的 .hgt 格式;
|
||
若为 ALOSDEM, 则直接读取 dem 并转储为 DEFLATE 压缩后的 tif 文件.
|
||
|
||
Parameters
|
||
----------
|
||
zip_file_list: list
|
||
待解压的 ZIP 文件列表
|
||
output_dir: Path
|
||
输出目录
|
||
mode_str: str, optional
|
||
解压模式, 默认为 "NASADEM", 可选 "NASADEM", "ALOSDEM"
|
||
"""
|
||
try:
|
||
for zip_path in zip_file_list:
|
||
if not os.path.isfile(zip_path) or not zipfile.is_zipfile(zip_path):
|
||
continue
|
||
with zipfile.ZipFile(zip_path, "r") as zip_ref:
|
||
if mode_str == "NASADEM":
|
||
# 示例ZIP文件名称: NASADEM_HGT_n30e113.zip
|
||
# 仅解压包含 .hgt, .slope, .aspect 的文件
|
||
for tif_file in [
|
||
f
|
||
for f in zip_ref.namelist()
|
||
if f.endswith((".hgt", ".slope", ".aspect"))
|
||
]:
|
||
# 解压时重命名文件
|
||
new_name = (
|
||
tif_file.replace(".hgt", ".elevation.hgt")
|
||
if tif_file.endswith(".hgt")
|
||
else f"{tif_file}.hgt"
|
||
)
|
||
# 解压文件到指定目录
|
||
unzip_file_path = os.path.join(output_dir, new_name)
|
||
if os.path.exists(unzip_file_path):
|
||
continue
|
||
with zip_ref.open(tif_file) as source_file:
|
||
with open(unzip_file_path, "wb") as unzip_file:
|
||
unzip_file.write(source_file.read())
|
||
elif mode_str == "ALOSDEM":
|
||
# 示例ZIP文件名称: AP_14354_FBS_F3000_RT1.zip
|
||
# 仅解压包含 dem.tif 的文件, 其内部路径为 AP_14354_FBS_F3000_RT1/AP_14354_FBS_F3000_RT1.dem.tif
|
||
tif_file = [f for f in zip_ref.namelist() if f.endswith("dem.tif")][0]
|
||
# 读取压缩包内的文件并转储到指定目录
|
||
dem = open_rasterio(zip_ref.open(tif_file))
|
||
# 转储为 INT32 DEFLATE 压缩后的 .tif 文件
|
||
dem.rio.to_raster(
|
||
output_dir / os.path.basename(tif_file),
|
||
compress="DEFLATE",
|
||
)
|
||
except Exception as e:
|
||
logging.error(f"Error unzipping {mode_str} to {output_dir}: {e}")
|
||
return
|
||
|
||
|
||
def process_granule(
|
||
unzip_dir: Path,
|
||
output_dir: Path,
|
||
mode_str: str,
|
||
name: str,
|
||
roi: list,
|
||
clip=True,
|
||
tile_id: str = None,
|
||
) -> bool:
|
||
"""
|
||
读取解压并重命名处理后的指定类型 DEM 数据并进行预处理, 包括读取, 裁剪, 镶嵌, 并对坡度坡向进行缩放
|
||
|
||
Parameters
|
||
----------
|
||
unzip_dir: Path
|
||
解压后的 DEM 文件根目录
|
||
output_dir: Path
|
||
输出根目录
|
||
mode_str: str
|
||
数据模式, 可选 NASADEM, ALOSDEM
|
||
name: str
|
||
数据类型, 可选 elevation, slope, aspect
|
||
roi: list
|
||
网格范围
|
||
clip: bool
|
||
是否裁剪
|
||
tile_id: str
|
||
网格编号
|
||
|
||
Returns
|
||
-------
|
||
process_state: bool
|
||
处理状态 True or False
|
||
"""
|
||
|
||
if mode_str == "NASADEM":
|
||
dem_file_list = glob.glob(os.path.join(unzip_dir, f"*{name}.hgt"))
|
||
out_tif_name = f"DEM.{mode_str}.{tile_id}.2000.{name}.tif"
|
||
elif mode_str == "ALOSDEM":
|
||
dem_file_list = glob.glob(os.path.join(unzip_dir, "*.dem.tif"))
|
||
# 示例ZIP文件名称: AP_14354_FBS_F3000_RT1.zip
|
||
# 仅解压包含 dem.tif 的文件, 其内部路径为 AP_14354_FBS_F3000_RT1/AP_14354_FBS_F3000_RT1.dem.tif
|
||
tif_file = [f for f in zip_ref.namelist() if f.endswith("dem.tif")][0]
|
||
out_tif_name = f"DEM.{mode_str}.{tile_id}.2011.{name}.tif"
|
||
output_file = os.path.join(output_dir, out_tif_name)
|
||
if not os.path.isfile(output_file):
|
||
try:
|
||
dem_raster_list = []
|
||
for dem_path in dem_file_list:
|
||
raster = (
|
||
open_rasterio(dem_path).squeeze(dim="band", drop=True).rename(name)
|
||
)
|
||
if name == "slope" or name == "aspect":
|
||
org_attrs = raster.attrs
|
||
raster = raster * 0.01
|
||
# 恢复源数据属性信息
|
||
raster.attrs = org_attrs.copy()
|
||
raster.rio.write_crs("EPSG:4326", inplace=True)
|
||
raster.attrs["scale_factor"] = 1
|
||
dem_raster_list.append(raster)
|
||
if len(dem_raster_list) >= 1:
|
||
# 先逐一裁剪再镶嵌合成(仅在提供ROI且要求裁剪时执行)
|
||
if roi is not None and clip:
|
||
clipped_list = []
|
||
for raster in dem_raster_list:
|
||
clipped = clip_image(raster, roi, clip_by_box=True)
|
||
clipped_list.append(clipped)
|
||
# 镶嵌合成
|
||
if name == "slope" or name == "aspect":
|
||
dem_mosaiced = mosaic_images(dem_raster_list, nodata=-9999)
|
||
else:
|
||
dem_mosaiced = mosaic_images(dem_raster_list, nodata=-32768)
|
||
dem_mosaiced.rio.to_raster(
|
||
output_file, driver="COG", compress="DEFLATE"
|
||
)
|
||
except Exception as e:
|
||
logging.error(f"Error processing files in {name}: {e}")
|
||
return False
|
||
logging.info(f"Processed {output_file} successfully.")
|
||
else:
|
||
logging.warning(f"{output_file} already exists. Skipping.")
|
||
return True
|
||
|
||
|
||
def main(
|
||
output_root_dir: Path,
|
||
region_file: Path,
|
||
tile_id: str,
|
||
mode_str: str = "NASADEM",
|
||
):
|
||
results_urls = []
|
||
mode_str = mode_str.upper()
|
||
output_root_dir = output_root_dir / mode_str
|
||
# 放置下载的 ZIP 文件
|
||
download_dir = output_root_dir / "ZIP"
|
||
# 放置解压并预处理后的文件
|
||
unzip_dir = download_dir / "UNZIP"
|
||
output_dir = output_root_dir / "TIF" / tile_id
|
||
os.makedirs(download_dir, exist_ok=True)
|
||
os.makedirs(unzip_dir, exist_ok=True)
|
||
os.makedirs(output_dir, exist_ok=True)
|
||
results_urls_file = output_root_dir / f"{mode_str}_{tile_id}_results_urls.json"
|
||
|
||
# 配置日志
|
||
logs_file = output_root_dir / f"{mode_str}_{tile_id}_SuPER.log"
|
||
setup_logging(logs_file)
|
||
|
||
# 检索数据并存储, 默认覆盖上一次检索记录
|
||
if mode_str == "NASADEM":
|
||
asset_name = ["NASADEM_HGT", "NASADEM_SC"]
|
||
results_urls = earthdata_search(asset_name, region_file=region_file)
|
||
elif mode_str == "ALOSDEM":
|
||
asset_name = ["ALOS_PSR_RTC_HIGH"]
|
||
results_urls = earthdata_search(asset_name, region_file=region_file)
|
||
# ALOSDEM 数据仅保存含 "FBS" 字符串的 URL
|
||
results_urls = [url for url in results_urls if "FBS" in url[0]]
|
||
else:
|
||
results_urls = []
|
||
with open(results_urls_file, "w") as f:
|
||
json.dump(results_urls, f)
|
||
logging.info(f"Found {len(results_urls)} {mode_str} granules after filtering.")
|
||
if len(results_urls) == 0:
|
||
return
|
||
|
||
# 构造待解压的文件列表
|
||
zip_file_list = [
|
||
download_dir / os.path.basename(result[0])
|
||
for result in results_urls
|
||
]
|
||
|
||
client = dask.distributed.Client(timeout=60, memory_limit="8GB")
|
||
client.run(setup_dask_environment)
|
||
all_start_time = time.time()
|
||
client.scatter(results_urls)
|
||
|
||
logging.info(f"Start processing {mode_str} ...")
|
||
download_tasks = [
|
||
dask.delayed(download_granule)(granule_url, download_dir)
|
||
for granule_url in results_urls
|
||
]
|
||
# 根据模式传递正确的解压标识
|
||
unzip_tasks = dask.delayed(process_dem_zip)(zip_file_list, unzip_dir, mode_str)
|
||
if mode_str == "NASADEM":
|
||
process_tasks = [
|
||
dask.delayed(process_granule)(
|
||
unzip_dir, output_dir, mode_str, name, region, True, tile_id
|
||
)
|
||
for name in ["elevation", "slope", "aspect"]
|
||
]
|
||
|
||
dask.compute(*download_tasks)
|
||
dask.compute(unzip_tasks)
|
||
|
||
if mode_str == "NASADEM":
|
||
dask.compute(*process_tasks)
|
||
|
||
client.close()
|
||
all_total_time = time.time() - all_start_time
|
||
logging.info(
|
||
f"All {mode_str} Downloading complete and processed. Total time: {all_total_time} seconds"
|
||
)
|
||
|
||
|
||
if __name__ == "__main__":
|
||
earthaccess.login(strategy="netrc", persist=True)
|
||
# tile_id = "49REL"
|
||
# tile_id = "Wuhan"
|
||
tile_id = "Hubei"
|
||
region_file = Path(f"./data/vectors/{tile_id}.geojson")
|
||
# mode_str = "NASADEM"
|
||
mode_str = "ALOSDEM"
|
||
output_root_dir = Path(".\\data\\DEM")
|
||
main(output_root_dir, region_file, tile_id, mode_str)
|