366 lines
13 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# -*- coding: utf-8 -*-
"""
===============================================================================
This module contains functions related to preprocessing DEM data.
For example, elevation, slope, aspect
Step1: Use earthaccess search and download DEM Data
- NASADEM_HGT
- includes 30m DEM, based on SRTM data
- https://lpdaac.usgs.gov/products/nasadem_hgtv001/
- NASADEM_SC
- includes 30m slope, aspect, based on NASADEM_HGT
- https://lpdaac.usgs.gov/products/nasadem_scv001/
- ALOS PALSAR RTC Project
- includes 12.5, 30m DEM, based on ALOS PALSAR data
- https://www.earthdata.nasa.gov/data/projects/alos-palsar-rtc-project
Step2a: Process NASADEM data
- 下载的 NASADEM 均为 *.zip 文件, 需先进行解压
- NASADEM 文件名称结构为: NASADEM_类型_网格编号/网格编号.数据类型
- 高程示例: NASADEM_HGT_n30e113/n30e113.hgt
- 坡度示例: NASADEM_SC_n30e113/n30e113.slope
- 坡向示例: NASADEM_SC_n30e113/n30e113.aspect
- 读取文件按指定范围进行裁剪并镶嵌, 坡度和坡向数据需要进行缩放处理, 将网格范围的结果保存为 *.tif 文件
Step2b: Process ALOS PALSAR RTC Project data
- 下载的 ALOS PALSAR RTC Project 均为 *.zip 文件, 需先进行解压
- ALOS PALSAR RTC Project 文件名称结构为: AP_轨道号_CCC_DDDD_RT1.数据类型.tif
- 高程示例: AP_16112_FBS_F0620_RT1.dem.tif
- 读取文件按指定范围进行裁剪并镶嵌, 坡度和坡向数据需要进行缩放处理, 将网格范围的结果保存为 *.tif 文件
-------------------------------------------------------------------------------
Authors: Hong Xie
Last Updated: 2025-10-20
===============================================================================
"""
import os
import sys
import glob
from pathlib import Path
import json
import requests
import zipfile
import time
import dask.distributed
import logging
import earthaccess
import numpy as np
from rioxarray import open_rasterio
sys.path.append(str(Path(".").parent))
from utils.common_utils import (
setup_dask_environment,
setup_logging,
clip_image,
mosaic_images,
)
from HLS_SuPER.HLS_Su import earthdata_search
def reorganize_nasadem_urls(dem_results_urls: list):
"""
重组 NASADEM 下载链接
将同一格网内的高程, 坡度坡向数据链接进行组合
Parameters
----------
dem_results_urls: list
查询返回的 NASADEM 数据 URL 列表
Returns
-------
grouped_results_urls: list
重组后的 NASADEM 数据 URL 列表
"""
tile_ids = []
for granule in dem_results_urls:
tile_id = granule[0].split("/")[-2].split("_")[-1]
tile_ids.append(tile_id)
tile_ids = np.array(tile_ids)
# 根据瓦片ID找到对应的索引
tile_id_indices = np.where(tile_ids == tile_id)
# 根据索引过滤结果
return [dem_results_urls[i] for i in tile_id_indices[0]]
def download_granule(granule_urls: list[str], output_dir: str) -> bool:
"""
下载单批数据
Parameters
----------
granule_urls: list
查询返回的规范化待下载数据 URL 列表
output_dir: str
下载目录
Returns
-------
download_state: bool
下载状态 True or False
"""
# 检查是否已下载
if not all(
os.path.isfile(os.path.join(output_dir, os.path.basename(url)))
for url in granule_urls
):
try:
earthaccess.download(granule_urls, output_dir)
except Exception as e:
# 下载失败时, 先尝试使用 requests 库下载
for url in granule_urls:
response = requests.get(url, timeout=30)
if response.status_code == 200:
with open(
os.path.join(output_dir, os.path.basename(url)), "wb"
) as f:
f.write(response.content)
else:
logging.error(
f"Error downloading data: {response.status_code}. Skipping."
)
return False
logging.info("All Data already downloaded.")
return True
def process_dem_zip(
zip_file_list: list[str], output_dir: Path, mode_str: str = "NASADEM"
) -> None:
"""
处理下载的 DEM ZIP 文件,
若为 NASADEM, 则将其解压后的文件统一为可读写的 .hgt 格式;
若为 ALOSDEM, 则直接读取 dem 并转储为 DEFLATE 压缩后的 tif 文件.
Parameters
----------
zip_file_list: list
待解压的 ZIP 文件列表
output_dir: Path
输出目录
mode_str: str, optional
解压模式, 默认为 "NASADEM", 可选 "NASADEM", "ALOSDEM"
"""
try:
for zip_path in zip_file_list:
if not os.path.isfile(zip_path) or not zipfile.is_zipfile(zip_path):
continue
with zipfile.ZipFile(zip_path, "r") as zip_ref:
if mode_str == "NASADEM":
# 示例ZIP文件名称: NASADEM_HGT_n30e113.zip
# 仅解压包含 .hgt, .slope, .aspect 的文件
for tif_file in [
f
for f in zip_ref.namelist()
if f.endswith((".hgt", ".slope", ".aspect"))
]:
# 解压时重命名文件
new_name = (
tif_file.replace(".hgt", ".elevation.hgt")
if tif_file.endswith(".hgt")
else f"{tif_file}.hgt"
)
# 解压文件到指定目录
unzip_file_path = os.path.join(output_dir, new_name)
if os.path.exists(unzip_file_path):
continue
with zip_ref.open(tif_file) as source_file:
with open(unzip_file_path, "wb") as unzip_file:
unzip_file.write(source_file.read())
elif mode_str == "ALOSDEM":
# 示例ZIP文件名称: AP_14354_FBS_F3000_RT1.zip
# 仅解压包含 dem.tif 的文件, 其内部路径为 AP_14354_FBS_F3000_RT1/AP_14354_FBS_F3000_RT1.dem.tif
tif_file = [f for f in zip_ref.namelist() if f.endswith("dem.tif")][0]
# 读取压缩包内的文件并转储到指定目录
dem = open_rasterio(zip_ref.open(tif_file))
# 转储为 INT32 DEFLATE 压缩后的 .tif 文件
dem.rio.to_raster(
output_dir / os.path.basename(tif_file),
compress="DEFLATE",
)
except Exception as e:
logging.error(f"Error unzipping {mode_str} to {output_dir}: {e}")
return
def process_granule(
unzip_dir: Path,
output_dir: Path,
mode_str: str,
name: str,
roi: list,
clip=True,
tile_id: str = None,
) -> bool:
"""
读取解压并重命名处理后的指定类型 DEM 数据并进行预处理, 包括读取, 裁剪, 镶嵌, 并对坡度坡向进行缩放
Parameters
----------
unzip_dir: Path
解压后的 DEM 文件根目录
output_dir: Path
输出根目录
mode_str: str
数据模式, 可选 NASADEM, ALOSDEM
name: str
数据类型, 可选 elevation, slope, aspect
roi: list
网格范围
clip: bool
是否裁剪
tile_id: str
网格编号
Returns
-------
process_state: bool
处理状态 True or False
"""
if mode_str == "NASADEM":
dem_file_list = glob.glob(os.path.join(unzip_dir, f"*{name}.hgt"))
out_tif_name = f"DEM.{mode_str}.{tile_id}.2000.{name}.tif"
elif mode_str == "ALOSDEM":
dem_file_list = glob.glob(os.path.join(unzip_dir, "*.dem.tif"))
# 示例ZIP文件名称: AP_14354_FBS_F3000_RT1.zip
# 仅解压包含 dem.tif 的文件, 其内部路径为 AP_14354_FBS_F3000_RT1/AP_14354_FBS_F3000_RT1.dem.tif
tif_file = [f for f in zip_ref.namelist() if f.endswith("dem.tif")][0]
out_tif_name = f"DEM.{mode_str}.{tile_id}.2011.{name}.tif"
output_file = os.path.join(output_dir, out_tif_name)
if not os.path.isfile(output_file):
try:
dem_raster_list = []
for dem_path in dem_file_list:
raster = (
open_rasterio(dem_path).squeeze(dim="band", drop=True).rename(name)
)
if name == "slope" or name == "aspect":
org_attrs = raster.attrs
raster = raster * 0.01
# 恢复源数据属性信息
raster.attrs = org_attrs.copy()
raster.rio.write_crs("EPSG:4326", inplace=True)
raster.attrs["scale_factor"] = 1
dem_raster_list.append(raster)
if len(dem_raster_list) >= 1:
# 先逐一裁剪再镶嵌合成仅在提供ROI且要求裁剪时执行
if roi is not None and clip:
clipped_list = []
for raster in dem_raster_list:
clipped = clip_image(raster, roi, clip_by_box=True)
clipped_list.append(clipped)
# 镶嵌合成
if name == "slope" or name == "aspect":
dem_mosaiced = mosaic_images(dem_raster_list, nodata=-9999)
else:
dem_mosaiced = mosaic_images(dem_raster_list, nodata=-32768)
dem_mosaiced.rio.to_raster(
output_file, driver="COG", compress="DEFLATE"
)
except Exception as e:
logging.error(f"Error processing files in {name}: {e}")
return False
logging.info(f"Processed {output_file} successfully.")
else:
logging.warning(f"{output_file} already exists. Skipping.")
return True
def main(
output_root_dir: Path,
region_file: Path,
tile_id: str,
mode_str: str = "NASADEM",
):
results_urls = []
mode_str = mode_str.upper()
output_root_dir = output_root_dir / mode_str
# 放置下载的 ZIP 文件
download_dir = output_root_dir / "ZIP"
# 放置解压并预处理后的文件
unzip_dir = download_dir / "UNZIP"
output_dir = output_root_dir / "TIF" / tile_id
os.makedirs(download_dir, exist_ok=True)
os.makedirs(unzip_dir, exist_ok=True)
os.makedirs(output_dir, exist_ok=True)
results_urls_file = output_root_dir / f"{mode_str}_{tile_id}_results_urls.json"
# 配置日志
logs_file = output_root_dir / f"{mode_str}_{tile_id}_SuPER.log"
setup_logging(logs_file)
# 检索数据并存储, 默认覆盖上一次检索记录
if mode_str == "NASADEM":
asset_name = ["NASADEM_HGT", "NASADEM_SC"]
results_urls = earthdata_search(asset_name, region_file=region_file)
elif mode_str == "ALOSDEM":
asset_name = ["ALOS_PSR_RTC_HIGH"]
results_urls = earthdata_search(asset_name, region_file=region_file)
# ALOSDEM 数据仅保存含 "FBS" 字符串的 URL
results_urls = [url for url in results_urls if "FBS" in url[0]]
else:
results_urls = []
with open(results_urls_file, "w") as f:
json.dump(results_urls, f)
logging.info(f"Found {len(results_urls)} {mode_str} granules after filtering.")
if len(results_urls) == 0:
return
# 构造待解压的文件列表
zip_file_list = [
download_dir / os.path.basename(result[0])
for result in results_urls
]
client = dask.distributed.Client(timeout=60, memory_limit="8GB")
client.run(setup_dask_environment)
all_start_time = time.time()
client.scatter(results_urls)
logging.info(f"Start processing {mode_str} ...")
download_tasks = [
dask.delayed(download_granule)(granule_url, download_dir)
for granule_url in results_urls
]
# 根据模式传递正确的解压标识
unzip_tasks = dask.delayed(process_dem_zip)(zip_file_list, unzip_dir, mode_str)
if mode_str == "NASADEM":
process_tasks = [
dask.delayed(process_granule)(
unzip_dir, output_dir, mode_str, name, region, True, tile_id
)
for name in ["elevation", "slope", "aspect"]
]
dask.compute(*download_tasks)
dask.compute(unzip_tasks)
if mode_str == "NASADEM":
dask.compute(*process_tasks)
client.close()
all_total_time = time.time() - all_start_time
logging.info(
f"All {mode_str} Downloading complete and processed. Total time: {all_total_time} seconds"
)
if __name__ == "__main__":
earthaccess.login(strategy="netrc", persist=True)
# tile_id = "49REL"
# tile_id = "Wuhan"
tile_id = "Hubei"
region_file = Path(f"./data/vectors/{tile_id}.geojson")
# mode_str = "NASADEM"
mode_str = "ALOSDEM"
output_root_dir = Path(".\\data\\DEM")
main(output_root_dir, region_file, tile_id, mode_str)