refactor(HLS_SuPER): 修复HLS数据下载与处理代码存在的问题,并优化代码结构
- 统一调整所有文件的导入顺序,清理冗余导入项 - 修复format_roi函数中误用convex_hull获取顶点的问题,改用exterior.coords - 修正dask并行任务中的参数传递错误 - 新增非交互运行模式支持,优化confirm_action函数 - 添加无搜索结果时提前退出的检查逻辑 - 优化日志配置,新增netrc认证支持 - 修复裸露的except语句,改用明确的Exception捕获 - 更新所有文件的最后更新日期
This commit is contained in:
parent
cc0791f1ba
commit
339755a42c
@ -3,24 +3,24 @@
|
||||
===============================================================================
|
||||
HLS Processing and Exporting Reformatted Data (HLS_PER)
|
||||
|
||||
This module contains functions to conduct subsetting and quality filtering of
|
||||
This module contains functions to conduct subsetting and quality filtering of
|
||||
search results.
|
||||
-------------------------------------------------------------------------------
|
||||
Authors: Cole Krehbiel, Mahsa Jami, and Erik Bolch
|
||||
Editor: Hong Xie
|
||||
Last Updated: 2025-03-30
|
||||
Last Updated: 2026-05-17
|
||||
===============================================================================
|
||||
"""
|
||||
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
import logging
|
||||
|
||||
import numpy as np
|
||||
from datetime import datetime as dt
|
||||
import xarray as xr
|
||||
import rioxarray as rxr
|
||||
|
||||
import dask.distributed
|
||||
import numpy as np
|
||||
import rioxarray as rxr
|
||||
import xarray as xr
|
||||
|
||||
|
||||
def create_output_name(url, band_dict):
|
||||
@ -181,7 +181,6 @@ def process_granule(
|
||||
os.path.isfile(f"{output_dir}/{create_output_name(url, band_dict)}")
|
||||
for url in granule_urls
|
||||
):
|
||||
|
||||
# First Handle Quality Layer
|
||||
# (Add) 简化原有的冗余处理, 仅处理质量层, 并最后移除质量层下载url
|
||||
if quality_filter:
|
||||
|
||||
@ -3,21 +3,23 @@
|
||||
===============================================================================
|
||||
This module contains functions related to searching and preprocessing HLS data.
|
||||
|
||||
-------------------------------------------------------------------------------
|
||||
-------------------------------------------------------------------------------
|
||||
Authors: Mahsa Jami, Cole Krehbiel, and Erik Bolch
|
||||
Contact: lpdaac@usgs.gov
|
||||
Contact: lpdaac@usgs.gov
|
||||
Editor: Hong Xie
|
||||
Last Updated: 2025-10-16
|
||||
Last Updated: 2026-05-17
|
||||
===============================================================================
|
||||
"""
|
||||
|
||||
# Import necessary packages
|
||||
import os
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
import numpy as np
|
||||
|
||||
import earthaccess
|
||||
import geopandas as gpd
|
||||
import numpy as np
|
||||
from shapely.geometry import box
|
||||
from shapely.geometry.polygon import orient
|
||||
|
||||
@ -77,7 +79,7 @@ def format_roi(roi: Path):
|
||||
roi = gpd.GeoDataFrame(geometry=[box(*bbox)], crs="EPSG:4326")
|
||||
roi["geometry"] = roi["geometry"].apply(ensure_ccw)
|
||||
|
||||
vertices_list = list(roi.geometry[0].convex_hull.coords)
|
||||
vertices_list = list(roi.geometry[0].exterior.coords)
|
||||
|
||||
return (roi, vertices_list)
|
||||
|
||||
|
||||
@ -1,11 +1,11 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
===============================================================================
|
||||
HLS Subsetting, Processing, and Exporting Reformatted Data Prep Script
|
||||
HLS Subsetting, Processing, and Exporting Reformatted Data Prep Script
|
||||
Authors: Cole Krehbiel, Mahsa Jami, and Erik Bolch
|
||||
Contact: lpdaac@usgs.gov
|
||||
Editor: Hong Xie
|
||||
Last Updated: 2025-10-16
|
||||
Last Updated: 2026-05-17
|
||||
===============================================================================
|
||||
"""
|
||||
|
||||
@ -14,22 +14,25 @@ Last Updated: 2025-10-16
|
||||
# TODO Improve behavior around deletion of cogs when a netcdf is requested
|
||||
# TODO Add ZARR as output option
|
||||
|
||||
from HLS_PER import process_granule, create_timeseries_dataset
|
||||
from HLS_Su import hls_search, format_roi
|
||||
from utils.common_utils import setup_dask_environment
|
||||
import os
|
||||
import sys
|
||||
import argparse
|
||||
import shutil
|
||||
import logging
|
||||
import time
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import shutil
|
||||
import sys
|
||||
import time
|
||||
from datetime import datetime as dt
|
||||
import earthaccess
|
||||
|
||||
import dask.distributed
|
||||
import earthaccess
|
||||
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from HLS_PER import create_timeseries_dataset, process_granule
|
||||
from HLS_Su import format_roi, hls_search
|
||||
|
||||
from utils.common_utils import setup_dask_environment
|
||||
|
||||
|
||||
def parse_arguments():
|
||||
"""
|
||||
@ -349,6 +352,21 @@ def confirm_action(prompt):
|
||||
"""
|
||||
Prompts the user to confirm an action.
|
||||
"""
|
||||
non_interactive = not sys.stdin.isatty() or os.environ.get(
|
||||
"HLS_SUPER_NON_INTERACTIVE", ""
|
||||
).lower() in {"1", "true", "yes", "y"}
|
||||
if non_interactive:
|
||||
prompt_l = prompt.lower()
|
||||
if "use the existing results file" in prompt_l:
|
||||
return False
|
||||
if "overwrite the existing results file" in prompt_l:
|
||||
return True
|
||||
if "proceed with processing" in prompt_l:
|
||||
return True
|
||||
if "temporary directory" in prompt_l:
|
||||
return True
|
||||
return True
|
||||
|
||||
while True:
|
||||
response = input(prompt).lower()
|
||||
if response in ["y", "yes"]:
|
||||
@ -515,6 +533,10 @@ def main():
|
||||
)
|
||||
logging.info(filter_log)
|
||||
|
||||
if results_count == 0:
|
||||
logging.warning("No data found matching the search criteria. Exiting.")
|
||||
sys.exit("No data found. Processing aborted.")
|
||||
|
||||
# Confirm Processing
|
||||
if not confirm_action("Do you want to proceed with processing? (y/n)"):
|
||||
sys.exit("Processing aborted.")
|
||||
@ -553,7 +575,7 @@ def main():
|
||||
logging.info("Processing...")
|
||||
tasks = [
|
||||
dask.delayed(process_granule)(
|
||||
granule_url,
|
||||
granule_urls,
|
||||
roi=roi,
|
||||
clip=clip,
|
||||
quality_filter=qf,
|
||||
@ -563,15 +585,14 @@ def main():
|
||||
bit_nums=[1, 3],
|
||||
chunk_size=chunk_size,
|
||||
)
|
||||
for granule_url in results_urls
|
||||
for granule_urls in results_urls
|
||||
]
|
||||
dask.compute(*tasks)
|
||||
|
||||
# Create Timeseries Dataset if NC4
|
||||
if args.of == "NC4":
|
||||
logging.info("Creating timeseries dataset...")
|
||||
create_timeseries_dataset(
|
||||
cog_dir, output_type=args.of, output_dir=output_dir)
|
||||
create_timeseries_dataset(cog_dir, output_type=args.of, output_dir=output_dir)
|
||||
|
||||
# Close Dask Client
|
||||
client.close()
|
||||
@ -588,7 +609,7 @@ def main():
|
||||
|
||||
# End Timer
|
||||
total_time = time.time() - start_time
|
||||
logging.info(f"Processing complete. Total time: {round(total_time, 2)}s, ")
|
||||
logging.info(f"Processing complete. Total time: {round(total_time, 2)}s.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@ -9,25 +9,25 @@ Last Updated: 2025-09-11
|
||||
===============================================================================
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import glob
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
from datetime import datetime
|
||||
|
||||
import earthaccess
|
||||
import geopandas as gpd
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import xarray as xr
|
||||
from affine import Affine
|
||||
from osgeo import gdal, gdal_array
|
||||
from shapely import box
|
||||
import xarray as xr
|
||||
from rasterio.enums import Resampling
|
||||
from rasterio.merge import merge
|
||||
from rioxarray.merge import merge_arrays
|
||||
from rioxarray import open_rasterio
|
||||
import geopandas as gpd
|
||||
import matplotlib.pyplot as plt
|
||||
from rioxarray.merge import merge_arrays
|
||||
from shapely import box
|
||||
|
||||
gdal.UseExceptions()
|
||||
|
||||
@ -229,16 +229,32 @@ def setup_dask_environment():
|
||||
"""
|
||||
Passes RIO environment variables to dask workers for authentication.
|
||||
"""
|
||||
import os
|
||||
|
||||
import rasterio
|
||||
|
||||
cookie_file_path = os.path.expanduser("~/cookies.txt")
|
||||
candidate_cookie_paths = [
|
||||
os.environ.get("EARTHDATA_COOKIE_FILE"),
|
||||
os.path.expanduser("~/.urs_cookies"),
|
||||
os.path.expanduser("~/.cookies"),
|
||||
os.path.expanduser("~/cookies.txt"),
|
||||
]
|
||||
cookie_file_path = next(
|
||||
(p for p in candidate_cookie_paths if p and os.path.exists(p)),
|
||||
os.path.expanduser("~/cookies.txt"),
|
||||
)
|
||||
|
||||
netrc_path = os.environ.get("EARTHDATA_NETRC_FILE") or os.path.expanduser(
|
||||
"~/.netrc"
|
||||
)
|
||||
enable_netrc = "YES" if os.path.exists(netrc_path) else "NO"
|
||||
|
||||
global env
|
||||
gdal_config = {
|
||||
"GDAL_HTTP_UNSAFESSL": "YES",
|
||||
"GDAL_HTTP_COOKIEFILE": cookie_file_path,
|
||||
"GDAL_HTTP_COOKIEJAR": cookie_file_path,
|
||||
"GDAL_HTTP_NETRC": enable_netrc,
|
||||
"GDAL_HTTP_NETRC_FILE": netrc_path,
|
||||
"GDAL_DISABLE_READDIR_ON_OPEN": "YES",
|
||||
"CPL_VSIL_CURL_ALLOWED_EXTENSIONS": "TIF",
|
||||
"GDAL_HTTP_MAX_RETRY": "10",
|
||||
@ -250,25 +266,34 @@ def setup_dask_environment():
|
||||
env.__enter__()
|
||||
|
||||
|
||||
def setup_logging(log_file: str = "dask_worker.log"):
|
||||
def setup_logging(log_file: str = None):
|
||||
"""
|
||||
在Dask工作进程中设置logging
|
||||
设置logging
|
||||
|
||||
Parameters
|
||||
----------
|
||||
|
||||
log_file : str, optional
|
||||
日志文件路径, by default "dask_worker.log"
|
||||
日志文件路径, by default None
|
||||
"""
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(levelname)s:%(asctime)s ||| %(message)s",
|
||||
handlers=[
|
||||
logging.StreamHandler(sys.stdout),
|
||||
logging.FileHandler(log_file),
|
||||
],
|
||||
)
|
||||
if log_file is None:
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(levelname)s:%(asctime)s ||| %(message)s",
|
||||
handlers=[logging.StreamHandler(sys.stdout)],
|
||||
encoding="utf-8", # Python 3.9+ 支持此参数
|
||||
)
|
||||
else:
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(levelname)s:%(asctime)s ||| %(message)s",
|
||||
handlers=[
|
||||
logging.StreamHandler(sys.stdout),
|
||||
logging.FileHandler(log_file, encoding="utf-8"),
|
||||
],
|
||||
encoding="utf-8", # Python 3.9+ 支持此参数
|
||||
)
|
||||
|
||||
|
||||
def load_band_as_arr(org_tif_path, band_num=1):
|
||||
@ -386,7 +411,7 @@ def array_to_raster(
|
||||
except AttributeError:
|
||||
# For backwards compatibility with older version of GDAL
|
||||
rast = gdal.Open(gdal_array.GetArrayFilename(data))
|
||||
except:
|
||||
except Exception:
|
||||
rast = gdal_array.OpenArray(data)
|
||||
rast.SetGeoTransform(transform)
|
||||
rast.SetProjection(wkt)
|
||||
@ -479,7 +504,7 @@ def clip_roi_image(
|
||||
raster = open_rasterio(file_path)
|
||||
try:
|
||||
doy = os.path.basename(file_path).split(".")[3]
|
||||
except Exception as e:
|
||||
except Exception:
|
||||
doy = None
|
||||
if doy:
|
||||
raster.attrs["DOY"] = doy
|
||||
@ -815,6 +840,8 @@ def plot(data, title=None, cmap="gray"):
|
||||
title (str): 标题
|
||||
cmap (str): 颜色映射
|
||||
"""
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
plt.imshow(data)
|
||||
plt.title(title)
|
||||
plt.axis("off") # 关闭坐标轴
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user