From 339755a42cb3f7532e7bfe226fceb163837d3efb Mon Sep 17 00:00:00 2001 From: xhong Date: Mon, 18 May 2026 14:06:06 +0800 Subject: [PATCH] =?UTF-8?q?refactor(HLS=5FSuPER):=20=E4=BF=AE=E5=A4=8DHLS?= =?UTF-8?q?=E6=95=B0=E6=8D=AE=E4=B8=8B=E8=BD=BD=E4=B8=8E=E5=A4=84=E7=90=86?= =?UTF-8?q?=E4=BB=A3=E7=A0=81=E5=AD=98=E5=9C=A8=E7=9A=84=E9=97=AE=E9=A2=98?= =?UTF-8?q?=EF=BC=8C=E5=B9=B6=E4=BC=98=E5=8C=96=E4=BB=A3=E7=A0=81=E7=BB=93?= =?UTF-8?q?=E6=9E=84?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 统一调整所有文件的导入顺序,清理冗余导入项 - 修复format_roi函数中误用convex_hull获取顶点的问题,改用exterior.coords - 修正dask并行任务中的参数传递错误 - 新增非交互运行模式支持,优化confirm_action函数 - 添加无搜索结果时提前退出的检查逻辑 - 优化日志配置,新增netrc认证支持 - 修复裸露的except语句,改用明确的Exception捕获 - 更新所有文件的最后更新日期 --- HLS_SuPER/HLS_PER.py | 15 +++++---- HLS_SuPER/HLS_Su.py | 14 +++++---- HLS_SuPER/HLS_SuPER.py | 53 +++++++++++++++++++++---------- utils/common_utils.py | 71 +++++++++++++++++++++++++++++------------- 4 files changed, 101 insertions(+), 52 deletions(-) diff --git a/HLS_SuPER/HLS_PER.py b/HLS_SuPER/HLS_PER.py index 3639778..eb89938 100644 --- a/HLS_SuPER/HLS_PER.py +++ b/HLS_SuPER/HLS_PER.py @@ -3,24 +3,24 @@ =============================================================================== HLS Processing and Exporting Reformatted Data (HLS_PER) -This module contains functions to conduct subsetting and quality filtering of +This module contains functions to conduct subsetting and quality filtering of search results. ------------------------------------------------------------------------------- Authors: Cole Krehbiel, Mahsa Jami, and Erik Bolch Editor: Hong Xie -Last Updated: 2025-03-30 +Last Updated: 2026-05-17 =============================================================================== """ +import logging import os import sys -import logging - -import numpy as np from datetime import datetime as dt -import xarray as xr -import rioxarray as rxr + import dask.distributed +import numpy as np +import rioxarray as rxr +import xarray as xr def create_output_name(url, band_dict): @@ -181,7 +181,6 @@ def process_granule( os.path.isfile(f"{output_dir}/{create_output_name(url, band_dict)}") for url in granule_urls ): - # First Handle Quality Layer # (Add) 简化原有的冗余处理, 仅处理质量层, 并最后移除质量层下载url if quality_filter: diff --git a/HLS_SuPER/HLS_Su.py b/HLS_SuPER/HLS_Su.py index 94714b5..04e8e95 100644 --- a/HLS_SuPER/HLS_Su.py +++ b/HLS_SuPER/HLS_Su.py @@ -3,21 +3,23 @@ =============================================================================== This module contains functions related to searching and preprocessing HLS data. -------------------------------------------------------------------------------- +------------------------------------------------------------------------------- Authors: Mahsa Jami, Cole Krehbiel, and Erik Bolch -Contact: lpdaac@usgs.gov +Contact: lpdaac@usgs.gov Editor: Hong Xie -Last Updated: 2025-10-16 +Last Updated: 2026-05-17 =============================================================================== """ # Import necessary packages -import os import logging +import os +import sys from pathlib import Path -import numpy as np + import earthaccess import geopandas as gpd +import numpy as np from shapely.geometry import box from shapely.geometry.polygon import orient @@ -77,7 +79,7 @@ def format_roi(roi: Path): roi = gpd.GeoDataFrame(geometry=[box(*bbox)], crs="EPSG:4326") roi["geometry"] = roi["geometry"].apply(ensure_ccw) - vertices_list = list(roi.geometry[0].convex_hull.coords) + vertices_list = list(roi.geometry[0].exterior.coords) return (roi, vertices_list) diff --git a/HLS_SuPER/HLS_SuPER.py b/HLS_SuPER/HLS_SuPER.py index 0cb12b8..4267624 100644 --- a/HLS_SuPER/HLS_SuPER.py +++ b/HLS_SuPER/HLS_SuPER.py @@ -1,11 +1,11 @@ # -*- coding: utf-8 -*- """ =============================================================================== -HLS Subsetting, Processing, and Exporting Reformatted Data Prep Script +HLS Subsetting, Processing, and Exporting Reformatted Data Prep Script Authors: Cole Krehbiel, Mahsa Jami, and Erik Bolch Contact: lpdaac@usgs.gov Editor: Hong Xie -Last Updated: 2025-10-16 +Last Updated: 2026-05-17 =============================================================================== """ @@ -14,22 +14,25 @@ Last Updated: 2025-10-16 # TODO Improve behavior around deletion of cogs when a netcdf is requested # TODO Add ZARR as output option -from HLS_PER import process_granule, create_timeseries_dataset -from HLS_Su import hls_search, format_roi -from utils.common_utils import setup_dask_environment -import os -import sys import argparse -import shutil -import logging -import time import json +import logging +import os +import shutil +import sys +import time from datetime import datetime as dt -import earthaccess + import dask.distributed +import earthaccess sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +from HLS_PER import create_timeseries_dataset, process_granule +from HLS_Su import format_roi, hls_search + +from utils.common_utils import setup_dask_environment + def parse_arguments(): """ @@ -349,6 +352,21 @@ def confirm_action(prompt): """ Prompts the user to confirm an action. """ + non_interactive = not sys.stdin.isatty() or os.environ.get( + "HLS_SUPER_NON_INTERACTIVE", "" + ).lower() in {"1", "true", "yes", "y"} + if non_interactive: + prompt_l = prompt.lower() + if "use the existing results file" in prompt_l: + return False + if "overwrite the existing results file" in prompt_l: + return True + if "proceed with processing" in prompt_l: + return True + if "temporary directory" in prompt_l: + return True + return True + while True: response = input(prompt).lower() if response in ["y", "yes"]: @@ -515,6 +533,10 @@ def main(): ) logging.info(filter_log) + if results_count == 0: + logging.warning("No data found matching the search criteria. Exiting.") + sys.exit("No data found. Processing aborted.") + # Confirm Processing if not confirm_action("Do you want to proceed with processing? (y/n)"): sys.exit("Processing aborted.") @@ -553,7 +575,7 @@ def main(): logging.info("Processing...") tasks = [ dask.delayed(process_granule)( - granule_url, + granule_urls, roi=roi, clip=clip, quality_filter=qf, @@ -563,15 +585,14 @@ def main(): bit_nums=[1, 3], chunk_size=chunk_size, ) - for granule_url in results_urls + for granule_urls in results_urls ] dask.compute(*tasks) # Create Timeseries Dataset if NC4 if args.of == "NC4": logging.info("Creating timeseries dataset...") - create_timeseries_dataset( - cog_dir, output_type=args.of, output_dir=output_dir) + create_timeseries_dataset(cog_dir, output_type=args.of, output_dir=output_dir) # Close Dask Client client.close() @@ -588,7 +609,7 @@ def main(): # End Timer total_time = time.time() - start_time - logging.info(f"Processing complete. Total time: {round(total_time, 2)}s, ") + logging.info(f"Processing complete. Total time: {round(total_time, 2)}s.") if __name__ == "__main__": diff --git a/utils/common_utils.py b/utils/common_utils.py index f2f10d2..95dd4c4 100644 --- a/utils/common_utils.py +++ b/utils/common_utils.py @@ -9,25 +9,25 @@ Last Updated: 2025-09-11 =============================================================================== """ -import os -import sys import glob import json import logging +import os +import sys from datetime import datetime + import earthaccess +import geopandas as gpd import numpy as np import pandas as pd +import xarray as xr from affine import Affine from osgeo import gdal, gdal_array -from shapely import box -import xarray as xr from rasterio.enums import Resampling from rasterio.merge import merge -from rioxarray.merge import merge_arrays from rioxarray import open_rasterio -import geopandas as gpd -import matplotlib.pyplot as plt +from rioxarray.merge import merge_arrays +from shapely import box gdal.UseExceptions() @@ -229,16 +229,32 @@ def setup_dask_environment(): """ Passes RIO environment variables to dask workers for authentication. """ - import os + import rasterio - cookie_file_path = os.path.expanduser("~/cookies.txt") + candidate_cookie_paths = [ + os.environ.get("EARTHDATA_COOKIE_FILE"), + os.path.expanduser("~/.urs_cookies"), + os.path.expanduser("~/.cookies"), + os.path.expanduser("~/cookies.txt"), + ] + cookie_file_path = next( + (p for p in candidate_cookie_paths if p and os.path.exists(p)), + os.path.expanduser("~/cookies.txt"), + ) + + netrc_path = os.environ.get("EARTHDATA_NETRC_FILE") or os.path.expanduser( + "~/.netrc" + ) + enable_netrc = "YES" if os.path.exists(netrc_path) else "NO" global env gdal_config = { "GDAL_HTTP_UNSAFESSL": "YES", "GDAL_HTTP_COOKIEFILE": cookie_file_path, "GDAL_HTTP_COOKIEJAR": cookie_file_path, + "GDAL_HTTP_NETRC": enable_netrc, + "GDAL_HTTP_NETRC_FILE": netrc_path, "GDAL_DISABLE_READDIR_ON_OPEN": "YES", "CPL_VSIL_CURL_ALLOWED_EXTENSIONS": "TIF", "GDAL_HTTP_MAX_RETRY": "10", @@ -250,25 +266,34 @@ def setup_dask_environment(): env.__enter__() -def setup_logging(log_file: str = "dask_worker.log"): +def setup_logging(log_file: str = None): """ - 在Dask工作进程中设置logging + 设置logging Parameters ---------- log_file : str, optional - 日志文件路径, by default "dask_worker.log" + 日志文件路径, by default None """ - logging.basicConfig( - level=logging.INFO, - format="%(levelname)s:%(asctime)s ||| %(message)s", - handlers=[ - logging.StreamHandler(sys.stdout), - logging.FileHandler(log_file), - ], - ) + if log_file is None: + logging.basicConfig( + level=logging.INFO, + format="%(levelname)s:%(asctime)s ||| %(message)s", + handlers=[logging.StreamHandler(sys.stdout)], + encoding="utf-8", # Python 3.9+ 支持此参数 + ) + else: + logging.basicConfig( + level=logging.INFO, + format="%(levelname)s:%(asctime)s ||| %(message)s", + handlers=[ + logging.StreamHandler(sys.stdout), + logging.FileHandler(log_file, encoding="utf-8"), + ], + encoding="utf-8", # Python 3.9+ 支持此参数 + ) def load_band_as_arr(org_tif_path, band_num=1): @@ -386,7 +411,7 @@ def array_to_raster( except AttributeError: # For backwards compatibility with older version of GDAL rast = gdal.Open(gdal_array.GetArrayFilename(data)) - except: + except Exception: rast = gdal_array.OpenArray(data) rast.SetGeoTransform(transform) rast.SetProjection(wkt) @@ -479,7 +504,7 @@ def clip_roi_image( raster = open_rasterio(file_path) try: doy = os.path.basename(file_path).split(".")[3] - except Exception as e: + except Exception: doy = None if doy: raster.attrs["DOY"] = doy @@ -815,6 +840,8 @@ def plot(data, title=None, cmap="gray"): title (str): 标题 cmap (str): 颜色映射 """ + import matplotlib.pyplot as plt + plt.imshow(data) plt.title(title) plt.axis("off") # 关闭坐标轴