feat(HLS_SuPER): 使用最小外包简化检索时的ROI范围.

This commit is contained in:
谢泓 2025-10-13 10:00:56 +08:00
parent 116b964904
commit a3e4c7a172
3 changed files with 26 additions and 25 deletions

2
.gitignore vendored
View File

@ -11,3 +11,5 @@ data/
*.tiff
*.ipynb
*.log

View File

@ -42,7 +42,7 @@ def fetch_and_save_geojson(accode: str, city_name: str, out_dir: str) -> Path:
out = {}
for k, v in props.items():
# 移除嵌套对象
if k in ("parent","center", "centroid", "acroutes"):
if k in ("parent", "center", "centroid", "acroutes"):
continue
# 仅保留标量类型; 丢弃其他嵌套结构
if isinstance(v, (str, int, float, bool)) or v is None:

View File

@ -5,7 +5,7 @@ HLS Subsetting, Processing, and Exporting Reformatted Data Prep Script
Authors: Cole Krehbiel, Mahsa Jami, and Erik Bolch
Contact: lpdaac@usgs.gov
Editor: Hong Xie
Last Updated: 2025-09-05
Last Updated: 2025-10-13
===============================================================================
"""
@ -14,27 +14,25 @@ Last Updated: 2025-09-05
# TODO Improve behavior around deletion of cogs when a netcdf is requested
# TODO Add ZARR as output option
import argparse
import sys
from HLS_PER import process_granule, create_timeseries_dataset
from HLS_Su import hls_search
from utils.common_utils import setup_dask_environment
import os
import sys
import argparse
import shutil
import logging
import time
import json
from datetime import datetime as dt
import earthaccess
from shapely.geometry import box
from shapely.geometry.polygon import orient
import geopandas as gpd
from datetime import datetime as dt
import dask.distributed
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from utils.common_utils import setup_dask_environment
from HLS_Su import hls_search
from HLS_PER import process_granule, create_timeseries_dataset
def parse_arguments():
"""
@ -170,6 +168,7 @@ def parse_arguments():
return parser.parse_args()
def ensure_ccw(geom):
"""
Ensure the exterior ring of the polygon is counterclockwise.
@ -178,7 +177,8 @@ def ensure_ccw(geom):
return geom # Already counterclockwise
else:
return orient(geom, sign=1.0) # Make it counterclockwise
def format_roi(roi):
"""
Determines if submitted ROI is a file or bbox coordinates.
@ -193,6 +193,12 @@ def format_roi(roi):
try:
# Open ROI if file
roi = gpd.read_file(roi)
# Check if ROI is in Geographic CRS, if not, convert to it
if not roi.crs.is_geographic:
roi = roi.to_crs("EPSG:4326")
logging.info(
"Note: ROI submitted is being converted to Geographic CRS (EPSG:4326)"
)
# (Add) 添加对多种几何图形类型的支持, 将MultiPolygon合并为Polygon
if len(roi) > 1 or roi.geometry[0].geom_type == "MultiPolygon":
# Merge all Polygon geometries and create external boundary
@ -201,18 +207,10 @@ def format_roi(roi):
)
single_geometry = roi.union_all().convex_hull
roi = gpd.GeoDataFrame(geometry=[single_geometry], crs=roi.crs)
# Check if ROI is in Geographic CRS, if not, convert to it
if not roi.crs.is_geographic:
roi = roi.to_crs("EPSG:4326")
logging.info(
"Note: ROI submitted is being converted to Geographic CRS (EPSG:4326)"
)
roi['geometry'] = roi['geometry'].apply(ensure_ccw)
# List Vertices in correct order for search
# (Add) 使用外包矩形坐标简化提交检索时使用的坐标
minx, miny, maxx, maxy = roi.total_bounds
bounding_box = box(minx, miny, maxx, maxy)
vertices_list = list(bounding_box.exterior.coords)
# (Add) 使用外包矩形坐标简化提交检索时使用的坐标, 检索时仅支持逆时针
vertices_list = list(roi.geometry[0].exterior.coords)
except (FileNotFoundError, ValueError):
sys.exit(
f"The GeoJSON/shapefile is either not valid or could not be found.\nPlease double check the name and provide the absolute path to the file or make sure that it is located in {os.getcwd()}"
@ -224,9 +222,9 @@ def format_roi(roi):
# Convert bbox to a geodataframe for clipping
roi = gpd.GeoDataFrame(geometry=[box(*bbox)], crs="EPSG:4326")
roi['geometry'] = roi['geometry'].apply(ensure_ccw)
roi['geometry'] = roi['geometry'].apply(ensure_ccw)
vertices_list = list(roi.geometry[0].exterior.coords)
vertices_list = list(roi.geometry[0].convex_hull.coords)
return (roi, vertices_list)
@ -635,7 +633,8 @@ def main():
# Create Timeseries Dataset if NC4
if args.of == "NC4":
logging.info("Creating timeseries dataset...")
create_timeseries_dataset(cog_dir, output_type=args.of, output_dir=output_dir)
create_timeseries_dataset(
cog_dir, output_type=args.of, output_dir=output_dir)
# Close Dask Client
client.close()
@ -652,7 +651,7 @@ def main():
# End Timer
total_time = time.time() - start_time
logging.info(f"Processing complete. Total time: {round(total_time,2)}s, ")
logging.info(f"Processing complete. Total time: {round(total_time, 2)}s, ")
if __name__ == "__main__":