feat(HLS_SuPER): 使用最小外包简化检索时的ROI范围.

This commit is contained in:
谢泓 2025-10-13 10:00:56 +08:00
parent 116b964904
commit a3e4c7a172
3 changed files with 26 additions and 25 deletions

2
.gitignore vendored
View File

@ -11,3 +11,5 @@ data/
*.tiff *.tiff
*.ipynb *.ipynb
*.log

View File

@ -42,7 +42,7 @@ def fetch_and_save_geojson(accode: str, city_name: str, out_dir: str) -> Path:
out = {} out = {}
for k, v in props.items(): for k, v in props.items():
# 移除嵌套对象 # 移除嵌套对象
if k in ("parent","center", "centroid", "acroutes"): if k in ("parent", "center", "centroid", "acroutes"):
continue continue
# 仅保留标量类型; 丢弃其他嵌套结构 # 仅保留标量类型; 丢弃其他嵌套结构
if isinstance(v, (str, int, float, bool)) or v is None: if isinstance(v, (str, int, float, bool)) or v is None:

View File

@ -5,7 +5,7 @@ HLS Subsetting, Processing, and Exporting Reformatted Data Prep Script
Authors: Cole Krehbiel, Mahsa Jami, and Erik Bolch Authors: Cole Krehbiel, Mahsa Jami, and Erik Bolch
Contact: lpdaac@usgs.gov Contact: lpdaac@usgs.gov
Editor: Hong Xie Editor: Hong Xie
Last Updated: 2025-09-05 Last Updated: 2025-10-13
=============================================================================== ===============================================================================
""" """
@ -14,27 +14,25 @@ Last Updated: 2025-09-05
# TODO Improve behavior around deletion of cogs when a netcdf is requested # TODO Improve behavior around deletion of cogs when a netcdf is requested
# TODO Add ZARR as output option # TODO Add ZARR as output option
import argparse from HLS_PER import process_granule, create_timeseries_dataset
import sys from HLS_Su import hls_search
from utils.common_utils import setup_dask_environment
import os import os
import sys
import argparse
import shutil import shutil
import logging import logging
import time import time
import json import json
from datetime import datetime as dt
import earthaccess import earthaccess
from shapely.geometry import box from shapely.geometry import box
from shapely.geometry.polygon import orient from shapely.geometry.polygon import orient
import geopandas as gpd import geopandas as gpd
from datetime import datetime as dt
import dask.distributed import dask.distributed
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from utils.common_utils import setup_dask_environment
from HLS_Su import hls_search
from HLS_PER import process_granule, create_timeseries_dataset
def parse_arguments(): def parse_arguments():
""" """
@ -170,6 +168,7 @@ def parse_arguments():
return parser.parse_args() return parser.parse_args()
def ensure_ccw(geom): def ensure_ccw(geom):
""" """
Ensure the exterior ring of the polygon is counterclockwise. Ensure the exterior ring of the polygon is counterclockwise.
@ -179,6 +178,7 @@ def ensure_ccw(geom):
else: else:
return orient(geom, sign=1.0) # Make it counterclockwise return orient(geom, sign=1.0) # Make it counterclockwise
def format_roi(roi): def format_roi(roi):
""" """
Determines if submitted ROI is a file or bbox coordinates. Determines if submitted ROI is a file or bbox coordinates.
@ -193,6 +193,12 @@ def format_roi(roi):
try: try:
# Open ROI if file # Open ROI if file
roi = gpd.read_file(roi) roi = gpd.read_file(roi)
# Check if ROI is in Geographic CRS, if not, convert to it
if not roi.crs.is_geographic:
roi = roi.to_crs("EPSG:4326")
logging.info(
"Note: ROI submitted is being converted to Geographic CRS (EPSG:4326)"
)
# (Add) 添加对多种几何图形类型的支持, 将MultiPolygon合并为Polygon # (Add) 添加对多种几何图形类型的支持, 将MultiPolygon合并为Polygon
if len(roi) > 1 or roi.geometry[0].geom_type == "MultiPolygon": if len(roi) > 1 or roi.geometry[0].geom_type == "MultiPolygon":
# Merge all Polygon geometries and create external boundary # Merge all Polygon geometries and create external boundary
@ -201,18 +207,10 @@ def format_roi(roi):
) )
single_geometry = roi.union_all().convex_hull single_geometry = roi.union_all().convex_hull
roi = gpd.GeoDataFrame(geometry=[single_geometry], crs=roi.crs) roi = gpd.GeoDataFrame(geometry=[single_geometry], crs=roi.crs)
# Check if ROI is in Geographic CRS, if not, convert to it
if not roi.crs.is_geographic:
roi = roi.to_crs("EPSG:4326")
logging.info(
"Note: ROI submitted is being converted to Geographic CRS (EPSG:4326)"
)
roi['geometry'] = roi['geometry'].apply(ensure_ccw) roi['geometry'] = roi['geometry'].apply(ensure_ccw)
# List Vertices in correct order for search # List Vertices in correct order for search
# (Add) 使用外包矩形坐标简化提交检索时使用的坐标 # (Add) 使用外包矩形坐标简化提交检索时使用的坐标, 检索时仅支持逆时针
minx, miny, maxx, maxy = roi.total_bounds vertices_list = list(roi.geometry[0].exterior.coords)
bounding_box = box(minx, miny, maxx, maxy)
vertices_list = list(bounding_box.exterior.coords)
except (FileNotFoundError, ValueError): except (FileNotFoundError, ValueError):
sys.exit( sys.exit(
f"The GeoJSON/shapefile is either not valid or could not be found.\nPlease double check the name and provide the absolute path to the file or make sure that it is located in {os.getcwd()}" f"The GeoJSON/shapefile is either not valid or could not be found.\nPlease double check the name and provide the absolute path to the file or make sure that it is located in {os.getcwd()}"
@ -226,7 +224,7 @@ def format_roi(roi):
roi = gpd.GeoDataFrame(geometry=[box(*bbox)], crs="EPSG:4326") roi = gpd.GeoDataFrame(geometry=[box(*bbox)], crs="EPSG:4326")
roi['geometry'] = roi['geometry'].apply(ensure_ccw) roi['geometry'] = roi['geometry'].apply(ensure_ccw)
vertices_list = list(roi.geometry[0].exterior.coords) vertices_list = list(roi.geometry[0].convex_hull.coords)
return (roi, vertices_list) return (roi, vertices_list)
@ -635,7 +633,8 @@ def main():
# Create Timeseries Dataset if NC4 # Create Timeseries Dataset if NC4
if args.of == "NC4": if args.of == "NC4":
logging.info("Creating timeseries dataset...") logging.info("Creating timeseries dataset...")
create_timeseries_dataset(cog_dir, output_type=args.of, output_dir=output_dir) create_timeseries_dataset(
cog_dir, output_type=args.of, output_dir=output_dir)
# Close Dask Client # Close Dask Client
client.close() client.close()
@ -652,7 +651,7 @@ def main():
# End Timer # End Timer
total_time = time.time() - start_time total_time = time.time() - start_time
logging.info(f"Processing complete. Total time: {round(total_time,2)}s, ") logging.info(f"Processing complete. Total time: {round(total_time, 2)}s, ")
if __name__ == "__main__": if __name__ == "__main__":