diff --git a/DATA_SuPER/MODIS_SuPER.py b/DATA_SuPER/MODIS_SuPER.py index 7698f55..855b294 100644 --- a/DATA_SuPER/MODIS_SuPER.py +++ b/DATA_SuPER/MODIS_SuPER.py @@ -6,7 +6,7 @@ For example, MCD43A3, MCD43A4, MOD11A1. ------------------------------------------------------------------------------- Authors: Hong Xie -Last Updated: 2025-09-11 +Last Updated: 2025-10-16 =============================================================================== """ @@ -15,6 +15,7 @@ import sys import json import time import logging +from pathlib import Path import earthaccess import rioxarray as rxr import dask.distributed @@ -236,7 +237,7 @@ def process_granule( def main( - region: list, + region_file: Path, asset_name: str, modis_tile_id: str, years: list, @@ -245,7 +246,7 @@ def main( target_crs: str, output_root_dir: str, ): - bbox = tuple(list(region.total_bounds)) + region = gpd.read_file(region_file) results_urls = [] output_dir = os.path.join(output_root_dir, asset_name) os.makedirs(output_dir, exist_ok=True) @@ -260,7 +261,7 @@ def main( ) year_temporal = (f"{year}-{dates[0]}T00:00:00", f"{year}-{dates[1]}T23:59:59") year_results = earthdata_search( - [asset_name], year_temporal, bbox, modis_tile_id + [asset_name], year_temporal, region_file, modis_tile_id ) results_urls.extend(year_results) with open(year_results_file, "w") as f: @@ -327,7 +328,7 @@ if __name__ == "__main__": earthaccess.login(persist=True) # region = gpd.read_file("./data/vectors/wuling_guanqu_polygon.geojson") tile_id = "49REL" - region = gpd.read_file(f"./data/vectors/{tile_id}.geojson") + region_file = f"./data/vectors/{tile_id}.geojson" target_crs = "EPSG:32649" asset_name = "MOD11A1" # asset_name = "MCD43A3" @@ -338,7 +339,7 @@ if __name__ == "__main__": dates = ("03-01", "10-31") output_root_dir = ".\\data\\MODIS\\" main( - region, + region_file, asset_name, modis_tile_id, years, diff --git a/DATA_SuPER/S1_SAR_SuPER.py b/DATA_SuPER/S1_SAR_SuPER.py index 7f287c6..f9e4311 100644 --- a/DATA_SuPER/S1_SAR_SuPER.py +++ b/DATA_SuPER/S1_SAR_SuPER.py @@ -29,13 +29,14 @@ RTC-S1 数据产品由美国宇航局喷气推进实验室 (JPL) OPERA 项目组 ------------------------------------------------------------------------------- Authors: Hong Xie -Last Updated: 2025-03-10 +Last Updated: 2025-10-20 =============================================================================== """ import os import sys import glob +from pathlib import Path import json import time from datetime import datetime @@ -141,7 +142,7 @@ def convert_to_db(image: xr.DataArray) -> xr.DataArray: def process_sar_granule( url_basenames: list[str], - output_dir: str, + output_dir: Path, date: str, tile_id: str, roi: list, @@ -156,7 +157,7 @@ def process_sar_granule( ---------- url_basenames : list[str] 用于文件匹配的 RTC-S1 数据下载链接的文件名列表 - output_dir : str + output_dir : Path 输出根目录 date : str 日期, 格式为 YYYY%j @@ -243,7 +244,7 @@ def process_sar_granule( def process_sar_static_granule( - url_basenames: list[str], output_dir: str, tile_id: str, roi: list, clip=True + url_basenames: list[str], output_dir: Path, tile_id: str, roi: list, clip=True ) -> bool: """ OPERA RTC-S1-STATIC 静态数据预处理 @@ -254,7 +255,7 @@ def process_sar_static_granule( ---------- url_basenames : list[str] 用于文件匹配的 RTC-S1-STATIC 数据下载链接的文件名列表 - output_dir: str + output_dir: Path 输出根目录 tile_id: str tile id @@ -327,12 +328,12 @@ def process_sar_static_granule( def main( asset_name: list[str], - region: list, + region_file: Path, years: list[str | int], - output_dir: str, + output_dir: Path, tile_id: str, ): - bbox = tuple(list(region.total_bounds)) + region = read_file(region_file) # 示例文件名称: OPERA_L2_RTC-S1_T040-083952-IW1_20240605T102012Z_20240612T053743Z_S1A_30_v1.0_VH.tif static_name = ["OPERA_L2_RTC-S1-STATIC_V1"] sar_output_dir = os.path.join(output_dir, "RTC_S1") @@ -353,7 +354,7 @@ def main( year_results_dir, f"RTC_S1_{tile_id}_{year}_results_urls.json" ) year_temporal = (f"{year}-06-01T00:00:00", f"{year}-08-31T23:59:59") - year_results = earthdata_search(asset_name, year_temporal, roi=bbox) + year_results = earthdata_search(asset_name, year_temporal, region_file=region_file) results_urls.extend(year_results) with open(year_results_file, "w") as f: json.dump(year_results, f) @@ -361,7 +362,7 @@ def main( with open(results_urls_file, "w") as f: json.dump(results_urls, f) - static_granule_urls = earthdata_search(static_name, roi=bbox) + static_granule_urls = earthdata_search(static_name, region_file=region_file) static_url_basenames = [ os.path.basename(url) for sublist in static_granule_urls for url in sublist ] @@ -454,10 +455,11 @@ if __name__ == "__main__": earthaccess.login(persist=True) asset_name = ["OPERA_L2_RTC-S1_V1"] tile_id = "49REL" - region = read_file(f".\\data\\vectors\\{tile_id}.geojson") + # region = read_file(f".\\data\\vectors\\{tile_id}.geojson") + region_file = Path(f".\\data\\vectors\\{tile_id}.geojson") # region = read_file("./data/vectors/wuling_guanqu_polygon.geojson") # years = ["2024", "2023", "2022"] years = ["2024"] - output_dir = ".\\data\\SAR" - os.makedirs(output_dir, exist_ok=True) - main(asset_name, region, years, output_dir, tile_id) + output_dir = Path(".\\data\\SAR") + output_dir.mkdir(parents=True, exist_ok=True) + main(asset_name, region_file, years, output_dir, tile_id) diff --git a/HLS_SuPER/HLS_Su.py b/HLS_SuPER/HLS_Su.py index 7b97d81..94714b5 100644 --- a/HLS_SuPER/HLS_Su.py +++ b/HLS_SuPER/HLS_Su.py @@ -7,19 +7,85 @@ This module contains functions related to searching and preprocessing HLS data. Authors: Mahsa Jami, Cole Krehbiel, and Erik Bolch Contact: lpdaac@usgs.gov Editor: Hong Xie -Last Updated: 2025-02-20 +Last Updated: 2025-10-16 =============================================================================== """ # Import necessary packages +import os +import logging +from pathlib import Path import numpy as np import earthaccess +import geopandas as gpd +from shapely.geometry import box +from shapely.geometry.polygon import orient + + +def ensure_ccw(geom): + """ + Ensure the exterior ring of the polygon is counterclockwise. + """ + if geom.exterior.is_ccw: + return geom # Already counterclockwise + else: + return orient(geom, sign=1.0) # Make it counterclockwise + + +def format_roi(roi: Path): + """ + Determines if submitted ROI is a file or bbox coordinates. + + If a file, opens a GeoJSON or shapefile and creates a list of polygon vertices in the correct order. If the file has multiple polygons it will use a unary union convex hull of the external bounds. + + If bbox coordinates, creates a geodataframe with a single Polygon geometry. + + Returns a geopandas dataframe for clipping and a list of vertices for searching. + """ + if os.path.isfile(roi): # and roi.endswith(("geojson", "shp")): + try: + # Open ROI if file + roi = gpd.read_file(roi) + # Check if ROI is in Geographic CRS, if not, convert to it + if not roi.crs.is_geographic: + roi = roi.to_crs("EPSG:4326") + logging.info( + "Note: ROI submitted is being converted to Geographic CRS (EPSG:4326)" + ) + # (Add) 添加对多种几何图形类型的支持, 将MultiPolygon合并为Polygon + if len(roi) > 1 or roi.geometry[0].geom_type == "MultiPolygon": + # Merge all Polygon geometries and create external boundary + logging.info( + "Multiple polygons detected. Creating single geometry of external coordinates." + ) + single_geometry = roi.union_all().convex_hull + roi = gpd.GeoDataFrame(geometry=[single_geometry], crs=roi.crs) + roi["geometry"] = roi["geometry"].apply(ensure_ccw) + # List Vertices in correct order for search + # (Add) 使用外包矩形坐标简化提交检索时使用的坐标, 仅支持逆时针 + vertices_list = list(roi.geometry[0].exterior.coords) + except (FileNotFoundError, ValueError): + sys.exit( + f"The GeoJSON/shapefile is either not valid or could not be found.\nPlease double check the name and provide the absolute path to the file or make sure that it is located in {os.getcwd()}" + ) + else: + # If bbox coordinates are submitted + bbox = tuple(map(float, roi.strip("'\"").split(","))) + print(bbox) + + # Convert bbox to a geodataframe for clipping + roi = gpd.GeoDataFrame(geometry=[box(*bbox)], crs="EPSG:4326") + roi["geometry"] = roi["geometry"].apply(ensure_ccw) + + vertices_list = list(roi.geometry[0].convex_hull.coords) + + return (roi, vertices_list) def earthdata_search( asset_name: list, dates: tuple = None, - roi: list = None, + region_file: Path = None, tile_id: str = None, hours: tuple = None, log=False, @@ -30,28 +96,32 @@ def earthdata_search( For example: - MODIS: MCD43A3, MCD43A4, MOD11A1, MOD11A2, ... - SMAP: SPL3SMP_E, SPL4SMGP, ... - - DEM: NASADEM_HGT, NASADEM_SC, ... + - GPM: GPM_3IMERGDL, ... + - DEM: NASADEM_HGT, NASADEM_SC, ALOS_PSR_RTC_HIGH, ALOS_PSR_RTC_LOW, ... """ # Search for data - if dates and not roi: - # 全球范围的数据集不需要roi参数 + if not region_file: + # 全球范围的数据集不需要roi参数, 如 SMAP, GPM results = earthaccess.search_data( short_name=list(asset_name), temporal=dates, ) - elif roi and not dates: - # 适用非时间序列数据, 如 DEM - results = earthaccess.search_data( - short_name=list(asset_name), - bounding_box=roi, - ) else: - results = earthaccess.search_data( - short_name=list(asset_name), - bounding_box=roi, - temporal=dates, - ) + # 基于 roi 的外包多边形进行数据检索 + roi, vertices_list = format_roi(region_file) + if not dates: + # 适用非时间序列数据, 如 DEM + results = earthaccess.search_data( + short_name=list(asset_name), + polygon=vertices_list, + ) + else: + results = earthaccess.search_data( + short_name=list(asset_name), + polygon=vertices_list, + temporal=dates, + ) # 根据瓦片ID过滤影像 if tile_id: diff --git a/HLS_SuPER/HLS_SuPER.py b/HLS_SuPER/HLS_SuPER.py index 843dbb2..0cb12b8 100644 --- a/HLS_SuPER/HLS_SuPER.py +++ b/HLS_SuPER/HLS_SuPER.py @@ -5,7 +5,7 @@ HLS Subsetting, Processing, and Exporting Reformatted Data Prep Script Authors: Cole Krehbiel, Mahsa Jami, and Erik Bolch Contact: lpdaac@usgs.gov Editor: Hong Xie -Last Updated: 2025-10-13 +Last Updated: 2025-10-16 =============================================================================== """ @@ -15,7 +15,7 @@ Last Updated: 2025-10-13 # TODO Add ZARR as output option from HLS_PER import process_granule, create_timeseries_dataset -from HLS_Su import hls_search +from HLS_Su import hls_search, format_roi from utils.common_utils import setup_dask_environment import os import sys @@ -26,9 +26,6 @@ import time import json from datetime import datetime as dt import earthaccess -from shapely.geometry import box -from shapely.geometry.polygon import orient -import geopandas as gpd import dask.distributed sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) @@ -169,66 +166,6 @@ def parse_arguments(): return parser.parse_args() -def ensure_ccw(geom): - """ - Ensure the exterior ring of the polygon is counterclockwise. - """ - if geom.exterior.is_ccw: - return geom # Already counterclockwise - else: - return orient(geom, sign=1.0) # Make it counterclockwise - - -def format_roi(roi): - """ - Determines if submitted ROI is a file or bbox coordinates. - - If a file, opens a GeoJSON or shapefile and creates a list of polygon vertices in the correct order. If the file has multiple polygons it will use a unary union convex hull of the external bounds. - - If bbox coordinates, creates a geodataframe with a single Polygon geometry. - - Returns a geopandas dataframe for clipping and a list of vertices for searching. - """ - if os.path.isfile(roi): # and roi.endswith(("geojson", "shp")): - try: - # Open ROI if file - roi = gpd.read_file(roi) - # Check if ROI is in Geographic CRS, if not, convert to it - if not roi.crs.is_geographic: - roi = roi.to_crs("EPSG:4326") - logging.info( - "Note: ROI submitted is being converted to Geographic CRS (EPSG:4326)" - ) - # (Add) 添加对多种几何图形类型的支持, 将MultiPolygon合并为Polygon - if len(roi) > 1 or roi.geometry[0].geom_type == "MultiPolygon": - # Merge all Polygon geometries and create external boundary - logging.info( - "Multiple polygons detected. Creating single geometry of external coordinates." - ) - single_geometry = roi.union_all().convex_hull - roi = gpd.GeoDataFrame(geometry=[single_geometry], crs=roi.crs) - roi['geometry'] = roi['geometry'].apply(ensure_ccw) - # List Vertices in correct order for search - # (Add) 使用外包矩形坐标简化提交检索时使用的坐标, 检索时仅支持逆时针 - vertices_list = list(roi.geometry[0].exterior.coords) - except (FileNotFoundError, ValueError): - sys.exit( - f"The GeoJSON/shapefile is either not valid or could not be found.\nPlease double check the name and provide the absolute path to the file or make sure that it is located in {os.getcwd()}" - ) - else: - # If bbox coordinates are submitted - bbox = tuple(map(float, roi.strip("'\"").split(","))) - print(bbox) - - # Convert bbox to a geodataframe for clipping - roi = gpd.GeoDataFrame(geometry=[box(*bbox)], crs="EPSG:4326") - roi['geometry'] = roi['geometry'].apply(ensure_ccw) - - vertices_list = list(roi.geometry[0].convex_hull.coords) - - return (roi, vertices_list) - - def format_dates(start, end): # Strip Quotes start = start.strip("'").strip('"')