feat: 调整数据检索方式, 统一使用外包Polygon检索以减少所需处理的数据量.
This commit is contained in:
parent
c90432a815
commit
62b2370fd7
@ -6,7 +6,7 @@ For example, MCD43A3, MCD43A4, MOD11A1.
|
||||
|
||||
-------------------------------------------------------------------------------
|
||||
Authors: Hong Xie
|
||||
Last Updated: 2025-09-11
|
||||
Last Updated: 2025-10-16
|
||||
===============================================================================
|
||||
"""
|
||||
|
||||
@ -15,6 +15,7 @@ import sys
|
||||
import json
|
||||
import time
|
||||
import logging
|
||||
from pathlib import Path
|
||||
import earthaccess
|
||||
import rioxarray as rxr
|
||||
import dask.distributed
|
||||
@ -236,7 +237,7 @@ def process_granule(
|
||||
|
||||
|
||||
def main(
|
||||
region: list,
|
||||
region_file: Path,
|
||||
asset_name: str,
|
||||
modis_tile_id: str,
|
||||
years: list,
|
||||
@ -245,7 +246,7 @@ def main(
|
||||
target_crs: str,
|
||||
output_root_dir: str,
|
||||
):
|
||||
bbox = tuple(list(region.total_bounds))
|
||||
region = gpd.read_file(region_file)
|
||||
results_urls = []
|
||||
output_dir = os.path.join(output_root_dir, asset_name)
|
||||
os.makedirs(output_dir, exist_ok=True)
|
||||
@ -260,7 +261,7 @@ def main(
|
||||
)
|
||||
year_temporal = (f"{year}-{dates[0]}T00:00:00", f"{year}-{dates[1]}T23:59:59")
|
||||
year_results = earthdata_search(
|
||||
[asset_name], year_temporal, bbox, modis_tile_id
|
||||
[asset_name], year_temporal, region_file, modis_tile_id
|
||||
)
|
||||
results_urls.extend(year_results)
|
||||
with open(year_results_file, "w") as f:
|
||||
@ -327,7 +328,7 @@ if __name__ == "__main__":
|
||||
earthaccess.login(persist=True)
|
||||
# region = gpd.read_file("./data/vectors/wuling_guanqu_polygon.geojson")
|
||||
tile_id = "49REL"
|
||||
region = gpd.read_file(f"./data/vectors/{tile_id}.geojson")
|
||||
region_file = f"./data/vectors/{tile_id}.geojson"
|
||||
target_crs = "EPSG:32649"
|
||||
asset_name = "MOD11A1"
|
||||
# asset_name = "MCD43A3"
|
||||
@ -338,7 +339,7 @@ if __name__ == "__main__":
|
||||
dates = ("03-01", "10-31")
|
||||
output_root_dir = ".\\data\\MODIS\\"
|
||||
main(
|
||||
region,
|
||||
region_file,
|
||||
asset_name,
|
||||
modis_tile_id,
|
||||
years,
|
||||
|
||||
@ -29,13 +29,14 @@ RTC-S1 数据产品由美国宇航局喷气推进实验室 (JPL) OPERA 项目组
|
||||
|
||||
-------------------------------------------------------------------------------
|
||||
Authors: Hong Xie
|
||||
Last Updated: 2025-03-10
|
||||
Last Updated: 2025-10-20
|
||||
===============================================================================
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import glob
|
||||
from pathlib import Path
|
||||
import json
|
||||
import time
|
||||
from datetime import datetime
|
||||
@ -141,7 +142,7 @@ def convert_to_db(image: xr.DataArray) -> xr.DataArray:
|
||||
|
||||
def process_sar_granule(
|
||||
url_basenames: list[str],
|
||||
output_dir: str,
|
||||
output_dir: Path,
|
||||
date: str,
|
||||
tile_id: str,
|
||||
roi: list,
|
||||
@ -156,7 +157,7 @@ def process_sar_granule(
|
||||
----------
|
||||
url_basenames : list[str]
|
||||
用于文件匹配的 RTC-S1 数据下载链接的文件名列表
|
||||
output_dir : str
|
||||
output_dir : Path
|
||||
输出根目录
|
||||
date : str
|
||||
日期, 格式为 YYYY%j
|
||||
@ -243,7 +244,7 @@ def process_sar_granule(
|
||||
|
||||
|
||||
def process_sar_static_granule(
|
||||
url_basenames: list[str], output_dir: str, tile_id: str, roi: list, clip=True
|
||||
url_basenames: list[str], output_dir: Path, tile_id: str, roi: list, clip=True
|
||||
) -> bool:
|
||||
"""
|
||||
OPERA RTC-S1-STATIC 静态数据预处理
|
||||
@ -254,7 +255,7 @@ def process_sar_static_granule(
|
||||
----------
|
||||
url_basenames : list[str]
|
||||
用于文件匹配的 RTC-S1-STATIC 数据下载链接的文件名列表
|
||||
output_dir: str
|
||||
output_dir: Path
|
||||
输出根目录
|
||||
tile_id: str
|
||||
tile id
|
||||
@ -327,12 +328,12 @@ def process_sar_static_granule(
|
||||
|
||||
def main(
|
||||
asset_name: list[str],
|
||||
region: list,
|
||||
region_file: Path,
|
||||
years: list[str | int],
|
||||
output_dir: str,
|
||||
output_dir: Path,
|
||||
tile_id: str,
|
||||
):
|
||||
bbox = tuple(list(region.total_bounds))
|
||||
region = read_file(region_file)
|
||||
# 示例文件名称: OPERA_L2_RTC-S1_T040-083952-IW1_20240605T102012Z_20240612T053743Z_S1A_30_v1.0_VH.tif
|
||||
static_name = ["OPERA_L2_RTC-S1-STATIC_V1"]
|
||||
sar_output_dir = os.path.join(output_dir, "RTC_S1")
|
||||
@ -353,7 +354,7 @@ def main(
|
||||
year_results_dir, f"RTC_S1_{tile_id}_{year}_results_urls.json"
|
||||
)
|
||||
year_temporal = (f"{year}-06-01T00:00:00", f"{year}-08-31T23:59:59")
|
||||
year_results = earthdata_search(asset_name, year_temporal, roi=bbox)
|
||||
year_results = earthdata_search(asset_name, year_temporal, region_file=region_file)
|
||||
results_urls.extend(year_results)
|
||||
with open(year_results_file, "w") as f:
|
||||
json.dump(year_results, f)
|
||||
@ -361,7 +362,7 @@ def main(
|
||||
with open(results_urls_file, "w") as f:
|
||||
json.dump(results_urls, f)
|
||||
|
||||
static_granule_urls = earthdata_search(static_name, roi=bbox)
|
||||
static_granule_urls = earthdata_search(static_name, region_file=region_file)
|
||||
static_url_basenames = [
|
||||
os.path.basename(url) for sublist in static_granule_urls for url in sublist
|
||||
]
|
||||
@ -454,10 +455,11 @@ if __name__ == "__main__":
|
||||
earthaccess.login(persist=True)
|
||||
asset_name = ["OPERA_L2_RTC-S1_V1"]
|
||||
tile_id = "49REL"
|
||||
region = read_file(f".\\data\\vectors\\{tile_id}.geojson")
|
||||
# region = read_file(f".\\data\\vectors\\{tile_id}.geojson")
|
||||
region_file = Path(f".\\data\\vectors\\{tile_id}.geojson")
|
||||
# region = read_file("./data/vectors/wuling_guanqu_polygon.geojson")
|
||||
# years = ["2024", "2023", "2022"]
|
||||
years = ["2024"]
|
||||
output_dir = ".\\data\\SAR"
|
||||
os.makedirs(output_dir, exist_ok=True)
|
||||
main(asset_name, region, years, output_dir, tile_id)
|
||||
output_dir = Path(".\\data\\SAR")
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
main(asset_name, region_file, years, output_dir, tile_id)
|
||||
|
||||
@ -7,19 +7,85 @@ This module contains functions related to searching and preprocessing HLS data.
|
||||
Authors: Mahsa Jami, Cole Krehbiel, and Erik Bolch
|
||||
Contact: lpdaac@usgs.gov
|
||||
Editor: Hong Xie
|
||||
Last Updated: 2025-02-20
|
||||
Last Updated: 2025-10-16
|
||||
===============================================================================
|
||||
"""
|
||||
|
||||
# Import necessary packages
|
||||
import os
|
||||
import logging
|
||||
from pathlib import Path
|
||||
import numpy as np
|
||||
import earthaccess
|
||||
import geopandas as gpd
|
||||
from shapely.geometry import box
|
||||
from shapely.geometry.polygon import orient
|
||||
|
||||
|
||||
def ensure_ccw(geom):
|
||||
"""
|
||||
Ensure the exterior ring of the polygon is counterclockwise.
|
||||
"""
|
||||
if geom.exterior.is_ccw:
|
||||
return geom # Already counterclockwise
|
||||
else:
|
||||
return orient(geom, sign=1.0) # Make it counterclockwise
|
||||
|
||||
|
||||
def format_roi(roi: Path):
|
||||
"""
|
||||
Determines if submitted ROI is a file or bbox coordinates.
|
||||
|
||||
If a file, opens a GeoJSON or shapefile and creates a list of polygon vertices in the correct order. If the file has multiple polygons it will use a unary union convex hull of the external bounds.
|
||||
|
||||
If bbox coordinates, creates a geodataframe with a single Polygon geometry.
|
||||
|
||||
Returns a geopandas dataframe for clipping and a list of vertices for searching.
|
||||
"""
|
||||
if os.path.isfile(roi): # and roi.endswith(("geojson", "shp")):
|
||||
try:
|
||||
# Open ROI if file
|
||||
roi = gpd.read_file(roi)
|
||||
# Check if ROI is in Geographic CRS, if not, convert to it
|
||||
if not roi.crs.is_geographic:
|
||||
roi = roi.to_crs("EPSG:4326")
|
||||
logging.info(
|
||||
"Note: ROI submitted is being converted to Geographic CRS (EPSG:4326)"
|
||||
)
|
||||
# (Add) 添加对多种几何图形类型的支持, 将MultiPolygon合并为Polygon
|
||||
if len(roi) > 1 or roi.geometry[0].geom_type == "MultiPolygon":
|
||||
# Merge all Polygon geometries and create external boundary
|
||||
logging.info(
|
||||
"Multiple polygons detected. Creating single geometry of external coordinates."
|
||||
)
|
||||
single_geometry = roi.union_all().convex_hull
|
||||
roi = gpd.GeoDataFrame(geometry=[single_geometry], crs=roi.crs)
|
||||
roi["geometry"] = roi["geometry"].apply(ensure_ccw)
|
||||
# List Vertices in correct order for search
|
||||
# (Add) 使用外包矩形坐标简化提交检索时使用的坐标, 仅支持逆时针
|
||||
vertices_list = list(roi.geometry[0].exterior.coords)
|
||||
except (FileNotFoundError, ValueError):
|
||||
sys.exit(
|
||||
f"The GeoJSON/shapefile is either not valid or could not be found.\nPlease double check the name and provide the absolute path to the file or make sure that it is located in {os.getcwd()}"
|
||||
)
|
||||
else:
|
||||
# If bbox coordinates are submitted
|
||||
bbox = tuple(map(float, roi.strip("'\"").split(",")))
|
||||
print(bbox)
|
||||
|
||||
# Convert bbox to a geodataframe for clipping
|
||||
roi = gpd.GeoDataFrame(geometry=[box(*bbox)], crs="EPSG:4326")
|
||||
roi["geometry"] = roi["geometry"].apply(ensure_ccw)
|
||||
|
||||
vertices_list = list(roi.geometry[0].convex_hull.coords)
|
||||
|
||||
return (roi, vertices_list)
|
||||
|
||||
|
||||
def earthdata_search(
|
||||
asset_name: list,
|
||||
dates: tuple = None,
|
||||
roi: list = None,
|
||||
region_file: Path = None,
|
||||
tile_id: str = None,
|
||||
hours: tuple = None,
|
||||
log=False,
|
||||
@ -30,26 +96,30 @@ def earthdata_search(
|
||||
For example:
|
||||
- MODIS: MCD43A3, MCD43A4, MOD11A1, MOD11A2, ...
|
||||
- SMAP: SPL3SMP_E, SPL4SMGP, ...
|
||||
- DEM: NASADEM_HGT, NASADEM_SC, ...
|
||||
- GPM: GPM_3IMERGDL, ...
|
||||
- DEM: NASADEM_HGT, NASADEM_SC, ALOS_PSR_RTC_HIGH, ALOS_PSR_RTC_LOW, ...
|
||||
"""
|
||||
|
||||
# Search for data
|
||||
if dates and not roi:
|
||||
# 全球范围的数据集不需要roi参数
|
||||
if not region_file:
|
||||
# 全球范围的数据集不需要roi参数, 如 SMAP, GPM
|
||||
results = earthaccess.search_data(
|
||||
short_name=list(asset_name),
|
||||
temporal=dates,
|
||||
)
|
||||
elif roi and not dates:
|
||||
else:
|
||||
# 基于 roi 的外包多边形进行数据检索
|
||||
roi, vertices_list = format_roi(region_file)
|
||||
if not dates:
|
||||
# 适用非时间序列数据, 如 DEM
|
||||
results = earthaccess.search_data(
|
||||
short_name=list(asset_name),
|
||||
bounding_box=roi,
|
||||
polygon=vertices_list,
|
||||
)
|
||||
else:
|
||||
results = earthaccess.search_data(
|
||||
short_name=list(asset_name),
|
||||
bounding_box=roi,
|
||||
polygon=vertices_list,
|
||||
temporal=dates,
|
||||
)
|
||||
|
||||
|
||||
@ -5,7 +5,7 @@ HLS Subsetting, Processing, and Exporting Reformatted Data Prep Script
|
||||
Authors: Cole Krehbiel, Mahsa Jami, and Erik Bolch
|
||||
Contact: lpdaac@usgs.gov
|
||||
Editor: Hong Xie
|
||||
Last Updated: 2025-10-13
|
||||
Last Updated: 2025-10-16
|
||||
===============================================================================
|
||||
"""
|
||||
|
||||
@ -15,7 +15,7 @@ Last Updated: 2025-10-13
|
||||
# TODO Add ZARR as output option
|
||||
|
||||
from HLS_PER import process_granule, create_timeseries_dataset
|
||||
from HLS_Su import hls_search
|
||||
from HLS_Su import hls_search, format_roi
|
||||
from utils.common_utils import setup_dask_environment
|
||||
import os
|
||||
import sys
|
||||
@ -26,9 +26,6 @@ import time
|
||||
import json
|
||||
from datetime import datetime as dt
|
||||
import earthaccess
|
||||
from shapely.geometry import box
|
||||
from shapely.geometry.polygon import orient
|
||||
import geopandas as gpd
|
||||
import dask.distributed
|
||||
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
@ -169,66 +166,6 @@ def parse_arguments():
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def ensure_ccw(geom):
|
||||
"""
|
||||
Ensure the exterior ring of the polygon is counterclockwise.
|
||||
"""
|
||||
if geom.exterior.is_ccw:
|
||||
return geom # Already counterclockwise
|
||||
else:
|
||||
return orient(geom, sign=1.0) # Make it counterclockwise
|
||||
|
||||
|
||||
def format_roi(roi):
|
||||
"""
|
||||
Determines if submitted ROI is a file or bbox coordinates.
|
||||
|
||||
If a file, opens a GeoJSON or shapefile and creates a list of polygon vertices in the correct order. If the file has multiple polygons it will use a unary union convex hull of the external bounds.
|
||||
|
||||
If bbox coordinates, creates a geodataframe with a single Polygon geometry.
|
||||
|
||||
Returns a geopandas dataframe for clipping and a list of vertices for searching.
|
||||
"""
|
||||
if os.path.isfile(roi): # and roi.endswith(("geojson", "shp")):
|
||||
try:
|
||||
# Open ROI if file
|
||||
roi = gpd.read_file(roi)
|
||||
# Check if ROI is in Geographic CRS, if not, convert to it
|
||||
if not roi.crs.is_geographic:
|
||||
roi = roi.to_crs("EPSG:4326")
|
||||
logging.info(
|
||||
"Note: ROI submitted is being converted to Geographic CRS (EPSG:4326)"
|
||||
)
|
||||
# (Add) 添加对多种几何图形类型的支持, 将MultiPolygon合并为Polygon
|
||||
if len(roi) > 1 or roi.geometry[0].geom_type == "MultiPolygon":
|
||||
# Merge all Polygon geometries and create external boundary
|
||||
logging.info(
|
||||
"Multiple polygons detected. Creating single geometry of external coordinates."
|
||||
)
|
||||
single_geometry = roi.union_all().convex_hull
|
||||
roi = gpd.GeoDataFrame(geometry=[single_geometry], crs=roi.crs)
|
||||
roi['geometry'] = roi['geometry'].apply(ensure_ccw)
|
||||
# List Vertices in correct order for search
|
||||
# (Add) 使用外包矩形坐标简化提交检索时使用的坐标, 检索时仅支持逆时针
|
||||
vertices_list = list(roi.geometry[0].exterior.coords)
|
||||
except (FileNotFoundError, ValueError):
|
||||
sys.exit(
|
||||
f"The GeoJSON/shapefile is either not valid or could not be found.\nPlease double check the name and provide the absolute path to the file or make sure that it is located in {os.getcwd()}"
|
||||
)
|
||||
else:
|
||||
# If bbox coordinates are submitted
|
||||
bbox = tuple(map(float, roi.strip("'\"").split(",")))
|
||||
print(bbox)
|
||||
|
||||
# Convert bbox to a geodataframe for clipping
|
||||
roi = gpd.GeoDataFrame(geometry=[box(*bbox)], crs="EPSG:4326")
|
||||
roi['geometry'] = roi['geometry'].apply(ensure_ccw)
|
||||
|
||||
vertices_list = list(roi.geometry[0].convex_hull.coords)
|
||||
|
||||
return (roi, vertices_list)
|
||||
|
||||
|
||||
def format_dates(start, end):
|
||||
# Strip Quotes
|
||||
start = start.strip("'").strip('"')
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user