feat: 更新经过多轮测试使用后的稳定版本.
This commit is contained in:
parent
6d16e32022
commit
ac1a0a96f2
@ -7,7 +7,8 @@ This module contains functions to conduct subsetting and quality filtering of
|
||||
search results.
|
||||
-------------------------------------------------------------------------------
|
||||
Authors: Cole Krehbiel, Mahsa Jami, and Erik Bolch
|
||||
Last Updated: 2024-09-18
|
||||
Editor: Hong Xie
|
||||
Last Updated: 2025-01-06
|
||||
===============================================================================
|
||||
"""
|
||||
|
||||
@ -21,6 +22,7 @@ import xarray as xr
|
||||
import rioxarray as rxr
|
||||
import dask.distributed
|
||||
|
||||
|
||||
def create_output_name(url, band_dict):
|
||||
"""
|
||||
Uses HLS default naming scheme to generate an output name with common band names.
|
||||
@ -42,7 +44,9 @@ def create_output_name(url, band_dict):
|
||||
return output_name
|
||||
|
||||
|
||||
def open_hls(url, roi=None, scale=True, chunk_size=dict(band=1, x=512, y=512)):
|
||||
def open_hls(
|
||||
url, roi=None, clip=False, scale=True, chunk_size=dict(band=1, x=512, y=512)
|
||||
):
|
||||
"""
|
||||
Generic Function to open an HLS COG and clip to ROI. For consistent scaling, this must be done manually.
|
||||
Some HLS Landsat scenes have the metadata in the wrong location.
|
||||
@ -52,23 +56,40 @@ def open_hls(url, roi=None, scale=True, chunk_size=dict(band=1, x=512, y=512)):
|
||||
"band", drop=True
|
||||
)
|
||||
|
||||
# Reproject ROI and Clip if ROI is provided
|
||||
if roi is not None:
|
||||
# (Add) 读取波段名称
|
||||
split_asset = url.split("/")[-1].split(".")
|
||||
asset_name = split_asset[1]
|
||||
band_name = split_asset[-2]
|
||||
|
||||
# Reproject ROI and Clip if ROI is provided and clip is True
|
||||
if roi is not None and clip:
|
||||
roi = roi.to_crs(da.spatial_ref.crs_wkt)
|
||||
da = da.rio.clip(roi.geometry.values, roi.crs, all_touched=True)
|
||||
|
||||
# (Add) 即使大部分影像已经被缩放且填补了缺失值, 但可能仍然有些影像需要进行手动在本地GIS软件中进行缩放和填补缺失值
|
||||
# Apply Scale Factor if desired for non-quality layer
|
||||
if scale and "Fmask" not in url:
|
||||
# Mask Fill Values
|
||||
da = xr.where(da == -9999, np.nan, da)
|
||||
# Scale Data
|
||||
da = da * 0.0001
|
||||
# Remove Scale Factor After Scaling - Prevents Double Scaling
|
||||
da.attrs["scale_factor"] = 1.0
|
||||
if band_name != "Fmask":
|
||||
if scale:
|
||||
# Mask Fill Values
|
||||
da = xr.where(da == -9999, np.nan, da)
|
||||
# Scale Data
|
||||
# (Add) 除质量层, 以及 L30 的两个热红外波段外, 其他光谱波段缩放因子均为 0.0001
|
||||
# (Add) L30 的两个热红外波段缩放因子为 0.01
|
||||
# NOTE: 需要注意的是热红外此时未被改名
|
||||
if (band_name == "B10" or band_name == "B11") and (asset_name == "L30"):
|
||||
da = da * 0.01
|
||||
else:
|
||||
da = da * 0.0001
|
||||
# Remove Scale Factor After Scaling - Prevents Double Scaling
|
||||
da.attrs["scale_factor"] = 1.0
|
||||
|
||||
# Add Scale Factor to Attributes Manually - This will overwrite/add if the data is missing.
|
||||
if not scale and "Fmask" not in url:
|
||||
da.attrs["scale_factor"] = 0.0001
|
||||
# Add Scale Factor to Attributes Manually - This will overwrite/add if the data is missing.
|
||||
# (Add) 若要手动缩放, 则需要手动添加缩放因子
|
||||
else:
|
||||
if (band_name == "B10" or band_name == "B11") and (asset_name == "L30"):
|
||||
da.attrs["scale_factor"] = 0.01
|
||||
else:
|
||||
da.attrs["scale_factor"] = 0.0001
|
||||
|
||||
return da
|
||||
|
||||
@ -91,6 +112,7 @@ def create_quality_mask(quality_data, bit_nums: list = [0, 1, 2, 3, 4, 5]):
|
||||
def process_granule(
|
||||
granule_urls,
|
||||
roi,
|
||||
clip,
|
||||
quality_filter,
|
||||
scale,
|
||||
output_dir,
|
||||
@ -100,6 +122,31 @@ def process_granule(
|
||||
):
|
||||
"""
|
||||
Processes a list of HLS asset urls for a single granule.
|
||||
|
||||
args:
|
||||
granule_urls (list): List of HLS asset urls to process.
|
||||
|
||||
roi (geopandas.GeoDataFrame): ROI to filter data.
|
||||
|
||||
clip (bool): If True, ROI will be clipped to the image.
|
||||
|
||||
quality_filter (bool): If True, quality layer will be used to mask data.
|
||||
|
||||
scale (bool): If True, data will be scaled to reflectance.
|
||||
|
||||
output_dir (str): Directory to save output files.
|
||||
|
||||
band_dict (dict): Dictionary of band names and asset names.
|
||||
|
||||
bit_nums (list): List of bit numbers to use for quality mask.
|
||||
- 0: Cirrus
|
||||
- 1: Cloud
|
||||
- 2: Adjacent to cloud/shadow
|
||||
- 3: Cloud shadow
|
||||
- 4: Snow/ice
|
||||
- 5: Water
|
||||
|
||||
chunk_size (dict): Dictionary of chunk sizes for dask.
|
||||
"""
|
||||
|
||||
# Setup Logging
|
||||
@ -116,6 +163,7 @@ def process_granule(
|
||||
):
|
||||
|
||||
# First Handle Quality Layer
|
||||
# (Add) 简化原有的冗余处理, 仅处理质量层, 并最后移除质量层下载url
|
||||
if quality_filter:
|
||||
# Generate Quality Layer URL
|
||||
split_asset = granule_urls[0].split("/")[-1].split(".")
|
||||
@ -123,31 +171,26 @@ def process_granule(
|
||||
quality_url = (
|
||||
f"{'/'.join(granule_urls[0].split('/')[:-1])}/{'.'.join(split_asset)}"
|
||||
)
|
||||
|
||||
# Check if File exists in Output Directory
|
||||
output_name = create_output_name(quality_url, band_dict)
|
||||
output_file = f"{output_dir}/{output_name}"
|
||||
|
||||
# Open Quality Layer
|
||||
qa_da = open_hls(quality_url, roi, scale, chunk_size)
|
||||
|
||||
quality_output_name = create_output_name(quality_url, band_dict)
|
||||
quality_output_file = f"{output_dir}/{quality_output_name}"
|
||||
# Check if quality asset is already processed
|
||||
if not os.path.isfile(output_file):
|
||||
if not os.path.isfile(quality_output_file):
|
||||
# Write Output
|
||||
qa_da.rio.to_raster(raster_path=output_file, driver="COG")
|
||||
# Open Quality Layer
|
||||
qa_da = open_hls(quality_url, roi, clip, scale, chunk_size)
|
||||
qa_da.rio.to_raster(raster_path=quality_output_file, driver="COG")
|
||||
else:
|
||||
logging.info(
|
||||
f"Existing file {output_name} found in {output_dir}. Skipping."
|
||||
f"Existing quality file {quality_output_name} found in {output_dir}."
|
||||
)
|
||||
|
||||
# Remove Quality Layer from Granule Asset List if Present
|
||||
granule_urls = [asset for asset in granule_urls if asset != quality_url]
|
||||
|
||||
# Create Quality Mask
|
||||
# TODO: 掩膜数组的存在可能会造成Dask内存的溢出, 需要优化
|
||||
qa_mask = create_quality_mask(qa_da, bit_nums=bit_nums)
|
||||
# (Add) 若设置 quality_filter=True, 则在生成质量掩码后, 需要移除质量层, 避免后续重复处理
|
||||
granule_urls = [url for url in granule_urls if "Fmask" not in url]
|
||||
|
||||
# Process Remaining Assets
|
||||
|
||||
for url in granule_urls:
|
||||
# Check if File exists in Output Directory
|
||||
output_name = create_output_name(url, band_dict)
|
||||
@ -156,14 +199,20 @@ def process_granule(
|
||||
# Check if scene is already processed
|
||||
if not os.path.isfile(output_file):
|
||||
# Open Asset
|
||||
da = open_hls(url, roi, scale, chunk_size)
|
||||
da = open_hls(url, roi, clip, scale, chunk_size)
|
||||
|
||||
# Apply Quality Mask if Desired
|
||||
if quality_filter:
|
||||
da = da.where(~qa_mask)
|
||||
|
||||
# Write Output
|
||||
da.rio.to_raster(raster_path=output_file, driver="COG")
|
||||
if "FMASK" in output_name:
|
||||
da.rio.to_raster(raster_path=output_file, driver="COG")
|
||||
else:
|
||||
# (Add) 固定输出为 float32 类型, 否则会默认 float64 类型
|
||||
da.rio.to_raster(
|
||||
raster_path=output_file, driver="COG", dtype="float32"
|
||||
)
|
||||
else:
|
||||
logging.info(
|
||||
f"Existing file {output_name} found in {output_dir}. Skipping."
|
||||
|
@ -5,8 +5,9 @@ This module contains functions related to searching and preprocessing HLS data.
|
||||
|
||||
-------------------------------------------------------------------------------
|
||||
Authors: Mahsa Jami, Cole Krehbiel, and Erik Bolch
|
||||
Contact: lpdaac@usgs.gov
|
||||
Last Updated: 2024-09-18
|
||||
Contact: lpdaac@usgs.gov
|
||||
Editor: Hong Xie
|
||||
Last Updated: 2025-01-06
|
||||
===============================================================================
|
||||
"""
|
||||
|
||||
@ -16,7 +17,9 @@ import earthaccess
|
||||
|
||||
|
||||
# Main function to search and filter HLS data
|
||||
def hls_search(roi: list, band_dict: dict, dates=None, cloud_cover=None, log=False):
|
||||
def hls_search(
|
||||
roi: list, band_dict: dict, dates=None, cloud_cover=None, tile_id=None, log=False
|
||||
):
|
||||
"""
|
||||
This function uses earthaccess to search for HLS data using an roi and temporal parameter, filter by cloud cover and delivers a list of results urls for the selected bands.
|
||||
"""
|
||||
@ -27,6 +30,10 @@ def hls_search(roi: list, band_dict: dict, dates=None, cloud_cover=None, log=Fal
|
||||
temporal=dates,
|
||||
)
|
||||
|
||||
# (Add) 根据瓦片ID过滤影像
|
||||
if tile_id:
|
||||
results = hls_tileid_filter(results, tile_id)
|
||||
|
||||
# Filter by cloud cover
|
||||
if cloud_cover:
|
||||
results = hls_cc_filter(results, cloud_cover)
|
||||
@ -45,6 +52,23 @@ def hls_search(roi: list, band_dict: dict, dates=None, cloud_cover=None, log=Fal
|
||||
return selected_results_urls
|
||||
|
||||
|
||||
def hls_tileid_filter(results, tile_id):
|
||||
"""
|
||||
(Add) 基于给定的瓦片ID过滤earthaccess检索的数据结果
|
||||
"""
|
||||
|
||||
tile_ids = []
|
||||
for result in results:
|
||||
# 从json中检索瓦片ID,转换为字符串并放入数组中
|
||||
tmp_id = str(result["meta"]["native-id"].split(".")[2])
|
||||
tile_ids.append(tmp_id)
|
||||
tile_ids = np.array(tile_ids)
|
||||
# 根据瓦片ID找到对应的索引
|
||||
tile_id_indices = np.where(tile_ids == tile_id)
|
||||
# 根据索引过滤结果
|
||||
return [results[i] for i in tile_id_indices[0]]
|
||||
|
||||
|
||||
# Filter earthaccess results based on cloud cover threshold
|
||||
def hls_cc_filter(results, cc_threshold):
|
||||
"""
|
||||
|
@ -4,7 +4,8 @@
|
||||
HLS Subsetting, Processing, and Exporting Reformatted Data Prep Script
|
||||
Authors: Cole Krehbiel, Mahsa Jami, and Erik Bolch
|
||||
Contact: lpdaac@usgs.gov
|
||||
Last Updated: 2024-09-18
|
||||
Editor: Hong Xie
|
||||
Last Updated: 2025-01-06
|
||||
===============================================================================
|
||||
"""
|
||||
|
||||
@ -53,6 +54,23 @@ def parse_arguments():
|
||||
" ",
|
||||
)
|
||||
|
||||
# (Add) clip: 裁剪参数, 默认为 False, 表示不裁剪, 如果为 True, 则表示裁剪
|
||||
parser.add_argument(
|
||||
"-clip",
|
||||
choices=["True", "False"],
|
||||
required=False,
|
||||
help="(Optional) If provided, the script will clip the output to the ROI.",
|
||||
default="False",
|
||||
)
|
||||
|
||||
# (Add) tile: HLS 的瓦片 ID
|
||||
parser.add_argument(
|
||||
"-tile",
|
||||
type=str,
|
||||
required=False,
|
||||
help="(Optional) Tile ID for spatial subset. If provided, the script will search for the tile ID based on the ROI.",
|
||||
)
|
||||
|
||||
# dir: Directory to save the files to
|
||||
parser.add_argument(
|
||||
"-dir",
|
||||
@ -117,7 +135,7 @@ def parse_arguments():
|
||||
choices=["True", "False"],
|
||||
required=False,
|
||||
help="Flag to apply scale factor to layers before exporting output files. This is generally unecessary as most applications will scale automatically.",
|
||||
default="False",
|
||||
default="True",
|
||||
)
|
||||
|
||||
# of: output file format
|
||||
@ -221,6 +239,14 @@ def format_dates(start, end):
|
||||
return dates
|
||||
|
||||
|
||||
def format_tile_id(tile_id):
|
||||
"""
|
||||
(Add) 格式化tile_id参数
|
||||
"""
|
||||
tile_id = tile_id.strip("'").strip('"')
|
||||
return str(tile_id)
|
||||
|
||||
|
||||
def format_cloud_cover(cc):
|
||||
try:
|
||||
cc = int(cc.strip("'").strip('"'))
|
||||
@ -464,6 +490,10 @@ def main():
|
||||
roi, vl = format_roi(args.roi)
|
||||
logging.info("Region of Interest formatted successfully")
|
||||
|
||||
# (Add) 格式化 clip 参数
|
||||
clip = str_to_bool(args.clip)
|
||||
logging.info(f"Clip to ROI: {clip}")
|
||||
|
||||
# Set Output Directory
|
||||
if args.dir is not None:
|
||||
output_dir = os.path.normpath(args.dir.strip("'").strip('"')) + os.sep
|
||||
@ -485,6 +515,10 @@ def main():
|
||||
cc = format_cloud_cover(args.cc)
|
||||
logging.info(f"Cloud Cover Filter <= {cc}")
|
||||
|
||||
# (Add) 格式化 Tile ID 参数
|
||||
tile = format_tile_id(args.tile)
|
||||
logging.info(f"Tile ID: {tile}")
|
||||
|
||||
# Quality Filtering
|
||||
qf = str_to_bool(args.qf)
|
||||
logging.info(f"Quality Filtering: {qf}")
|
||||
@ -533,20 +567,27 @@ def main():
|
||||
else:
|
||||
logging.info("Searching for data...")
|
||||
results_urls = hls_search(
|
||||
roi=vl, band_dict=band_dict, dates=dates, cloud_cover=cc
|
||||
roi=vl, band_dict=band_dict, dates=dates, cloud_cover=cc, tile_id=tile
|
||||
)
|
||||
logging.info(f"Writing search results to {results_urls_file}")
|
||||
with open(results_urls_file, "w") as file:
|
||||
json.dump(results_urls, file)
|
||||
|
||||
results_count = len(results_urls)
|
||||
total_assets = sum(len(sublist) for sublist in results_urls)
|
||||
|
||||
filter_descriptions = []
|
||||
if cc:
|
||||
logging.info(
|
||||
f"{len(results_urls)} granules remain after cloud filtering. {total_assets} assets will be processed."
|
||||
)
|
||||
filter_descriptions.append("cloud")
|
||||
if tile:
|
||||
filter_descriptions.append("tile")
|
||||
if filter_descriptions:
|
||||
filter_log = f"{results_count} granules remain after {' and '.join(filter_descriptions)} filtering. {total_assets} assets will be processed."
|
||||
else:
|
||||
logging.info(f"{total_assets} assets will be processed.")
|
||||
filter_log = (
|
||||
f"{results_count} granules remain. {total_assets} assets will be processed."
|
||||
)
|
||||
logging.info(filter_log)
|
||||
|
||||
# Confirm Processing
|
||||
if not confirm_action("Do you want to proceed with processing? (y/n)"):
|
||||
@ -588,11 +629,12 @@ def main():
|
||||
dask.delayed(process_granule)(
|
||||
granule_url,
|
||||
roi=roi,
|
||||
clip=clip,
|
||||
quality_filter=qf,
|
||||
scale=scale,
|
||||
output_dir=cog_dir,
|
||||
band_dict=band_dict,
|
||||
bit_nums=[0, 1, 2, 3, 4, 5],
|
||||
bit_nums=[1, 3],
|
||||
chunk_size=chunk_size,
|
||||
)
|
||||
for granule_url in results_urls
|
||||
@ -612,8 +654,8 @@ def main():
|
||||
logging.info("Timeseries Dataset Created. Removing Temporary Files...")
|
||||
shutil.rmtree(cog_dir)
|
||||
|
||||
# Add: 下载影像数据按照DOY归档
|
||||
logging.info("开始对已下载影像按照DOY日期进行归档.")
|
||||
# (Add) 下载影像数据按照DOY归档
|
||||
logging.info("开始将已下载的HLS影像按照DOY日期进行归档")
|
||||
files_collection(output_dir)
|
||||
logging.info("归档完成!")
|
||||
|
||||
|
30
README.md
30
README.md
@ -13,7 +13,7 @@
|
||||
|
||||
### 1.2 配置环境变量
|
||||
|
||||
- 为了在控制台中直接使用conda命令, 需要将安装的相关目录配置到Path环境变量中。
|
||||
- 为了在控制台中直接使用conda命令,需要将安装的相关目录配置到Path环境变量中。
|
||||
|
||||
```
|
||||
D:\program\miniforge3
|
||||
@ -98,9 +98,11 @@ mamba activate lpdaac_windows
|
||||
- 需要注意的是,密码中最好不要出 `@/#/$/%` 等符号,爬取时可能会出错。
|
||||
- 单个用户每秒限制最多100次请求,参考自:https://forum.earthdata.nasa.gov/viewtopic.php?t=3734
|
||||
|
||||
### 3.2 爬取云端数据并在内存中进行预处理
|
||||
### 3.2 脚本可用参数
|
||||
|
||||
- `-roi`:感兴趣区,需要按照 **左下右上** 的逆时针顺序设置点坐标
|
||||
- `-clip`:是否对影像进行裁剪,默认 `False`
|
||||
- `-tile`:HLS影像瓦片ID,例如 `T49RGQ`
|
||||
- `-dir`:输出目录,必须是已存在的目录
|
||||
- `-start`:开始时间,格式为 `YYYY-MM-DD`
|
||||
- `-end`:结束时间,格式为 `YYYY-MM-DD`
|
||||
@ -110,14 +112,28 @@ mamba activate lpdaac_windows
|
||||
- `-qf`:是否使用质量波段过滤云/云阴影像元,默认 `True`
|
||||
- `-scale`:是否对影像使用缩放因子,默认 `True`
|
||||
|
||||
- 爬取所有光谱波段
|
||||
### 3.3 爬取云端数据并在内存中进行预处理示例
|
||||
|
||||
- 爬取 L30 与 S30 的核心光谱波段:仅按照感兴趣区,瓦片ID,起止时间以及产品名称筛选影像,不进行云量筛选影像,对影像进行去云掩膜处理
|
||||
|
||||
```sh
|
||||
python .\\HLS_SuPER\\HLS_SuPER.py -roi '113.10114,30.62845,114.24349,31.59081' -dir .\\data\\HLS\\L30 -start 2024-01-01 -end 2024-01-31 -prod HLSL30 -bands COASTAL-AEROSOL,BLUE,GREEN,RED,NIR1,SWIR1,SWIR2,CIRRUS,TIR1,TIR2,Fmask -cc 70 -qf True -scale True
|
||||
python .\\HLS_SuPER\\HLS_SuPER.py -roi '112.9834,30.5286,114.32373,31.64448' -tile T49RGQ -dir .\\data\\HLS\\ALL -start 2024-01-01 -end 2024-01-31 -prod both -bands BLUE,GREEN,RED,NIR1,SWIR1,SWIR2,Fmask -scale True
|
||||
```
|
||||
|
||||
- 仅爬取必要的核心波段
|
||||
- 爬取 L30 的所有波段:按照感兴趣区,瓦片ID,起止时间以及产品名称筛选影像,过滤云量小于70% 的影像,对影像进行去云掩膜处理
|
||||
|
||||
```sh
|
||||
python .\\HLS_SuPER\\HLS_SuPER.py -roi '113.10114,30.62845,114.24349,31.59081' -dir .\\data\\HLS\\L30\\subset -start 2024-01-01 -end 2024-01-31 -prod HLSL30 -bands BLUE,GREEN,RED,NIR1,SWIR1,SWIR2 -cc 70 -qf True -scale True
|
||||
```
|
||||
python .\\HLS_SuPER\\HLS_SuPER.py -roi '112.9834,30.5286,114.32373,31.64448' -tile T49RGQ -dir .\\data\\HLS\\L30\\subset -start 2024-01-01 -end 2024-01-31 -prod HLSL30 -bands COASTAL-AEROSOL,BLUE,GREEN,RED,NIR1,SWIR1,SWIR2,CIRRUS,TIR1,TIR2,Fmask -cc 70 -scale True
|
||||
```
|
||||
|
||||
- 仅爬取 L30 的热红外波段:仅按照感兴趣区,瓦片ID,起止时间以及产品名称筛选影像,不进行云量筛选影像,对影像进行去云掩膜处理
|
||||
|
||||
```sh
|
||||
python .\\HLS_SuPER\\HLS_SuPER.py -roi '112.9834,30.5286,114.32373,31.64448' -tile T49RGQ -dir .\\data\\HLS\\L30\\TIR -start 2024-01-01 -end 2024-01-31 -prod HLSL30 -bands TIR1,TIR2,Fmask -scale True
|
||||
```
|
||||
|
||||
- 【测试用】不进行云量筛选,直接爬取 L30 2024 年暑期光谱波段与热红外波段
|
||||
|
||||
```sh
|
||||
python .\\HLS_SuPER\\HLS_SuPER.py -roi '112.9834,30.5286,114.32373,31.64448' -tile T49RGQ -dir .\\data\\HLS\\L30\\subset\\2024 -start 2024-06-01 -end 2024-08-31 -prod HLSL30 -bands BLUE,GREEN,RED,NIR1,SWIR1,SWIR2,TIR1,TIR2,Fmask -scale True
|
||||
```
|
||||
|
Loading…
x
Reference in New Issue
Block a user