feat: 优化下载与归档逻辑, 下载完成后再进行归档.
This commit is contained in:
parent
7ecd0378f1
commit
6d16e32022
@ -21,7 +21,6 @@ import xarray as xr
|
|||||||
import rioxarray as rxr
|
import rioxarray as rxr
|
||||||
import dask.distributed
|
import dask.distributed
|
||||||
|
|
||||||
|
|
||||||
def create_output_name(url, band_dict):
|
def create_output_name(url, band_dict):
|
||||||
"""
|
"""
|
||||||
Uses HLS default naming scheme to generate an output name with common band names.
|
Uses HLS default naming scheme to generate an output name with common band names.
|
||||||
@ -30,9 +29,6 @@ def create_output_name(url, band_dict):
|
|||||||
# Get Necessary Strings
|
# Get Necessary Strings
|
||||||
prod = url.split("/")[4].split(".")[0]
|
prod = url.split("/")[4].split(".")[0]
|
||||||
asset = url.split("/")[-1].split(".")[-2]
|
asset = url.split("/")[-1].split(".")[-2]
|
||||||
# Add: 获取影像DOY以备影像归档
|
|
||||||
time = url.split("/")[-1].split(".")[3]
|
|
||||||
file_doy = time[:8]
|
|
||||||
# Hard-coded one off for Fmask name incase it is not in the band_dict but is needed for masking
|
# Hard-coded one off for Fmask name incase it is not in the band_dict but is needed for masking
|
||||||
# 翻译:硬编码一个Fmask名称, 以防它不在band_dict中但需要用于掩膜处理
|
# 翻译:硬编码一个Fmask名称, 以防它不在band_dict中但需要用于掩膜处理
|
||||||
if asset == "Fmask":
|
if asset == "Fmask":
|
||||||
@ -43,7 +39,7 @@ def create_output_name(url, band_dict):
|
|||||||
output_name = (
|
output_name = (
|
||||||
f"{'.'.join(url.split('/')[-1].split('.')[:-2])}.{key}.subset.tif"
|
f"{'.'.join(url.split('/')[-1].split('.')[:-2])}.{key}.subset.tif"
|
||||||
)
|
)
|
||||||
return [output_name, file_doy]
|
return output_name
|
||||||
|
|
||||||
|
|
||||||
def open_hls(url, roi=None, scale=True, chunk_size=dict(band=1, x=512, y=512)):
|
def open_hls(url, roi=None, scale=True, chunk_size=dict(band=1, x=512, y=512)):
|
||||||
@ -115,9 +111,7 @@ def process_granule(
|
|||||||
|
|
||||||
# Check if all Outputs Exist for a Granule
|
# Check if all Outputs Exist for a Granule
|
||||||
if not all(
|
if not all(
|
||||||
os.path.isfile(
|
os.path.isfile(f"{output_dir}/{create_output_name(url, band_dict)}")
|
||||||
f"{output_dir}/{create_output_name(url, band_dict)[1]}/{create_output_name(url, band_dict)[0]}"
|
|
||||||
)
|
|
||||||
for url in granule_urls
|
for url in granule_urls
|
||||||
):
|
):
|
||||||
|
|
||||||
@ -131,12 +125,7 @@ def process_granule(
|
|||||||
)
|
)
|
||||||
|
|
||||||
# Check if File exists in Output Directory
|
# Check if File exists in Output Directory
|
||||||
output_name = create_output_name(quality_url, band_dict)[0]
|
output_name = create_output_name(quality_url, band_dict)
|
||||||
# Add: 以影像DOY为子目录归档同日影像
|
|
||||||
file_doy = create_output_name(quality_url, band_dict)[1]
|
|
||||||
output_dir = f"{output_dir}/{file_doy}"
|
|
||||||
if not os.path.isdir(output_dir):
|
|
||||||
os.makedirs(output_dir)
|
|
||||||
output_file = f"{output_dir}/{output_name}"
|
output_file = f"{output_dir}/{output_name}"
|
||||||
|
|
||||||
# Open Quality Layer
|
# Open Quality Layer
|
||||||
|
@ -408,6 +408,33 @@ def setup_dask_environment():
|
|||||||
env.__enter__()
|
env.__enter__()
|
||||||
|
|
||||||
|
|
||||||
|
def files_collection(output_dir):
|
||||||
|
"""
|
||||||
|
将已下载的HLS影像数据按照文件名中的日期进行归档
|
||||||
|
"""
|
||||||
|
|
||||||
|
# 获取当前目录下的所有文件
|
||||||
|
files = os.listdir(output_dir)
|
||||||
|
# 遍历所有文件
|
||||||
|
for file in files:
|
||||||
|
# 检查文件是否为tif格式
|
||||||
|
if file.endswith(".tif"):
|
||||||
|
# 提取文件名中的日期
|
||||||
|
doy = file.split(".")[3][:7]
|
||||||
|
|
||||||
|
# 构建目标目录路径
|
||||||
|
target_dir = os.path.join(output_dir, doy)
|
||||||
|
|
||||||
|
# 如果目标目录不存在,则创建它
|
||||||
|
if not os.path.exists(target_dir):
|
||||||
|
os.makedirs(target_dir)
|
||||||
|
|
||||||
|
# 移动文件到目标目录
|
||||||
|
source_path = os.path.join(output_dir, file)
|
||||||
|
target_path = os.path.join(target_dir, file)
|
||||||
|
shutil.move(source_path, target_path)
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
"""
|
"""
|
||||||
Main function to run the HLS SuPER script.
|
Main function to run the HLS SuPER script.
|
||||||
@ -585,6 +612,11 @@ def main():
|
|||||||
logging.info("Timeseries Dataset Created. Removing Temporary Files...")
|
logging.info("Timeseries Dataset Created. Removing Temporary Files...")
|
||||||
shutil.rmtree(cog_dir)
|
shutil.rmtree(cog_dir)
|
||||||
|
|
||||||
|
# Add: 下载影像数据按照DOY归档
|
||||||
|
logging.info("开始对已下载影像按照DOY日期进行归档.")
|
||||||
|
files_collection(output_dir)
|
||||||
|
logging.info("归档完成!")
|
||||||
|
|
||||||
# End Timer
|
# End Timer
|
||||||
total_time = time.time() - start_time
|
total_time = time.time() - start_time
|
||||||
logging.info(f"Processing complete. Total time: {round(total_time,2)}s, ")
|
logging.info(f"Processing complete. Total time: {round(total_time,2)}s, ")
|
||||||
|
@ -96,6 +96,7 @@ mamba activate lpdaac_windows
|
|||||||
- 参考自NASA官网示例Demo:https://github.com/nasa/LPDAAC-Data-Resources/blob/main/setup/setup_instructions_python.md
|
- 参考自NASA官网示例Demo:https://github.com/nasa/LPDAAC-Data-Resources/blob/main/setup/setup_instructions_python.md
|
||||||
- 首次运行爬取命令时,需要输入用户名和密码,用户名和密码可以在 [Earthdata](https://urs.earthdata.nasa.gov/) 注册获取。
|
- 首次运行爬取命令时,需要输入用户名和密码,用户名和密码可以在 [Earthdata](https://urs.earthdata.nasa.gov/) 注册获取。
|
||||||
- 需要注意的是,密码中最好不要出 `@/#/$/%` 等符号,爬取时可能会出错。
|
- 需要注意的是,密码中最好不要出 `@/#/$/%` 等符号,爬取时可能会出错。
|
||||||
|
- 单个用户每秒限制最多100次请求,参考自:https://forum.earthdata.nasa.gov/viewtopic.php?t=3734
|
||||||
|
|
||||||
### 3.2 爬取云端数据并在内存中进行预处理
|
### 3.2 爬取云端数据并在内存中进行预处理
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user