feat: 优化下载与归档逻辑, 下载完成后再进行归档.
This commit is contained in:
parent
7ecd0378f1
commit
6d16e32022
@ -21,7 +21,6 @@ import xarray as xr
|
||||
import rioxarray as rxr
|
||||
import dask.distributed
|
||||
|
||||
|
||||
def create_output_name(url, band_dict):
|
||||
"""
|
||||
Uses HLS default naming scheme to generate an output name with common band names.
|
||||
@ -30,9 +29,6 @@ def create_output_name(url, band_dict):
|
||||
# Get Necessary Strings
|
||||
prod = url.split("/")[4].split(".")[0]
|
||||
asset = url.split("/")[-1].split(".")[-2]
|
||||
# Add: 获取影像DOY以备影像归档
|
||||
time = url.split("/")[-1].split(".")[3]
|
||||
file_doy = time[:8]
|
||||
# Hard-coded one off for Fmask name incase it is not in the band_dict but is needed for masking
|
||||
# 翻译:硬编码一个Fmask名称, 以防它不在band_dict中但需要用于掩膜处理
|
||||
if asset == "Fmask":
|
||||
@ -43,7 +39,7 @@ def create_output_name(url, band_dict):
|
||||
output_name = (
|
||||
f"{'.'.join(url.split('/')[-1].split('.')[:-2])}.{key}.subset.tif"
|
||||
)
|
||||
return [output_name, file_doy]
|
||||
return output_name
|
||||
|
||||
|
||||
def open_hls(url, roi=None, scale=True, chunk_size=dict(band=1, x=512, y=512)):
|
||||
@ -115,9 +111,7 @@ def process_granule(
|
||||
|
||||
# Check if all Outputs Exist for a Granule
|
||||
if not all(
|
||||
os.path.isfile(
|
||||
f"{output_dir}/{create_output_name(url, band_dict)[1]}/{create_output_name(url, band_dict)[0]}"
|
||||
)
|
||||
os.path.isfile(f"{output_dir}/{create_output_name(url, band_dict)}")
|
||||
for url in granule_urls
|
||||
):
|
||||
|
||||
@ -131,12 +125,7 @@ def process_granule(
|
||||
)
|
||||
|
||||
# Check if File exists in Output Directory
|
||||
output_name = create_output_name(quality_url, band_dict)[0]
|
||||
# Add: 以影像DOY为子目录归档同日影像
|
||||
file_doy = create_output_name(quality_url, band_dict)[1]
|
||||
output_dir = f"{output_dir}/{file_doy}"
|
||||
if not os.path.isdir(output_dir):
|
||||
os.makedirs(output_dir)
|
||||
output_name = create_output_name(quality_url, band_dict)
|
||||
output_file = f"{output_dir}/{output_name}"
|
||||
|
||||
# Open Quality Layer
|
||||
|
@ -408,6 +408,33 @@ def setup_dask_environment():
|
||||
env.__enter__()
|
||||
|
||||
|
||||
def files_collection(output_dir):
|
||||
"""
|
||||
将已下载的HLS影像数据按照文件名中的日期进行归档
|
||||
"""
|
||||
|
||||
# 获取当前目录下的所有文件
|
||||
files = os.listdir(output_dir)
|
||||
# 遍历所有文件
|
||||
for file in files:
|
||||
# 检查文件是否为tif格式
|
||||
if file.endswith(".tif"):
|
||||
# 提取文件名中的日期
|
||||
doy = file.split(".")[3][:7]
|
||||
|
||||
# 构建目标目录路径
|
||||
target_dir = os.path.join(output_dir, doy)
|
||||
|
||||
# 如果目标目录不存在,则创建它
|
||||
if not os.path.exists(target_dir):
|
||||
os.makedirs(target_dir)
|
||||
|
||||
# 移动文件到目标目录
|
||||
source_path = os.path.join(output_dir, file)
|
||||
target_path = os.path.join(target_dir, file)
|
||||
shutil.move(source_path, target_path)
|
||||
|
||||
|
||||
def main():
|
||||
"""
|
||||
Main function to run the HLS SuPER script.
|
||||
@ -585,6 +612,11 @@ def main():
|
||||
logging.info("Timeseries Dataset Created. Removing Temporary Files...")
|
||||
shutil.rmtree(cog_dir)
|
||||
|
||||
# Add: 下载影像数据按照DOY归档
|
||||
logging.info("开始对已下载影像按照DOY日期进行归档.")
|
||||
files_collection(output_dir)
|
||||
logging.info("归档完成!")
|
||||
|
||||
# End Timer
|
||||
total_time = time.time() - start_time
|
||||
logging.info(f"Processing complete. Total time: {round(total_time,2)}s, ")
|
||||
|
@ -96,6 +96,7 @@ mamba activate lpdaac_windows
|
||||
- 参考自NASA官网示例Demo:https://github.com/nasa/LPDAAC-Data-Resources/blob/main/setup/setup_instructions_python.md
|
||||
- 首次运行爬取命令时,需要输入用户名和密码,用户名和密码可以在 [Earthdata](https://urs.earthdata.nasa.gov/) 注册获取。
|
||||
- 需要注意的是,密码中最好不要出 `@/#/$/%` 等符号,爬取时可能会出错。
|
||||
- 单个用户每秒限制最多100次请求,参考自:https://forum.earthdata.nasa.gov/viewtopic.php?t=3734
|
||||
|
||||
### 3.2 爬取云端数据并在内存中进行预处理
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user