diff --git a/HLS_SuPER/HLS_PER.py b/HLS_SuPER/HLS_PER.py index 240a772..7b2a6b7 100644 --- a/HLS_SuPER/HLS_PER.py +++ b/HLS_SuPER/HLS_PER.py @@ -21,7 +21,6 @@ import xarray as xr import rioxarray as rxr import dask.distributed - def create_output_name(url, band_dict): """ Uses HLS default naming scheme to generate an output name with common band names. @@ -30,9 +29,6 @@ def create_output_name(url, band_dict): # Get Necessary Strings prod = url.split("/")[4].split(".")[0] asset = url.split("/")[-1].split(".")[-2] - # Add: 获取影像DOY以备影像归档 - time = url.split("/")[-1].split(".")[3] - file_doy = time[:8] # Hard-coded one off for Fmask name incase it is not in the band_dict but is needed for masking # 翻译:硬编码一个Fmask名称, 以防它不在band_dict中但需要用于掩膜处理 if asset == "Fmask": @@ -43,7 +39,7 @@ def create_output_name(url, band_dict): output_name = ( f"{'.'.join(url.split('/')[-1].split('.')[:-2])}.{key}.subset.tif" ) - return [output_name, file_doy] + return output_name def open_hls(url, roi=None, scale=True, chunk_size=dict(band=1, x=512, y=512)): @@ -115,9 +111,7 @@ def process_granule( # Check if all Outputs Exist for a Granule if not all( - os.path.isfile( - f"{output_dir}/{create_output_name(url, band_dict)[1]}/{create_output_name(url, band_dict)[0]}" - ) + os.path.isfile(f"{output_dir}/{create_output_name(url, band_dict)}") for url in granule_urls ): @@ -131,12 +125,7 @@ def process_granule( ) # Check if File exists in Output Directory - output_name = create_output_name(quality_url, band_dict)[0] - # Add: 以影像DOY为子目录归档同日影像 - file_doy = create_output_name(quality_url, band_dict)[1] - output_dir = f"{output_dir}/{file_doy}" - if not os.path.isdir(output_dir): - os.makedirs(output_dir) + output_name = create_output_name(quality_url, band_dict) output_file = f"{output_dir}/{output_name}" # Open Quality Layer diff --git a/HLS_SuPER/HLS_SuPER.py b/HLS_SuPER/HLS_SuPER.py index 85bf0db..2bba32a 100644 --- a/HLS_SuPER/HLS_SuPER.py +++ b/HLS_SuPER/HLS_SuPER.py @@ -408,6 +408,33 @@ def setup_dask_environment(): env.__enter__() +def files_collection(output_dir): + """ + 将已下载的HLS影像数据按照文件名中的日期进行归档 + """ + + # 获取当前目录下的所有文件 + files = os.listdir(output_dir) + # 遍历所有文件 + for file in files: + # 检查文件是否为tif格式 + if file.endswith(".tif"): + # 提取文件名中的日期 + doy = file.split(".")[3][:7] + + # 构建目标目录路径 + target_dir = os.path.join(output_dir, doy) + + # 如果目标目录不存在,则创建它 + if not os.path.exists(target_dir): + os.makedirs(target_dir) + + # 移动文件到目标目录 + source_path = os.path.join(output_dir, file) + target_path = os.path.join(target_dir, file) + shutil.move(source_path, target_path) + + def main(): """ Main function to run the HLS SuPER script. @@ -585,6 +612,11 @@ def main(): logging.info("Timeseries Dataset Created. Removing Temporary Files...") shutil.rmtree(cog_dir) + # Add: 下载影像数据按照DOY归档 + logging.info("开始对已下载影像按照DOY日期进行归档.") + files_collection(output_dir) + logging.info("归档完成!") + # End Timer total_time = time.time() - start_time logging.info(f"Processing complete. Total time: {round(total_time,2)}s, ") diff --git a/README.md b/README.md index 12ed215..bb18d97 100644 --- a/README.md +++ b/README.md @@ -96,6 +96,7 @@ mamba activate lpdaac_windows - 参考自NASA官网示例Demo:https://github.com/nasa/LPDAAC-Data-Resources/blob/main/setup/setup_instructions_python.md - 首次运行爬取命令时,需要输入用户名和密码,用户名和密码可以在 [Earthdata](https://urs.earthdata.nasa.gov/) 注册获取。 - 需要注意的是,密码中最好不要出 `@/#/$/%` 等符号,爬取时可能会出错。 +- 单个用户每秒限制最多100次请求,参考自:https://forum.earthdata.nasa.gov/viewtopic.php?t=3734 ### 3.2 爬取云端数据并在内存中进行预处理