Compare commits

...

22 Commits
v1.0 ... master

Author SHA1 Message Date
11118915b3 feat(GEE_Scripts): 添加Sentinel-2影像下载脚本. 2025-12-09 12:44:26 +08:00
ff1dea2324 feat(sr2rgb): 完善影像转换处理逻辑. 2025-12-09 12:18:18 +08:00
fc5a29696a feat(sr2rgb): 添加SR地表反射率影像批量镶嵌并转换8bit RGB影像代码. 2025-12-07 17:42:30 +08:00
7b3573e57d fix: 完善注释说明与部分路径变量类型声明. 2025-10-20 20:29:26 +08:00
63c85931e3 feat(DEM_SuPER): 优化ALOSDEM文件处理逻辑, 直接读取并转储压缩包内部的DEM文件. 2025-10-20 10:37:22 +08:00
62b2370fd7 feat: 调整数据检索方式, 统一使用外包Polygon检索以减少所需处理的数据量. 2025-10-20 08:48:38 +08:00
c90432a815 feat(DataV_SuPER): 增强行政区划代码解析功能,支持县级市搜索. 2025-10-14 11:04:58 +08:00
2b007df006 feat(steup): 更新环境配置文件和依赖项版本. 2025-10-13 11:12:23 +08:00
7a28957087 feat(steup): 更新earthaccess包至0.15.1版本. 2025-10-13 10:46:10 +08:00
a3e4c7a172 feat(HLS_SuPER): 使用最小外包简化检索时的ROI范围. 2025-10-13 10:00:56 +08:00
116b964904 feat(DataV_SuPER): 添加从阿里云DataV获取行政区边界数据的功能. 2025-10-12 16:38:28 +08:00
fcbfa1bf08 fix(DEM_SuPER): 优化ALOSDEM文件检索匹配模式并优化裁剪逻辑. 2025-10-12 16:28:28 +08:00
418923f7df feat(DEM_SuPER): 添加对ALOS 12.5m DEM的本地解压支持. 2025-10-11 13:20:23 +08:00
b872e9e3f1 feat(DEM_SuPER): 添加对ALOS 12.5m DEM的下载支持. 2025-10-10 23:37:38 +08:00
64f50ffc0a feat: 优化项目路径处理和代码结构以及MODIS下载处理逻辑. 2025-09-12 10:41:56 +08:00
bda6f0a1ef refactor(sr2rgb): 使用xarray重构地表反射率合成RGB图像功能. 2025-09-05 21:07:51 +08:00
3d5d159d7f fix(HLS_SuPER): 确保多边形外环为逆时针方向并简化网络查询时提交的坐标. 2025-09-05 12:15:34 +08:00
d01ba65d64 feat(utils): 添加地表反射率转RGB图像功能. 2025-09-05 11:24:13 +08:00
c19f334035 refactor: 使用动态路径替换硬编码路径. 2025-09-04 14:47:42 +08:00
8883fd8d58 fix(setup): 补全Python3.12新环境下的selenium依赖. 2025-08-11 19:53:00 +08:00
1f86d2da2b feat(setup): 升级环境至Python3.12, 并更新环境配置文件及README文档. 2025-08-10 18:09:25 +08:00
e1997c102e fix(DEM_SuPER): 修复DEM数据下载拼接逻辑bug. 2025-08-05 17:21:53 +08:00
18 changed files with 1647 additions and 728 deletions

4
.gitignore vendored
View File

@ -9,3 +9,7 @@ data/
*.tif
*.tiff
*.ipynb
*.log

View File

@ -0,0 +1,211 @@
"""
访问阿里云 DataV 下载的行政区边界数据保存为原始 JSON, 有部分字段与GDAL不兼容, 导致无法
直接读取, 需要先清洗再保存为 GeoJSON.
- 官方地址: https://datav.aliyun.com/portal/school/atlas/area_selector
- Step1: "城市名称"解析为行政区划代码 (adcode);
- Step2: DataV 原始数据保存为 `城市名称.json` 文件;
- Step3: 移除不兼容 GDAL/GeoPandas 的属性字段 (parent, center, centroid, acroutes);
- Step4: 将清洗后的结果写出为 `城市名称.geojson` 文件.
-------------------------------------------------------------------------------
Authors: Hong Xie
Last Updated: 2025-10-20
===============================================================================
"""
import json
import requests
from pathlib import Path
from typing import Optional
import re
def get_datav_json(accode: str) -> dict:
"""
DataV 接口获取行政区边界的原始 JSON 数据并返回字典.
"""
# 使用路径式接口, 支持如 "420100" 或 "420100_full"
url = f"https://geo.datav.aliyun.com/areas_v3/bound/{accode}.json"
response = requests.get(url, timeout=15)
response.raise_for_status()
return response.json()
def fetch_and_save_geojson(accode: str, city_name: str, out_dir: Path) -> Path:
"""
获取 DataV 原始数据, 先保存为 .json; 随后清洗属性并另存为 .geojson.
"""
raw_data = get_datav_json(accode)
# 处理 features: 移除不兼容 GeoPandas 的属性
def sanitize_properties(props: dict) -> dict:
out = {}
for k, v in props.items():
# 移除嵌套对象
if k in ("parent", "center", "centroid", "acroutes"):
continue
# 仅保留标量类型; 丢弃其他嵌套结构
if isinstance(v, (str, int, float, bool)) or v is None:
out[k] = v
return out
# 输出路径(确保目录存在)
out_dir_path = Path(out_dir)
out_dir_path.mkdir(parents=True, exist_ok=True)
# 先保存原始 JSON(未清洗)
raw_json_path = out_dir_path / f"{city_name}.json"
with raw_json_path.open("w", encoding="utf-8") as f:
json.dump(raw_data, f, ensure_ascii=False)
# 再保存清洗后的 GeoJSON
out_path = out_dir_path / f"{city_name}.geojson"
# 深拷贝后进行清洗, 避免影响原始数据
data = json.loads(json.dumps(raw_data))
features = data.get("features", [])
for feature in features:
props = feature.get("properties", {})
feature["properties"] = sanitize_properties(props)
# 写出为 .geojson, 确保 UTF-8 且保留中文字符
with out_path.open("w", encoding="utf-8") as f:
json.dump(data, f, ensure_ascii=False)
return out_path
def _normalize_name(name: str) -> str:
name = name.strip()
# 简单去除常见后缀提高匹配鲁棒性
for suffix in ("", "", "地区", "", "自治州", "自治县", "特别行政区"):
if name.endswith(suffix):
name = name[: -len(suffix)]
return name
def _name_matches_exact(target: str, candidate: str) -> bool:
return _normalize_name(target) == _normalize_name(candidate)
def resolve_adcode_by_name(city_name: str, prefer_full: bool = False) -> Optional[str]:
"""
通过城市名称解析 DataV 行政区划代码.
优先遍历全国(100000_full)和各省级(full)数据进行匹配.
如果在省级数据中未找到, 会进一步搜索地级市下的区县数据.
返回如 "420100" "420100_full", 找不到则返回 None.
"""
# 先在全国层级中尝试匹配(通常包含省级与直辖市)
try:
cn = requests.get(
"https://geo.datav.aliyun.com/areas_v3/bound/100000_full.json",
timeout=20,
).json()
except Exception:
cn = None
target = city_name
contains_province_candidate = None
provinces = []
if cn:
# 先尝试在全国数据中直接匹配省级名称
for feat in cn.get("features", []):
props = feat.get("properties", {})
if props.get("level") == "province":
name = props.get("name", "")
code = str(props.get("adcode", ""))
if re.fullmatch(r"\d{6}", code):
if _name_matches_exact(target, name):
return f"{code}_full" if prefer_full else code
if _normalize_name(target) in _normalize_name(name):
contains_province_candidate = code
provinces.append(code)
# 遍历各省级行政区, 精确匹配城市名
cities_to_search = [] # 收集需要进一步搜索的地级市
for pcode in provinces:
try:
prov = requests.get(
f"https://geo.datav.aliyun.com/areas_v3/bound/{pcode}_full.json",
timeout=20,
).json()
except Exception:
continue
exact_candidate = None
contains_candidate = None
for feat in prov.get("features", []):
props = feat.get("properties", {})
level = props.get("level")
name = props.get("name", "")
code = str(props.get("adcode", ""))
# 仅考虑城市或区县, 且编码为6位数字
if level in ("city", "district") and re.fullmatch(r"\d{6}", code):
if _name_matches_exact(target, name):
exact_candidate = code
break
# 作为回退: 包含匹配, 但不立即返回, 继续寻找精确匹配
if _normalize_name(target) in _normalize_name(name):
contains_candidate = code
# 收集地级市代码,用于后续搜索县级市
if level == "city" and re.fullmatch(r"\d{6}", code):
cities_to_search.append(code)
if exact_candidate:
return f"{exact_candidate}_full" if prefer_full else exact_candidate
if contains_candidate:
return f"{contains_candidate}_full" if prefer_full else contains_candidate
# 如果在省级数据中未找到,搜索地级市下的区县数据(如县级市)
for city_code in cities_to_search:
try:
city_data = requests.get(
f"https://geo.datav.aliyun.com/areas_v3/bound/{city_code}_full.json",
timeout=20,
).json()
except Exception:
continue
for feat in city_data.get("features", []):
props = feat.get("properties", {})
level = props.get("level")
name = props.get("name", "")
code = str(props.get("adcode", ""))
# 检查区县级别的行政区(包括县级市)
if level == "district" and re.fullmatch(r"\d{6}", code):
if _name_matches_exact(target, name):
return f"{code}_full" if prefer_full else code
# 包含匹配作为备选
if _normalize_name(target) in _normalize_name(name):
# 找到包含匹配的县级市,直接返回
return f"{code}_full" if prefer_full else code
# 如果城市/区县未匹配到, 回退使用省级包含匹配
if contains_province_candidate:
return f"{contains_province_candidate}_full" if prefer_full else contains_province_candidate
return None
def fetch_and_save_geojson_by_name(city_name: str, out_dir: Path, prefer_full: bool = False) -> Path:
"""
通过城市名称解析 adcode, 并直接拉取与保存 GeoJSON.
"""
code = resolve_adcode_by_name(city_name, prefer_full=prefer_full)
if not code:
raise ValueError(f"无法通过名称解析到行政区划代码: {city_name}")
return fetch_and_save_geojson(code, city_name, out_dir)
if __name__ == "__main__":
# city_name = "湖北省"
# city_name = "武汉市"
# city_name = "十堰市"
# city_name = "钟祥市"
# city_name = ""
out_dir = Path("./data/vectors/")
out = fetch_and_save_geojson_by_name(city_name, out_dir, prefer_full=False)
print(f"Saved raw JSON and GeoJSON for {city_name}: {out}.")

View File

@ -4,44 +4,60 @@
This module contains functions related to preprocessing DEM data.
For example, elevation, slope, aspect
Step1: Use earthaccess search and download NASADEM Data
Step1: Use earthaccess search and download DEM Data
- NASADEM_HGT
- includes 30m DEM, based on SRTM data
- https://lpdaac.usgs.gov/products/nasadem_hgtv001/
- NASADEM_SC
- includes 30m slope, aspect, based on NASADEM_HGT
- https://lpdaac.usgs.gov/products/nasadem_scv001/
- ALOS PALSAR RTC Project
- includes 12.5, 30m DEM, based on ALOS PALSAR data
- https://www.earthdata.nasa.gov/data/projects/alos-palsar-rtc-project
Step2: Process DEM data
Step2a: Process NASADEM data
- 下载的 NASADEM 均为 *.zip 文件, 需先进行解压
- NASADEM 文件名称结构为: NASADEM_类型_网格编号/网格编号.数据类型
- 高程示例: NASADEM_HGT_n30e113/n30e113.hgt
- 坡度示例: NASADEM_SC_n30e113/n30e113.slope
- 坡向示例: NASADEM_SC_n30e113/n30e113.aspect
- 读取文件按网格进行裁剪并镶嵌, 坡度和坡向数据需要进行缩放处理, 将网格范围的结果保存为 *.tif 文件
- 读取文件按指定范围进行裁剪并镶嵌, 坡度和坡向数据需要进行缩放处理, 将网格范围的结果保存为 *.tif 文件
Step2b: Process ALOS PALSAR RTC Project data
- 下载的 ALOS PALSAR RTC Project 均为 *.zip 文件, 需先进行解压
- ALOS PALSAR RTC Project 文件名称结构为: AP_轨道号_CCC_DDDD_RT1.数据类型.tif
- 高程示例: AP_16112_FBS_F0620_RT1.dem.tif
- 读取文件按指定范围进行裁剪并镶嵌, 坡度和坡向数据需要进行缩放处理, 将网格范围的结果保存为 *.tif 文件
-------------------------------------------------------------------------------
Authors: Hong Xie
Last Updated: 2025-07-03
Last Updated: 2025-10-20
===============================================================================
"""
import os
import sys
import glob
from pathlib import Path
import json
import requests
import zipfile
import time
import dask.distributed
import logging
import earthaccess
import geopandas as gpd
import numpy as np
from rioxarray import open_rasterio
sys.path.append("D:/NASA_EarthData_Script")
sys.path.append(str(Path(".").parent))
from utils.common_utils import setup_dask_environment, clip_image, mosaic_images
from utils.common_utils import (
setup_dask_environment,
setup_logging,
clip_image,
mosaic_images,
)
from HLS_SuPER.HLS_Su import earthdata_search
@ -97,59 +113,105 @@ def download_granule(granule_urls: list[str], output_dir: str) -> bool:
try:
earthaccess.download(granule_urls, output_dir)
except Exception as e:
logging.error(f"Error downloading data: {e}. Skipping.")
return False
# 下载失败时, 先尝试使用 requests 库下载
for url in granule_urls:
response = requests.get(url, timeout=30)
if response.status_code == 200:
with open(
os.path.join(output_dir, os.path.basename(url)), "wb"
) as f:
f.write(response.content)
else:
logging.error(
f"Error downloading data: {response.status_code}. Skipping."
)
return False
logging.info("All Data already downloaded.")
return True
def unzip_nasadem_files(zip_file_list: list[str], unzip_dir: str):
def process_dem_zip(
zip_file_list: list[str], output_dir: Path, mode_str: str = "NASADEM"
) -> None:
"""
解压下载的 NASADEM ZIP 文件, 并将解压后的文件统一为可读写的 .hgt 格式
处理下载的 DEM ZIP 文件,
若为 NASADEM, 则将其解压后的文件统一为可读写的 .hgt 格式;
若为 ALOSDEM, 则直接读取 dem 并转储为 DEFLATE 压缩后的 tif 文件.
Parameters
----------
zip_file_list: list
待解压的 ZIP 文件列表
output_dir: Path
输出目录
mode_str: str, optional
解压模式, 默认为 "NASADEM", 可选 "NASADEM", "ALOSDEM"
"""
try:
for zip_path in zip_file_list:
if not zipfile.is_zipfile(zip_path):
if not os.path.isfile(zip_path) or not zipfile.is_zipfile(zip_path):
continue
with zipfile.ZipFile(zip_path, "r") as zip_ref:
# 仅解压包含 .hgt, .slope, .aspect 的文件
for hgt_file in [f for f in zip_ref.namelist() if f.endswith((".hgt", ".slope", ".aspect"))]:
# 解压时重命名文件
new_name = (
hgt_file.replace(".hgt", ".elevation.hgt")
if hgt_file.endswith(".hgt")
else f"{hgt_file}.hgt"
if mode_str == "NASADEM":
# 示例ZIP文件名称: NASADEM_HGT_n30e113.zip
# 仅解压包含 .hgt, .slope, .aspect 的文件
for tif_file in [
f
for f in zip_ref.namelist()
if f.endswith((".hgt", ".slope", ".aspect"))
]:
# 解压时重命名文件
new_name = (
tif_file.replace(".hgt", ".elevation.hgt")
if tif_file.endswith(".hgt")
else f"{tif_file}.hgt"
)
# 解压文件到指定目录
unzip_file_path = os.path.join(output_dir, new_name)
if os.path.exists(unzip_file_path):
continue
with zip_ref.open(tif_file) as source_file:
with open(unzip_file_path, "wb") as unzip_file:
unzip_file.write(source_file.read())
elif mode_str == "ALOSDEM":
# 示例ZIP文件名称: AP_14354_FBS_F3000_RT1.zip
# 仅解压包含 dem.tif 的文件, 其内部路径为 AP_14354_FBS_F3000_RT1/AP_14354_FBS_F3000_RT1.dem.tif
tif_file = [f for f in zip_ref.namelist() if f.endswith("dem.tif")][0]
# 读取压缩包内的文件并转储到指定目录
dem = open_rasterio(zip_ref.open(tif_file))
# 转储为 INT32 DEFLATE 压缩后的 .tif 文件
dem.rio.to_raster(
output_dir / os.path.basename(tif_file),
compress="DEFLATE",
)
unzip_file_path = os.path.join(unzip_dir, new_name)
if os.path.exists(unzip_file_path):
continue
with zip_ref.open(hgt_file) as source_file:
with open(unzip_file_path, 'wb') as unzip_file:
unzip_file.write(source_file.read())
except Exception as e:
logging.error(f"Error unzipping NASADEM to {unzip_dir}: {e}")
logging.error(f"Error unzipping {mode_str} to {output_dir}: {e}")
return
def process_granule(
unzip_dir: str,
output_dir: str,
unzip_dir: Path,
output_dir: Path,
mode_str: str,
name: str,
roi: list,
clip=True,
tile_id: str = None,
) -> bool:
"""
读取解压并重命名处理后的指定类型 NASADEM 数据并进行预处理, 包括读取, 裁剪, 镶嵌, 并对坡度坡向进行缩放
读取解压并重命名处理后的指定类型 DEM 数据并进行预处理, 包括读取, 裁剪, 镶嵌, 并对坡度坡向进行缩放
Parameters
----------
unzip_dir: str
解压后的 NASADEM 文件根目录
output_dir: str
unzip_dir: Path
解压后的 DEM 文件根目录
output_dir: Path
输出根目录
mode_str: str
数据模式, 可选 NASADEM, ALOSDEM
name: str
数据类型, 包括 elevation, slope, aspect
数据类型, 可选 elevation, slope, aspect
roi: list
网格范围
clip: bool
@ -163,34 +225,46 @@ def process_granule(
处理状态 True or False
"""
dem_file_list = glob.glob(os.path.join(unzip_dir, f"*{name}.hgt"))
out_tif_name = f"DEM.NASADEM.{tile_id}.2000.{name}.tif"
if mode_str == "NASADEM":
dem_file_list = glob.glob(os.path.join(unzip_dir, f"*{name}.hgt"))
out_tif_name = f"DEM.{mode_str}.{tile_id}.2000.{name}.tif"
elif mode_str == "ALOSDEM":
dem_file_list = glob.glob(os.path.join(unzip_dir, "*.dem.tif"))
# 示例ZIP文件名称: AP_14354_FBS_F3000_RT1.zip
# 仅解压包含 dem.tif 的文件, 其内部路径为 AP_14354_FBS_F3000_RT1/AP_14354_FBS_F3000_RT1.dem.tif
tif_file = [f for f in zip_ref.namelist() if f.endswith("dem.tif")][0]
out_tif_name = f"DEM.{mode_str}.{tile_id}.2011.{name}.tif"
output_file = os.path.join(output_dir, out_tif_name)
if not os.path.isfile(output_file):
try:
dem_raster_list = []
for dem_path in dem_file_list:
dem = (
open_rasterio(dem_path)
.squeeze(dim="band", drop=True)
.rename(name)
raster = (
open_rasterio(dem_path).squeeze(dim="band", drop=True).rename(name)
)
if name == "slope" or name == "aspect":
org_attrs = dem.attrs
dem = dem * 0.01
org_attrs = raster.attrs
raster = raster * 0.01
# 恢复源数据属性信息
dem.attrs = org_attrs.copy()
dem.rio.write_crs("EPSG:4326", inplace=True)
dem.attrs["scale_factor"] = 1
dem_raster_list.append(dem)
if len(dem_raster_list) > 1:
raster.attrs = org_attrs.copy()
raster.rio.write_crs("EPSG:4326", inplace=True)
raster.attrs["scale_factor"] = 1
dem_raster_list.append(raster)
if len(dem_raster_list) >= 1:
# 先逐一裁剪再镶嵌合成仅在提供ROI且要求裁剪时执行
if roi is not None and clip:
clipped_list = []
for raster in dem_raster_list:
clipped = clip_image(raster, roi, clip_by_box=True)
clipped_list.append(clipped)
# 镶嵌合成
if name == "slope" or name == "aspect":
dem_mosaiced = mosaic_images(dem_raster_list, nodata=-9999)
else:
dem_mosaiced = mosaic_images(dem_raster_list, nodata=-32768)
if roi is not None and clip:
dem_mosaiced = clip_image(dem_mosaiced, roi)
dem_mosaiced.rio.to_raster(output_file, driver="COG", compress="DEFLATE")
dem_mosaiced.rio.to_raster(
output_file, driver="COG", compress="DEFLATE"
)
except Exception as e:
logging.error(f"Error processing files in {name}: {e}")
return False
@ -200,71 +274,92 @@ def process_granule(
return True
def main(region: list, asset_name: list, tile_id: str):
bbox = tuple(list(region.total_bounds))
# 示例文件名称: NASADEM_HGT_n30e113.zip
def main(
output_root_dir: Path,
region_file: Path,
tile_id: str,
mode_str: str = "NASADEM",
):
results_urls = []
output_root_dir = ".\\data\\DEM\\NASADEM"
mode_str = mode_str.upper()
output_root_dir = output_root_dir / mode_str
# 放置下载的 ZIP 文件
download_dir = os.path.join(output_root_dir, "ZIP")
download_dir = output_root_dir / "ZIP"
# 放置解压并预处理后的文件
unzip_dir = os.path.join(download_dir, "UNZIP")
output_dir = os.path.join(output_root_dir, "TIF", tile_id)
unzip_dir = download_dir / "UNZIP"
output_dir = output_root_dir / "TIF" / tile_id
os.makedirs(download_dir, exist_ok=True)
os.makedirs(unzip_dir, exist_ok=True)
results_urls_file = f"{output_root_dir}\\NASADEM_{tile_id}_results_urls.json"
if not os.path.isfile(results_urls_file):
results_urls = earthdata_search(asset_name, roi=bbox)
with open(results_urls_file, "w") as f:
json.dump(results_urls, f)
else:
results_urls = json.load(open(results_urls_file))
# 构造待解压的文件列表
zip_file_list = [os.path.join(download_dir, os.path.basename(result[0])) for result in results_urls]
os.makedirs(output_dir, exist_ok=True)
results_urls_file = output_root_dir / f"{mode_str}_{tile_id}_results_urls.json"
# 配置日志
logging.basicConfig(
level=logging.INFO,
format="%(levelname)s:%(asctime)s ||| %(message)s",
handlers=[
logging.StreamHandler(sys.stdout),
logging.FileHandler(f"{output_root_dir}\\NASADEM_SuPER.log"),
],
)
logging.info(f"Found {len(results_urls)} NASADEM granules.")
logs_file = output_root_dir / f"{mode_str}_{tile_id}_SuPER.log"
setup_logging(logs_file)
# 检索数据并存储, 默认覆盖上一次检索记录
if mode_str == "NASADEM":
asset_name = ["NASADEM_HGT", "NASADEM_SC"]
results_urls = earthdata_search(asset_name, region_file=region_file)
elif mode_str == "ALOSDEM":
asset_name = ["ALOS_PSR_RTC_HIGH"]
results_urls = earthdata_search(asset_name, region_file=region_file)
# ALOSDEM 数据仅保存含 "FBS" 字符串的 URL
results_urls = [url for url in results_urls if "FBS" in url[0]]
else:
results_urls = []
with open(results_urls_file, "w") as f:
json.dump(results_urls, f)
logging.info(f"Found {len(results_urls)} {mode_str} granules after filtering.")
if len(results_urls) == 0:
return
# 构造待解压的文件列表
zip_file_list = [
download_dir / os.path.basename(result[0])
for result in results_urls
]
client = dask.distributed.Client(timeout=60, memory_limit="8GB")
client.run(setup_dask_environment)
all_start_time = time.time()
client.scatter(results_urls)
logging.info(f"Start processing NASADEM ...")
logging.info(f"Start processing {mode_str} ...")
download_tasks = [
dask.delayed(download_granule)(granule_url, download_dir)
for granule_url in results_urls
]
unzip_tasks = dask.delayed(unzip_nasadem_files)(zip_file_list, unzip_dir)
process_tasks = [
dask.delayed(process_granule)(
unzip_dir, output_dir, name, region, True, tile_id
)
for name in ["elevation", "slope", "aspect"]
]
# 根据模式传递正确的解压标识
unzip_tasks = dask.delayed(process_dem_zip)(zip_file_list, unzip_dir, mode_str)
if mode_str == "NASADEM":
process_tasks = [
dask.delayed(process_granule)(
unzip_dir, output_dir, mode_str, name, region, True, tile_id
)
for name in ["elevation", "slope", "aspect"]
]
dask.compute(*download_tasks)
dask.compute(unzip_tasks)
dask.compute(*process_tasks)
if mode_str == "NASADEM":
dask.compute(*process_tasks)
client.close()
all_total_time = time.time() - all_start_time
logging.info(
f"All NASADEM Downloading complete and proccessed. Total time: {all_total_time} seconds"
f"All {mode_str} Downloading complete and processed. Total time: {all_total_time} seconds"
)
if __name__ == "__main__":
earthaccess.login(persist=True)
# region = gpd.read_file("./data/vectors/wuling_guanqu_polygon.geojson")
tile_id = "49REL"
region = gpd.read_file(f"./data/vectors/{tile_id}.geojson")
asset_name = ["NASADEM_HGT", "NASADEM_SC"]
main(region, asset_name, tile_id)
earthaccess.login(strategy="netrc", persist=True)
# tile_id = "49REL"
# tile_id = "Wuhan"
tile_id = "Hubei"
region_file = Path(f"./data/vectors/{tile_id}.geojson")
# mode_str = "NASADEM"
mode_str = "ALOSDEM"
output_root_dir = Path(".\\data\\DEM")
main(output_root_dir, region_file, tile_id, mode_str)

View File

@ -28,7 +28,7 @@ import logging
import earthaccess
from xarray import open_dataset
sys.path.append("D:/NASA_EarthData_Script")
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from utils.common_utils import setup_dask_environment
from HLS_SuPER.HLS_Su import earthdata_search

View File

@ -6,7 +6,7 @@ For example, MCD43A3, MCD43A4, MOD11A1.
-------------------------------------------------------------------------------
Authors: Hong Xie
Last Updated: 2025-07-15
Last Updated: 2025-10-16
===============================================================================
"""
@ -15,14 +15,20 @@ import sys
import json
import time
import logging
from pathlib import Path
import earthaccess
import rioxarray as rxr
import dask.distributed
import geopandas as gpd
sys.path.append("D:/NASA_EarthData_Script")
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from utils.common_utils import clip_image, reproject_image, setup_dask_environment
from utils.common_utils import (
clip_image,
reproject_image,
setup_dask_environment,
setup_logging,
)
from HLS_SuPER.HLS_Su import earthdata_search
@ -117,7 +123,15 @@ def open_modis(file_path, prod_name):
raise ValueError(f"Unknown MODIS product: {prod_name}.")
def process_modis(download_file, prod_name, roi, clip=True, scale=True, target_crs=None, output_file=None):
def process_modis(
download_file,
prod_name,
roi,
clip=True,
scale=True,
target_crs=None,
output_file=None,
):
"""
MODIS 数据进行预处理, 包括裁剪, 重投影和缩放.
"""
@ -127,13 +141,14 @@ def process_modis(download_file, prod_name, roi, clip=True, scale=True, target_c
if roi is not None and clip:
modis = clip_image(modis, roi)
if target_crs is not None:
if target_crs is not None and modis is not None:
modis = reproject_image(modis, target_crs)
# 重投影后再裁剪一次
if roi is not None and clip:
modis = clip_image(modis, roi)
# 重投影后再裁剪一次
if roi is not None and clip:
modis = clip_image(modis, roi)
if modis.isnull().all():
logging.error(f"Processing {download_file}. Roi area all pixels are nodata.")
if scale:
# 缩放计算后会丢源属性和坐标系, 需要先备份源数据属性信息
org_attrs = modis.attrs
@ -165,14 +180,9 @@ def process_granule(
clip,
scale,
output_dir,
target_crs="EPSG:4326",
tile_id=None,
target_crs="EPSG:4326",
):
logging.basicConfig(
level=logging.INFO,
format="%(levelname)s:%(asctime)s ||| %(message)s",
handlers=[logging.StreamHandler(sys.stdout)],
)
download_hdf_name = os.path.basename(granule_urls[0])
# 获取名称与日期
@ -191,7 +201,7 @@ def process_granule(
out_tif_name = f"MODIS.{prod_name}.{tile_id}.{date}.NBRDF.tif"
else:
out_tif_name = download_hdf_name.replace(".hdf", ".tif")
# 除 MCD43A4 需用于光谱指数计算外, MOD11A1 日间温度与 MCD43A4 反照率无需再按日期归档
# 除 MCD43A4 需用于光谱指数计算外, MOD11A1 日间温度与 MCD43A3 反照率无需再按日期归档
if prod_name == "MOD11A1" or prod_name == "MCD43A3":
output_path = os.path.join(output_dir, "TIF")
else:
@ -199,39 +209,44 @@ def process_granule(
os.makedirs(output_path, exist_ok=True)
output_file = os.path.join(output_path, out_tif_name)
if not os.path.isfile(output_file):
# Step1: 下载 HDF 文件
if not os.path.isfile(download_file):
try:
earthaccess.download(granule_urls, download_path)
except Exception as e:
logging.error(f"Error downloading {download_file}: {e}")
return
else:
logging.warning(f"{download_file} already exists. Skipping.")
# Step2: 处理 HDF 文件
# Step1: 下载 HDF 文件
if not os.path.isfile(download_file):
try:
process_modis(download_file, prod_name, roi, clip, scale, target_crs, output_file)
earthaccess.download(granule_urls, download_path)
except Exception as e:
logging.error(f"Error downloading {download_file}: {e}")
return
else:
logging.warning(f"{download_file} already exists. Skipping.")
# Step2: 处理 HDF 文件
if not os.path.isfile(output_file):
try:
process_modis(
download_file, prod_name, roi, clip, scale, target_crs, output_file
)
logging.info(f"Processed {output_file} successfully.")
except Exception as e:
os.remove(download_file)
logging.info(f"Removed corrupted file {download_file}. Retrying download.")
process_granule(granule_urls, roi, clip, scale, output_dir, target_crs, tile_id)
logging.info(f"Processed {output_file} successfully.")
process_granule(
granule_urls, roi, clip, scale, output_dir, target_crs, tile_id
)
else:
logging.warning(f"{output_file} already exists. Skipping.")
def main(
region: list,
region_file: Path,
asset_name: str,
modis_tile_id: str,
years: list,
dates: tuple[str, str],
tile_id: str,
target_crs: str,
output_root_dir: str,
):
bbox = tuple(list(region.total_bounds))
region = gpd.read_file(region_file)
results_urls = []
output_dir = os.path.join(output_root_dir, asset_name)
os.makedirs(output_dir, exist_ok=True)
@ -246,7 +261,7 @@ def main(
)
year_temporal = (f"{year}-{dates[0]}T00:00:00", f"{year}-{dates[1]}T23:59:59")
year_results = earthdata_search(
[asset_name], year_temporal, bbox, modis_tile_id
[asset_name], year_temporal, region_file, modis_tile_id
)
results_urls.extend(year_results)
with open(year_results_file, "w") as f:
@ -255,20 +270,9 @@ def main(
with open(results_urls_file, "w") as f:
json.dump(results_urls, f)
# 配置日志, 首次配置生效, 后续嵌套配置无效
logging.basicConfig(
level=logging.INFO, # 级别为INFO及以上的日志会被记录
format="%(levelname)s:%(asctime)s ||| %(message)s",
handlers=[
logging.StreamHandler(sys.stdout), # 输出到控制台
logging.FileHandler(
f"{output_dir}\\{asset_name}_{tile_id}_SuPER.log"
), # 输出到日志文件
],
)
client = dask.distributed.Client(timeout=60, memory_limit="8GB")
client.run(setup_dask_environment)
client.run(setup_logging)
all_start_time = time.time()
for year in years:
year_results_dir = os.path.join(output_dir, year)
@ -276,6 +280,11 @@ def main(
year_results_dir, f"{asset_name}_{modis_tile_id}_{year}_results_urls.json"
)
year_results = json.load(open(year_results_file))
# 配置主进程日志
logs_file = os.path.join(
year_results_dir, f"{asset_name}_{tile_id}_{year}_SuPER.log"
)
setup_logging(logs_file)
client.scatter(year_results)
start_time = time.time()
@ -287,14 +296,21 @@ def main(
clip=True,
scale=True,
output_dir=year_results_dir,
target_crs="EPSG:32649",
tile_id=tile_id,
target_crs=target_crs,
)
for granule_url in year_results
]
dask.compute(*tasks)
total_time = time.time() - start_time
# Dask任务结束后读取dask_worker.txt日志文件内容, 并注入到logs_file中
with open(logs_file, "a") as f:
with open("dask_worker.log", "r") as f2:
f.write(f2.read())
# 随后清空dask_worker.txt文件
with open("dask_worker.log", "w") as f:
f.write("")
logging.info(
f"{year} MODIS {asset_name} Downloading complete and proccessed. Total time: {total_time} seconds"
)
@ -303,19 +319,32 @@ def main(
logging.info(
f"All MODIS {asset_name} Downloading complete and proccessed. Total time: {all_total_time} seconds"
)
# 最后删除dask_worker.log文件
os.remove("dask_worker.log")
return
if __name__ == "__main__":
earthaccess.login(persist=True)
# region = gpd.read_file("./data/vectors/wuling_guanqu_polygon.geojson")
tile_id = "49REL"
region = gpd.read_file(f"./data/vectors/{tile_id}.geojson")
# asset_name = "MOD11A1"
region_file = f"./data/vectors/{tile_id}.geojson"
target_crs = "EPSG:32649"
asset_name = "MOD11A1"
# asset_name = "MCD43A3"
asset_name = "MCD43A4"
# asset_name = "MCD43A4"
modis_tile_id = "h27v06"
# 示例文件名称: MCD43A4.A2024001.h27v05.061.2024010140610.hdf
years = ["2024", "2023", "2022"]
years = ["2024"]
dates = ("03-01", "10-31")
output_root_dir = ".\\data\\MODIS\\"
main(region, asset_name, modis_tile_id, years, dates, tile_id, output_root_dir)
main(
region_file,
asset_name,
modis_tile_id,
years,
dates,
tile_id,
target_crs,
output_root_dir,
)

View File

@ -19,7 +19,7 @@ RTC-S1 数据产品由美国宇航局喷气推进实验室 (JPL) OPERA 项目组
5. 地形校正 (Terrain Flattening)
6. UTM投影重采样
- 产品特性:
- 时间覆盖: 2021-01-01 (持续更新)
- 时间覆盖: 2014-01-01 (历史数据已全覆盖, 持续更新最新数据)
- 空间分辨率: 30
- 数据格式: GeoTIFF/HDF5
- 进一步预处理:
@ -29,13 +29,14 @@ RTC-S1 数据产品由美国宇航局喷气推进实验室 (JPL) OPERA 项目组
-------------------------------------------------------------------------------
Authors: Hong Xie
Last Updated: 2025-03-10
Last Updated: 2025-10-20
===============================================================================
"""
import os
import sys
import glob
from pathlib import Path
import json
import time
from datetime import datetime
@ -48,7 +49,7 @@ import numpy as np
import xarray as xr
from rioxarray import open_rasterio
sys.path.append("D:/NASA_EarthData_Script")
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from utils.common_utils import setup_dask_environment, clip_image, mosaic_images
from HLS_SuPER.HLS_Su import earthdata_search
@ -141,7 +142,7 @@ def convert_to_db(image: xr.DataArray) -> xr.DataArray:
def process_sar_granule(
url_basenames: list[str],
output_dir: str,
output_dir: Path,
date: str,
tile_id: str,
roi: list,
@ -156,7 +157,7 @@ def process_sar_granule(
----------
url_basenames : list[str]
用于文件匹配的 RTC-S1 数据下载链接的文件名列表
output_dir : str
output_dir : Path
输出根目录
date : str
日期, 格式为 YYYY%j
@ -243,7 +244,7 @@ def process_sar_granule(
def process_sar_static_granule(
url_basenames: list[str], output_dir: str, tile_id: str, roi: list, clip=True
url_basenames: list[str], output_dir: Path, tile_id: str, roi: list, clip=True
) -> bool:
"""
OPERA RTC-S1-STATIC 静态数据预处理
@ -254,7 +255,7 @@ def process_sar_static_granule(
----------
url_basenames : list[str]
用于文件匹配的 RTC-S1-STATIC 数据下载链接的文件名列表
output_dir: str
output_dir: Path
输出根目录
tile_id: str
tile id
@ -327,12 +328,12 @@ def process_sar_static_granule(
def main(
asset_name: list[str],
region: list,
region_file: Path,
years: list[str | int],
output_dir: str,
output_dir: Path,
tile_id: str,
):
bbox = tuple(list(region.total_bounds))
region = read_file(region_file)
# 示例文件名称: OPERA_L2_RTC-S1_T040-083952-IW1_20240605T102012Z_20240612T053743Z_S1A_30_v1.0_VH.tif
static_name = ["OPERA_L2_RTC-S1-STATIC_V1"]
sar_output_dir = os.path.join(output_dir, "RTC_S1")
@ -353,7 +354,7 @@ def main(
year_results_dir, f"RTC_S1_{tile_id}_{year}_results_urls.json"
)
year_temporal = (f"{year}-06-01T00:00:00", f"{year}-08-31T23:59:59")
year_results = earthdata_search(asset_name, year_temporal, roi=bbox)
year_results = earthdata_search(asset_name, year_temporal, region_file=region_file)
results_urls.extend(year_results)
with open(year_results_file, "w") as f:
json.dump(year_results, f)
@ -361,7 +362,7 @@ def main(
with open(results_urls_file, "w") as f:
json.dump(results_urls, f)
static_granule_urls = earthdata_search(static_name, roi=bbox)
static_granule_urls = earthdata_search(static_name, region_file=region_file)
static_url_basenames = [
os.path.basename(url) for sublist in static_granule_urls for url in sublist
]
@ -454,10 +455,11 @@ if __name__ == "__main__":
earthaccess.login(persist=True)
asset_name = ["OPERA_L2_RTC-S1_V1"]
tile_id = "49REL"
region = read_file(f".\\data\\vectors\\{tile_id}.geojson")
# region = read_file(f".\\data\\vectors\\{tile_id}.geojson")
region_file = Path(f".\\data\\vectors\\{tile_id}.geojson")
# region = read_file("./data/vectors/wuling_guanqu_polygon.geojson")
# years = ["2024", "2023", "2022"]
years = ["2024"]
output_dir = ".\\data\\SAR"
os.makedirs(output_dir, exist_ok=True)
main(asset_name, region, years, output_dir, tile_id)
output_dir = Path(".\\data\\SAR")
output_dir.mkdir(parents=True, exist_ok=True)
main(asset_name, region_file, years, output_dir, tile_id)

View File

@ -28,7 +28,7 @@ import h5py
from osgeo import gdal
import xarray as xr
sys.path.append("D:/NASA_EarthData_Script")
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from utils.common_params import EASE2_GRID_PARAMS, EPSG
from utils.common_utils import (

View File

@ -55,7 +55,7 @@ import logging
import time
from datetime import datetime, timedelta
sys.path.append("D:\NASA_EarthData_Script")
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
class getInsituData:

View File

@ -0,0 +1,129 @@
/**
* Sentinel-2 哨兵二号数据下载 以下载 2025 年湖北省无云RGB三波段影像为例
*
* GEE 最新代码: https://code.earthengine.google.com/15b61c853c281deb3ef8fc416224c405
*
* @author CVEO Team
* @date 2025-12-08
*
* 1. 加载 Sentinel-2 数据与 Cloud Score+ 云掩膜
* 2. 合成年度无云影像
* 3. 导出COG云优化并填补缺失值的GeoTIFF影像分块
*/
// 加载研究区域和影像
var region_name = "湖北省";
var region_name_en = "Hubei";
var region = allCites.filter(ee.Filter.eq("省", region_name));
var year = 2025;
var start_date = year + "-01-01";
var end_date = year + "-12-31";
var cloud_threshold = 5; // 5% 云量
// Use 'cs' or 'cs_cdf', depending on your use case; see docs for guidance.
var QA_BAND = "cs_cdf";
// The threshold for masking; values between 0.50 and 0.65 generally work well.
// Higher values will remove thin clouds, haze & cirrus shadows.
var CLEAR_THRESHOLD = 0.8;
// Sentinel-2的B8A波段(20m)可能更适合代表近红外波段
// var s2Bands = ["B2", "B3", "B4", "B8", "B11", "B12"];
var s2Bands = ["B2", "B3", "B4", "B8A", "B11", "B12"];
var commonBands = ["Blue", "Green", "Red", "NIR", "SWIR1", "SWIR2"];
var region_geo = region.geometry();
var bounds = region_geo.bounds();
var common_filter = ee.Filter.and(
ee.Filter.bounds(bounds),
ee.Filter.date(start_date, end_date)
);
/**
* 基于 Cloud Score+ 云掩膜
* @param {ee.Image} image Sentinel-2 image
* @return {ee.Image} cloud masked Sentinel-2 image
*/
function maskS2cloudsByCS(image) {
var cloudMask = image.select(QA_BAND).gte(CLEAR_THRESHOLD);
image = image.updateMask(cloudMask);
return image
.copyProperties(image)
.copyProperties(image, ["system:time_start", "system:index", "system:id"]);
}
// 加载Sentinel-2 L2A数据与Cloud Score+云掩膜
// Cloud Score+ image collection.
// Note Cloud Score+ is produced from Sentinel-2 level 1C data and can be applied to either L1C or L2A collections.
var S2csPlus = ee.ImageCollection("GOOGLE/CLOUD_SCORE_PLUS/V1/S2_HARMONIZED");
var S2dataset = ee
.ImageCollection("COPERNICUS/S2_SR_HARMONIZED")
.filter(common_filter)
.filter(ee.Filter.lt("CLOUDY_PIXEL_PERCENTAGE", cloud_threshold))
.select(s2Bands, commonBands)
.linkCollection(S2csPlus, [QA_BAND])
.map(maskS2cloudsByCS);
var s2_img = ee.Image(S2dataset.median());
var s2_img_rgb = s2_img.select(["Red", "Green", "Blue"]);
var s2_img_frgb = s2_img.select(["NIR", "Red", "Green"]);
var true_rgb_vis = {
bands: ["Red", "Green", "Blue"],
min: 0.0,
max: 3000,
};
var false_rgb_vis = {
min: 0.0,
max: 4000,
gamma: 1.4,
bands: ["NIR", "Red", "Green"],
};
var styling = {
color: "red",
fillColor: "00000000",
};
Map.centerObject(region, 7);
Map.addLayer(s2_img_frgb, false_rgb_vis, year + " Sentinel-2 False RGB", false);
Map.addLayer(s2_img_rgb, true_rgb_vis, year + " Sentinel-2 True RGB");
Map.addLayer(region.style(styling), {}, region_name);
// 导出裁剪处理后的Sentinel-2 RGB影像
var fileName = region_name_en + "_Sentinel-2_" + year;
// 区域过大, 需要先将 s2_img 划分为四块网格分开下载
var coords = ee.List(bounds.coordinates().get(0));
var minX = ee.Number(ee.List(coords.get(0)).get(0));
var minY = ee.Number(ee.List(coords.get(0)).get(1));
var maxX = ee.Number(ee.List(coords.get(2)).get(0));
var maxY = ee.Number(ee.List(coords.get(2)).get(1));
var midX = minX.add(maxX).divide(2);
var midY = minY.add(maxY).divide(2);
var grid1 = ee.Geometry.Rectangle([minX, minY, midX, midY]);
var grid2 = ee.Geometry.Rectangle([midX, minY, maxX, midY]);
var grid3 = ee.Geometry.Rectangle([minX, midY, midX, maxY]);
var grid4 = ee.Geometry.Rectangle([midX, midY, maxX, maxY]);
var grids = [grid1, grid2, grid3, grid4];
for (var i = 0; i < grids.length; i++) {
var grid_region = grids[i];
// 明确设置数据类型为Float32, 否则默认类型为Float64, 会占用更多内存
// 并对缺失值进行填充, 否则默认为nan不便于后续本地处理
var clipped_img = s2_img_rgb.clip(grid_region).toFloat().unmask(-9999.0);
var grid_fileName = fileName + "_grid_" + (i + 1);
Export.image.toDrive({
image: clipped_img,
description: grid_fileName,
folder: "Sentinel-2",
region: grid_region,
scale: 10,
maxPixels: 1e13, // GEE 最多支持 1e8 像素, 当超过时会自动分块
fileFormat: "GeoTIFF",
// 导出COG云优化的GeoTIFF影像, 并明确设置缺失值为-9999.0
formatOptions: {
cloudOptimized: true,
noData: -9999.0,
},
});
}

View File

@ -7,19 +7,85 @@ This module contains functions related to searching and preprocessing HLS data.
Authors: Mahsa Jami, Cole Krehbiel, and Erik Bolch
Contact: lpdaac@usgs.gov
Editor: Hong Xie
Last Updated: 2025-02-20
Last Updated: 2025-10-16
===============================================================================
"""
# Import necessary packages
import os
import logging
from pathlib import Path
import numpy as np
import earthaccess
import geopandas as gpd
from shapely.geometry import box
from shapely.geometry.polygon import orient
def ensure_ccw(geom):
"""
Ensure the exterior ring of the polygon is counterclockwise.
"""
if geom.exterior.is_ccw:
return geom # Already counterclockwise
else:
return orient(geom, sign=1.0) # Make it counterclockwise
def format_roi(roi: Path):
"""
Determines if submitted ROI is a file or bbox coordinates.
If a file, opens a GeoJSON or shapefile and creates a list of polygon vertices in the correct order. If the file has multiple polygons it will use a unary union convex hull of the external bounds.
If bbox coordinates, creates a geodataframe with a single Polygon geometry.
Returns a geopandas dataframe for clipping and a list of vertices for searching.
"""
if os.path.isfile(roi): # and roi.endswith(("geojson", "shp")):
try:
# Open ROI if file
roi = gpd.read_file(roi)
# Check if ROI is in Geographic CRS, if not, convert to it
if not roi.crs.is_geographic:
roi = roi.to_crs("EPSG:4326")
logging.info(
"Note: ROI submitted is being converted to Geographic CRS (EPSG:4326)"
)
# (Add) 添加对多种几何图形类型的支持, 将MultiPolygon合并为Polygon
if len(roi) > 1 or roi.geometry[0].geom_type == "MultiPolygon":
# Merge all Polygon geometries and create external boundary
logging.info(
"Multiple polygons detected. Creating single geometry of external coordinates."
)
single_geometry = roi.union_all().convex_hull
roi = gpd.GeoDataFrame(geometry=[single_geometry], crs=roi.crs)
roi["geometry"] = roi["geometry"].apply(ensure_ccw)
# List Vertices in correct order for search
# (Add) 使用外包矩形坐标简化提交检索时使用的坐标, 仅支持逆时针
vertices_list = list(roi.geometry[0].exterior.coords)
except (FileNotFoundError, ValueError):
sys.exit(
f"The GeoJSON/shapefile is either not valid or could not be found.\nPlease double check the name and provide the absolute path to the file or make sure that it is located in {os.getcwd()}"
)
else:
# If bbox coordinates are submitted
bbox = tuple(map(float, roi.strip("'\"").split(",")))
print(bbox)
# Convert bbox to a geodataframe for clipping
roi = gpd.GeoDataFrame(geometry=[box(*bbox)], crs="EPSG:4326")
roi["geometry"] = roi["geometry"].apply(ensure_ccw)
vertices_list = list(roi.geometry[0].convex_hull.coords)
return (roi, vertices_list)
def earthdata_search(
asset_name: list,
dates: tuple = None,
roi: list = None,
region_file: Path = None,
tile_id: str = None,
hours: tuple = None,
log=False,
@ -30,28 +96,32 @@ def earthdata_search(
For example:
- MODIS: MCD43A3, MCD43A4, MOD11A1, MOD11A2, ...
- SMAP: SPL3SMP_E, SPL4SMGP, ...
- DEM: NASADEM_HGT, NASADEM_SC, ...
- GPM: GPM_3IMERGDL, ...
- DEM: NASADEM_HGT, NASADEM_SC, ALOS_PSR_RTC_HIGH, ALOS_PSR_RTC_LOW, ...
"""
# Search for data
if dates and not roi:
# 全球范围的数据集不需要roi参数
if not region_file:
# 全球范围的数据集不需要roi参数, 如 SMAP, GPM
results = earthaccess.search_data(
short_name=list(asset_name),
temporal=dates,
)
elif roi and not dates:
# 适用非时间序列数据, 如 DEM
results = earthaccess.search_data(
short_name=list(asset_name),
bounding_box=roi,
)
else:
results = earthaccess.search_data(
short_name=list(asset_name),
bounding_box=roi,
temporal=dates,
)
# 基于 roi 的外包多边形进行数据检索
roi, vertices_list = format_roi(region_file)
if not dates:
# 适用非时间序列数据, 如 DEM
results = earthaccess.search_data(
short_name=list(asset_name),
polygon=vertices_list,
)
else:
results = earthaccess.search_data(
short_name=list(asset_name),
polygon=vertices_list,
temporal=dates,
)
# 根据瓦片ID过滤影像
if tile_id:

View File

@ -5,7 +5,7 @@ HLS Subsetting, Processing, and Exporting Reformatted Data Prep Script
Authors: Cole Krehbiel, Mahsa Jami, and Erik Bolch
Contact: lpdaac@usgs.gov
Editor: Hong Xie
Last Updated: 2025-01-15
Last Updated: 2025-10-16
===============================================================================
"""
@ -14,25 +14,21 @@ Last Updated: 2025-01-15
# TODO Improve behavior around deletion of cogs when a netcdf is requested
# TODO Add ZARR as output option
import argparse
import sys
from HLS_PER import process_granule, create_timeseries_dataset
from HLS_Su import hls_search, format_roi
from utils.common_utils import setup_dask_environment
import os
import sys
import argparse
import shutil
import logging
import time
import json
import earthaccess
from shapely.geometry import box
import geopandas as gpd
from datetime import datetime as dt
import earthaccess
import dask.distributed
sys.path.append("D:/NASA_EarthData_Script")
from utils.common_utils import setup_dask_environment
from HLS_Su import hls_search
from HLS_PER import process_granule, create_timeseries_dataset
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
def parse_arguments():
@ -170,59 +166,6 @@ def parse_arguments():
return parser.parse_args()
def format_roi(roi):
"""
Determines if submitted ROI is a file or bbox coordinates.
If a file, opens a GeoJSON or shapefile and creates a list of polygon vertices in the correct order. If the file has multiple polygons it will use a unary union convex hull of the external bounds.
If bbox coordinates, creates a geodataframe with a single Polygon geometry.
Returns a geopandas dataframe for clipping and a list of vertices for searching.
"""
if os.path.isfile(roi): # and roi.endswith(("geojson", "shp")):
try:
# Open ROI if file
roi = gpd.read_file(roi)
# (Add) 添加对多种几何图形类型的支持, 将MultiPolygon合并为Polygon
if len(roi) > 1 or roi.geometry[0].geom_type == "MultiPolygon":
# Merge all Polygon geometries and create external boundary
logging.info(
"Multiple polygons detected. Creating single geometry of external coordinates."
)
single_geometry = roi.unary_union.convex_hull
roi = gpd.GeoDataFrame(geometry=[single_geometry], crs=roi.crs)
# Check if ROI is in Geographic CRS, if not, convert to it
if roi.crs.is_geographic:
# List Vertices in correct order for search
# (Add) 使用外包矩形坐标作为检索使用的坐标
minx, miny, maxx, maxy = roi.total_bounds
bounding_box = box(minx, miny, maxx, maxy)
vertices_list = list(bounding_box.exterior.coords)
else:
roi_geographic = roi.to_crs("EPSG:4326")
logging.info(
"Note: ROI submitted is being converted to Geographic CRS (EPSG:4326)"
)
vertices_list = list(roi_geographic.geometry[0].exterior.coords)
except (FileNotFoundError, ValueError):
sys.exit(
f"The GeoJSON/shapefile is either not valid or could not be found.\nPlease double check the name and provide the absolute path to the file or make sure that it is located in {os.getcwd()}"
)
else:
# If bbox coordinates are submitted
bbox = tuple(map(float, roi.strip("'\"").split(",")))
print(bbox)
# Convert bbox to a geodataframe for clipping
roi = gpd.GeoDataFrame(geometry=[box(*bbox)], crs="EPSG:4326")
vertices_list = list(roi.geometry[0].exterior.coords)
return (roi, vertices_list)
def format_dates(start, end):
# Strip Quotes
start = start.strip("'").strip('"')
@ -248,6 +191,8 @@ def format_tile_id(tile_id):
"""
(Add) 格式化tile_id参数
"""
if tile_id is None:
return None
tile_id = tile_id.strip("'").strip('"')
return str(tile_id)
@ -481,6 +426,7 @@ def main():
# Defaults to the current directory
output_dir = os.getcwd() + os.sep
os.makedirs(output_dir, exist_ok=True)
logging.info(f"Output directory set to: {output_dir}")
# Format/Validate Dates
@ -624,7 +570,8 @@ def main():
# Create Timeseries Dataset if NC4
if args.of == "NC4":
logging.info("Creating timeseries dataset...")
create_timeseries_dataset(cog_dir, output_type=args.of, output_dir=output_dir)
create_timeseries_dataset(
cog_dir, output_type=args.of, output_dir=output_dir)
# Close Dask Client
client.close()
@ -641,7 +588,7 @@ def main():
# End Timer
total_time = time.time() - start_time
logging.info(f"Processing complete. Total time: {round(total_time,2)}s, ")
logging.info(f"Processing complete. Total time: {round(total_time, 2)}s, ")
if __name__ == "__main__":

View File

@ -1,33 +1,36 @@
# NASA EARTHDATA 数据自动化爬取与预处理
# 公开RS/GIS数据自动化下载与预处理工具
## 仓库说明
1) EARTHDATA 说明
- NASA 计划于 2026 年底将公开数据全部集成进 EARTHDATA 中
- HLS 数据集NASA 多部门将Landsat8/9与Sentinel-2A/B统一协调至30m划分为
- L30Landsat8/9包含其独有的2个热红外波段并已处理为表面亮温℃
- S30Sentinel-2A/B包含其独有的4个红边波段
- 对于核心六波段Blue/Green/Red/NIR/SWIR1/SWIR2可直接合并使用合并后湖北地区单格网观测频率约为3-4d
2) 使用说明
- 本仓库基于官方原始脚本修复了实际使用中存在的bug并添加了更多可控制参数
- 爬取时不要使用魔法工具
- 实测单格网全年影像爬取+预处理约1h
- 单用户每秒限制100次请求
1. EARTHDATA 说明
- NASA 计划于2026年底将公开数据全部集成进 EARTHDATA 中
- HLS 数据集NASA多部门将 Landsat8/9 与 Sentinel-2A/B 统一协调至 30m划分为
- L30Landsat8/9包含其独有的 2 个热红外波段,并已处理为表面亮温 ℃)
- S30Sentinel-2A/B包含其独有的 4 个红边波段)
- 对于核心六波段Blue/Green/Red/NIR/SWIR1/SWIR2可直接合并使用合并后湖北地区单格网观测频率约为3-4d
2. 使用说明
- 针对HLS数据获取基于NASA EARTHDATA官方原始脚本修复了实际使用中的bug并添加了更多控制参数
- 使用时不要使用魔法工具
- 实测单格网全年HLS影像爬取+预处理约1h
## 1 安装基础环境
### 1.1 miniforge
### 1.1 miniforge 介绍
- miniforge是结合conda与mamba的最小化包比Anaconda和Miniconda更快更轻量并且配置命令与原conda基本一致支持直接使用mamba命令。
- 简而言之环境配置效率上miniforge > Mambaforge (202407已废弃) > Miniconda + Mamba > Miniconda > Anaconda
- miniforge 是结合 conda mamba 的最小化包,比 Anaconda Miniconda 更快更轻量,并且配置命令与原 conda 基本一致,支持直接使用 mamba 命令。
- 简而言之环境配置效率上miniforge > Mambaforge (202407 已废弃) > Miniconda + Mamba > Miniconda > Anaconda
- 官方仓库地址https://github.com/conda-forge/miniforge
- 官方下载地址https://conda-forge.org/download/
### 1.2 配置环境变量
- 为了在控制台中直接使用conda命令需要将安装的相关目录配置到Path环境变量中。
- 为了在控制台中直接使用 conda 命令,需要将安装的相关目录配置到 Path 环境变量中。
```
D:\program\miniforge3
@ -37,27 +40,27 @@ D:\program\miniforge3\Library\bin
### 1.3 配置权限
- 详细配置与miniconda相同图文教程地址https://gis-xh.github.io/my-note/python/01conda/Win11-Miniconda-install/
- Windows环境下需要设置虚拟环境文件夹的访问权限为所有用户可访问否则会出现无法读取虚拟环境文件的问题
- 详细配置与 miniconda 相同图文教程地址https://gis-xh.github.io/my-note/python/01conda/Win11-Miniconda-install/
- Windows 环境下,需要设置虚拟环境文件夹的访问权限为所有用户可访问,否则会出现无法读取虚拟环境文件的问题
- 具体地:
- 设置`D:\program\miniforge3\env`目录为所有用户可访问,具体操作为:右键点击文件夹 -> 属性 -> 安全 -> 编辑 -> 添加 -> 添加所有用户 -> 全选 -> 应用 -> 确定
### 1.4 配置镜像源
- 生成下载源文件的配置文件 (若已经安装过Anaconda/Miniconda则无需执行此步骤)
- 生成下载源文件的配置文件 (若已经安装过 Anaconda/Miniconda则无需执行此步骤)
```sh
conda config --set show_channel_urls yes
```
- 在`C:\Users\实际用户名\`目录找到`.condarc`文件,使用记事本打开,输入如下内容并保存
- 在 `C:\Users\实际用户名\` 目录找到 `.condarc` 文件,使用记事本打开,输入如下内容并保存
```
envs_dirs:
- D:\program\miniforge3\envs
- 其他路径地址(可选,创建虚拟环境时将会按照顺序查找)
channels:
- defaults
- conda-forge
show_channel_urls: true
channel_alias: https://mirrors.tuna.tsinghua.edu.cn/anaconda
default_channels:
@ -75,6 +78,12 @@ custom_channels:
simpleitk: https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud
```
- 清理镜像缓存
```sh
conda clean -i
```
### 1.5 初始化 conda
- 打开控制台,初始化 PowerShell 与 CMD
@ -89,18 +98,30 @@ conda init cmd.exe
## 2 运行环境配置
### 2.1 使用mamba创建并激活虚拟环境
### 2.1 使用 mamba 创建并激活虚拟环境
- 克隆虚拟环境 (Windows环境下推荐)
- 克隆虚拟环境 (完整复刻运行环境所有依赖)
```sh
mamba env create -f setup/lpdaac_windows.yml
mamba env create -f setup/lpdaac.yml
```
- 克隆虚拟环境 (复刻主要依赖环境-部分依赖可能会更新)
```sh
mamba env create -f setup/environment.yml
```
- 激活虚拟环境
```sh
mamba activate lpdaac_windows
mamba activate lpdaac
```
- 导出当前虚拟环境
```sh
mamba env export > setup/lpdaac.yml
```
## 3 设计思路
@ -146,11 +167,10 @@ data/
### 3.2 NASA Earthdata 账户准备
- 参考自NASA官网示例Demohttps://github.com/nasa/LPDAAC-Data-Resources/blob/main/setup/setup_instructions_python.md
- 参考自 NASA 官网示例 Demohttps://github.com/nasa/LPDAAC-Data-Resources/blob/main/setup/setup_instructions_python.md
- 首次运行爬取命令时,需要输入用户名和密码,用户名和密码可以在 [Earthdata](https://urs.earthdata.nasa.gov/) 注册获取。
- 需要注意的是,密码中最好不要出 `@/#/$/%` 等符号,爬取时可能会出错。
- 单个用户每秒限制最多100次请求参考自https://forum.earthdata.nasa.gov/viewtopic.php?t=3734
- 单个用户每秒限制最多 100 次请求参考自https://forum.earthdata.nasa.gov/viewtopic.php?t=3734
## 4 使用示例
@ -160,7 +180,7 @@ data/
- `-roi`:感兴趣区,需要按照 **左下右上** 的逆时针顺序设置点坐标,同时还支持 `shp``geojson/json` 格式文件
- `-clip`:是否对影像进行裁剪,默认 `False`
- `-tile`HLS影像瓦片ID例如 `T49RGQ`
- `-tile`HLS 影像瓦片 ID例如 `T49RGQ`
- `-dir`:输出目录,必须是已存在的目录
- `-start`:开始时间,格式为 `YYYY-MM-DD`
- `-end`:结束时间,格式为 `YYYY-MM-DD`
@ -172,19 +192,19 @@ data/
#### 4.1.2 爬取云端数据并在内存中进行预处理示例
- 爬取 L30 与 S30 的核心光谱波段仅按照感兴趣区瓦片ID起止时间以及产品名称筛选影像不进行云量筛选影像对影像进行去云掩膜处理
- 爬取 L30 与 S30 的核心光谱波段:仅按照感兴趣区,瓦片 ID起止时间以及产品名称筛选影像不进行云量筛选影像对影像进行去云掩膜处理
```sh
python .\\HLS_SuPER\\HLS_SuPER.py -roi '112.9834,30.5286,114.32373,31.64448' -tile T49RGQ -dir .\\data\\HLS\\ALL -start 2024-01-01 -end 2024-01-31 -prod both -bands BLUE,GREEN,RED,NIR1,SWIR1,SWIR2,Fmask -scale True
```
- 爬取 L30 的所有波段按照感兴趣区瓦片ID起止时间以及产品名称筛选影像过滤云量小于70% 的影像,对影像进行去云掩膜处理
- 爬取 L30 的所有波段:按照感兴趣区,瓦片 ID起止时间以及产品名称筛选影像过滤云量小于 70% 的影像,对影像进行去云掩膜处理
```sh
python .\\HLS_SuPER\\HLS_SuPER.py -roi '112.9834,30.5286,114.32373,31.64448' -tile T49RGQ -dir .\\data\\HLS\\L30\\subset -start 2024-01-01 -end 2024-01-31 -prod HLSL30 -bands COASTAL-AEROSOL,BLUE,GREEN,RED,NIR1,SWIR1,SWIR2,CIRRUS,TIR1,TIR2,Fmask -cc 70 -scale True
```
- 仅爬取 L30 的热红外波段仅按照感兴趣区瓦片ID起止时间以及产品名称筛选影像不进行云量筛选影像对影像进行去云掩膜处理
- 仅爬取 L30 的热红外波段:仅按照感兴趣区,瓦片 ID起止时间以及产品名称筛选影像不进行云量筛选影像对影像进行去云掩膜处理
```sh
python .\\HLS_SuPER\\HLS_SuPER.py -roi .\\data\\vectors\\49REL.geojson -tile T49REL -dir .\\data\\HLS\\2024 -start 2024-06-01 -end 2024-09-01 -prod HLSL30 -bands TIR1,TIR2
@ -198,4 +218,4 @@ python .\\HLS_SuPER\\HLS_SuPER.py -roi .\\data\\vectors\\49REL.geojson -tile T49
### 4.2 其他数据
v1.0: 直接运行`DATA_SuPER/`目录下所需数据对应的*.py文件即可.
v1.0: 直接运行 `DATA_SuPER/` 目录下所需数据对应的 `*.py` 文件即可.

31
setup/environment.yml Normal file
View File

@ -0,0 +1,31 @@
name: lpdaac
channels:
- conda-forge
dependencies:
- dask
- earthaccess
- fiona
- gdal
- geopandas
- geoviews
- h5netcdf
- h5py
- harmony-py
- hvplot
- jupyter
- jupyter_bokeh
- jupyterlab
- libgdal-hdf4
- odc-stac
- pyresample
- pystac-client
- python=3.12
- rasterio
- ray-default
- rioxarray
- scikit-image
- seaborn
- spectral
- selenium
- firefox
- geckodriver

439
setup/lpdaac.yml Normal file
View File

@ -0,0 +1,439 @@
name: lpdaac
channels:
- conda-forge
dependencies:
- _libavif_api=1.3.0=h57928b3_2
- _openmp_mutex=4.5=2_gnu
- _python_abi3_support=1.0=hd8ed1ab_2
- affine=2.4.0=pyhd8ed1ab_1
- aiobotocore=2.24.2=pyhcf101f3_0
- aiohappyeyeballs=2.6.1=pyhd8ed1ab_0
- aiohttp=3.13.0=py312h30f5c63_0
- aiohttp-cors=0.8.1=pyhd8ed1ab_0
- aioitertools=0.12.0=pyhd8ed1ab_1
- aiosignal=1.4.0=pyhd8ed1ab_0
- annotated-types=0.7.0=pyhd8ed1ab_1
- anyio=4.11.0=pyhcf101f3_0
- aom=3.9.1=he0c23c2_0
- argon2-cffi=25.1.0=pyhd8ed1ab_0
- argon2-cffi-bindings=25.1.0=py312he06e257_1
- arrow=1.3.0=pyhd8ed1ab_1
- asciitree=0.3.3=py_2
- asttokens=3.0.0=pyhd8ed1ab_1
- async-lru=2.0.5=pyh29332c3_0
- attrs=25.4.0=pyh71513ae_0
- aws-c-auth=0.9.0=h467f71e_16
- aws-c-cal=0.9.2=hef2a5b8_1
- aws-c-common=0.12.4=hfd05255_0
- aws-c-compression=0.3.1=ha8a2810_6
- aws-c-event-stream=0.5.5=h16d2062_1
- aws-c-http=0.10.2=h909f643_3
- aws-c-io=0.21.0=h20b9e97_1
- aws-c-mqtt=0.13.1=h8a47558_4
- aws-c-s3=0.8.3=hcc9d52c_1
- aws-c-sdkutils=0.2.4=ha8a2810_1
- aws-checksums=0.2.7=ha8a2810_2
- aws-crt-cpp=0.32.10=h16ee0b7_3
- aws-sdk-cpp=1.11.510=h41ea3a3_14
- babel=2.17.0=pyhd8ed1ab_0
- beautifulsoup4=4.14.2=pyha770c72_0
- bleach=6.2.0=pyh29332c3_4
- bleach-with-css=6.2.0=h82add2a_4
- blosc=1.21.6=hfd34d9b_1
- bokeh=3.8.0=pyhd8ed1ab_0
- botocore=1.40.18=pyhd8ed1ab_0
- bounded-pool-executor=0.0.3=pyhd8ed1ab_0
- branca=0.8.2=pyhd8ed1ab_0
- brotli=1.1.0=hfd05255_4
- brotli-bin=1.1.0=hfd05255_4
- brotli-python=1.1.0=py312hbb81ca0_4
- bzip2=1.0.8=h0ad9c76_8
- c-ares=1.34.5=h2466b09_0
- c-blosc2=2.21.3=h3cf07e4_0
- ca-certificates=2025.10.5=h4c7d964_0
- cached-property=1.5.2=hd8ed1ab_1
- cached_property=1.5.2=pyha770c72_1
- cachetools=6.2.0=pyhd8ed1ab_0
- cartopy=0.25.0=py312hc128f0a_1
- certifi=2025.10.5=pyhd8ed1ab_0
- cffi=2.0.0=py312he06e257_0
- cftime=1.6.4=py312h196c9fc_2
- charls=2.4.2=h1537add_0
- charset-normalizer=3.4.3=pyhd8ed1ab_0
- click=8.1.8=pyh7428d3b_0
- click-plugins=1.1.1.2=pyhd8ed1ab_0
- cligj=0.7.2=pyhd8ed1ab_2
- cloudpickle=3.1.1=pyhd8ed1ab_0
- colorama=0.4.6=pyhd8ed1ab_1
- colorcet=3.1.0=pyhd8ed1ab_1
- colorful=0.5.6=pyhd8ed1ab_0
- comm=0.2.3=pyhe01879c_0
- configobj=5.0.9=pyhd8ed1ab_1
- contourpy=1.3.3=py312hf90b1b7_2
- cpython=3.12.11=py312hd8ed1ab_0
- cryptography=45.0.7=py312h84d000f_1
- curlify=2.2.1=pyh44b312d_0
- cycler=0.12.1=pyhd8ed1ab_1
- cytoolz=1.0.1=py312h4389bb4_0
- dask=2025.9.1=pyhcf101f3_0
- dask-core=2025.9.1=pyhcf101f3_0
- datashader=0.18.2=pyhd8ed1ab_0
- dav1d=1.2.1=hcfcfb64_0
- debugpy=1.8.17=py312ha1a9051_0
- decorator=5.2.1=pyhd8ed1ab_0
- defusedxml=0.7.1=pyhd8ed1ab_0
- deprecated=1.2.18=pyhd8ed1ab_0
- distlib=0.4.0=pyhd8ed1ab_0
- distributed=2025.9.1=pyhcf101f3_0
- donfig=0.8.1.post1=pyhd8ed1ab_1
- earthaccess=0.15.1=pyhcf101f3_0
- exceptiongroup=1.3.0=pyhd8ed1ab_0
- executing=2.2.1=pyhd8ed1ab_0
- fasteners=0.19=pyhd8ed1ab_1
- filelock=3.20.0=pyhd8ed1ab_0
- fiona=1.10.1=py312h6e88f47_3
- firefox=143.0.4=h5112557_0
- folium=0.20.0=pyhd8ed1ab_0
- fonttools=4.60.1=py312h05f76fc_0
- fqdn=1.5.1=pyhd8ed1ab_1
- freetype=2.14.1=h57928b3_0
- freexl=2.0.0=hf297d47_2
- frozenlist=1.7.0=py312hfdf67e6_0
- fsspec=2025.9.0=pyhd8ed1ab_0
- gdal=3.10.3=py312h07de9ea_21
- geckodriver=0.36.0=h127b8e1_0
- geopandas=1.1.1=pyhd8ed1ab_1
- geopandas-base=1.1.1=pyha770c72_1
- geos=3.14.0=hdade9fe_0
- geotiff=1.7.4=h73469f5_4
- geoviews=1.14.1=hd8ed1ab_0
- geoviews-core=1.14.1=pyha770c72_0
- giflib=5.2.2=h64bf75a_0
- google-api-core=2.25.2=pyhd8ed1ab_0
- google-auth=2.41.1=pyhd8ed1ab_0
- googleapis-common-protos=1.70.0=pyhd8ed1ab_0
- grpcio=1.71.0=py312h18946f6_1
- h11=0.16.0=pyhd8ed1ab_0
- h2=4.3.0=pyhcf101f3_0
- h5netcdf=1.6.4=pyhd8ed1ab_0
- h5py=3.14.0=nompi_py312h03cd2ba_101
- harmony-py=0.4.14=pyhd8ed1ab_0
- hdf4=4.2.15=h5557f11_7
- hdf5=1.14.6=nompi_he30205f_103
- holoviews=1.21.0=pyhd8ed1ab_0
- hpack=4.1.0=pyhd8ed1ab_0
- httpcore=1.0.9=pyh29332c3_0
- httpx=0.28.1=pyhd8ed1ab_0
- hvplot=0.12.1=pyhd8ed1ab_0
- hyperframe=6.1.0=pyhd8ed1ab_0
- icu=75.1=he0c23c2_0
- idna=3.10=pyhd8ed1ab_1
- imagecodecs=2025.8.2=py312h424859f_4
- imageio=2.37.0=pyhfb79c49_0
- importlib-metadata=8.7.0=pyhe01879c_1
- importlib-resources=6.5.2=pyhd8ed1ab_0
- importlib_resources=6.5.2=pyhd8ed1ab_0
- ipykernel=6.30.1=pyh3521513_0
- ipython=9.6.0=pyh6be1c34_0
- ipython_pygments_lexers=1.1.1=pyhd8ed1ab_0
- ipywidgets=8.1.7=pyhd8ed1ab_0
- isoduration=20.11.0=pyhd8ed1ab_1
- jedi=0.19.2=pyhd8ed1ab_1
- jinja2=3.1.6=pyhd8ed1ab_0
- jmespath=1.0.1=pyhd8ed1ab_1
- joblib=1.5.2=pyhd8ed1ab_0
- json5=0.12.1=pyhd8ed1ab_0
- jsonpointer=3.0.0=py312h2e8e312_2
- jsonschema=4.25.1=pyhe01879c_0
- jsonschema-specifications=2025.9.1=pyhcf101f3_0
- jsonschema-with-format-nongpl=4.25.1=he01879c_0
- jupyter=1.1.1=pyhd8ed1ab_1
- jupyter-lsp=2.3.0=pyhcf101f3_0
- jupyter_bokeh=4.0.5=pyhd8ed1ab_1
- jupyter_client=8.6.3=pyhd8ed1ab_1
- jupyter_console=6.6.3=pyhd8ed1ab_1
- jupyter_core=5.8.1=pyh5737063_0
- jupyter_events=0.12.0=pyh29332c3_0
- jupyter_server=2.17.0=pyhcf101f3_0
- jupyter_server_terminals=0.5.3=pyhd8ed1ab_1
- jupyterlab=4.4.9=pyhd8ed1ab_0
- jupyterlab_pygments=0.3.0=pyhd8ed1ab_2
- jupyterlab_server=2.27.3=pyhd8ed1ab_1
- jupyterlab_widgets=3.0.15=pyhd8ed1ab_0
- jxrlib=1.1=hcfcfb64_3
- kiwisolver=1.4.9=py312h78d62e6_1
- krb5=1.21.3=hdf4eb48_0
- lark=1.3.0=pyhd8ed1ab_0
- lazy-loader=0.4=pyhd8ed1ab_2
- lcms2=2.17=hbcf6048_0
- lerc=4.0.0=h6470a55_1
- libabseil=20250127.1=cxx17_h4eb7d71_0
- libaec=1.1.4=h20038f6_0
- libarchive=3.8.1=gpl_h26aea39_101
- libarrow=20.0.0=h7ea4809_8_cuda
- libarrow-acero=20.0.0=h7d8d6a5_8_cuda
- libarrow-dataset=20.0.0=h7d8d6a5_8_cuda
- libarrow-substrait=20.0.0=hb76e781_8_cuda
- libavif16=1.3.0=he916da2_2
- libblas=3.9.0=35_h5709861_mkl
- libbrotlicommon=1.1.0=hfd05255_4
- libbrotlidec=1.1.0=hfd05255_4
- libbrotlienc=1.1.0=hfd05255_4
- libcblas=3.9.0=35_h2a3cdd5_mkl
- libcrc32c=1.1.2=h0e60522_0
- libcurl=8.14.1=h88aaa65_0
- libdeflate=1.24=h76ddb4d_0
- libevent=2.1.12=h3671451_1
- libexpat=2.7.1=hac47afa_0
- libffi=3.4.6=h537db12_1
- libfreetype=2.14.1=h57928b3_0
- libfreetype6=2.14.1=hdbac1cb_0
- libgcc=15.2.0=h1383e82_7
- libgdal-core=3.10.3=haf333d4_21
- libgdal-hdf4=3.10.3=ha47b6c4_21
- libgomp=15.2.0=h1383e82_7
- libgoogle-cloud=2.36.0=hf249c01_1
- libgoogle-cloud-storage=2.36.0=he5eb982_1
- libgrpc=1.71.0=h8c3449c_1
- libhwloc=2.12.1=default_h64bd3f2_1002
- libhwy=1.3.0=ha71e874_1
- libiconv=1.18=hc1393d2_2
- libjpeg-turbo=3.1.0=h2466b09_0
- libjxl=0.11.1=hb7713f0_4
- libkml=1.3.0=h538826c_1021
- liblapack=3.9.0=35_hf9ab0e9_mkl
- liblzma=5.8.1=h2466b09_2
- libnetcdf=4.9.3=nompi_h7d90bef_103
- libparquet=20.0.0=ha850022_8_cuda
- libpng=1.6.50=h7351971_1
- libprotobuf=5.29.3=hd33f5f0_2
- libre2-11=2025.06.26=habfad5f_0
- librttopo=1.1.0=h5ff11c1_19
- libsodium=1.0.20=hc70643c_0
- libspatialite=5.1.0=gpl_h3bf7137_118
- libsqlite=3.50.4=hf5d6505_0
- libssh2=1.11.1=h9aa295b_0
- libthrift=0.21.0=hbe90ef8_0
- libtiff=4.7.1=h550210a_0
- libutf8proc=2.10.0=hff4702e_0
- libwebp-base=1.6.0=h4d5522a_0
- libwinpthread=12.0.0.r4.gg4f2fc60ca=h57928b3_10
- libxcb=1.17.0=h0e4246c_0
- libxml2=2.15.0=ha29bfb0_1
- libxml2-16=2.15.0=h06f855e_1
- libxml2-devel=2.15.0=ha29bfb0_1
- libzip=1.11.2=h3135430_0
- libzlib=1.3.1=h2466b09_2
- libzopfli=1.0.3=h0e60522_0
- linkify-it-py=2.0.3=pyhd8ed1ab_1
- llvm-openmp=21.1.2=hfa2b4ca_3
- llvmlite=0.45.1=py312hdb9728c_0
- locket=1.0.0=pyhd8ed1ab_0
- lz4=4.4.4=py312ha1aa51a_1
- lz4-c=1.10.0=h2466b09_1
- lzo=2.10=h6a83c73_1002
- mapclassify=2.10.0=pyhd8ed1ab_1
- markdown=3.9=pyhd8ed1ab_0
- markdown-it-py=4.0.0=pyhd8ed1ab_0
- markupsafe=3.0.3=py312h05f76fc_0
- matplotlib-base=3.10.6=py312h0ebf65c_1
- matplotlib-inline=0.1.7=pyhd8ed1ab_1
- mdit-py-plugins=0.5.0=pyhd8ed1ab_0
- mdurl=0.1.2=pyhd8ed1ab_1
- minizip=4.0.10=h9fa1bad_0
- mistune=3.1.4=pyhcf101f3_0
- mkl=2024.2.2=h57928b3_16
- msgpack-python=1.1.2=py312hf90b1b7_0
- multidict=6.6.3=py312h05f76fc_0
- multimethod=2.0=pyhd8ed1ab_0
- multipledispatch=0.6.0=pyhd8ed1ab_1
- munkres=1.1.4=pyhd8ed1ab_1
- narwhals=2.7.0=pyhcf101f3_0
- nbclient=0.10.2=pyhd8ed1ab_0
- nbconvert-core=7.16.6=pyh29332c3_0
- nbformat=5.10.4=pyhd8ed1ab_1
- nest-asyncio=1.6.0=pyhd8ed1ab_1
- netcdf4=1.7.2=nompi_py312h79d12a2_104
- networkx=3.5=pyhe01879c_0
- notebook=7.4.7=pyhd8ed1ab_0
- notebook-shim=0.2.4=pyhd8ed1ab_1
- numba=0.62.1=py312h9a042f1_0
- numcodecs=0.15.1=py312h72972c8_0
- numpy=2.3.3=py312ha72d056_0
- odc-geo=0.5.0rc1=pyhd8ed1ab_0
- odc-loader=0.5.1=pyhd8ed1ab_0
- odc-stac=0.4.0=pyhd8ed1ab_0
- opencensus=0.11.3=pyhd8ed1ab_1
- opencensus-context=0.1.3=py312h2e8e312_4
- openjpeg=2.5.4=h24db6dd_0
- openssl=3.5.4=h725018a_0
- opentelemetry-api=1.35.0=pyhd8ed1ab_0
- opentelemetry-exporter-prometheus=0.56b0=pyhe01879c_1
- opentelemetry-proto=1.37.0=pyhd8ed1ab_0
- opentelemetry-sdk=1.35.0=pyhd8ed1ab_0
- opentelemetry-semantic-conventions=0.56b0=pyh3cfb1c2_0
- orc=2.1.2=h35764e3_0
- outcome=1.3.0.post0=pyhd8ed1ab_1
- overrides=7.7.0=pyhd8ed1ab_1
- packaging=25.0=pyh29332c3_1
- pandas=2.3.3=py312hc128f0a_1
- pandocfilters=1.5.0=pyhd8ed1ab_0
- panel=1.8.2=pyhd8ed1ab_0
- param=2.2.1=pyhd8ed1ab_0
- parso=0.8.5=pyhcf101f3_0
- partd=1.4.2=pyhd8ed1ab_0
- patsy=1.0.1=pyhd8ed1ab_1
- pcre2=10.46=h3402e2f_0
- pickleshare=0.7.5=pyhd8ed1ab_1004
- pillow=11.3.0=py312h5ee8bfe_3
- pip=25.2=pyh8b19718_0
- platformdirs=4.5.0=pyhcf101f3_0
- pockets=0.9.1=pyhd8ed1ab_1
- pqdm=0.2.0=pyhd8ed1ab_1
- progressbar2=4.2.0=pyhd8ed1ab_0
- proj=9.7.0=h9080b7b_0
- prometheus_client=0.23.1=pyhd8ed1ab_0
- prompt-toolkit=3.0.52=pyha770c72_0
- prompt_toolkit=3.0.52=hd8ed1ab_0
- propcache=0.3.1=py312h31fea79_0
- proto-plus=1.26.1=pyhd8ed1ab_0
- protobuf=5.29.3=py312h275cf98_0
- psutil=7.1.0=py312he06e257_0
- pthread-stubs=0.4=h0e40799_1002
- pure_eval=0.2.3=pyhd8ed1ab_1
- py-spy=0.4.1=h77a83cd_0
- pyarrow=20.0.0=py312h2e8e312_0
- pyarrow-core=20.0.0=py312h607bf26_0_cuda
- pyasn1=0.6.1=pyhd8ed1ab_2
- pyasn1-modules=0.4.2=pyhd8ed1ab_0
- pycparser=2.22=pyh29332c3_1
- pyct=0.6.0=pyhd8ed1ab_0
- pydantic=2.12.0=pyh3cfb1c2_0
- pydantic-core=2.41.1=py312hdabe01f_0
- pygments=2.19.2=pyhd8ed1ab_0
- pykdtree=1.4.3=py312h196c9fc_1
- pyogrio=0.11.0=py312h6e88f47_0
- pyopenssl=25.1.0=pyhd8ed1ab_0
- pyparsing=3.2.5=pyhcf101f3_0
- pyproj=3.7.2=py312habbd053_2
- pyresample=1.34.2=py312h275cf98_0
- pyshp=3.0.2=pyhd8ed1ab_0
- pysocks=1.7.1=pyh09c184e_7
- pystac=1.14.1=pyhd8ed1ab_0
- pystac-client=0.9.0=pyhd8ed1ab_0
- python=3.12.11=h3f84c4b_0_cpython
- python-cmr=0.13.0=pyhff2d567_1
- python-dateutil=2.8.2=pyhd8ed1ab_0
- python-dotenv=0.20.0=pyhd8ed1ab_0
- python-fastjsonschema=2.21.2=pyhe01879c_0
- python-gil=3.12.11=hd8ed1ab_0
- python-json-logger=2.0.7=pyhd8ed1ab_0
- python-tzdata=2025.2=pyhd8ed1ab_0
- python-utils=3.9.1=pyhff2d567_1
- python_abi=3.12=8_cp312
- pytz=2025.2=pyhd8ed1ab_0
- pyu2f=0.1.5=pyhd8ed1ab_1
- pyviz_comms=3.0.6=pyhd8ed1ab_0
- pywavelets=1.9.0=py312h196c9fc_1
- pywin32=311=py312h829343e_1
- pywinpty=2.0.15=py312h275cf98_1
- pyyaml=6.0.3=py312h05f76fc_0
- pyzmq=27.1.0=py312hbb5da91_0
- qhull=2020.2=hc790b64_5
- rasterio=1.4.3=py312h11f88aa_3
- rav1e=0.7.1=ha073cba_3
- ray-core=2.49.2=py312h6652516_2
- ray-default=2.49.2=py312h8c80c70_2
- re2=2025.06.26=h3dd2b4f_0
- referencing=0.36.2=pyh29332c3_0
- requests=2.32.5=pyhd8ed1ab_0
- rfc3339-validator=0.1.4=pyhd8ed1ab_1
- rfc3986-validator=0.1.1=pyh9f0ad1d_0
- rfc3987-syntax=1.1.0=pyhe01879c_1
- rioxarray=0.19.0=pyhd8ed1ab_0
- rpds-py=0.27.1=py312hdabe01f_1
- rsa=4.9.1=pyhd8ed1ab_0
- s3fs=2025.9.0=pyhd8ed1ab_0
- scikit-image=0.25.2=py312hc128f0a_2
- scikit-learn=1.7.2=py312h91ac024_0
- scipy=1.16.2=py312h33376e8_0
- seaborn=0.13.2=hd8ed1ab_3
- seaborn-base=0.13.2=pyhd8ed1ab_3
- selenium=4.36.0=pyhcf101f3_0
- selenium-manager=4.36.0=h18a1a76_0
- send2trash=1.8.3=pyh5737063_1
- setuptools=80.9.0=pyhff2d567_0
- shapely=2.1.2=py312ha0f8e3e_0
- six=1.17.0=pyhe01879c_1
- smart_open=7.3.1=pyhcf101f3_0
- snappy=1.2.2=h7fa0ca8_0
- sniffio=1.3.1=pyhd8ed1ab_1
- snuggs=1.4.7=pyhd8ed1ab_2
- sortedcontainers=2.4.0=pyhd8ed1ab_1
- soupsieve=2.8=pyhd8ed1ab_0
- spectral=0.24=pyhd8ed1ab_0
- sphinxcontrib-napoleon=0.7=pyhd8ed1ab_1
- sqlite=3.50.4=hdb435a2_0
- stack_data=0.6.3=pyhd8ed1ab_1
- statsmodels=0.14.5=py312h196c9fc_1
- svt-av1=3.1.2=hac47afa_0
- tbb=2021.13.0=h18a62a1_3
- tblib=3.1.0=pyhd8ed1ab_0
- tenacity=9.1.2=pyhd8ed1ab_0
- terminado=0.18.1=pyh5737063_0
- threadpoolctl=3.6.0=pyhecae5ae_0
- tifffile=2025.10.4=pyhd8ed1ab_0
- tinycss2=1.4.0=pyhd8ed1ab_0
- tinynetrc=1.3.1=pyhd8ed1ab_0
- tk=8.6.13=h2c6b04d_2
- tomli=2.3.0=pyhcf101f3_0
- toolz=1.0.0=pyhd8ed1ab_1
- tornado=6.5.2=py312he06e257_1
- tqdm=4.67.1=pyhd8ed1ab_1
- traitlets=5.14.3=pyhd8ed1ab_1
- trio=0.31.0=py312h680a3fa_0
- trio-websocket=0.12.2=pyh29332c3_0
- types-python-dateutil=2.9.0.20251008=pyhd8ed1ab_0
- typing-extensions=4.15.0=h396c80c_0
- typing-inspection=0.4.2=pyhd8ed1ab_0
- typing_extensions=4.15.0=pyhcf101f3_0
- typing_utils=0.1.0=pyhd8ed1ab_1
- tzdata=2025b=h78e105d_0
- uc-micro-py=1.0.3=pyhd8ed1ab_1
- ucrt=10.0.26100.0=h57928b3_0
- unicodedata2=16.0.0=py312he06e257_1
- uri-template=1.3.0=pyhd8ed1ab_1
- uriparser=0.9.8=h5a68840_0
- urllib3=2.5.0=pyhd8ed1ab_0
- vc=14.3=h41ae7f8_31
- vc14_runtime=14.44.35208=h818238b_31
- vcomp14=14.44.35208=h818238b_31
- virtualenv=20.35.3=pyhd8ed1ab_0
- vs2015_runtime=14.44.35208=h38c0c73_31
- wcwidth=0.2.14=pyhd8ed1ab_0
- webcolors=24.11.1=pyhd8ed1ab_0
- webencodings=0.5.1=pyhd8ed1ab_3
- websocket-client=1.9.0=pyhd8ed1ab_0
- wheel=0.45.1=pyhd8ed1ab_1
- widgetsnbextension=4.0.14=pyhd8ed1ab_0
- win_inet_pton=1.1.0=pyh7428d3b_8
- winpty=0.4.3=4
- wrapt=1.17.3=py312he06e257_1
- wsproto=1.2.0=pyhd8ed1ab_1
- xarray=2025.10.1=pyhd8ed1ab_0
- xerces-c=3.2.5=he0c23c2_2
- xorg-libxau=1.0.12=h0e40799_0
- xorg-libxdmcp=1.1.5=h0e40799_0
- xyzservices=2025.4.0=pyhd8ed1ab_0
- yaml=0.2.5=h6a83c73_3
- yarl=1.20.1=py312h31fea79_0
- zarr=2.18.7=pyhd8ed1ab_0
- zeromq=4.3.5=h5bddc39_9
- zfp=1.0.1=h2f0f97f_3
- zict=3.0.0=pyhd8ed1ab_1
- zipp=3.23.0=pyhd8ed1ab_0
- zlib=1.3.1=h2466b09_2
- zlib-ng=2.2.5=h1608b31_0
- zstandard=0.25.0=py312he5662c2_0
- zstd=1.5.7=hbeecb71_2
prefix: D:\program\miniforge3\envs\lpdaac

View File

@ -1,426 +0,0 @@
name: lpdaac_windows
channels:
- defaults
- conda-forge
dependencies:
- affine=2.4.0=pyhd8ed1ab_0
- aiobotocore=2.7.0=pyhd8ed1ab_1
- aiofiles=22.1.0=pyhd8ed1ab_0
- aiohttp=3.8.6=py310h8d17308_1
- aiohttp-cors=0.7.0=py_0
- aioitertools=0.11.0=pyhd8ed1ab_0
- aiosignal=1.3.1=pyhd8ed1ab_0
- aiosqlite=0.19.0=pyhd8ed1ab_0
- ansicon=1.89.0=py310h5588dad_7
- anyio=4.0.0=pyhd8ed1ab_0
- argon2-cffi=23.1.0=pyhd8ed1ab_0
- argon2-cffi-bindings=21.2.0=py310h8d17308_4
- arrow=1.3.0=pyhd8ed1ab_0
- asciitree=0.3.3=py_2
- asttokens=2.4.1=pyhd8ed1ab_0
- async-timeout=4.0.3=pyhd8ed1ab_0
- aws-c-auth=0.7.3=h0127223_1
- aws-c-cal=0.6.1=hfb91821_1
- aws-c-common=0.9.0=hcfcfb64_0
- aws-c-compression=0.2.17=h04c9df6_2
- aws-c-event-stream=0.3.1=h495bb32_4
- aws-c-http=0.7.11=hf013885_4
- aws-c-io=0.13.32=he824701_1
- aws-c-mqtt=0.9.3=h64f41f2_1
- aws-c-s3=0.3.14=hb8b96c7_1
- aws-c-sdkutils=0.1.12=h04c9df6_1
- aws-checksums=0.1.17=h04c9df6_1
- aws-crt-cpp=0.21.0=hf1ed06d_5
- aws-sdk-cpp=1.10.57=heb7cc7f_19
- babel=2.13.1=pyhd8ed1ab_0
- backports=1.0=pyhd8ed1ab_3
- backports.functools_lru_cache=1.6.5=pyhd8ed1ab_0
- beautifulsoup4=4.12.2=pyha770c72_0
- bleach=6.1.0=pyhd8ed1ab_0
- blessed=1.19.1=pyh95a074a_2
- blosc=1.21.5=hdccc3a2_0
- bokeh=3.3.0=pyhd8ed1ab_0
- boto3=1.28.64=pyhd8ed1ab_0
- botocore=1.31.64=pyhd8ed1ab_0
- bounded-pool-executor=0.0.3=pyhd8ed1ab_0
- branca=0.7.0=pyhd8ed1ab_1
- brotli=1.0.9=hcfcfb64_9
- brotli-bin=1.0.9=hcfcfb64_9
- brotli-python=1.0.9=py310h00ffb61_9
- bzip2=1.0.8=hcfcfb64_5
- c-ares=1.21.0=hcfcfb64_0
- ca-certificates=2025.1.31=h56e8100_0
- cached-property=1.5.2=hd8ed1ab_1
- cached_property=1.5.2=pyha770c72_1
- cachetools=5.3.2=pyhd8ed1ab_0
- cairo=1.18.0=h1fef639_0
- cartopy=0.22.0=py310hecd3228_1
- certifi=2025.1.31=pyhd8ed1ab_0
- cffi=1.16.0=py310h8d17308_0
- cfitsio=4.3.0=h9b0cee5_0
- cftime=1.6.3=py310h3e78b6c_0
- charset-normalizer=3.3.2=pyhd8ed1ab_0
- click=8.1.7=win_pyh7428d3b_0
- click-plugins=1.1.1=py_0
- cligj=0.7.2=pyhd8ed1ab_1
- cloudpickle=3.0.0=pyhd8ed1ab_0
- colorama=0.4.6=pyhd8ed1ab_0
- colorcet=3.0.1=pyhd8ed1ab_0
- colorful=0.5.4=pyhd8ed1ab_0
- comm=0.1.4=pyhd8ed1ab_0
- configobj=5.0.8=pyhd8ed1ab_0
- contourpy=1.2.0=py310h232114e_0
- cryptography=41.0.5=py310h6e82f81_0
- cycler=0.12.1=pyhd8ed1ab_0
- cytoolz=0.12.2=py310h8d17308_1
- dask=2023.10.1=pyhd8ed1ab_0
- dask-core=2023.10.1=pyhd8ed1ab_0
- datashader=0.16.0=pyhd8ed1ab_0
- debugpy=1.8.0=py310h00ffb61_1
- decorator=5.1.1=pyhd8ed1ab_0
- defusedxml=0.7.1=pyhd8ed1ab_0
- distlib=0.3.7=pyhd8ed1ab_0
- distributed=2023.10.1=pyhd8ed1ab_0
- entrypoints=0.4=pyhd8ed1ab_0
- exceptiongroup=1.1.3=pyhd8ed1ab_0
- executing=2.0.1=pyhd8ed1ab_0
- expat=2.5.0=h63175ca_1
- fasteners=0.17.3=pyhd8ed1ab_0
- filelock=3.13.1=pyhd8ed1ab_0
- fiona=1.9.5=py310h65cc672_0
- folium=0.15.0=pyhd8ed1ab_0
- font-ttf-dejavu-sans-mono=2.37=hab24e00_0
- font-ttf-inconsolata=3.000=h77eed37_0
- font-ttf-source-code-pro=2.038=h77eed37_0
- font-ttf-ubuntu=0.83=hab24e00_0
- fontconfig=2.14.2=hbde0cde_0
- fonts-conda-ecosystem=1=0
- fonts-conda-forge=1=0
- fonttools=4.44.0=py310h8d17308_0
- fqdn=1.5.1=pyhd8ed1ab_0
- freetype=2.12.1=hdaf720e_2
- freexl=2.0.0=h8276f4a_0
- frozenlist=1.4.0=py310h8d17308_1
- fsspec=2023.10.0=pyhca7485f_0
- gdal=3.7.3=py310haa9213b_2
- geopandas=0.14.0=pyhd8ed1ab_1
- geopandas-base=0.14.0=pyha770c72_1
- geos=3.12.0=h1537add_0
- geotiff=1.7.1=hcf4a93f_14
- geoviews=1.11.0=pyhd8ed1ab_0
- geoviews-core=1.11.0=pyha770c72_0
- gettext=0.21.1=h5728263_0
- gitdb=4.0.11=pyhd8ed1ab_0
- gitpython=3.1.40=pyhd8ed1ab_0
- google-api-core=2.13.0=pyhd8ed1ab_0
- google-auth=2.23.4=pyhca7485f_0
- googleapis-common-protos=1.61.0=pyhd8ed1ab_0
- gpustat=1.1.1=pyhd8ed1ab_0
- grpcio=1.54.3=py310h8020be6_0
- h5netcdf=1.3.0=pyhd8ed1ab_0
- h5py=3.10.0=nompi_py310h20f5850_100
- hdf4=4.2.15=h5557f11_7
- hdf5=1.14.2=nompi_h73e8ff5_100
- holoviews=1.18.1=pyhd8ed1ab_0
- hvplot=0.9.0=pyhd8ed1ab_0
- icu=73.2=h63175ca_0
- idna=3.4=pyhd8ed1ab_0
- imagecodecs-lite=2019.12.3=py310h3e78b6c_7
- imageio=2.31.5=pyh8c1a49c_0
- importlib-metadata=6.8.0=pyha770c72_0
- importlib_metadata=6.8.0=hd8ed1ab_0
- intel-openmp=2023.2.0=h57928b3_50497
- ipykernel=6.26.0=pyha63f2e9_0
- ipython=8.17.2=pyh5737063_0
- ipython_genutils=0.2.0=py_1
- ipywidgets=8.1.1=pyhd8ed1ab_0
- isoduration=20.11.0=pyhd8ed1ab_0
- jedi=0.19.1=pyhd8ed1ab_0
- jinja2=3.1.2=pyhd8ed1ab_1
- jinxed=1.2.0=pyh95a074a_0
- jmespath=1.0.1=pyhd8ed1ab_0
- joblib=1.3.2=pyhd8ed1ab_0
- json5=0.9.14=pyhd8ed1ab_0
- jsonpointer=2.4=py310h5588dad_3
- jsonschema=4.19.2=pyhd8ed1ab_0
- jsonschema-specifications=2023.7.1=pyhd8ed1ab_0
- jsonschema-with-format-nongpl=4.19.2=pyhd8ed1ab_0
- jupyter=1.0.0=pyhd8ed1ab_10
- jupyter-resource-usage=1.0.1=pyhd8ed1ab_0
- jupyter-server-mathjax=0.2.6=pyh5bfe37b_1
- jupyter_bokeh=3.0.7=pyhd8ed1ab_0
- jupyter_client=7.4.9=pyhd8ed1ab_0
- jupyter_console=6.6.3=pyhd8ed1ab_0
- jupyter_core=5.5.0=py310h5588dad_0
- jupyter_events=0.9.0=pyhd8ed1ab_0
- jupyter_server=2.10.0=pyhd8ed1ab_0
- jupyter_server_fileid=0.9.0=pyhd8ed1ab_0
- jupyter_server_terminals=0.4.4=pyhd8ed1ab_1
- jupyter_server_ydoc=0.8.0=pyhd8ed1ab_0
- jupyter_ydoc=0.2.4=pyhd8ed1ab_0
- jupyterlab=3.6.6=pyhd8ed1ab_0
- jupyterlab-geojson=3.4.0=pyhd8ed1ab_0
- jupyterlab-git=0.44.0=pyhd8ed1ab_0
- jupyterlab_pygments=0.2.2=pyhd8ed1ab_0
- jupyterlab_server=2.25.1=pyhd8ed1ab_0
- jupyterlab_widgets=3.0.9=pyhd8ed1ab_0
- kealib=1.5.2=ha10e780_1
- kiwisolver=1.4.5=py310h232114e_1
- krb5=1.21.2=heb0366b_0
- lazy_loader=0.3=pyhd8ed1ab_0
- lcms2=2.15=h67d730c_3
- lerc=4.0.0=h63175ca_0
- libabseil=20230125.3=cxx17_h63175ca_0
- libaec=1.1.2=h63175ca_1
- libarchive=3.7.2=h6f8411a_0
- libarrow=12.0.1=he3e0f11_8_cpu
- libblas=3.9.0=19_win64_mkl
- libboost-headers=1.82.0=h57928b3_6
- libbrotlicommon=1.0.9=hcfcfb64_9
- libbrotlidec=1.0.9=hcfcfb64_9
- libbrotlienc=1.0.9=hcfcfb64_9
- libcblas=3.9.0=19_win64_mkl
- libcrc32c=1.1.2=h0e60522_0
- libcurl=8.4.0=hd5e4a3a_0
- libdeflate=1.19=hcfcfb64_0
- libevent=2.1.12=h3671451_1
- libexpat=2.5.0=h63175ca_1
- libffi=3.4.2=h8ffe710_5
- libgdal=3.7.3=h3217549_2
- libglib=2.78.1=he8f3873_0
- libgoogle-cloud=2.12.0=h00b2bdc_1
- libgrpc=1.54.3=ha177ca7_0
- libhwloc=2.9.3=default_haede6df_1009
- libiconv=1.17=h8ffe710_0
- libjpeg-turbo=3.0.0=hcfcfb64_1
- libkml=1.3.0=haf3e7a6_1018
- liblapack=3.9.0=19_win64_mkl
- libnetcdf=4.9.2=nompi_h8284064_112
- libpng=1.6.39=h19919ed_0
- libpq=16.3=hab9416b_0
- libprotobuf=3.21.12=h12be248_2
- librttopo=1.1.0=h92c5fdb_14
- libsodium=1.0.18=h8d14728_1
- libspatialindex=1.9.3=h39d44d4_4
- libspatialite=5.1.0=hbf340bc_1
- libsqlite=3.44.0=hcfcfb64_0
- libssh2=1.11.0=h7dfc565_0
- libthrift=0.18.1=h06f6336_2
- libtiff=4.6.0=h6e2ebb7_2
- libutf8proc=2.8.0=h82a8f57_0
- libwebp-base=1.3.2=hcfcfb64_0
- libxcb=1.15=hcd874cb_0
- libxml2=2.12.7=h283a6d9_1
- libzip=1.10.1=h1d365fa_3
- libzlib=1.2.13=hcfcfb64_5
- linkify-it-py=2.0.0=pyhd8ed1ab_0
- llvmlite=0.41.1=py310hb84602e_0
- locket=1.0.0=pyhd8ed1ab_0
- lz4=4.3.2=py310hbbb2075_1
- lz4-c=1.9.4=hcfcfb64_0
- lzo=2.10=he774522_1000
- m2w64-gcc-libgfortran=5.3.0=6
- m2w64-gcc-libs=5.3.0=7
- m2w64-gcc-libs-core=5.3.0=7
- m2w64-gmp=6.1.0=2
- m2w64-libwinpthread-git=5.0.0.4634.697f757=2
- mapclassify=2.6.1=pyhd8ed1ab_0
- markdown=3.5.1=pyhd8ed1ab_0
- markdown-it-py=3.0.0=pyhd8ed1ab_0
- markupsafe=2.1.3=py310h8d17308_1
- matplotlib-base=3.8.1=py310hc9baf74_0
- matplotlib-inline=0.1.6=pyhd8ed1ab_0
- mdit-py-plugins=0.4.0=pyhd8ed1ab_0
- mdurl=0.1.0=pyhd8ed1ab_0
- minizip=4.0.2=h5bed578_0
- mistune=3.0.2=pyhd8ed1ab_0
- mkl=2023.2.0=h6a75c08_50496
- msgpack-python=1.0.6=py310h232114e_0
- msys2-conda-epoch=20160418=1
- multidict=6.0.4=py310h8d17308_1
- multimethod=1.9.1=pyhd8ed1ab_0
- multipledispatch=0.6.0=py_0
- munch=4.0.0=pyhd8ed1ab_0
- munkres=1.1.4=pyh9f0ad1d_0
- nbclassic=1.0.0=pyhb4ecaf3_1
- nbclient=0.8.0=pyhd8ed1ab_0
- nbconvert=7.11.0=pyhd8ed1ab_0
- nbconvert-core=7.11.0=pyhd8ed1ab_0
- nbconvert-pandoc=7.11.0=pyhd8ed1ab_0
- nbdime=3.2.1=pyhd8ed1ab_0
- nbformat=5.9.2=pyhd8ed1ab_0
- nest-asyncio=1.5.8=pyhd8ed1ab_0
- netcdf4=1.6.5=nompi_py310h6477780_100
- networkx=3.2.1=pyhd8ed1ab_0
- notebook=6.5.6=pyha770c72_0
- notebook-shim=0.2.3=pyhd8ed1ab_0
- numba=0.58.1=py310h9ccaf4f_0
- numcodecs=0.12.1=py310h00ffb61_0
- numpy=1.26.0=py310hf667824_0
- nvidia-ml-py=12.535.133=pyhd8ed1ab_0
- opencensus=0.11.3=pyhd8ed1ab_0
- opencensus-context=0.1.3=py310h5588dad_2
- openjpeg=2.5.0=h3d672ee_3
- openssl=3.4.1=ha4e3fda_0
- orc=1.9.0=hada7b9e_1
- overrides=7.4.0=pyhd8ed1ab_0
- packaging=23.2=pyhd8ed1ab_0
- pandas=2.1.2=py310hecd3228_0
- pandoc=3.1.3=h57928b3_0
- pandocfilters=1.5.0=pyhd8ed1ab_0
- panel=1.3.1=pyhd8ed1ab_0
- param=2.0.1=pyhca7485f_0
- parso=0.8.3=pyhd8ed1ab_0
- partd=1.4.1=pyhd8ed1ab_0
- pcre2=10.40=h17e33f8_0
- pexpect=4.8.0=pyh1a96a4e_2
- pickleshare=0.7.5=py_1003
- pillow=10.1.0=py310h1e6a543_0
- pip=23.3.1=pyhd8ed1ab_0
- pixman=0.42.2=h63175ca_0
- pkgutil-resolve-name=1.3.10=pyhd8ed1ab_1
- platformdirs=3.11.0=pyhd8ed1ab_0
- poppler=23.10.0=hc2f3c52_0
- poppler-data=0.4.12=hd8ed1ab_0
- postgresql=16.3=h7f155c9_0
- pqdm=0.2.0=pyhd8ed1ab_0
- proj=9.3.0=he13c7e8_2
- prometheus_client=0.18.0=pyhd8ed1ab_0
- prompt-toolkit=3.0.39=pyha770c72_0
- prompt_toolkit=3.0.39=hd8ed1ab_0
- protobuf=4.21.12=py310h00ffb61_0
- psutil=5.9.5=py310h8d17308_1
- pthread-stubs=0.4=hcd874cb_1001
- pthreads-win32=2.9.1=hfa6e2cd_3
- ptyprocess=0.7.0=pyhd3deb0d_0
- pure_eval=0.2.2=pyhd8ed1ab_0
- py-spy=0.3.14=h975169c_0
- pyarrow=12.0.1=py310hd1a9178_8_cpu
- pyasn1=0.5.0=pyhd8ed1ab_0
- pyasn1-modules=0.3.0=pyhd8ed1ab_0
- pycparser=2.21=pyhd8ed1ab_0
- pyct=0.4.6=py_0
- pyct-core=0.4.6=py_0
- pydantic=1.10.13=py310h8d17308_1
- pygments=2.16.1=pyhd8ed1ab_0
- pykdtree=1.3.9=py310h3e78b6c_1
- pyopenssl=23.3.0=pyhd8ed1ab_0
- pyparsing=3.1.1=pyhd8ed1ab_0
- pyproj=3.6.1=py310hebb2149_4
- pyresample=1.27.1=py310hecd3228_2
- pyshp=2.3.1=pyhd8ed1ab_0
- pysocks=1.7.1=pyh0701188_6
- pystac=1.9.0=pyhd8ed1ab_0
- pystac-client=0.7.5=pyhd8ed1ab_0
- python=3.10.13=h4de0772_0_cpython
- python-dateutil=2.8.2=pyhd8ed1ab_0
- python-fastjsonschema=2.18.1=pyhd8ed1ab_0
- python-json-logger=2.0.7=pyhd8ed1ab_0
- python-tzdata=2023.3=pyhd8ed1ab_0
- python_abi=3.10=4_cp310
- pytz=2023.3.post1=pyhd8ed1ab_0
- pyu2f=0.1.5=pyhd8ed1ab_0
- pyviz_comms=2.3.2=pyhd8ed1ab_0
- pywavelets=1.4.1=py310h3e78b6c_1
- pywin32=306=py310h00ffb61_2
- pywinpty=2.0.12=py310h00ffb61_0
- pyyaml=6.0.1=py310h8d17308_1
- pyzmq=24.0.1=py310hcd737a0_1
- qtconsole-base=5.5.0=pyha770c72_0
- qtpy=2.4.1=pyhd8ed1ab_0
- rasterio=1.3.9=py310h4d3659c_0
- ray-core=2.7.1=py310h139b6d1_0
- ray-default=2.7.1=py310h5588dad_0
- re2=2023.03.02=hd4eee63_0
- referencing=0.30.2=pyhd8ed1ab_0
- requests=2.31.0=pyhd8ed1ab_0
- rfc3339-validator=0.1.4=pyhd8ed1ab_0
- rfc3986-validator=0.1.1=pyh9f0ad1d_0
- rioxarray=0.15.0=pyhd8ed1ab_0
- rpds-py=0.12.0=py310h87d50f1_0
- rsa=4.9=pyhd8ed1ab_0
- rtree=1.1.0=py310h1cbd46b_0
- s3fs=2023.10.0=pyhd8ed1ab_0
- s3transfer=0.7.0=pyhd8ed1ab_0
- scikit-image=0.20.0=py310h1c4a608_1
- scikit-learn=1.3.2=py310hfd2573f_1
- scipy=1.11.3=py310hf667824_1
- send2trash=1.8.2=pyh08f2357_0
- setproctitle=1.3.3=py310h8d17308_0
- setuptools=68.2.2=pyhd8ed1ab_0
- shapely=2.0.2=py310h839b4a8_0
- six=1.16.0=pyh6c4a22f_0
- smart_open=6.4.0=pyhd8ed1ab_0
- smmap=5.0.0=pyhd8ed1ab_0
- snappy=1.1.10=hfb803bf_0
- sniffio=1.3.0=pyhd8ed1ab_0
- snuggs=1.4.7=py_0
- sortedcontainers=2.4.0=pyhd8ed1ab_0
- soupsieve=2.5=pyhd8ed1ab_1
- spectral=0.23.1=pyh1a96a4e_0
- sqlite=3.44.0=hcfcfb64_0
- stack_data=0.6.2=pyhd8ed1ab_0
- tbb=2021.10.0=h91493d7_2
- tblib=2.0.0=pyhd8ed1ab_0
- terminado=0.17.0=pyh08f2357_0
- threadpoolctl=3.2.0=pyha21a80b_0
- tifffile=2020.6.3=py_0
- tiledb=2.16.3=h1ffc264_3
- tinycss2=1.2.1=pyhd8ed1ab_0
- tinynetrc=1.3.1=pyhd8ed1ab_0
- tk=8.6.13=h5226925_1
- tomli=2.0.1=pyhd8ed1ab_0
- toolz=0.12.0=pyhd8ed1ab_0
- tornado=6.3.3=py310h8d17308_1
- tqdm=4.66.1=pyhd8ed1ab_0
- traitlets=5.13.0=pyhd8ed1ab_0
- types-python-dateutil=2.8.19.14=pyhd8ed1ab_0
- typing_utils=0.1.0=pyhd8ed1ab_0
- tzdata=2023c=h71feb2d_0
- uc-micro-py=1.0.1=pyhd8ed1ab_0
- ucrt=10.0.22621.0=h57928b3_0
- unicodedata2=15.1.0=py310h8d17308_0
- uri-template=1.3.0=pyhd8ed1ab_0
- uriparser=0.9.7=h1537add_1
- urllib3=1.26.18=pyhd8ed1ab_0
- vc=14.3=h64f974e_17
- vc14_runtime=14.36.32532=hdcecf7f_17
- virtualenv=20.21.0=pyhd8ed1ab_0
- vs2015_runtime=14.36.32532=h05e6639_17
- wcwidth=0.2.9=pyhd8ed1ab_0
- webcolors=1.13=pyhd8ed1ab_0
- webencodings=0.5.1=pyhd8ed1ab_2
- wheel=0.41.3=pyhd8ed1ab_0
- widgetsnbextension=4.0.9=pyhd8ed1ab_0
- win_inet_pton=1.1.0=pyhd8ed1ab_6
- winpty=0.4.3=4
- wrapt=1.16.0=py310h8d17308_0
- xarray=2023.10.1=pyhd8ed1ab_0
- xerces-c=3.2.4=h63175ca_3
- xorg-libxau=1.0.11=hcd874cb_0
- xorg-libxdmcp=1.1.3=hcd874cb_0
- xyzservices=2023.10.1=pyhd8ed1ab_0
- xz=5.2.6=h8d14728_0
- y-py=0.5.5=py310h87d50f1_2
- yaml=0.2.5=h8ffe710_2
- yarl=1.9.2=py310h8d17308_1
- ypy-websocket=0.8.2=pyhd8ed1ab_0
- zarr=2.16.1=pyhd8ed1ab_0
- zeromq=4.3.4=h0e60522_1
- zict=3.0.0=pyhd8ed1ab_0
- zipp=3.17.0=pyhd8ed1ab_0
- zlib=1.2.13=hcfcfb64_5
- zstd=1.5.5=h12be248_0
- pip:
- attrs==25.3.0
- earthaccess==0.14.0
- h11==0.14.0
- importlib-resources==6.5.2
- outcome==1.3.0.post0
- python-cmr==0.13.0
- selenium==4.30.0
- trio==0.29.0
- trio-websocket==0.12.2
- typing-extensions==4.12.2
- websocket-client==1.8.0
- wsproto==1.2.0

View File

@ -5,7 +5,7 @@
-------------------------------------------------------------------------------
Authors: Hong Xie
Last Updated: 2025-07-07
Last Updated: 2025-09-11
===============================================================================
"""
@ -256,10 +256,11 @@ def setup_logging(log_file: str = "dask_worker.log"):
Parameters
----------
log_file : str, optional
日志文件路径, by default "dask_worker.log"
"""
logging.basicConfig(
level=logging.INFO,
format="%(levelname)s:%(asctime)s ||| %(message)s",
@ -269,6 +270,7 @@ def setup_logging(log_file: str = "dask_worker.log"):
],
)
def load_band_as_arr(org_tif_path, band_num=1):
"""
读取波段数据
@ -410,15 +412,26 @@ def create_quality_mask(quality_data, bit_nums: list = [0, 1, 2, 3, 4, 5]):
def clip_image(
image: xr.DataArray | xr.Dataset, roi: gpd.GeoDataFrame, clip_by_box=True
):
image: xr.DataArray | xr.Dataset, roi: gpd.GeoDataFrame = None, clip_by_box=True
) -> xr.DataArray | xr.Dataset | None:
"""
Clip Image data to ROI.
args:
image (xarray.DataArray | xarray.Dataset): 通过 rioxarray.open_rasterio 加载的图像数据.
roi (gpd.GeoDataFrame): 感兴趣区数据.
clip_by_box (bool): 是否使用 bbox 进行裁剪, 默认为 True.
Parameters
----------
image : xarray.DataArray | xarray.Dataset
通过 rioxarray.open_rasterio 加载的图像数据.
roi : gpd.GeoDataFrame, optional
感兴趣区数据.
clip_by_box : bool, optional
是否使用 bbox 进行裁剪, 默认为 True.
Returns
-------
xarray.DataArray | xarray.Dataset | None
裁剪后的图像数据. 若裁剪后数据全为无效值, 则返回 None.
"""
if roi is None:
@ -443,15 +456,25 @@ def clip_image(
return image_cliped
def clip_roi_image(file_path: str, grid: gpd.GeoDataFrame) -> xr.DataArray | None:
def clip_roi_image(
file_path: str, grid: gpd.GeoDataFrame = None
) -> xr.DataArray | None:
"""
按研究区范围裁剪影像
args:
file_path (str): 待裁剪影像路径
grid (gpd.GeoDataFrame): 格网范围
return:
raster_cliped (xr.DataArray): 裁剪后的影像
Parameters
----------
file_path : str
待裁剪影像路径
grid : gpd.GeoDataFrame, optional
格网范围, 默认为 None.
Returns
-------
raster_cliped : xr.DataArray
裁剪后的影像
"""
raster = open_rasterio(file_path)
try:
@ -487,15 +510,27 @@ def reproject_image(
target_crs: str = None,
target_shape: tuple = None,
target_image: xr.DataArray = None,
):
) -> xr.DataArray:
"""
Reproject Image data to target CRS or target data.
args:
image (xarray.DataArray): 通过 rioxarray.open_rasterio 加载的图像数据.
target_crs (str): Target CRS, eg. EPSG:4326.
target_shape (tuple): Target shape, eg. (1000, 1000).
target_image (xarray.DataArray): Target image, eg. rioxarray.open_rasterio 加载的图像数据.
Parameters
----------
image : xarray.DataArray
通过 rioxarray.open_rasterio 加载的图像数据.
target_crs : str, optional
Target CRS, eg. EPSG:4326.
target_shape : tuple, optional
Target shape, eg. (1000, 1000).
target_image : xarray.DataArray, optional
Target image, eg. rioxarray.open_rasterio 加载的图像数据.
Returns
-------
xarray.DataArray
重投影后的图像数据.
"""
if target_image is not None:
# 使用 target_image 进行重投影匹配
@ -506,9 +541,9 @@ def reproject_image(
target_image.shape[1] == image.shape[1]
and target_image.shape[2] == image.shape[2]
):
# 若判断为降尺度/等尺度, 则直接使用 cubic_spline 重采样投影到目标影像
# 若判断为降尺度/等尺度, 则直接使用 cubic 双三次插值重采样投影到目标影像
image_reprojed = image.rio.reproject_match(
target_image, resampling=Resampling.cubic_spline
target_image, resampling=Resampling.cubic
)
else:
# print("target_image shape is not match with image shape", image.shape, "to", target_image.shape)
@ -520,7 +555,7 @@ def reproject_image(
# 使用 target_crs 进行重投影
reproject_kwargs = {
"dst_crs": target_crs,
"resampling": Resampling.cubic_spline,
"resampling": Resampling.cubic,
}
if target_shape is not None:
reproject_kwargs["shape"] = target_shape
@ -563,7 +598,7 @@ def mosaic_images(
tif_list,
nodata=nodata,
method=method,
resampling=Resampling.cubic_spline,
resampling=Resampling.cubic,
)
# 将结果重新构建为 xarray 数据集
# 单张SAR影像直接读取 transform: 233400.0 30.0 0.0 3463020.0 0.0 -30.0

104
utils/sr2rgb.py Normal file
View File

@ -0,0 +1,104 @@
"""
COG 格式的 Red, Green, Blue 单波段地表反射率图像合成为 RGB 图像
1. 对比度线性拉伸至 0-255;
2. 合并波段;
3. 保存为 uint8 格式 RGB 图像;
"""
import os
import xarray as xr
from rioxarray import open_rasterio
import numpy as np
def sr2rgb(red_path: str, green_path: str, blue_path: str, output_path: str) -> None:
"""
将红绿蓝三个单波段地表反射率图像合成为RGB图像
参数:
red_path (str): 红色波段文件路径
green_path (str): 绿色波段文件路径
blue_path (str): 蓝色波段文件路径
output_path (str): 输出RGB图像路径
"""
# 检查文件是否存在
for path in [red_path, green_path, blue_path]:
if not os.path.exists(path):
raise FileNotFoundError(f"文件不存在: {path}")
# 读取三个波段数据
red_band = open_rasterio(red_path, masked=True).squeeze(dim="band", drop=True)
green_band = open_rasterio(green_path, masked=True).squeeze(dim="band", drop=True)
blue_band = open_rasterio(blue_path, masked=True).squeeze(dim="band", drop=True)
# 暂存元数据
y_coords = red_band.y
x_coords = red_band.x
crs = red_band.rio.crs
transform = red_band.rio.transform()
def stretch_band(band):
"""
线性拉伸到0-255范围
"""
# 处理NaN值与负值
band_no_nan = np.where(np.isnan(band), 0, band)
band_no_nan = np.where(band_no_nan < 0, 0, band_no_nan)
band_min = np.min(band_no_nan)
band_max = np.max(band_no_nan)
# 避免除零错误
if band_max == band_min:
stretched = np.zeros_like(band_no_nan, dtype=np.uint8)
else:
stretched = ((band_no_nan - band_min) / (band_max - band_min) * 255).astype(
np.uint8
)
return stretched
red_stretched = stretch_band(red_band.values)
green_stretched = stretch_band(green_band.values)
blue_stretched = stretch_band(blue_band.values)
# 合并三个波段为RGB图像
rgb_array = xr.DataArray(
np.dstack((red_stretched, green_stretched, blue_stretched)),
dims=("y", "x", "band"),
coords={"band": [1, 2, 3], "y": y_coords, "x": x_coords},
)
# 转置维度顺序以符合rioxarray要求
rgb_array = rgb_array.transpose("band", "y", "x")
# 写入元数据
rgb_array.rio.write_crs(crs, inplace=True)
rgb_array.rio.write_transform(transform, inplace=True)
rgb_array.rio.write_nodata(0, inplace=True)
# 保存为TIFF文件
rgb_array.rio.to_raster(output_path, dtype="uint8")
print(f"RGB图像已保存到: {output_path}")
return
if __name__ == "__main__":
# tif_dir = "D:\\NASA_EarthData_Script\\data\\HLS\\2024\\2024012"
# red_path = os.path.join(
# tif_dir, "HLS.S30.T49RGP.2024012T031101.v2.0.RED.subset.tif"
# )
# green_path = os.path.join(
# tif_dir, "HLS.S30.T49RGP.2024012T031101.v2.0.GREEN.subset.tif"
# )
# blue_path = os.path.join(
# tif_dir, "HLS.S30.T49RGP.2024012T031101.v2.0.BLUE.subset.tif"
# )
# output_path = os.path.join(tif_dir, "HLS.S30.T49RGP.2024012T031101.v2.0.RGB.tif")
tif_dir = "D:\\NASA_EarthData_Script\\data\\HLS\\2025\\2025011"
red_path = os.path.join(
tif_dir, "HLS.S30.T49RGP.2025011T031009.v2.0.RED.subset.tif"
)
green_path = os.path.join(
tif_dir, "HLS.S30.T49RGP.2025011T031009.v2.0.GREEN.subset.tif"
)
blue_path = os.path.join(
tif_dir, "HLS.S30.T49RGP.2025011T031009.v2.0.BLUE.subset.tif"
)
output_path = os.path.join(tif_dir, "HLS.S30.T49RGP.2025011T031009.v2.0.RGB.tif")
sr2rgb(red_path, green_path, blue_path, output_path)

229
utils/sr2rgb_light.py Normal file
View File

@ -0,0 +1,229 @@
"""
使用 GDAL BuildVRT Translate 快速将影像批量镶嵌并转换为 8bit RGB (Light version)
1. 将输入目录下的所有 tif 文件合并为一个 VRT 文件;
2. 采用 GDAL ComputeStatistics 进行快速统计计算, 并根据指定的百分比截断值计算有效数据范围;
3. VRT 文件转换为 8bit RGB 格式的 COG 文件, 并保存到指定目录.
"""
import os
import sys
import logging
from pathlib import Path
from osgeo import gdal
import numpy as np
# 添加父目录到 sys.path 以导入 utils
BASE_DIR = Path(__file__).parent.parent
sys.path.append(str(BASE_DIR))
from utils.common_utils import setup_logging
gdal.UseExceptions()
# 设置 GDAL 选项以优化性能
gdal.SetConfigOption("GDAL_NUM_THREADS", "ALL_CPUS")
gdal.SetConfigOption("GDAL_CACHEMAX", "1024")
def compute_band_stats(band: gdal.Band, approx_ok: bool = True, no_data: float = np.nan, percentile: float = 2.0):
"""
使用 GDAL ComputeStatistics 计算波段的统计信息
实现类似 QGIS 图层符号化中的累计计数削减效果
Parameters
----------
band : gdal.Band
GDAL 波段对象
approx_ok : bool, optional
是否允许近似统计, 速度快, by default True
no_data : float, optional
无效值, 默认 np.nan, by default np.nan
percentile : float, optional
百分比截断值 (2 表示 2%-98% 范围), by default 2.0
Returns
-------
tuple
(min_val, max_val)
"""
try:
# 使用 GDAL 的 ComputeStatistics 计算统计信息
stats = band.ComputeStatistics(approx_ok)
if stats is not None and len(stats) >= 2:
min_val, max_val = stats[0], stats[1]
# 如果需要更精确的百分比统计, 使用直方图
if percentile > 0:
# 创建直方图
hist_min = min_val
hist_max = max_val
bucket_count = 256 # 直方图桶数
# 计算直方图
hist = band.GetHistogram(
hist_min, hist_max, bucket_count,
include_out_of_range=False, approx_ok=True
)
if hist and len(hist) > 0:
# 计算累积直方图
hist_array = np.array(hist, dtype=np.float32)
cum_hist = np.cumsum(hist_array)
total = cum_hist[-1]
# 找到百分比位置
# 考虑到无效值为 -9999, 所以直接从 0 开始, 尽量还原原始数据范围
if no_data <= 0:
lower_thresh = 0.0
else:
lower_thresh = total * (percentile / 100.0)
upper_thresh = total * (1 - percentile / 100.0)
# 找到对应的值
lower_idx = np.searchsorted(cum_hist, lower_thresh)
upper_idx = np.searchsorted(cum_hist, upper_thresh)
if lower_idx < len(hist_array) and upper_idx < len(hist_array):
# 将索引映射回实际值
bin_width = (hist_max - hist_min) / bucket_count
min_val = hist_min + lower_idx * bin_width
max_val = hist_min + upper_idx * bin_width
return min_val, max_val
except Exception as e:
logging.warning(f"计算统计信息失败: {str(e)}")
# 如果失败, 返回默认值
return 0.0, 1.0
def vrt_to_8bit_simple(input_dir: str | Path, output_path: str | Path,
no_data: float = np.nan, percentile: float = 1.0):
"""
使用 gdal.Translate 将指定目录下的 tif 文件合并并转换为 8bit
使用 GDAL 内置统计功能快速计算并转换, NaN 值将被赋值为 0
Parameters
----------
input_dir : str | Path
输入 TIF 文件所在目录
output_path : str | Path
输出文件路径
no_data : float, optional
输入影像中的无效值, 默认 np.nan
percentile : float, optional
百分比截断值, 默认 0% 99%
"""
input_dir = Path(input_dir)
output_path = Path(output_path)
# 1. 获取所有tif文件
tif_files = [
str(f)
for f in input_dir.iterdir()
if f.is_file() and f.suffix.lower() == ".tif"
]
if not tif_files:
logging.warning(f"{input_dir} 中没有找到 tif 文件.")
return
logging.info(f"1) 找到 {len(tif_files)} 个 tif 文件")
vrt_path = input_dir / "merged.vrt"
vrt_ds = None
try:
# 2. 创建VRT
logging.info("2) 构建 VRT 镶嵌...")
vrt_options = gdal.BuildVRTOptions(
srcNodata=np.nan, # 输入影像中的无效值, 设为 NaN 防止拼接处存在缝隙
VRTNodata=no_data, # 输出 VRT 中的无效值
)
vrt_ds = gdal.BuildVRT(str(vrt_path), tif_files, options=vrt_options)
if vrt_ds is None:
logging.error("构建 VRT 失败.")
return
# 获取波段数与影像尺寸
num_bands = vrt_ds.RasterCount
width = vrt_ds.RasterXSize
height = vrt_ds.RasterYSize
logging.info(f"波段数: {num_bands}, 影像尺寸: {width}x{height}")
# 3. 使用 GDAL 快速计算统计信息
logging.info("3) 使用 GDAL 计算影像统计信息...")
scale_params = []
for band_idx in range(1, num_bands + 1):
band = vrt_ds.GetRasterBand(band_idx)
# 使用 GDAL 内置函数计算统计
min_val, max_val = compute_band_stats(
band, no_data=no_data, percentile=percentile)
logging.info(
f"波段 {band_idx}: 有效值范围 [{min_val:.4f}, {max_val:.4f}]")
# 将有效值映射到 1-255, 保留 0 作为 NoData 专用值
# 避免暗部像素 (如水体、阴影) 会被映射为 0, 从而被误判为透明缺失
scale_params.append([min_val, max_val, 1, 255])
# 4. 使用gdal_translate转换为8bit, 无效值统一设为0
if output_path.exists():
logging.warning(f"结果文件已存在: {output_path}")
return
logging.info("4) 使用 gdal_translate 转换为 8bit COG (NaN->0)...")
translate_options = gdal.TranslateOptions(
format="COG", # 输出为 COG 格式, 自动构建金字塔
scaleParams=scale_params,
outputType=gdal.GDT_Byte,
noData=0, # 输出 NoData 设为 0
creationOptions=[
"COMPRESS=DEFLATE",
"ZLEVEL=4", # DEFLATE 压缩级别, 支持 1-9, 默认为 6
"PREDICTOR=2", # 差值预测, 利于影像压缩
"NUM_THREADS=ALL_CPUS",
"BIGTIFF=IF_SAFER",
# "TILED=YES", # COG 格式自带分块, 不支持手动设置
# "PHOTOMETRIC=RGB", # COG 格式不支持 photometric 参数
],
callback=gdal.TermProgress_nocb # 进度回调, 不显示进度条
)
gdal.Translate(str(output_path), vrt_ds, options=translate_options)
logging.info(f"已保存: {output_path}")
# 释放VRT数据集, 确保文件句柄释放
vrt_ds = None
except Exception as e:
logging.error(f"处理过程中出现异常: {str(e)}")
import traceback
logging.error(traceback.format_exc())
finally:
# 清理资源
vrt_ds = None
def main(input_dir, output_path):
input_dir = Path(input_dir)
output_path = Path(output_path)
output_root = output_path.parent
os.makedirs(output_root, exist_ok=True)
log_file = output_root / "sr2rgb_light.log"
setup_logging(str(log_file))
logging.info("开始批量处理 (Light Mode)...")
logging.info(f"输入目录: {input_dir}")
logging.info(f"输出文件: {output_path}")
vrt_to_8bit_simple(input_dir, output_path, no_data=-9999.0, percentile=1.0)
if __name__ == "__main__":
# 输入目录: 包含分块tif影像的根目录
input_root = Path(r"D:\CVEOdata\RS_Data\2025_S2_COG")
# 输出目录: 存放最终RGB镶嵌结果的目录
output_root = Path(r"D:\CVEOdata\RS_Data\2025_S2_RGB_COG")
rgb_file = output_root / "Hubei_Sentinel-2_2025_RGB_COG.tif"
main(input_root, rgb_file)