From ff1dea23248d5d75f1a7ffff616f11191afbf6f5 Mon Sep 17 00:00:00 2001 From: xhong Date: Tue, 9 Dec 2025 12:18:18 +0800 Subject: [PATCH] =?UTF-8?q?feat(sr2rgb):=20=E5=AE=8C=E5=96=84=E5=BD=B1?= =?UTF-8?q?=E5=83=8F=E8=BD=AC=E6=8D=A2=E5=A4=84=E7=90=86=E9=80=BB=E8=BE=91?= =?UTF-8?q?.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- setup/lpdaac.yml | 42 ++++----- utils/sr2rgb_light.py | 214 ++++++++++++++++++++++++++---------------- 2 files changed, 154 insertions(+), 102 deletions(-) diff --git a/setup/lpdaac.yml b/setup/lpdaac.yml index 8edd066..54db95f 100644 --- a/setup/lpdaac.yml +++ b/setup/lpdaac.yml @@ -104,8 +104,8 @@ dependencies: - geckodriver=0.36.0=h127b8e1_0 - geopandas=1.1.1=pyhd8ed1ab_1 - geopandas-base=1.1.1=pyha770c72_1 - - geos=3.13.1=h9ea8674_0 - - geotiff=1.7.4=h86c3423_2 + - geos=3.14.0=hdade9fe_0 + - geotiff=1.7.4=h73469f5_4 - geoviews=1.14.1=hd8ed1ab_0 - geoviews-core=1.14.1=pyha770c72_0 - giflib=5.2.2=h64bf75a_0 @@ -117,7 +117,7 @@ dependencies: - h2=4.3.0=pyhcf101f3_0 - h5netcdf=1.6.4=pyhd8ed1ab_0 - h5py=3.14.0=nompi_py312h03cd2ba_101 - - harmony-py=1.2.0=pyhd8ed1ab_0 + - harmony-py=0.4.14=pyhd8ed1ab_0 - hdf4=4.2.15=h5557f11_7 - hdf5=1.14.6=nompi_he30205f_103 - holoviews=1.21.0=pyhd8ed1ab_0 @@ -126,6 +126,7 @@ dependencies: - httpx=0.28.1=pyhd8ed1ab_0 - hvplot=0.12.1=pyhd8ed1ab_0 - hyperframe=6.1.0=pyhd8ed1ab_0 + - icu=75.1=he0c23c2_0 - idna=3.10=pyhd8ed1ab_1 - imagecodecs=2025.8.2=py312h424859f_4 - imageio=2.37.0=pyhfb79c49_0 @@ -168,7 +169,7 @@ dependencies: - lerc=4.0.0=h6470a55_1 - libabseil=20250127.1=cxx17_h4eb7d71_0 - libaec=1.1.4=h20038f6_0 - - libarchive=3.8.1=gpl_h1ca5a36_100 + - libarchive=3.8.1=gpl_h26aea39_101 - libarrow=20.0.0=h7ea4809_8_cuda - libarrow-acero=20.0.0=h7d8d6a5_8_cuda - libarrow-dataset=20.0.0=h7d8d6a5_8_cuda @@ -188,13 +189,13 @@ dependencies: - libfreetype=2.14.1=h57928b3_0 - libfreetype6=2.14.1=hdbac1cb_0 - libgcc=15.2.0=h1383e82_7 - - libgdal-core=3.10.3=h228a343_13 - - libgdal-hdf4=3.10.3=ha47b6c4_13 + - libgdal-core=3.10.3=haf333d4_21 + - libgdal-hdf4=3.10.3=ha47b6c4_21 - libgomp=15.2.0=h1383e82_7 - libgoogle-cloud=2.36.0=hf249c01_1 - libgoogle-cloud-storage=2.36.0=he5eb982_1 - libgrpc=1.71.0=h8c3449c_1 - - libhwloc=2.12.1=default_h88281d1_1000 + - libhwloc=2.12.1=default_h64bd3f2_1002 - libhwy=1.3.0=ha71e874_1 - libiconv=1.18=hc1393d2_2 - libjpeg-turbo=3.1.0=h2466b09_0 @@ -202,14 +203,14 @@ dependencies: - libkml=1.3.0=h538826c_1021 - liblapack=3.9.0=35_hf9ab0e9_mkl - liblzma=5.8.1=h2466b09_2 - - libnetcdf=4.9.3=nompi_ha45073a_102 + - libnetcdf=4.9.3=nompi_h7d90bef_103 - libparquet=20.0.0=ha850022_8_cuda - libpng=1.6.50=h7351971_1 - libprotobuf=5.29.3=hd33f5f0_2 - libre2-11=2025.06.26=habfad5f_0 - - librttopo=1.1.0=hbfc9ebc_18 + - librttopo=1.1.0=h5ff11c1_19 - libsodium=1.0.20=hc70643c_0 - - libspatialite=5.1.0=h378fb81_14 + - libspatialite=5.1.0=gpl_h3bf7137_118 - libsqlite=3.50.4=hf5d6505_0 - libssh2=1.11.1=h9aa295b_0 - libthrift=0.21.0=hbe90ef8_0 @@ -218,7 +219,9 @@ dependencies: - libwebp-base=1.6.0=h4d5522a_0 - libwinpthread=12.0.0.r4.gg4f2fc60ca=h57928b3_10 - libxcb=1.17.0=h0e4246c_0 - - libxml2=2.13.8=h741aa76_1 + - libxml2=2.15.0=ha29bfb0_1 + - libxml2-16=2.15.0=h06f855e_1 + - libxml2-devel=2.15.0=ha29bfb0_1 - libzip=1.11.2=h3135430_0 - libzlib=1.3.1=h2466b09_2 - libzopfli=1.0.3=h0e60522_0 @@ -280,7 +283,7 @@ dependencies: - parso=0.8.5=pyhcf101f3_0 - partd=1.4.2=pyhd8ed1ab_0 - patsy=1.0.1=pyhd8ed1ab_1 - - pcre2=10.45=h99c9b8b_0 + - pcre2=10.46=h3402e2f_0 - pickleshare=0.7.5=pyhd8ed1ab_1004 - pillow=11.3.0=py312h5ee8bfe_3 - pip=25.2=pyh8b19718_0 @@ -288,7 +291,7 @@ dependencies: - pockets=0.9.1=pyhd8ed1ab_1 - pqdm=0.2.0=pyhd8ed1ab_1 - progressbar2=4.2.0=pyhd8ed1ab_0 - - proj=9.6.2=h7990399_2 + - proj=9.7.0=h9080b7b_0 - prometheus_client=0.23.1=pyhd8ed1ab_0 - prompt-toolkit=3.0.52=pyha770c72_0 - prompt_toolkit=3.0.52=hd8ed1ab_0 @@ -312,7 +315,7 @@ dependencies: - pyogrio=0.11.0=py312h6e88f47_0 - pyopenssl=25.1.0=pyhd8ed1ab_0 - pyparsing=3.2.5=pyhcf101f3_0 - - pyproj=3.7.2=py312h235ce7f_1 + - pyproj=3.7.2=py312habbd053_2 - pyresample=1.34.2=py312h275cf98_0 - pyshp=3.0.2=pyhd8ed1ab_0 - pysocks=1.7.1=pyh09c184e_7 @@ -320,7 +323,7 @@ dependencies: - pystac-client=0.9.0=pyhd8ed1ab_0 - python=3.12.11=h3f84c4b_0_cpython - python-cmr=0.13.0=pyhff2d567_1 - - python-dateutil=2.9.0.post0=pyhe01879c_2 + - python-dateutil=2.8.2=pyhd8ed1ab_0 - python-dotenv=0.20.0=pyhd8ed1ab_0 - python-fastjsonschema=2.21.2=pyhe01879c_0 - python-gil=3.12.11=hd8ed1ab_0 @@ -337,7 +340,7 @@ dependencies: - pyyaml=6.0.3=py312h05f76fc_0 - pyzmq=27.1.0=py312hbb5da91_0 - qhull=2020.2=hc790b64_5 - - rasterio=1.4.3=py312h9aeec68_2 + - rasterio=1.4.3=py312h11f88aa_3 - rav1e=0.7.1=ha073cba_3 - ray-core=2.49.2=py312h6652516_2 - ray-default=2.49.2=py312h8c80c70_2 @@ -360,7 +363,7 @@ dependencies: - selenium-manager=4.36.0=h18a1a76_0 - send2trash=1.8.3=pyh5737063_1 - setuptools=80.9.0=pyhff2d567_0 - - shapely=2.0.7=py312h3f81574_1 + - shapely=2.1.2=py312ha0f8e3e_0 - six=1.17.0=pyhe01879c_1 - smart_open=7.3.1=pyhcf101f3_0 - snappy=1.2.2=h7fa0ca8_0 @@ -433,7 +436,4 @@ dependencies: - zlib-ng=2.2.5=h1608b31_0 - zstandard=0.25.0=py312he5662c2_0 - zstd=1.5.7=hbeecb71_2 - - pip: - - beautifulsoup4==4.12.3 - -prefix: "D:\\program\\miniforge3\\envs\\lpdaac" +prefix: D:\program\miniforge3\envs\lpdaac diff --git a/utils/sr2rgb_light.py b/utils/sr2rgb_light.py index 592c438..a4d9d53 100644 --- a/utils/sr2rgb_light.py +++ b/utils/sr2rgb_light.py @@ -1,5 +1,9 @@ """ 使用 GDAL BuildVRT 与 Translate 快速将影像批量镶嵌并转换为 8bit RGB (Light version) + +1. 将输入目录下的所有 tif 文件合并为一个 VRT 文件; +2. 采用 GDAL ComputeStatistics 进行快速统计计算, 并根据指定的百分比截断值计算有效数据范围; +3. 将 VRT 文件转换为 8bit RGB 格式的 COG 文件, 并保存到指定目录. """ import os @@ -21,10 +25,84 @@ gdal.SetConfigOption("GDAL_NUM_THREADS", "ALL_CPUS") gdal.SetConfigOption("GDAL_CACHEMAX", "1024") -def vrt_to_8bit_simple(input_dir: str | Path, output_path: str | Path, num: int = 2): +def compute_band_stats(band: gdal.Band, approx_ok: bool = True, no_data: float = np.nan, percentile: float = 2.0): + """ + 使用 GDAL ComputeStatistics 计算波段的统计信息 + + 实现类似 QGIS 图层符号化中的累计计数削减效果 + + Parameters + ---------- + band : gdal.Band + GDAL 波段对象 + approx_ok : bool, optional + 是否允许近似统计, 速度快, by default True + no_data : float, optional + 无效值, 默认 np.nan, by default np.nan + percentile : float, optional + 百分比截断值 (2 表示 2%-98% 范围), by default 2.0 + + Returns + ------- + tuple + (min_val, max_val) + """ + try: + # 使用 GDAL 的 ComputeStatistics 计算统计信息 + stats = band.ComputeStatistics(approx_ok) + if stats is not None and len(stats) >= 2: + min_val, max_val = stats[0], stats[1] + + # 如果需要更精确的百分比统计, 使用直方图 + if percentile > 0: + # 创建直方图 + hist_min = min_val + hist_max = max_val + bucket_count = 256 # 直方图桶数 + + # 计算直方图 + hist = band.GetHistogram( + hist_min, hist_max, bucket_count, + include_out_of_range=False, approx_ok=True + ) + + if hist and len(hist) > 0: + # 计算累积直方图 + hist_array = np.array(hist, dtype=np.float32) + cum_hist = np.cumsum(hist_array) + total = cum_hist[-1] + + # 找到百分比位置 + # 考虑到无效值为 -9999, 所以直接从 0 开始, 尽量还原原始数据范围 + if no_data <= 0: + lower_thresh = 0.0 + else: + lower_thresh = total * (percentile / 100.0) + upper_thresh = total * (1 - percentile / 100.0) + + # 找到对应的值 + lower_idx = np.searchsorted(cum_hist, lower_thresh) + upper_idx = np.searchsorted(cum_hist, upper_thresh) + + if lower_idx < len(hist_array) and upper_idx < len(hist_array): + # 将索引映射回实际值 + bin_width = (hist_max - hist_min) / bucket_count + min_val = hist_min + lower_idx * bin_width + max_val = hist_min + upper_idx * bin_width + + return min_val, max_val + except Exception as e: + logging.warning(f"计算统计信息失败: {str(e)}") + + # 如果失败, 返回默认值 + return 0.0, 1.0 + + +def vrt_to_8bit_simple(input_dir: str | Path, output_path: str | Path, + no_data: float = np.nan, percentile: float = 1.0): """ 使用 gdal.Translate 将指定目录下的 tif 文件合并并转换为 8bit - NaN 值将被赋值为 0 + 使用 GDAL 内置统计功能快速计算并转换, NaN 值将被赋值为 0 Parameters ---------- @@ -32,111 +110,89 @@ def vrt_to_8bit_simple(input_dir: str | Path, output_path: str | Path, num: int 输入 TIF 文件所在目录 output_path : str | Path 输出文件路径 - num : int, optional - 拉伸的百分比范围, 默认2%到98% + no_data : float, optional + 输入影像中的无效值, 默认 np.nan + percentile : float, optional + 百分比截断值, 默认 0% 到 99% """ input_dir = Path(input_dir) output_path = Path(output_path) - + # 1. 获取所有tif文件 tif_files = [ str(f) for f in input_dir.iterdir() if f.is_file() and f.suffix.lower() == ".tif" ] - + if not tif_files: logging.warning(f"{input_dir} 中没有找到 tif 文件.") return - + logging.info(f"1) 找到 {len(tif_files)} 个 tif 文件") - - vrt_path = input_dir / "temp.vrt" + + vrt_path = input_dir / "merged.vrt" vrt_ds = None try: # 2. 创建VRT logging.info("2) 构建 VRT 镶嵌...") - vrt_ds = gdal.BuildVRT(str(vrt_path), tif_files) - + vrt_options = gdal.BuildVRTOptions( + srcNodata=np.nan, # 输入影像中的无效值, 设为 NaN 防止拼接处存在缝隙 + VRTNodata=no_data, # 输出 VRT 中的无效值 + ) + vrt_ds = gdal.BuildVRT(str(vrt_path), tif_files, options=vrt_options) + if vrt_ds is None: logging.error("构建 VRT 失败.") return - + # 获取波段数与影像尺寸 num_bands = vrt_ds.RasterCount width = vrt_ds.RasterXSize height = vrt_ds.RasterYSize logging.info(f"波段数: {num_bands}, 影像尺寸: {width}x{height}") - - # 3. 计算每个波段的统计信息并处理 NaN 值 - logging.info("3) 计算影像统计信息...") + + # 3. 使用 GDAL 快速计算统计信息 + logging.info("3) 使用 GDAL 计算影像统计信息...") scale_params = [] for band_idx in range(1, num_bands + 1): band = vrt_ds.GetRasterBand(band_idx) - - # 读取数据块以计算统计 (避免加载全部数据) - block_size = 1024 - stats_min = [] - stats_max = [] - - for y in range(0, vrt_ds.RasterYSize, block_size): - height = min(block_size, vrt_ds.RasterYSize - y) - for x in range(0, vrt_ds.RasterXSize, block_size): - width = min(block_size, vrt_ds.RasterXSize - x) - - data = band.ReadAsArray(x, y, width, height) - if data is not None: - # 计算非NaN的最小最大值 - valid_data = data[~np.isnan(data)] - # 读取2%-98%的有效数据范围 - if len(valid_data) > 0: - stats_min.append(np.nanpercentile(valid_data, num)) - stats_max.append(np.nanpercentile(valid_data, 100 - num)) - - if stats_min and stats_max: - min_val = min(stats_min) - max_val = max(stats_max) - logging.info(f"波段 {band_idx}: 有效值范围 [{min_val:.4f}, {max_val:.4f}]") - scale_params.append([min_val, max_val, 0, 255]) - else: - logging.warning(f"波段 {band_idx}: 未找到有效数据, 使用默认范围 [0, 1]") - scale_params.append([0, 1, 0, 255]) - - # 4. 使用gdal_translate转换为8bit, NaN值设为0 - logging.info("4) 使用 gdal_translate 转换为 8bit (NaN->0)...") + # 使用 GDAL 内置函数计算统计 + min_val, max_val = compute_band_stats( + band, no_data=no_data, percentile=percentile) + logging.info( + f"波段 {band_idx}: 有效值范围 [{min_val:.4f}, {max_val:.4f}]") + # 将有效值映射到 1-255, 保留 0 作为 NoData 专用值 + # 避免暗部像素 (如水体、阴影) 会被映射为 0, 从而被误判为透明缺失 + scale_params.append([min_val, max_val, 1, 255]) + + # 4. 使用gdal_translate转换为8bit, 无效值统一设为0 + if output_path.exists(): + logging.warning(f"结果文件已存在: {output_path}") + return + logging.info("4) 使用 gdal_translate 转换为 8bit COG (NaN->0)...") translate_options = gdal.TranslateOptions( + format="COG", # 输出为 COG 格式, 自动构建金字塔 scaleParams=scale_params, outputType=gdal.GDT_Byte, - noData=0, # 将NaN转换为0 + noData=0, # 输出 NoData 设为 0 creationOptions=[ - "COMPRESS=DEFLATE", - "TILED=YES", + "COMPRESS=DEFLATE", + "ZLEVEL=4", # DEFLATE 压缩级别, 支持 1-9, 默认为 6 + "PREDICTOR=2", # 差值预测, 利于影像压缩 + "NUM_THREADS=ALL_CPUS", "BIGTIFF=IF_SAFER", - "PHOTOMETRIC=RGB", - ] + # "TILED=YES", # COG 格式自带分块, 不支持手动设置 + # "PHOTOMETRIC=RGB", # COG 格式不支持 photometric 参数 + ], + callback=gdal.TermProgress_nocb # 进度回调, 不显示进度条 ) - gdal.Translate(str(output_path), vrt_ds, options=translate_options) logging.info(f"已保存: {output_path}") - - # 5. 构建金字塔 (基于输出文件) - logging.info("5) 构建金字塔...") - # 释放VRT数据集,确保文件句柄释放 + + # 释放VRT数据集, 确保文件句柄释放 vrt_ds = None - - # 打开生成的输出文件进行更新 - out_ds = gdal.Open(str(output_path), gdal.GA_Update) - if out_ds: - try: - # 构建金字塔: 2, 4, 8, 16... - # 这里根据影像大小自适应或者固定层级 - out_ds.BuildOverviews("AVERAGE", [2, 4, 8, 16]) - logging.info("金字塔构建完成") - finally: - out_ds = None - else: - logging.error(f"无法打开文件构建金字塔: {output_path}") except Exception as e: logging.error(f"处理过程中出现异常: {str(e)}") @@ -150,28 +206,24 @@ def vrt_to_8bit_simple(input_dir: str | Path, output_path: str | Path, num: int def main(input_dir, output_path): input_dir = Path(input_dir) output_path = Path(output_path) - + output_root = output_path.parent os.makedirs(output_root, exist_ok=True) - + log_file = output_root / "sr2rgb_light.log" setup_logging(str(log_file)) - + logging.info("开始批量处理 (Light Mode)...") logging.info(f"输入目录: {input_dir}") logging.info(f"输出文件: {output_path}") - - if output_path.exists(): - logging.warning(f"结果文件已存在: {output_path}") - return - vrt_to_8bit_simple(input_dir, output_path) + vrt_to_8bit_simple(input_dir, output_path, no_data=-9999.0, percentile=1.0) if __name__ == "__main__": # 输入目录: 包含分块tif影像的根目录 - input_root = Path(r"D:\CVEOdata\RS_Data\2025_S2") + input_root = Path(r"D:\CVEOdata\RS_Data\2025_S2_COG") # 输出目录: 存放最终RGB镶嵌结果的目录 - output_root = Path(r"D:\CVEOdata\RS_Data\2025_S2_RGB") - rgb_file = output_root / "Hubei_Sentinel-2_2025_RGB.tif" - main(input_root, rgb_file) \ No newline at end of file + output_root = Path(r"D:\CVEOdata\RS_Data\2025_S2_RGB_COG") + rgb_file = output_root / "Hubei_Sentinel-2_2025_RGB_COG.tif" + main(input_root, rgb_file)