feat: 项目初始化.

This commit is contained in:
谢泓 2025-01-04 15:39:20 +08:00
commit 7ecd0378f1
7 changed files with 1807 additions and 0 deletions

11
.gitignore vendored Normal file
View File

@ -0,0 +1,11 @@
.dodsrc
__pycache__/
data/
*.pdf
*.tif
*.tiff

317
HLS_SuPER/HLS_PER.py Normal file
View File

@ -0,0 +1,317 @@
# -*- coding: utf-8 -*-
"""
===============================================================================
HLS Processing and Exporting Reformatted Data (HLS_PER)
This module contains functions to conduct subsetting and quality filtering of
search results.
-------------------------------------------------------------------------------
Authors: Cole Krehbiel, Mahsa Jami, and Erik Bolch
Last Updated: 2024-09-18
===============================================================================
"""
import os
import sys
import logging
import numpy as np
from datetime import datetime as dt
import xarray as xr
import rioxarray as rxr
import dask.distributed
def create_output_name(url, band_dict):
"""
Uses HLS default naming scheme to generate an output name with common band names.
This allows for easier stacking of bands from both collections.
"""
# Get Necessary Strings
prod = url.split("/")[4].split(".")[0]
asset = url.split("/")[-1].split(".")[-2]
# Add: 获取影像DOY以备影像归档
time = url.split("/")[-1].split(".")[3]
file_doy = time[:8]
# Hard-coded one off for Fmask name incase it is not in the band_dict but is needed for masking
# 翻译硬编码一个Fmask名称, 以防它不在band_dict中但需要用于掩膜处理
if asset == "Fmask":
output_name = f"{'.'.join(url.split('/')[-1].split('.')[:-2])}.FMASK.subset.tif"
else:
for key, value in band_dict[prod].items():
if value == asset:
output_name = (
f"{'.'.join(url.split('/')[-1].split('.')[:-2])}.{key}.subset.tif"
)
return [output_name, file_doy]
def open_hls(url, roi=None, scale=True, chunk_size=dict(band=1, x=512, y=512)):
"""
Generic Function to open an HLS COG and clip to ROI. For consistent scaling, this must be done manually.
Some HLS Landsat scenes have the metadata in the wrong location.
"""
# Open using rioxarray
da = rxr.open_rasterio(url, chunks=chunk_size, mask_and_scale=False).squeeze(
"band", drop=True
)
# Reproject ROI and Clip if ROI is provided
if roi is not None:
roi = roi.to_crs(da.spatial_ref.crs_wkt)
da = da.rio.clip(roi.geometry.values, roi.crs, all_touched=True)
# Apply Scale Factor if desired for non-quality layer
if scale and "Fmask" not in url:
# Mask Fill Values
da = xr.where(da == -9999, np.nan, da)
# Scale Data
da = da * 0.0001
# Remove Scale Factor After Scaling - Prevents Double Scaling
da.attrs["scale_factor"] = 1.0
# Add Scale Factor to Attributes Manually - This will overwrite/add if the data is missing.
if not scale and "Fmask" not in url:
da.attrs["scale_factor"] = 0.0001
return da
def create_quality_mask(quality_data, bit_nums: list = [0, 1, 2, 3, 4, 5]):
"""
Uses the Fmask layer and bit numbers to create a binary mask of good pixels.
By default, bits 0-5 are used.
"""
mask_array = np.zeros((quality_data.shape[0], quality_data.shape[1]))
# Remove/Mask Fill Values and Convert to Integer
quality_data = np.nan_to_num(quality_data, 0).astype(np.int8)
for bit in bit_nums:
# Create a Single Binary Mask Layer
mask_temp = np.array(quality_data) & 1 << bit > 0
mask_array = np.logical_or(mask_array, mask_temp)
return mask_array
def process_granule(
granule_urls,
roi,
quality_filter,
scale,
output_dir,
band_dict,
bit_nums=[0, 1, 2, 3, 4, 5],
chunk_size=dict(band=1, x=512, y=512),
):
"""
Processes a list of HLS asset urls for a single granule.
"""
# Setup Logging
logging.basicConfig(
level=logging.INFO,
format="%(levelname)s:%(asctime)s ||| %(message)s",
handlers=[logging.StreamHandler(sys.stdout)],
)
# Check if all Outputs Exist for a Granule
if not all(
os.path.isfile(
f"{output_dir}/{create_output_name(url, band_dict)[1]}/{create_output_name(url, band_dict)[0]}"
)
for url in granule_urls
):
# First Handle Quality Layer
if quality_filter:
# Generate Quality Layer URL
split_asset = granule_urls[0].split("/")[-1].split(".")
split_asset[-2] = "Fmask"
quality_url = (
f"{'/'.join(granule_urls[0].split('/')[:-1])}/{'.'.join(split_asset)}"
)
# Check if File exists in Output Directory
output_name = create_output_name(quality_url, band_dict)[0]
# Add: 以影像DOY为子目录归档同日影像
file_doy = create_output_name(quality_url, band_dict)[1]
output_dir = f"{output_dir}/{file_doy}"
if not os.path.isdir(output_dir):
os.makedirs(output_dir)
output_file = f"{output_dir}/{output_name}"
# Open Quality Layer
qa_da = open_hls(quality_url, roi, scale, chunk_size)
# Check if quality asset is already processed
if not os.path.isfile(output_file):
# Write Output
qa_da.rio.to_raster(raster_path=output_file, driver="COG")
else:
logging.info(
f"Existing file {output_name} found in {output_dir}. Skipping."
)
# Remove Quality Layer from Granule Asset List if Present
granule_urls = [asset for asset in granule_urls if asset != quality_url]
# Create Quality Mask
qa_mask = create_quality_mask(qa_da, bit_nums=bit_nums)
# Process Remaining Assets
for url in granule_urls:
# Check if File exists in Output Directory
output_name = create_output_name(url, band_dict)
output_file = f"{output_dir}/{output_name}"
# Check if scene is already processed
if not os.path.isfile(output_file):
# Open Asset
da = open_hls(url, roi, scale, chunk_size)
# Apply Quality Mask if Desired
if quality_filter:
da = da.where(~qa_mask)
# Write Output
da.rio.to_raster(raster_path=output_file, driver="COG")
else:
logging.info(
f"Existing file {output_name} found in {output_dir}. Skipping."
)
else:
logging.info(
f"All assets related to {granule_urls[0].split('/')[-1]} are already processed, skipping."
)
def build_hls_xarray_timeseries(
hls_cog_list, mask_and_scale=True, chunk_size=dict(band=1, x=512, y=512)
):
"""
Builds a single band timeseries using xarray for a list of HLS COGs. Dependent on file naming convention.
Works on SuPERScript named files. Files need common naming bands corresponding HLSS and HLSL bands,
e.g. HLSL30 Band 5 (NIR1) and HLSS30 Band 8A (NIR1)
"""
# Define Band(s)
bands = [filename.split(".")[6] for filename in hls_cog_list]
# Make sure all files in list are the same band
if not all(band == bands[0] for band in bands):
raise ValueError("All listed files must be of the same band.")
band_name = bands[0]
# Create Time Variable
try:
time_list = [
dt.strptime(filename.split(".")[3], "%Y%jT%H%M%S")
for filename in hls_cog_list
]
except ValueError:
print("A COG does not have a valid date string in the filename.")
time = xr.Variable("time", time_list)
timeseries_da = xr.concat(
[
rxr.open_rasterio(
filename, mask_and_scale=mask_and_scale, chunks=chunk_size
).squeeze("band", drop=True)
for filename in hls_cog_list
],
dim=time,
)
timeseries_da.name = band_name
return timeseries_da
def create_timeseries_dataset(hls_file_dir, output_type, output_dir=None):
"""
Creates an xarray dataset timeseries from a directory of HLS COGs.
Writes to a netcdf output. Currently only works for HLS SuPER outputs.
"""
# Setup Logging
logging.basicConfig(
level=logging.INFO,
format="%(levelname)s:%(asctime)s ||| %(message)s",
handlers=[logging.StreamHandler(sys.stdout)],
)
# List Files in Directory
all_files = [file for file in os.listdir(hls_file_dir) if file.endswith(".tif")]
# Create Dictionary of Files by Band
file_dict = {}
for file in all_files:
tile = file.split(".")[2]
band = file.split(".")[6]
full_path = os.path.join(hls_file_dir, file)
if tile not in file_dict:
file_dict[tile] = {}
if band not in file_dict[tile]:
file_dict[tile][band] = []
file_dict[tile][band].append(full_path)
# logging.info(f"{file_dict}")
# Check that all bands within each tile have the same number of observations
for tile, bands in file_dict.items():
q_obs = {band: len(files) for band, files in bands.items()}
if not all(q == list(q_obs.values())[0] for q in q_obs.values()):
logging.info(
f"Not all bands in {tile} have the same number of observations."
)
logging.info(f"{q_obs}")
# Loop through each tile and build timeseries output
for tile, bands in file_dict.items():
dataset = xr.Dataset()
timeseries_dict = {
band: dask.delayed(build_hls_xarray_timeseries)(files)
for band, files in bands.items()
}
timeseries_dict = dask.compute(timeseries_dict)[0]
dataset = xr.Dataset(timeseries_dict)
# Set up CF-Compliant Coordinate Attributes
dataset.attrs["Conventions"] = "CF-1.6"
dataset.attrs["title"] = "HLS SuPER Timeseries Dataset"
dataset.attrs["institution"] = "LP DAAC"
dataset.x.attrs["axis"] = "X"
dataset.x.attrs["standard_name"] = "projection_x_coordinate"
dataset.x.attrs["long_name"] = "x-coordinate in projected coordinate system"
dataset.x.attrs["units"] = "m"
dataset.y.attrs["axis"] = "Y"
dataset.y.attrs["standard_name"] = "projection_y_coordinate"
dataset.y.attrs["long_name"] = "y-coordinate in projected coordinate system"
dataset.y.attrs["units"] = "m"
dataset.time.attrs["axis"] = "Z"
dataset.time.attrs["standard_name"] = "time"
dataset.time.attrs["long_name"] = "time"
# Get first and last date
first_date = (
dataset.time.data[0].astype("M8[ms]").astype(dt).strftime("%Y-%m-%d")
)
final_date = (
dataset.time.data[-1].astype("M8[ms]").astype(dt).strftime("%Y-%m-%d")
)
# Write Outputs
# if output_type == "NC4":
output_path = os.path.join(
output_dir, f"HLS.{tile}.{first_date}.{final_date}.subset.nc"
)
dataset.to_netcdf(output_path)
# elif output_type == "ZARR":
# output_path = os.path.join(output_dir, "hls_timeseries_dataset.zarr")
# dataset.to_zarr(output_path)
logging.info(f"Output saved to {output_path}")

89
HLS_SuPER/HLS_Su.py Normal file
View File

@ -0,0 +1,89 @@
# -*- coding: utf-8 -*-
"""
===============================================================================
This module contains functions related to searching and preprocessing HLS data.
-------------------------------------------------------------------------------
Authors: Mahsa Jami, Cole Krehbiel, and Erik Bolch
Contact: lpdaac@usgs.gov
Last Updated: 2024-09-18
===============================================================================
"""
# Import necessary packages
import numpy as np
import earthaccess
# Main function to search and filter HLS data
def hls_search(roi: list, band_dict: dict, dates=None, cloud_cover=None, log=False):
"""
This function uses earthaccess to search for HLS data using an roi and temporal parameter, filter by cloud cover and delivers a list of results urls for the selected bands.
"""
# Search for data
results = earthaccess.search_data(
short_name=list(band_dict.keys()), # Band dict contains shortnames as keys
polygon=roi,
temporal=dates,
)
# Filter by cloud cover
if cloud_cover:
results = hls_cc_filter(results, cloud_cover)
# Get results urls
results_urls = [granule.data_links() for granule in results]
# Flatten url list
# results_urls = [item for sublist in results_urls for item in sublist]
# Filter url list based on selected bands
selected_results_urls = [
get_selected_bands_urls(granule_urls, band_dict)
for granule_urls in results_urls
]
return selected_results_urls
# Filter earthaccess results based on cloud cover threshold
def hls_cc_filter(results, cc_threshold):
"""
This function filters a list of earthaccess results based on a cloud cover threshold.
"""
cc = []
for result in results:
# Retrieve Cloud Cover from json, convert to float and place in numpy array
cc.append(
float(
next(
(
aa
for aa in result["umm"]["AdditionalAttributes"]
if aa.get("Name") == "CLOUD_COVERAGE"
),
None,
)["Values"][0]
)
)
cc = np.array(cc)
# Find indices based on cloud cover threshold
cc_indices = np.where(cc <= cc_threshold)
# Filter results based on indices
return [results[i] for i in cc_indices[0]]
# Filter results urls based on selected bands
def get_selected_bands_urls(url_list, band_dict):
"""
This function filters a list of results urls based on HLS collection and selected bands.
"""
selected_bands_urls = []
# Loop through urls
for url in url_list:
# Filter bands based on band dictionary
for collection, nested_dict in band_dict.items():
if collection in url:
for band in nested_dict.values():
if band in url:
selected_bands_urls.append(url)
return selected_bands_urls

594
HLS_SuPER/HLS_SuPER.py Normal file
View File

@ -0,0 +1,594 @@
# -*- coding: utf-8 -*-
"""
===============================================================================
HLS Subsetting, Processing, and Exporting Reformatted Data Prep Script
Authors: Cole Krehbiel, Mahsa Jami, and Erik Bolch
Contact: lpdaac@usgs.gov
Last Updated: 2024-09-18
===============================================================================
"""
# Possible Future Improvements:
# TODO Improve CF-1.6 NetCDF Compliance
# TODO Improve behavior around deletion of cogs when a netcdf is requested
# TODO Add ZARR as output option
import argparse
import sys
import os
import shutil
import logging
import time
import json
import earthaccess
from shapely.geometry import polygon, box
import geopandas as gpd
from datetime import datetime as dt
import dask.distributed
from HLS_Su import hls_search
from HLS_PER import process_granule, create_timeseries_dataset
def parse_arguments():
"""
Function to parse command line input arguments.
"""
parser = argparse.ArgumentParser(
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
description="Performs Spatial/Temporal/Band Subsetting, Processing, and Customized Exporting for HLS V2.0 files",
)
# roi: Region of interest as shapefile, geojson, or comma separated LL Lon, LL Lat, UR Lon, UR Lat
parser.add_argument(
"-roi",
type=str,
required=True,
help="(Required) Region of Interest (ROI) for spatial subset. \
Valid inputs are: (1) a geojson or shapefile (absolute path to file required if not in same directory as this script), or \
(2) bounding box coordinates: 'LowerLeft_lon,LowerLeft_lat,UpperRight_lon,UpperRight_lat'\
NOTE: Negative coordinates MUST be written in single quotation marks '-120,43,-118,48'\
NOTE 2: If providing an absolute path with spaces in directory names, please use double quotation marks "
" ",
)
# dir: Directory to save the files to
parser.add_argument(
"-dir",
required=False,
help="Directory to export output HLS files to.",
default=os.getcwd(),
)
# start: Start Date
parser.add_argument(
"-start",
required=False,
help="Start date for time period of interest: valid format is yyyy-mm-dd (e.g. 2020-10-20).",
default="2014-04-03",
)
# end: End Date
parser.add_argument(
"-end",
required=False,
help="Start date for time period of interest: valid format is yyyy-mm-dd (e.g. 2022-10-24).",
default=dt.today().strftime("%Y-%m-%d"),
)
# prod: product(s) desired to be downloaded
parser.add_argument(
"-prod",
choices=["HLSS30", "HLSL30", "both"],
required=False,
help="Desired product(s) to be subset and processed.",
default="both",
)
# layers: layers desired to be processed within the products selected
parser.add_argument(
"-bands",
required=False,
help="Desired layers to be processed. Valid inputs are ALL, COASTAL-AEROSOL, BLUE, GREEN, RED, RED-EDGE1, RED-EDGE2, RED-EDGE3, NIR1, SWIR1, SWIR2, CIRRUS, TIR1, TIR2, WATER-VAPOR, FMASK, VZA, VAA, SZA, SAA. To request multiple layers, provide them in comma separated format with no spaces. Unsure of the names for your bands?--check out the README which contains a table of all bands and band names.",
default="ALL",
)
# cc: maximum cloud cover (%) allowed to be returned (by scene)
parser.add_argument(
"-cc",
required=False,
help="Maximum (scene-level) cloud cover (percent) allowed for returned observations (e.g. 35). Valid range: 0 to 100 (integers only)",
default="100",
)
# qf: quality filter flag: filter out poor quality data yes/no
parser.add_argument(
"-qf",
choices=["True", "False"],
required=False,
help="Flag to quality filter before exporting output files (see README for quality filtering performed).",
default="True",
)
# sf: scale factor flag: Scale data or leave unscaled yes/no
parser.add_argument(
"-scale",
choices=["True", "False"],
required=False,
help="Flag to apply scale factor to layers before exporting output files. This is generally unecessary as most applications will scale automatically.",
default="False",
)
# of: output file format
parser.add_argument(
"-of",
choices=["COG", "NC4", "ZARR"],
required=False,
help="Define the desired output file format",
default="COG",
)
# chunksize: chunk size for processing with dask
parser.add_argument(
"-cs",
type=str,
help="Chunksize for processing scenes with dask in format 'band,x,y'. This is used to provide chunk_size argument to rioxarray.open_rasterio to improve processing speed.\
For example: '1,512,512' (native hls chunk size) provides better performance for ROIs that fall within a single scene, while '1,3600,3600' (full HLS scene) provides better performance for \
larger ROIs that span multiple scenes. The default is '1,512,512', but this can lead to a very large task list for large ROIs.",
default="1,512,512",
)
# logfile: Optional logfile path
parser.add_argument(
"-logfile",
required=False,
help="Optional path to output logfile. If not provided, logging will only be to the console.",
)
return parser.parse_args()
def format_roi(roi):
"""
Determines if submitted ROI is a file or bbox coordinates.
If a file, opens a GeoJSON or shapefile and creates a list of polygon vertices in the correct order. If the file has multiple polygons it will use a unary union convex hull of the external bounds.
If bbox coordinates, creates a geodataframe with a single Polygon geometry.
Returns a geopandas dataframe for clipping and a list of vertices for searching.
"""
if os.path.isfile(roi): # and roi.endswith(("geojson", "shp")):
print(roi)
try:
# Open ROI if file
roi = gpd.read_file(roi)
if len(roi) > 1:
# Merge all Polygon geometries and create external boundary
logging.info(
"Multiple polygons detected. Creating single geometry of external coordinates."
)
single_geometry = roi.unary_union.convex_hull
roi = gpd.GeoDataFrame(geometry=[single_geometry], crs=roi.crs)
logging.info(roi)
# Check if ROI is in Geographic CRS, if not, convert to it
if roi.crs.is_geographic:
# List Vertices in correct order for search
vertices_list = list(roi.geometry[0].exterior.coords)
else:
roi_geographic = roi.to_crs("EPSG:4326")
logging.info(
"Note: ROI submitted is being converted to Geographic CRS (EPSG:4326)"
)
vertices_list = list(roi_geographic.geometry[0].exterior.coords)
except (FileNotFoundError, ValueError):
sys.exit(
f"The GeoJSON/shapefile is either not valid or could not be found.\nPlease double check the name and provide the absolute path to the file or make sure that it is located in {os.getcwd()}"
)
else:
# If bbox coordinates are submitted
bbox = tuple(map(float, roi.strip("'\"").split(",")))
print(bbox)
# Convert bbox to a geodataframe for clipping
roi = gpd.GeoDataFrame(geometry=[box(*bbox)], crs="EPSG:4326")
vertices_list = list(roi.geometry[0].exterior.coords)
return (roi, vertices_list)
def format_dates(start, end):
# Strip Quotes
start = start.strip("'").strip('"')
end = end.strip("'").strip('"')
# Convert to datetime
try:
start = dt.strptime(start, "%Y-%m-%d")
end = dt.strptime(end, "%Y-%m-%d")
except ValueError:
sys.exit(
"A date format is not valid. The valid format is ISO 8601: YYYY-MM-DD (e.g. 2020-10-20)"
)
if start > end:
sys.exit(
f"The Start Date requested: {start} is after the End Date Requested: {end}."
)
else:
dates = (start.strftime("%Y-%m-%d"), end.strftime("%Y-%m-%d"))
return dates
def format_cloud_cover(cc):
try:
cc = int(cc.strip("'").strip('"'))
except ValueError:
sys.exit(
f"{cc} is not a valid input for filtering by cloud cover (e.g. 35). Valid range: 0 to 100 (integers only)"
)
# Validate that cc is in the valid range (0-100)
if cc < 0 or cc > 100:
sys.exit(
f"{cc} is not a valid input option for filtering by cloud cover (e.g. 35). Valid range: 0 to 100 (integers only)"
)
return cc
def str_to_bool(value):
"""
Converts a string to a boolean.
Accepts 'True', 'true', '1' as True.
Accepts 'False', 'false', '0' as False.
"""
if isinstance(value, str):
if value.lower() in ("true", "1"):
return True
elif value.lower() in ("false", "0"):
return False
raise ValueError(f"Cannot convert {value} to boolean.")
def create_band_dict(prod, bands):
"""
Creates a dictionary of bands and common band names for each collection requested.
"""
shortname = {"HLSS30": "HLSS30.v2.0", "HLSL30": "HLSL30.v2.0"}
# Create a dictionary with product name and shortname
if prod == "both":
prods = shortname
else:
prods = {prod: shortname[prod]}
# Strip spacing, quotes, make all upper case and create a list
bands = bands.strip(" ").strip("'").strip('"').upper()
band_list = bands.split(",")
# Create a LUT dict including the HLS product bands mapped to names
lut = {
"HLSS30": {
"COASTAL-AEROSOL": "B01",
"BLUE": "B02",
"GREEN": "B03",
"RED": "B04",
"RED-EDGE1": "B05",
"RED-EDGE2": "B06",
"RED-EDGE3": "B07",
"NIR-Broad": "B08",
"NIR1": "B8A",
"WATER-VAPOR": "B09",
"CIRRUS": "B10",
"SWIR1": "B11",
"SWIR2": "B12",
"FMASK": "Fmask",
"VZA": "VZA",
"VAA": "VAA",
"SZA": "SZA",
"SAA": "SAA",
},
"HLSL30": {
"COASTAL-AEROSOL": "B01",
"BLUE": "B02",
"GREEN": "B03",
"RED": "B04",
"NIR1": "B05",
"SWIR1": "B06",
"SWIR2": "B07",
"CIRRUS": "B09",
"TIR1": "B10",
"TIR2": "B11",
"FMASK": "Fmask",
"VZA": "VZA",
"VAA": "VAA",
"SZA": "SZA",
"SAA": "SAA",
},
}
# List of all available/acceptable band names
all_bands = [
"ALL",
"COASTAL-AEROSOL",
"BLUE",
"GREEN",
"RED",
"RED-EDGE1",
"RED-EDGE2",
"RED-EDGE3",
"NIR1",
"SWIR1",
"SWIR2",
"CIRRUS",
"TIR1",
"TIR2",
"WATER-VAPOR",
"FMASK",
"VZA",
"VAA",
"SZA",
"SAA",
]
# Validate that bands are named correctly
for b in band_list:
if b not in all_bands:
sys.exit(
f"Band: {b} is not a valid input option. Valid inputs are {all_bands}. To request multiple layers, provide them in comma separated format with no spaces. Unsure of the names for your bands?--check out the README which contains a table of all bands and band names."
)
# Set up a dictionary of band names and numbers by product
band_dict = {}
for p in prods:
band_dict[p] = {}
for b in band_list:
if b == "ALL":
band_dict[p] = lut[p]
else:
try:
band_dict[p][b] = lut[p][b]
except ValueError:
print(f"Product {p} does not contain band {b}")
return band_dict
def format_chunksize(chunksize):
"""
Converts comma-separated chunksize string to dictionary.
"""
keys = ["band", "x", "y"]
values = list(map(int, chunksize.strip("'\"").split(",")))
if len(values) != len(keys):
raise ValueError(
"Chunksize must provide band, x and y (3) values separated by commas."
)
return dict(zip(keys, values))
def confirm_action(prompt):
"""
Prompts the user to confirm an action.
"""
while True:
response = input(prompt).lower()
if response in ["y", "yes"]:
return True
elif response in ["n", "no"]:
return False
else:
print("Invalid input. Please enter 'y' or 'n'.")
def setup_dask_environment():
"""
Passes RIO environment variables to dask workers for authentication.
"""
import os
import rasterio
cookie_file_path = os.path.expanduser("~/cookies.txt")
global env
gdal_config = {
"GDAL_HTTP_UNSAFESSL": "YES",
"GDAL_HTTP_COOKIEFILE": cookie_file_path,
"GDAL_HTTP_COOKIEJAR": cookie_file_path,
"GDAL_DISABLE_READDIR_ON_OPEN": "YES",
"CPL_VSIL_CURL_ALLOWED_EXTENSIONS": "TIF",
"GDAL_HTTP_MAX_RETRY": "10",
"GDAL_HTTP_RETRY_DELAY": "0.5",
"GDAL_HTTP_TIMEOUT": "300",
}
env = rasterio.Env(**gdal_config)
env.__enter__()
def main():
"""
Main function to run the HLS SuPER script.
"""
# Parse arguments
args = parse_arguments()
# Configure logging
log_handlers = [logging.StreamHandler(sys.stdout)]
if args.logfile:
log_handlers.append(logging.FileHandler(args.logfile))
logging.basicConfig(
level=logging.INFO,
format="%(levelname)s:%(asctime)s ||| %(message)s",
handlers=log_handlers,
)
# Handle Login Credentials with earthaccess
earthaccess.login(persist=True)
# Start Log
logging.info("HLS SuPER script started")
# Format ROI
roi, vl = format_roi(args.roi)
logging.info("Region of Interest formatted successfully")
# Set Output Directory
if args.dir is not None:
output_dir = os.path.normpath(args.dir.strip("'").strip('"')) + os.sep
else:
# Defaults to the current directory
output_dir = os.getcwd() + os.sep
logging.info(f"Output directory set to: {output_dir}")
# Format/Validate Dates
dates = format_dates(args.start, args.end)
logging.info(f"Date Parameters: {dates}")
# Create Product/Band Dictionary
band_dict = create_band_dict(args.prod, args.bands)
logging.info(f"Products/Bands Selected: {band_dict}")
# Format Cloud Cover
cc = format_cloud_cover(args.cc)
logging.info(f"Cloud Cover Filter <= {cc}")
# Quality Filtering
qf = str_to_bool(args.qf)
logging.info(f"Quality Filtering: {qf}")
# Scale Factor
scale = str_to_bool(args.scale)
logging.info(f"Apply Scale Factor: {scale}")
# Chunk Size
chunk_size = format_chunksize(args.cs)
logging.info(f"Chunk Size: {chunk_size}")
# Output File Type
if args.of not in ["COG", "NC4"]:
sys.exit(
f"Output format {args.of} is not a valid output format. Please choose from 'COG', 'NC4'."
)
logging.info(f"Output format: {args.of}")
# Search for Data and Save Results
results_urls_file = os.path.join(output_dir, "hls_super_results_urls.json")
use_existing_file = False
if os.path.isfile(results_urls_file):
logging.info(f"Results url list already exists in {output_dir}.")
# Confirm if user wants to use existing file.
if confirm_action(
f"Do you want to use the existing results file ({results_urls_file})? (y/n)"
):
use_existing_file = True
else:
if not confirm_action(
"Do you want to overwrite the existing results file? (y/n)"
):
sys.exit(
f"Processing aborted. Please move, rename, or remove existing file: {results_urls_file}."
)
if use_existing_file:
logging.info("Using existing results file.")
with open(results_urls_file, "r") as file:
results_urls = json.load(file)
else:
logging.info("Searching for data...")
results_urls = hls_search(
roi=vl, band_dict=band_dict, dates=dates, cloud_cover=cc
)
logging.info(f"Writing search results to {results_urls_file}")
with open(results_urls_file, "w") as file:
json.dump(results_urls, file)
total_assets = sum(len(sublist) for sublist in results_urls)
if cc:
logging.info(
f"{len(results_urls)} granules remain after cloud filtering. {total_assets} assets will be processed."
)
else:
logging.info(f"{total_assets} assets will be processed.")
# Confirm Processing
if not confirm_action("Do you want to proceed with processing? (y/n)"):
sys.exit("Processing aborted.")
# Initialize Dask Cluster
client = dask.distributed.Client()
# Setup Dask Environment (GDAL Configs)
client.run(setup_dask_environment)
logging.info(
f"Dask environment setup successfully. View dashboard: {client.dashboard_link}."
)
# Scatter Results Results url
client.scatter(results_urls)
# If NC4, create a temporary directory to store COGs
if args.of == "NC4":
cog_dir = os.path.join(output_dir, "temp")
if not os.path.exists(cog_dir):
os.makedirs(cog_dir, exist_ok=True)
else:
if not confirm_action(
"Temporary directory to store COGs already exists. Use these files to create NC4 outputs? (y/n)"
):
sys.exit(
f"Processing aborted. Please remove existing directory: {cog_dir}."
)
else:
cog_dir = output_dir
# Process Granules
start_time = time.time()
logging.info("Processing...")
tasks = [
dask.delayed(process_granule)(
granule_url,
roi=roi,
quality_filter=qf,
scale=scale,
output_dir=cog_dir,
band_dict=band_dict,
bit_nums=[0, 1, 2, 3, 4, 5],
chunk_size=chunk_size,
)
for granule_url in results_urls
]
dask.compute(*tasks)
# Create Timeseries Dataset if NC4
if args.of == "NC4":
logging.info("Creating timeseries dataset...")
create_timeseries_dataset(cog_dir, output_type=args.of, output_dir=output_dir)
# Close Dask Client
client.close()
# Remove Temporary COGs if NC4
if args.of == "NC4":
logging.info("Timeseries Dataset Created. Removing Temporary Files...")
shutil.rmtree(cog_dir)
# End Timer
total_time = time.time() - start_time
logging.info(f"Processing complete. Total time: {round(total_time,2)}s, ")
if __name__ == "__main__":
main()

254
HLS_SuPER/README.md Normal file
View File

@ -0,0 +1,254 @@
# HLS Subsetting, Processing, and Exporting Reformatted Data Prep Script
---
## Objective
NASA's Land Processes Distributed Active Archive Center (LP DAAC) archives and distributes Harmonized Landsat Sentinel-2 (HLS) version 2.0 products in the LP DAAC Cumulus cloud archive as Cloud Optimized GeoTIFFs (COG). the HLS_SuPER.py data prep script is a command line-executable Python script that allows users to submit inputs for their desired spatial (GeoJSON, Shapefile, bounding box) region of interest (ROI), time period of interest, and the specific desired product(s) and bands/layers within the HLS products. The script also includes options for cloud screening observations by a user-defined threshold, quality filtering, applying the scale factor to the data, and users can pick between two output file format options:
1. COG which returns an output for each source file
2. NetCDF4 which creates a single output with variables corresponding to bands and stacking all temporal observations for each band.
To construct these outputs, the input arguments provided by the user in command line are submitted to NASA's Common Metadata Repository API endpoint via the `earthaccess` Python library to find data. The script then returns a .json containing a nested list of all resulting granules with assets nested within for each HLS observation that intersect the user's input parameters. After outputing this file, it is leveraged to access the cloud-native HLS data for each asset, which are clipped to the ROI provided and exported in the desired output file format. Optionally, data can be quality filtered (see section on quality filtering below) and/or scaled. **This script does not support resampling or reprojection.**
### Available Products
1. Daily 30 meter (m) global HLS Sentinel-2 Multi-spectral Instrument Surface Reflectance - [HLSS30.002](https://doi.org/10.5067/HLS/HLSS30.002)
2. Daily 30 meter (m) global HLS Landsat 8 Operational Land Imager Surface Reflectance - [HLSL30.002](https://doi.org/10.5067/HLS/HLSL30.002)
> **Note:** On November 2021, this data prep script is updated to processes Version 2.0 daily 30 meter (m) global HLS Sentinel-2 Multi-spectral Instrument Surface Reflectance (HLSS30) data and Version 2.0 daily 30 m global HLS Landsat 8 OLI Surface Reflectance (HLSL30) data.
---
## Prerequisites
1. **Earthdata Login account**
- Create an Earthdata Login account (if you don't already have one) at <https://urs.earthdata.nasa.gov/users/new>
- Remember your username and password; you will need them to download or access data during the workshop and beyond.
2. **A Local Copy of this Repository**
- Copy/clone/[download](https://github.com/nasa/HLS-Data-Resources/archive/refs/heads/main.zip) the [HLS-Data-Resources Repository](https://github.com/nasa/HLS-Data-Resources.git). You will need all three of the python scripts downloaded to the same directory on your OS (HLS_Su.py, HLS_PER.py, HLS_SuPER.
3. **Compatible Python Environment**
- See the [Python Environment Setup](#python-environment-setup) section below.
- If you have previously set up the [**lpdaac_vitals** environment](https://github.com/nasa/VITALS/blob/main/setup/setup_instructions.md) for a workshop or content from the [VITALS repository](https://github.com/nasa/VITALS/tree/main), you can use that environment for this script as well.
### Python Environment Setup
For local Python environment setup we recommend using [mamba](https://mamba.readthedocs.io/en/latest/) to manage Python packages. To install *mamba*, download [miniforge](https://github.com/conda-forge/miniforge) for your operating system. If using Windows, be sure to check the box to "Add mamba to my PATH environment variable" to enable use of mamba directly from your command line interface. **Note that this may cause an issue if you have an existing mamba install through Anaconda.**
1. Using your preferred command line interface (command prompt, terminal, cmder, etc.) navigate to your local copy of the repository, then type the following to create a compatible Python environment.
For Windows:
```cmd
mamba create -n lpdaac_vitals -c conda-forge --yes python=3.10 fiona=1.8.22 gdal hvplot geoviews rioxarray rasterio jupyter geopandas earthaccess jupyter_bokeh h5py h5netcdf spectral scikit-image jupyterlab seaborn dask ray-default
```
For MacOSX:
```cmd
mamba create -n lpdaac_vitals -c conda-forge --yes python=3.10 gdal=3.7.2 hvplot geoviews rioxarray rasterio geopandas fiona=1.9.4 jupyter earthaccess jupyter_bokeh h5py h5netcdf spectral scikit-image seaborn jupyterlab dask ray-default ray-dashboard
```
2. Next, activate the Python Environment that you just created.
```cmd
mamba activate lpdaac_vitals
```
**Still having trouble getting a compatible Python environment set up? Contact [LP DAAC User Services](https://lpdaac.usgs.gov/lpdaac-contact-us/).**
## Script Execution
1. Once you have completed the prerequisites, open your command line interface navigate to the directory containing the script.
2. Ensure your python environment created above is activated.
```cmd
mamba activate lpdaac_vitals
```
3. The script requires an `roi`, which can be either a shapefile, geojson, or list of bbox coordinates (lower left longitude, lower left latitude, upper right longitude, upper right latitude). Other arguments are optional. See below for some examples of how to execute the script.
```cmd
> python HLS_SuPER.py -roi <insert geojson, shapefile, or bounding box coordinates here> -dir <insert directory to save the output files to>
```
> **Note:** After running the script, it will show inputs then conduct a search for results. A prompt for a **y/n** will appear to proceed with processing. This is to ensure that the user is away of the quantity of results/files that will be processed.
### Examples
#### Region of interest (```-roi```) specified using a geojson file
```None
> python HLS_SuPER.py -roi LA_County.geojson
```
#### Region of interest (```-roi```) specified using a bounding box and save outputs to specified directory
```None
> python HLS_SuPER.py -dir C:\Users\HLS\ -roi '-122.8,42.1,-120.5,43.1'
```
> **Note:** The bounding box is a comma-separated string of LL-Lon, LL-Lat, UR-Lon, UR-Lat. **Also**, if the first value in your bounding box is negative, you **MUST** use *single* quotations around the bounding box string. If you are using MacOS, you may need to use double quotes followed by single quotes ("'-122.8,42.1,-120.5,43.1'")
## Additional Script Execution Documentation
To see the full set of command line arguments and how to use them, type the following in the command prompt:
```None
> python HLS_SuPER.py -h
usage: HLS_SuPER.py [-h] -roi ROI [-dir DIR] [-start START] [-end END]
[-prod {HLSS30,HLSL30,both}] [-bands BANDS] [-cc CC]
[-qf {True,False}] [-scale {True,False}]
[-of {COG,NC4}]
...
```
### Script Arguments
#### -roi ROI
```None
(Required) Region of Interest (ROI) for spatial subset. Valid inputs are: (1) a geojson or shapefile (absolute path to file required if not in same directory as this script), or (2) bounding box coordinates: 'LowerLeft_lon,LowerLeft_lat,UpperRight_lon,UpperRight_lat' NOTE: Negative coordinates MUST be
written in single quotation marks '-120,43,-118,48'.
Example
> python HLS_SuPER.py -roi '-120,43,-118,48'
```
#### -dir DIR
```None
Directory to save output HLS files to. (default: <directory that the script is executed from>)
Example
> python HLS_SuPER.py -roi '-120,43,-118,48' -dir C:\Users\HLS\
```
#### -start START
```None
Start date for time period of interest: valid format is yyyy-mm-dd (e.g. 2020-10-20). (default: 2014-04-03)
Example
> python HLS_SuPER.py -roi '-120,43,-118,48' -dir C:\Users\HLS\ -start 2020-06-02
```
#### -end END
```None
Start date for time period of interest: valid format is yyyy-mm-dd (e.g. 2020-10-20). (default: current date)
Example
> python HLS_SuPER.py -roi '-120,43,-118,48' -dir C:\Users\HLS\ -start 2020-06-02 -end 2020-10-24
```
#### -prod {HLSS30,HLSL30,both}
```None
Desired product(s) to be subset and processed. (default: both)
Example
> python HLS_SuPER.py -roi '-120,43,-118,48' -dir C:\Users\HLS\ -start 2020-06-02 -end 2020-10-24 -prod both
```
#### -bands BANDS
```None
Desired layers to be processed. Valid inputs are ALL, COASTAL-AEROSOL, BLUE, GREEN, RED, RED-EDGE1, RED-EDGE2, RED-EDGE3, NIR1, SWIR1, SWIR2, CIRRUS, TIR1, TIR2, WATER-VAPOR, FMASK. To request multiple layers, provide them in comma separated format with no spaces. Unsure of the names for your bands?--check out the README which contains a table of all bands and band names. (default: ALL)
Example
> python HLS_SuPER.py -roi '-120,43,-118,48' -dir C:\Users\HLS\ -start 2020-06-02 -end 2020-10-24 -prod both -bands RED,GREEN,BLUE,NIR1
```
#### -cc CC
```None
Maximum cloud cover (percent) allowed for returned observations (e.g. 35). Valid range: 0 to 100 (integers only) (default: 100)
Example
> python HLS_SuPER.py -roi '-120,43,-118,48' -dir C:\Users\HLS\ -start 2020-06-02 -end 2020-10-24 -prod both -bands RED,GREEN,BLUE,NIR1 -cc 50`
```
#### -qf {True,False}
```None
Flag to quality filter before exporting output files (see section below for quality filtering performed). (default: True)
Example
> python HLS_SuPER.py -roi '-120,43,-118,48' -dir C:\Users\HLS\ -start 2020-06-02 -end 2020-10-24 -prod both -bands RED,GREEN,BLUE,NIR1 -cc 50 -qf True
```
#### -scale {True,False}
```None
Flag to apply scale factor to layers before exporting output files. (default: True)
Example
> python HLS_SuPER.py -roi '-120,43,-118,48' -dir C:\Users\HLS\ -start 2020-06-02 -end 2020-10-24 -prod both -bands RED,GREEN,BLUE,NIR1 -cc 50 -qf True -scale False
```
#### -of {COG,NC4}
```None
Define the desired output file format (default: COG)
Example
> python HLS_SuPER.py -roi '-120,43,-118,48' -dir C:\Users\HLS\ -start 2020-06-02 -end 2020-10-24 -prod both -bands RED,GREEN,BLUE,NIR1 -cc 50 -qf True -scale False -of NC4
```
### Quality Filtering
If quality filtering is set to True (default), the following quality filtering will be used:
- Cloud == 0 (No Cloud)
- Cloud shadow == 0 (No Cloud shadow)
- Adjacent to cloud/shadow == 0 (No Adjacent to cloud/shadow)
- Snow/ice == 0 (No Snow/ice)
- Water == 0 (No Water)
- aerosol level == Climatology aerosol (No Low, Moderate, and High aerosol level)
meaning that any pixel that does not meet the criteria outlined above will be removed and set to `_FillValue` in the output files.
The quality table for the HLS `Fmask` can be found in section 6.4 of the [HLS V2.0 User Guide](https://lpdaac.usgs.gov/documents/1118/HLS_User_Guide_V2.pdf).
If you do not want the data to be quality filtered, set argument `qf` to `False`.
### Output File Formats
Cloud-Optimized GeoTIFF (COG) is the default output file format. If NetCDF-4 (NC4) is selected by the user as the output file format, the script will export a single NC4 file for each HLS tile returned by the query, in the source HLS projection.
#### Output File Names
The standard format for HLS S30 V2.0 and HLS L30 V2.0 filenames is as follows:
**ex:** HLS.S30.T17SLU.2020117T160901.v2.0.B8A.tif
> **HLS.S30/HLS.L30**: Product Short Name
**T17SLU**: MGRS Tile ID (T+5-digits)
**2020117T160901**: Julian Date and Time of Acquisition (YYYYDDDTHHMMSS)
**v2.0**: Product Version
**B8A/B05**: Spectral Band
**.tif**: Data Format (Cloud Optimized GeoTIFF)
For additional information on HLS naming conventions, be sure to check out the [HLS Overview Page](https://lpdaac.usgs.gov/data/get-started-data/collection-overview/missions/harmonized-landsat-sentinel-2-hls-overview/#hls-naming-conventions).
If you selected COG as the output file format, the output file name will have product specific band names renamed the common names in available bands and include **.subset.tif** at the end of the filename:
> HLS.S30.T17SLU.2020117T160901.v2.0.NIR1.subset.tif
If you selected nc4 as the output file format, the following naming convention will be used:
**ex:** HLS.T17SLU.2020-10-24.2020-11-10.subset.nc4
> HLS.[MGRS Tile ID].[date of first observation in output file].[date of last observation in output file].subset.nc4
---
## Contact Info
Email: <LPDAAC@usgs.gov>
Voice: +1-866-573-3222
Organization: Land Processes Distributed Active Archive Center (LP DAAC)¹
Website: <https://lpdaac.usgs.gov/>
Date last modified: 2024-09-18
¹Work performed under USGS contract 140G0121D0001 for NASA contract NNG14HH33I.

122
README.md Normal file
View File

@ -0,0 +1,122 @@
# NASA EARTHDATA 数据爬取与预处理 —— 以 HLS 数据集为例
## 1 安装 miniforge
### 1.1 miniforge
- miniforge是结合conda与mamba的最小化包比Anaconda和Miniconda更快更轻量并且配置命令与原conda基本一致支持直接使用mamba命令。
- 简而言之环境配置效率上miniforge > Mambaforge (202407已废弃) > Miniconda + Mamba > Miniconda > Anaconda
- 官方仓库地址https://github.com/conda-forge/miniforge
- 官方下载地址https://conda-forge.org/download/
### 1.2 配置环境变量
- 为了在控制台中直接使用conda命令, 需要将安装的相关目录配置到Path环境变量中。
```
D:\program\miniforge3
D:\program\miniforge3\Scripts
D:\program\miniforge3\Library\bin
```
### 1.3 配置权限
- 详细配置与miniconda相同图文教程地址https://gis-xh.github.io/my-note/python/01conda/Win11-Miniconda-install/
- Windows环境下需要设置虚拟环境文件夹的访问权限为所有用户可访问否则会出现无法读取虚拟环境文件的问题
- 具体地:
- 设置`D:\program\miniforge3\env`目录为所有用户可访问,具体操作为:右键点击文件夹 -> 属性 -> 安全 -> 编辑 -> 添加 -> 添加所有用户 -> 全选 -> 应用 -> 确定
### 1.4 配置镜像源
- 生成下载源文件的配置文件 (若已经安装过Anaconda/Miniconda则无需执行此步骤)
```sh
conda config --set show_channel_urls yes
```
- 在`C:\Users\实际用户名\`目录找到`.condarc`文件,使用记事本打开,输入如下内容并保存
```
envs_dirs:
- D:\program\miniforge3\envs
- 其他路径地址(可选,创建虚拟环境时将会按照顺序查找)
channels:
- defaults
show_channel_urls: true
channel_alias: https://mirrors.tuna.tsinghua.edu.cn/anaconda
default_channels:
- https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/main
- https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free
- https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/r
- https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/pro
- https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/msys2
custom_channels:
conda-forge: https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud
msys2: https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud
bioconda: https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud
menpo: https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud
pytorch: https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud
simpleitk: https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud
```
### 1.5 初始化 conda
- 打开控制台,初始化 PowerShell 与 CMD
```sh
conda init powershell
```
```sh
conda init cmd.exe
```
## 2 运行环境配置
### 2.1 使用mamba创建并激活虚拟环境
- 克隆虚拟环境 (Windows环境下推荐)
```sh
mamba env create -f setup/lpdaac_windows.yml
```
- 激活虚拟环境
```sh
mamba activate lpdaac_windows
```
## 3 HLS 数据爬取
### 3.1 账户准备
- 参考自NASA官网示例Demohttps://github.com/nasa/LPDAAC-Data-Resources/blob/main/setup/setup_instructions_python.md
- 首次运行爬取命令时,需要输入用户名和密码,用户名和密码可以在 [Earthdata](https://urs.earthdata.nasa.gov/) 注册获取。
- 需要注意的是,密码中最好不要出 `@/#/$/%` 等符号,爬取时可能会出错。
### 3.2 爬取云端数据并在内存中进行预处理
- `-roi`:感兴趣区,需要按照 **左下右上** 的逆时针顺序设置点坐标
- `-dir`:输出目录,必须是已存在的目录
- `-start`:开始时间,格式为 `YYYY-MM-DD`
- `-end`:结束时间,格式为 `YYYY-MM-DD`
- `-prod`:产品名称,例如 `HLSL30``HLSS30``both`
- `-bands`:波段名称,如 `COASTAL-AEROSOL,BLUE,GREEN,RED,NIR1,SWIR1,SWIR2,CIRRUS,TIR1,TIR2,Fmask`
- `-cc`:筛选影像云量阈值,如 `70`
- `-qf`:是否使用质量波段过滤云/云阴影像元,默认 `True`
- `-scale`:是否对影像使用缩放因子,默认 `True`
- 爬取所有光谱波段
```sh
python .\\HLS_SuPER\\HLS_SuPER.py -roi '113.10114,30.62845,114.24349,31.59081' -dir .\\data\\HLS\\L30 -start 2024-01-01 -end 2024-01-31 -prod HLSL30 -bands COASTAL-AEROSOL,BLUE,GREEN,RED,NIR1,SWIR1,SWIR2,CIRRUS,TIR1,TIR2,Fmask -cc 70 -qf True -scale True
```
- 仅爬取必要的核心波段
```sh
python .\\HLS_SuPER\\HLS_SuPER.py -roi '113.10114,30.62845,114.24349,31.59081' -dir .\\data\\HLS\\L30\\subset -start 2024-01-01 -end 2024-01-31 -prod HLSL30 -bands BLUE,GREEN,RED,NIR1,SWIR1,SWIR2 -cc 70 -qf True -scale True
```

420
setup/lpdaac_windows.yml Normal file
View File

@ -0,0 +1,420 @@
name: lpdaac_windows
channels:
- defaults
- conda-forge
dependencies:
- affine=2.4.0=pyhd8ed1ab_0
- aiobotocore=2.7.0=pyhd8ed1ab_1
- aiofiles=22.1.0=pyhd8ed1ab_0
- aiohttp=3.8.6=py310h8d17308_1
- aiohttp-cors=0.7.0=py_0
- aioitertools=0.11.0=pyhd8ed1ab_0
- aiosignal=1.3.1=pyhd8ed1ab_0
- aiosqlite=0.19.0=pyhd8ed1ab_0
- ansicon=1.89.0=py310h5588dad_7
- anyio=4.0.0=pyhd8ed1ab_0
- argon2-cffi=23.1.0=pyhd8ed1ab_0
- argon2-cffi-bindings=21.2.0=py310h8d17308_4
- arrow=1.3.0=pyhd8ed1ab_0
- asciitree=0.3.3=py_2
- asttokens=2.4.1=pyhd8ed1ab_0
- async-timeout=4.0.3=pyhd8ed1ab_0
- attrs=23.1.0=pyh71513ae_1
- aws-c-auth=0.7.3=h0127223_1
- aws-c-cal=0.6.1=hfb91821_1
- aws-c-common=0.9.0=hcfcfb64_0
- aws-c-compression=0.2.17=h04c9df6_2
- aws-c-event-stream=0.3.1=h495bb32_4
- aws-c-http=0.7.11=hf013885_4
- aws-c-io=0.13.32=he824701_1
- aws-c-mqtt=0.9.3=h64f41f2_1
- aws-c-s3=0.3.14=hb8b96c7_1
- aws-c-sdkutils=0.1.12=h04c9df6_1
- aws-checksums=0.1.17=h04c9df6_1
- aws-crt-cpp=0.21.0=hf1ed06d_5
- aws-sdk-cpp=1.10.57=heb7cc7f_19
- babel=2.13.1=pyhd8ed1ab_0
- backports=1.0=pyhd8ed1ab_3
- backports.functools_lru_cache=1.6.5=pyhd8ed1ab_0
- beautifulsoup4=4.12.2=pyha770c72_0
- bleach=6.1.0=pyhd8ed1ab_0
- blessed=1.19.1=pyh95a074a_2
- blosc=1.21.5=hdccc3a2_0
- bokeh=3.3.0=pyhd8ed1ab_0
- boto3=1.28.64=pyhd8ed1ab_0
- botocore=1.31.64=pyhd8ed1ab_0
- bounded-pool-executor=0.0.3=pyhd8ed1ab_0
- branca=0.7.0=pyhd8ed1ab_1
- brotli=1.0.9=hcfcfb64_9
- brotli-bin=1.0.9=hcfcfb64_9
- brotli-python=1.0.9=py310h00ffb61_9
- bzip2=1.0.8=hcfcfb64_5
- c-ares=1.21.0=hcfcfb64_0
- ca-certificates=2023.7.22=h56e8100_0
- cached-property=1.5.2=hd8ed1ab_1
- cached_property=1.5.2=pyha770c72_1
- cachetools=5.3.2=pyhd8ed1ab_0
- cairo=1.18.0=h1fef639_0
- cartopy=0.22.0=py310hecd3228_1
- certifi=2023.7.22=pyhd8ed1ab_0
- cffi=1.16.0=py310h8d17308_0
- cfitsio=4.3.0=h9b0cee5_0
- cftime=1.6.3=py310h3e78b6c_0
- charset-normalizer=3.3.2=pyhd8ed1ab_0
- click=8.1.7=win_pyh7428d3b_0
- click-plugins=1.1.1=py_0
- cligj=0.7.2=pyhd8ed1ab_1
- cloudpickle=3.0.0=pyhd8ed1ab_0
- colorama=0.4.6=pyhd8ed1ab_0
- colorcet=3.0.1=pyhd8ed1ab_0
- colorful=0.5.4=pyhd8ed1ab_0
- comm=0.1.4=pyhd8ed1ab_0
- configobj=5.0.8=pyhd8ed1ab_0
- contourpy=1.2.0=py310h232114e_0
- cryptography=41.0.5=py310h6e82f81_0
- cycler=0.12.1=pyhd8ed1ab_0
- cytoolz=0.12.2=py310h8d17308_1
- dask=2023.10.1=pyhd8ed1ab_0
- dask-core=2023.10.1=pyhd8ed1ab_0
- datashader=0.16.0=pyhd8ed1ab_0
- debugpy=1.8.0=py310h00ffb61_1
- decorator=5.1.1=pyhd8ed1ab_0
- defusedxml=0.7.1=pyhd8ed1ab_0
- distlib=0.3.7=pyhd8ed1ab_0
- distributed=2023.10.1=pyhd8ed1ab_0
- earthaccess=0.7.1=pyhd8ed1ab_0
- entrypoints=0.4=pyhd8ed1ab_0
- exceptiongroup=1.1.3=pyhd8ed1ab_0
- executing=2.0.1=pyhd8ed1ab_0
- expat=2.5.0=h63175ca_1
- fasteners=0.17.3=pyhd8ed1ab_0
- filelock=3.13.1=pyhd8ed1ab_0
- fiona=1.9.5=py310h65cc672_0
- folium=0.15.0=pyhd8ed1ab_0
- font-ttf-dejavu-sans-mono=2.37=hab24e00_0
- font-ttf-inconsolata=3.000=h77eed37_0
- font-ttf-source-code-pro=2.038=h77eed37_0
- font-ttf-ubuntu=0.83=hab24e00_0
- fontconfig=2.14.2=hbde0cde_0
- fonts-conda-ecosystem=1=0
- fonts-conda-forge=1=0
- fonttools=4.44.0=py310h8d17308_0
- fqdn=1.5.1=pyhd8ed1ab_0
- freetype=2.12.1=hdaf720e_2
- freexl=2.0.0=h8276f4a_0
- frozenlist=1.4.0=py310h8d17308_1
- fsspec=2023.10.0=pyhca7485f_0
- gdal=3.7.3=py310haa9213b_2
- geopandas=0.14.0=pyhd8ed1ab_1
- geopandas-base=0.14.0=pyha770c72_1
- geos=3.12.0=h1537add_0
- geotiff=1.7.1=hcf4a93f_14
- geoviews=1.11.0=pyhd8ed1ab_0
- geoviews-core=1.11.0=pyha770c72_0
- gettext=0.21.1=h5728263_0
- gitdb=4.0.11=pyhd8ed1ab_0
- gitpython=3.1.40=pyhd8ed1ab_0
- google-api-core=2.13.0=pyhd8ed1ab_0
- google-auth=2.23.4=pyhca7485f_0
- googleapis-common-protos=1.61.0=pyhd8ed1ab_0
- gpustat=1.1.1=pyhd8ed1ab_0
- grpcio=1.54.3=py310h8020be6_0
- h5netcdf=1.3.0=pyhd8ed1ab_0
- h5py=3.10.0=nompi_py310h20f5850_100
- hdf4=4.2.15=h5557f11_7
- hdf5=1.14.2=nompi_h73e8ff5_100
- holoviews=1.18.1=pyhd8ed1ab_0
- hvplot=0.9.0=pyhd8ed1ab_0
- icu=73.2=h63175ca_0
- idna=3.4=pyhd8ed1ab_0
- imagecodecs-lite=2019.12.3=py310h3e78b6c_7
- imageio=2.31.5=pyh8c1a49c_0
- importlib-metadata=6.8.0=pyha770c72_0
- importlib_metadata=6.8.0=hd8ed1ab_0
- importlib_resources=6.1.1=pyhd8ed1ab_0
- intel-openmp=2023.2.0=h57928b3_50497
- ipykernel=6.26.0=pyha63f2e9_0
- ipython=8.17.2=pyh5737063_0
- ipython_genutils=0.2.0=py_1
- ipywidgets=8.1.1=pyhd8ed1ab_0
- isoduration=20.11.0=pyhd8ed1ab_0
- jedi=0.19.1=pyhd8ed1ab_0
- jinja2=3.1.2=pyhd8ed1ab_1
- jinxed=1.2.0=pyh95a074a_0
- jmespath=1.0.1=pyhd8ed1ab_0
- joblib=1.3.2=pyhd8ed1ab_0
- json5=0.9.14=pyhd8ed1ab_0
- jsonpointer=2.4=py310h5588dad_3
- jsonschema=4.19.2=pyhd8ed1ab_0
- jsonschema-specifications=2023.7.1=pyhd8ed1ab_0
- jsonschema-with-format-nongpl=4.19.2=pyhd8ed1ab_0
- jupyter=1.0.0=pyhd8ed1ab_10
- jupyter-resource-usage=1.0.1=pyhd8ed1ab_0
- jupyter-server-mathjax=0.2.6=pyh5bfe37b_1
- jupyter_bokeh=3.0.7=pyhd8ed1ab_0
- jupyter_client=7.4.9=pyhd8ed1ab_0
- jupyter_console=6.6.3=pyhd8ed1ab_0
- jupyter_core=5.5.0=py310h5588dad_0
- jupyter_events=0.9.0=pyhd8ed1ab_0
- jupyter_server=2.10.0=pyhd8ed1ab_0
- jupyter_server_fileid=0.9.0=pyhd8ed1ab_0
- jupyter_server_terminals=0.4.4=pyhd8ed1ab_1
- jupyter_server_ydoc=0.8.0=pyhd8ed1ab_0
- jupyter_ydoc=0.2.4=pyhd8ed1ab_0
- jupyterlab=3.6.6=pyhd8ed1ab_0
- jupyterlab-geojson=3.4.0=pyhd8ed1ab_0
- jupyterlab-git=0.44.0=pyhd8ed1ab_0
- jupyterlab_pygments=0.2.2=pyhd8ed1ab_0
- jupyterlab_server=2.25.1=pyhd8ed1ab_0
- jupyterlab_widgets=3.0.9=pyhd8ed1ab_0
- kealib=1.5.2=ha10e780_1
- kiwisolver=1.4.5=py310h232114e_1
- krb5=1.21.2=heb0366b_0
- lazy_loader=0.3=pyhd8ed1ab_0
- lcms2=2.15=h67d730c_3
- lerc=4.0.0=h63175ca_0
- libabseil=20230125.3=cxx17_h63175ca_0
- libaec=1.1.2=h63175ca_1
- libarchive=3.7.2=h6f8411a_0
- libarrow=12.0.1=he3e0f11_8_cpu
- libblas=3.9.0=19_win64_mkl
- libboost-headers=1.82.0=h57928b3_6
- libbrotlicommon=1.0.9=hcfcfb64_9
- libbrotlidec=1.0.9=hcfcfb64_9
- libbrotlienc=1.0.9=hcfcfb64_9
- libcblas=3.9.0=19_win64_mkl
- libcrc32c=1.1.2=h0e60522_0
- libcurl=8.4.0=hd5e4a3a_0
- libdeflate=1.19=hcfcfb64_0
- libevent=2.1.12=h3671451_1
- libexpat=2.5.0=h63175ca_1
- libffi=3.4.2=h8ffe710_5
- libgdal=3.7.3=h3217549_2
- libglib=2.78.1=he8f3873_0
- libgoogle-cloud=2.12.0=h00b2bdc_1
- libgrpc=1.54.3=ha177ca7_0
- libhwloc=2.9.3=default_haede6df_1009
- libiconv=1.17=h8ffe710_0
- libjpeg-turbo=3.0.0=hcfcfb64_1
- libkml=1.3.0=haf3e7a6_1018
- liblapack=3.9.0=19_win64_mkl
- libnetcdf=4.9.2=nompi_h8284064_112
- libpng=1.6.39=h19919ed_0
- libpq=16.1=h43585b0_0
- libprotobuf=3.21.12=h12be248_2
- librttopo=1.1.0=h92c5fdb_14
- libsodium=1.0.18=h8d14728_1
- libspatialindex=1.9.3=h39d44d4_4
- libspatialite=5.1.0=hbf340bc_1
- libsqlite=3.44.0=hcfcfb64_0
- libssh2=1.11.0=h7dfc565_0
- libthrift=0.18.1=h06f6336_2
- libtiff=4.6.0=h6e2ebb7_2
- libutf8proc=2.8.0=h82a8f57_0
- libwebp-base=1.3.2=hcfcfb64_0
- libxcb=1.15=hcd874cb_0
- libxml2=2.11.5=hc3477c8_1
- libzip=1.10.1=h1d365fa_3
- libzlib=1.2.13=hcfcfb64_5
- linkify-it-py=2.0.0=pyhd8ed1ab_0
- llvmlite=0.41.1=py310hb84602e_0
- locket=1.0.0=pyhd8ed1ab_0
- lz4=4.3.2=py310hbbb2075_1
- lz4-c=1.9.4=hcfcfb64_0
- lzo=2.10=he774522_1000
- m2w64-gcc-libgfortran=5.3.0=6
- m2w64-gcc-libs=5.3.0=7
- m2w64-gcc-libs-core=5.3.0=7
- m2w64-gmp=6.1.0=2
- m2w64-libwinpthread-git=5.0.0.4634.697f757=2
- mapclassify=2.6.1=pyhd8ed1ab_0
- markdown=3.5.1=pyhd8ed1ab_0
- markdown-it-py=3.0.0=pyhd8ed1ab_0
- markupsafe=2.1.3=py310h8d17308_1
- matplotlib-base=3.8.1=py310hc9baf74_0
- matplotlib-inline=0.1.6=pyhd8ed1ab_0
- mdit-py-plugins=0.4.0=pyhd8ed1ab_0
- mdurl=0.1.0=pyhd8ed1ab_0
- minizip=4.0.2=h5bed578_0
- mistune=3.0.2=pyhd8ed1ab_0
- mkl=2023.2.0=h6a75c08_50496
- msgpack-python=1.0.6=py310h232114e_0
- msys2-conda-epoch=20160418=1
- multidict=6.0.4=py310h8d17308_1
- multimethod=1.9.1=pyhd8ed1ab_0
- multipledispatch=0.6.0=py_0
- munch=4.0.0=pyhd8ed1ab_0
- munkres=1.1.4=pyh9f0ad1d_0
- nbclassic=1.0.0=pyhb4ecaf3_1
- nbclient=0.8.0=pyhd8ed1ab_0
- nbconvert=7.11.0=pyhd8ed1ab_0
- nbconvert-core=7.11.0=pyhd8ed1ab_0
- nbconvert-pandoc=7.11.0=pyhd8ed1ab_0
- nbdime=3.2.1=pyhd8ed1ab_0
- nbformat=5.9.2=pyhd8ed1ab_0
- nest-asyncio=1.5.8=pyhd8ed1ab_0
- netcdf4=1.6.5=nompi_py310h6477780_100
- networkx=3.2.1=pyhd8ed1ab_0
- notebook=6.5.6=pyha770c72_0
- notebook-shim=0.2.3=pyhd8ed1ab_0
- numba=0.58.1=py310h9ccaf4f_0
- numcodecs=0.12.1=py310h00ffb61_0
- numpy=1.26.0=py310hf667824_0
- nvidia-ml-py=12.535.133=pyhd8ed1ab_0
- opencensus=0.11.3=pyhd8ed1ab_0
- opencensus-context=0.1.3=py310h5588dad_2
- openjpeg=2.5.0=h3d672ee_3
- openssl=3.1.4=hcfcfb64_0
- orc=1.9.0=hada7b9e_1
- overrides=7.4.0=pyhd8ed1ab_0
- packaging=23.2=pyhd8ed1ab_0
- pandas=2.1.2=py310hecd3228_0
- pandoc=3.1.3=h57928b3_0
- pandocfilters=1.5.0=pyhd8ed1ab_0
- panel=1.3.1=pyhd8ed1ab_0
- param=2.0.1=pyhca7485f_0
- parso=0.8.3=pyhd8ed1ab_0
- partd=1.4.1=pyhd8ed1ab_0
- pcre2=10.40=h17e33f8_0
- pexpect=4.8.0=pyh1a96a4e_2
- pickleshare=0.7.5=py_1003
- pillow=10.1.0=py310h1e6a543_0
- pip=23.3.1=pyhd8ed1ab_0
- pixman=0.42.2=h63175ca_0
- pkgutil-resolve-name=1.3.10=pyhd8ed1ab_1
- platformdirs=3.11.0=pyhd8ed1ab_0
- poppler=23.10.0=hc2f3c52_0
- poppler-data=0.4.12=hd8ed1ab_0
- postgresql=16.1=hc80876b_0
- pqdm=0.2.0=pyhd8ed1ab_0
- proj=9.3.0=he13c7e8_2
- prometheus_client=0.18.0=pyhd8ed1ab_0
- prompt-toolkit=3.0.39=pyha770c72_0
- prompt_toolkit=3.0.39=hd8ed1ab_0
- protobuf=4.21.12=py310h00ffb61_0
- psutil=5.9.5=py310h8d17308_1
- pthread-stubs=0.4=hcd874cb_1001
- pthreads-win32=2.9.1=hfa6e2cd_3
- ptyprocess=0.7.0=pyhd3deb0d_0
- pure_eval=0.2.2=pyhd8ed1ab_0
- py-spy=0.3.14=h975169c_0
- pyarrow=12.0.1=py310hd1a9178_8_cpu
- pyasn1=0.5.0=pyhd8ed1ab_0
- pyasn1-modules=0.3.0=pyhd8ed1ab_0
- pycparser=2.21=pyhd8ed1ab_0
- pyct=0.4.6=py_0
- pyct-core=0.4.6=py_0
- pydantic=1.10.13=py310h8d17308_1
- pygments=2.16.1=pyhd8ed1ab_0
- pykdtree=1.3.9=py310h3e78b6c_1
- pyopenssl=23.3.0=pyhd8ed1ab_0
- pyparsing=3.1.1=pyhd8ed1ab_0
- pyproj=3.6.1=py310hebb2149_4
- pyresample=1.27.1=py310hecd3228_2
- pyshp=2.3.1=pyhd8ed1ab_0
- pysocks=1.7.1=pyh0701188_6
- pystac=1.9.0=pyhd8ed1ab_0
- pystac-client=0.7.5=pyhd8ed1ab_0
- python=3.10.13=h4de0772_0_cpython
- python-cmr=0.9.0=pyhd8ed1ab_0
- python-dateutil=2.8.2=pyhd8ed1ab_0
- python-fastjsonschema=2.18.1=pyhd8ed1ab_0
- python-json-logger=2.0.7=pyhd8ed1ab_0
- python-tzdata=2023.3=pyhd8ed1ab_0
- python_abi=3.10=4_cp310
- pytz=2023.3.post1=pyhd8ed1ab_0
- pyu2f=0.1.5=pyhd8ed1ab_0
- pyviz_comms=2.3.2=pyhd8ed1ab_0
- pywavelets=1.4.1=py310h3e78b6c_1
- pywin32=306=py310h00ffb61_2
- pywinpty=2.0.12=py310h00ffb61_0
- pyyaml=6.0.1=py310h8d17308_1
- pyzmq=24.0.1=py310hcd737a0_1
- qtconsole-base=5.5.0=pyha770c72_0
- qtpy=2.4.1=pyhd8ed1ab_0
- rasterio=1.3.9=py310h4d3659c_0
- ray-core=2.7.1=py310h139b6d1_0
- ray-default=2.7.1=py310h5588dad_0
- re2=2023.03.02=hd4eee63_0
- referencing=0.30.2=pyhd8ed1ab_0
- requests=2.31.0=pyhd8ed1ab_0
- rfc3339-validator=0.1.4=pyhd8ed1ab_0
- rfc3986-validator=0.1.1=pyh9f0ad1d_0
- rioxarray=0.15.0=pyhd8ed1ab_0
- rpds-py=0.12.0=py310h87d50f1_0
- rsa=4.9=pyhd8ed1ab_0
- rtree=1.1.0=py310h1cbd46b_0
- s3fs=2023.10.0=pyhd8ed1ab_0
- s3transfer=0.7.0=pyhd8ed1ab_0
- scikit-image=0.20.0=py310h1c4a608_1
- scikit-learn=1.3.2=py310hfd2573f_1
- scipy=1.11.3=py310hf667824_1
- send2trash=1.8.2=pyh08f2357_0
- setproctitle=1.3.3=py310h8d17308_0
- setuptools=68.2.2=pyhd8ed1ab_0
- shapely=2.0.2=py310h839b4a8_0
- six=1.16.0=pyh6c4a22f_0
- smart_open=6.4.0=pyhd8ed1ab_0
- smmap=5.0.0=pyhd8ed1ab_0
- snappy=1.1.10=hfb803bf_0
- sniffio=1.3.0=pyhd8ed1ab_0
- snuggs=1.4.7=py_0
- sortedcontainers=2.4.0=pyhd8ed1ab_0
- soupsieve=2.5=pyhd8ed1ab_1
- spectral=0.23.1=pyh1a96a4e_0
- sqlite=3.44.0=hcfcfb64_0
- stack_data=0.6.2=pyhd8ed1ab_0
- tbb=2021.10.0=h91493d7_2
- tblib=2.0.0=pyhd8ed1ab_0
- terminado=0.17.0=pyh08f2357_0
- threadpoolctl=3.2.0=pyha21a80b_0
- tifffile=2020.6.3=py_0
- tiledb=2.16.3=h1ffc264_3
- tinycss2=1.2.1=pyhd8ed1ab_0
- tinynetrc=1.3.1=pyhd8ed1ab_0
- tk=8.6.13=h5226925_1
- tomli=2.0.1=pyhd8ed1ab_0
- toolz=0.12.0=pyhd8ed1ab_0
- tornado=6.3.3=py310h8d17308_1
- tqdm=4.66.1=pyhd8ed1ab_0
- traitlets=5.13.0=pyhd8ed1ab_0
- types-python-dateutil=2.8.19.14=pyhd8ed1ab_0
- typing-extensions=4.8.0=hd8ed1ab_0
- typing_extensions=4.8.0=pyha770c72_0
- typing_utils=0.1.0=pyhd8ed1ab_0
- tzdata=2023c=h71feb2d_0
- uc-micro-py=1.0.1=pyhd8ed1ab_0
- ucrt=10.0.22621.0=h57928b3_0
- unicodedata2=15.1.0=py310h8d17308_0
- uri-template=1.3.0=pyhd8ed1ab_0
- uriparser=0.9.7=h1537add_1
- urllib3=1.26.18=pyhd8ed1ab_0
- vc=14.3=h64f974e_17
- vc14_runtime=14.36.32532=hdcecf7f_17
- virtualenv=20.21.0=pyhd8ed1ab_0
- vs2015_runtime=14.36.32532=h05e6639_17
- wcwidth=0.2.9=pyhd8ed1ab_0
- webcolors=1.13=pyhd8ed1ab_0
- webencodings=0.5.1=pyhd8ed1ab_2
- websocket-client=1.6.4=pyhd8ed1ab_0
- wheel=0.41.3=pyhd8ed1ab_0
- widgetsnbextension=4.0.9=pyhd8ed1ab_0
- win_inet_pton=1.1.0=pyhd8ed1ab_6
- winpty=0.4.3=4
- wrapt=1.16.0=py310h8d17308_0
- xarray=2023.10.1=pyhd8ed1ab_0
- xerces-c=3.2.4=h63175ca_3
- xorg-libxau=1.0.11=hcd874cb_0
- xorg-libxdmcp=1.1.3=hcd874cb_0
- xyzservices=2023.10.1=pyhd8ed1ab_0
- xz=5.2.6=h8d14728_0
- y-py=0.5.5=py310h87d50f1_2
- yaml=0.2.5=h8ffe710_2
- yarl=1.9.2=py310h8d17308_1
- ypy-websocket=0.8.2=pyhd8ed1ab_0
- zarr=2.16.1=pyhd8ed1ab_0
- zeromq=4.3.4=h0e60522_1
- zict=3.0.0=pyhd8ed1ab_0
- zipp=3.17.0=pyhd8ed1ab_0
- zlib=1.2.13=hcfcfb64_5
- zstd=1.5.5=h12be248_0