""" 访问阿里云 DataV 下载的行政区边界数据保存为原始 JSON, 有部分字段与GDAL不兼容, 导致无法 直接读取, 需要先清洗再保存为 GeoJSON. - 官方地址: https://datav.aliyun.com/portal/school/atlas/area_selector - Step1: 按"省-市-县名称"解析为行政区划代码 (adcode); - Step2: 将 DataV 原始数据保存为 `省-市-县名称.json` 文件; - Step3: 移除不兼容 GDAL/GeoPandas 的属性字段 (parent, center, centroid, acroutes); - Step4: 将清洗后的结果写出为 `省-市-县名称.geojson` 文件. ------------------------------------------------------------------------------- Authors: CVEO Team Last Updated: 2026-04-13 =============================================================================== """ import json import re from pathlib import Path from typing import Optional import requests def _validate_region_params(province: str, city: str, county: Optional[str]) -> None: """ Validate that province and city are non-empty. Raises ------ ValueError If province or city is None or empty string. """ if not province or not province.strip(): raise ValueError("province (省) cannot be empty") if not city or not city.strip(): raise ValueError("city (市) cannot be empty") def get_datav_json(accode: str) -> dict: """ 从 DataV 接口获取行政区边界的原始 JSON 数据并返回字典. """ # 使用路径式接口, 支持如 "420100" 或 "420100_full" url = f"https://geo.datav.aliyun.com/areas_v3/bound/{accode}.json" response = requests.get(url, timeout=15) response.raise_for_status() return response.json() def fetch_and_save_geojson(accode: str, region_name: str, out_dir: Path) -> Path: """ 获取 DataV 原始数据, 先保存为 .json; 随后清洗属性并另存为 .geojson. Parameters ---------- accode : str 行政区划代码, 如 "420100" 或 "420100_full". region_name : str 区域名称, 用于输出文件名. 当使用省-市-县三级参数时, 文件名为 "{province}_{city}.geojson" 或 "{province}_{city}_{county}.geojson". out_dir : Path 输出目录. """ raw_data = get_datav_json(accode) # 处理 features: 移除不兼容 GeoPandas 的属性 def sanitize_properties(props: dict) -> dict: out = {} for k, v in props.items(): # 移除嵌套对象 if k in ("parent", "center", "centroid", "acroutes"): continue # 仅保留标量类型; 丢弃其他嵌套结构 if isinstance(v, (str, int, float, bool)) or v is None: out[k] = v return out # 输出路径(确保目录存在) out_dir_path = Path(out_dir) out_dir_path.mkdir(parents=True, exist_ok=True) # 先保存原始 JSON(未清洗) raw_json_path = out_dir_path / f"{region_name}.json" with raw_json_path.open("w", encoding="utf-8") as f: json.dump(raw_data, f, ensure_ascii=False) # 再保存清洗后的 GeoJSON out_path = out_dir_path / f"{region_name}.geojson" # 深拷贝后进行清洗, 避免影响原始数据 data = json.loads(json.dumps(raw_data)) features = data.get("features", []) for feature in features: props = feature.get("properties", {}) feature["properties"] = sanitize_properties(props) # 写出为 .geojson, 确保 UTF-8 且保留中文字符 with out_path.open("w", encoding="utf-8") as f: json.dump(data, f, ensure_ascii=False) return out_path def _normalize_name(name: str) -> str: name = name.strip() # 简单去除常见后缀提高匹配鲁棒性 for suffix in ("市", "省", "地区", "盟", "自治州", "自治县", "特别行政区"): if name.endswith(suffix): name = name[: -len(suffix)] return name def _name_matches_exact(target: str, candidate: str) -> bool: return _normalize_name(target) == _normalize_name(candidate) def resolve_adcode_by_name( province: str, city: str, county: Optional[str] = None, prefer_full: bool = False, ) -> Optional[str]: """ 通过省-市-县名称解析 DataV 行政区划代码. 采用层级查找策略: 全国数据 -> 省级数据 -> 市级数据 -> 区县数据. 前两级 (省/市) 必须提供且不能为空; 第三级 (县) 可为空. Parameters ---------- province : str 省份名称, 如 "湖北省". 不能为空. city : str 城市名称, 如 "武汉市". 不能为空. county : str, optional 区县名称, 如 "江岸区". 可为空 (表示只解析到市级). prefer_full : bool, optional 是否返回下辖完整边界代码 (如 "420100_full"), 默认 False. Returns ------- str or None 行政区划代码, 如 "420100" 或 "420100_full", 找不到则返回 None. Raises ------ ValueError 如果 province 或 city 为空. """ _validate_region_params(province, city, county) # Step 1: 从全国数据中查找省份 try: cn = requests.get( "https://geo.datav.aliyun.com/areas_v3/bound/100000_full.json", timeout=20, ).json() except Exception: return None pcode = None for feat in cn.get("features", []): props = feat.get("properties", {}) if props.get("level") == "province": name = props.get("name", "") code = str(props.get("adcode", "")) if re.fullmatch(r"\d{6}", code): if _name_matches_exact(province, name): pcode = code break if not pcode: return None # Step 2: 从省份数据中查找城市 # 特殊处理直辖市: 当 province == city 时 (如 "北京市" == "北京市") if _name_matches_exact(province, city): ccode = pcode else: try: prov_data = requests.get( f"https://geo.datav.aliyun.com/areas_v3/bound/{pcode}_full.json", timeout=20, ).json() except Exception: return None ccode = None for feat in prov_data.get("features", []): props = feat.get("properties", {}) level = props.get("level") name = props.get("name", "") code = str(props.get("adcode", "")) # 匹配城市或区县级别 if level in ("city", "district") and re.fullmatch(r"\d{6}", code): if _name_matches_exact(city, name): ccode = code break if not ccode: return None # Step 3: 如果提供了区县名称, 从城市数据中查找区县 if county: try: city_data = requests.get( f"https://geo.datav.aliyun.com/areas_v3/bound/{ccode}_full.json", timeout=20, ).json() except Exception: return None dcode = None for feat in city_data.get("features", []): props = feat.get("properties", {}) level = props.get("level") name = props.get("name", "") code = str(props.get("adcode", "")) # 匹配区县 (包括县级市, 其 level 可能为 "city") if level in ("district", "city") and re.fullmatch(r"\d{6}", code): if _name_matches_exact(county, name): dcode = code break if not dcode: return None # 区县级别已经是最终边界,不需要 _full 后缀 return dcode # 只解析到市级 return f"{ccode}_full" if prefer_full else ccode def fetch_and_save_geojson_by_name( province: str, city: str, county: Optional[str], out_dir: Path, prefer_full: bool = False, ) -> Path: """ 通过省-市-县名称解析 adcode, 并直接拉取与保存 GeoJSON. Parameters ---------- province : str 省份名称, 如 "湖北省". 不能为空. city : str 城市名称, 如 "武汉市". 不能为空. county : str, optional 区县名称, 如 "江岸区". 可为空 (表示只解析到市级). out_dir : Path 输出目录, 用于保存 GeoJSON 文件. prefer_full : bool, optional 是否下载下辖区划的 GeoJSON, 默认 False. Returns ------- Path 保存的 GeoJSON 文件路径. Raises ------ ValueError 如果省或市为空, 或无法解析到行政区划代码. """ code = resolve_adcode_by_name(province, city, county, prefer_full=prefer_full) if not code: raise ValueError( f"无法通过名称解析到行政区划代码: province={province}, city={city}, county={county}" ) # 构建输出文件名 if county: region_name = f"{province}_{city}_{county}" else: region_name = f"{province}_{city}" return fetch_and_save_geojson(code, region_name, out_dir) if __name__ == "__main__": # 示例 1: 只获取市级边界 (province + city) # out = fetch_and_save_geojson_by_name("湖北省", "武汉市", None, out_dir=Path("./data/vectors/")) # 示例 2: 获取十堰市县区级边界 # out = fetch_and_save_geojson_by_name( # "湖北省", "十堰市", None, out_dir=Path("./data/vectors/"), prefer_full=True # ) # 示例 3: 获取区县级边界 (province + city + county) out = fetch_and_save_geojson_by_name( "湖北省", "十堰市", "郧西县", out_dir=Path("./data/vectors/"), prefer_full=True ) print(f"Saved raw JSON and GeoJSON: {out}.")