From f608e3afab9edfc031f21cbbdc8a23837df28c17 Mon Sep 17 00:00:00 2001 From: xhong Date: Mon, 13 Apr 2026 13:30:34 +0800 Subject: [PATCH] =?UTF-8?q?feat(DataV=5FSuPER):=20=E9=87=8D=E6=9E=84?= =?UTF-8?q?=E4=B8=BA=E6=94=AF=E6=8C=81=E7=9C=81-=E5=B8=82-=E5=8E=BF?= =?UTF-8?q?=E4=B8=89=E7=BA=A7=E8=A1=8C=E6=94=BF=E5=8C=BA=E5=88=92=E8=A7=A3?= =?UTF-8?q?=E6=9E=90?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 将单一城市名称参数改为省、市、县三级参数,提升解析准确性 - 实现层级查找策略:全国->省->市->县,并添加参数校验 - 更新输出文件命名规则,使用"省_市"或"省_市_县"格式 - 完善函数文档字符串和错误提示信息 --- Basemap_SuPER/DataV_SuPER.py | 275 +++++++++++++++++++++++------------ 1 file changed, 180 insertions(+), 95 deletions(-) diff --git a/Basemap_SuPER/DataV_SuPER.py b/Basemap_SuPER/DataV_SuPER.py index c2d1e1d..3177685 100644 --- a/Basemap_SuPER/DataV_SuPER.py +++ b/Basemap_SuPER/DataV_SuPER.py @@ -3,22 +3,38 @@ 直接读取, 需要先清洗再保存为 GeoJSON. - 官方地址: https://datav.aliyun.com/portal/school/atlas/area_selector -- Step1: 按"城市名称"解析为行政区划代码 (adcode); -- Step2: 将 DataV 原始数据保存为 `城市名称.json` 文件; +- Step1: 按"省-市-县名称"解析为行政区划代码 (adcode); +- Step2: 将 DataV 原始数据保存为 `省-市-县名称.json` 文件; - Step3: 移除不兼容 GDAL/GeoPandas 的属性字段 (parent, center, centroid, acroutes); -- Step4: 将清洗后的结果写出为 `城市名称.geojson` 文件. +- Step4: 将清洗后的结果写出为 `省-市-县名称.geojson` 文件. -------------------------------------------------------------------------------- -Authors: Hong Xie -Last Updated: 2025-10-20 +------------------------------------------------------------------------------- +Authors: CVEO Team +Last Updated: 2026-04-13 =============================================================================== """ import json -import requests +import re from pathlib import Path from typing import Optional -import re + +import requests + + +def _validate_region_params(province: str, city: str, county: Optional[str]) -> None: + """ + Validate that province and city are non-empty. + + Raises + ------ + ValueError + If province or city is None or empty string. + """ + if not province or not province.strip(): + raise ValueError("province (省) cannot be empty") + if not city or not city.strip(): + raise ValueError("city (市) cannot be empty") def get_datav_json(accode: str) -> dict: @@ -32,9 +48,19 @@ def get_datav_json(accode: str) -> dict: return response.json() -def fetch_and_save_geojson(accode: str, city_name: str, out_dir: Path) -> Path: +def fetch_and_save_geojson(accode: str, region_name: str, out_dir: Path) -> Path: """ 获取 DataV 原始数据, 先保存为 .json; 随后清洗属性并另存为 .geojson. + + Parameters + ---------- + accode : str + 行政区划代码, 如 "420100" 或 "420100_full". + region_name : str + 区域名称, 用于输出文件名. 当使用省-市-县三级参数时, 文件名为 + "{province}_{city}.geojson" 或 "{province}_{city}_{county}.geojson". + out_dir : Path + 输出目录. """ raw_data = get_datav_json(accode) @@ -54,11 +80,11 @@ def fetch_and_save_geojson(accode: str, city_name: str, out_dir: Path) -> Path: out_dir_path = Path(out_dir) out_dir_path.mkdir(parents=True, exist_ok=True) # 先保存原始 JSON(未清洗) - raw_json_path = out_dir_path / f"{city_name}.json" + raw_json_path = out_dir_path / f"{region_name}.json" with raw_json_path.open("w", encoding="utf-8") as f: json.dump(raw_data, f, ensure_ascii=False) # 再保存清洗后的 GeoJSON - out_path = out_dir_path / f"{city_name}.geojson" + out_path = out_dir_path / f"{region_name}.geojson" # 深拷贝后进行清洗, 避免影响原始数据 data = json.loads(json.dumps(raw_data)) features = data.get("features", []) @@ -86,126 +112,185 @@ def _name_matches_exact(target: str, candidate: str) -> bool: return _normalize_name(target) == _normalize_name(candidate) -def resolve_adcode_by_name(city_name: str, prefer_full: bool = False) -> Optional[str]: +def resolve_adcode_by_name( + province: str, + city: str, + county: Optional[str] = None, + prefer_full: bool = False, +) -> Optional[str]: """ - 通过城市名称解析 DataV 行政区划代码. - 优先遍历全国(100000_full)和各省级(full)数据进行匹配. - 如果在省级数据中未找到, 会进一步搜索地级市下的区县数据. - 返回如 "420100" 或 "420100_full", 找不到则返回 None. + 通过省-市-县名称解析 DataV 行政区划代码. + + 采用层级查找策略: 全国数据 -> 省级数据 -> 市级数据 -> 区县数据. + 前两级 (省/市) 必须提供且不能为空; 第三级 (县) 可为空. + + Parameters + ---------- + province : str + 省份名称, 如 "湖北省". 不能为空. + city : str + 城市名称, 如 "武汉市". 不能为空. + county : str, optional + 区县名称, 如 "江岸区". 可为空 (表示只解析到市级). + prefer_full : bool, optional + 是否返回下辖完整边界代码 (如 "420100_full"), 默认 False. + + Returns + ------- + str or None + 行政区划代码, 如 "420100" 或 "420100_full", 找不到则返回 None. + + Raises + ------ + ValueError + 如果 province 或 city 为空. """ - # 先在全国层级中尝试匹配(通常包含省级与直辖市) + _validate_region_params(province, city, county) + + # Step 1: 从全国数据中查找省份 try: cn = requests.get( "https://geo.datav.aliyun.com/areas_v3/bound/100000_full.json", timeout=20, ).json() except Exception: - cn = None + return None - target = city_name - contains_province_candidate = None - provinces = [] - - if cn: - # 先尝试在全国数据中直接匹配省级名称 - for feat in cn.get("features", []): - props = feat.get("properties", {}) - if props.get("level") == "province": - name = props.get("name", "") - code = str(props.get("adcode", "")) - if re.fullmatch(r"\d{6}", code): - if _name_matches_exact(target, name): - return f"{code}_full" if prefer_full else code - if _normalize_name(target) in _normalize_name(name): - contains_province_candidate = code - provinces.append(code) + pcode = None + for feat in cn.get("features", []): + props = feat.get("properties", {}) + if props.get("level") == "province": + name = props.get("name", "") + code = str(props.get("adcode", "")) + if re.fullmatch(r"\d{6}", code): + if _name_matches_exact(province, name): + pcode = code + break - # 遍历各省级行政区, 精确匹配城市名 - cities_to_search = [] # 收集需要进一步搜索的地级市 - - for pcode in provinces: + if not pcode: + return None + + # Step 2: 从省份数据中查找城市 + # 特殊处理直辖市: 当 province == city 时 (如 "北京市" == "北京市") + if _name_matches_exact(province, city): + ccode = pcode + else: try: - prov = requests.get( + prov_data = requests.get( f"https://geo.datav.aliyun.com/areas_v3/bound/{pcode}_full.json", timeout=20, ).json() except Exception: - continue - - exact_candidate = None - contains_candidate = None - - for feat in prov.get("features", []): + return None + + ccode = None + for feat in prov_data.get("features", []): props = feat.get("properties", {}) level = props.get("level") name = props.get("name", "") code = str(props.get("adcode", "")) - - # 仅考虑城市或区县, 且编码为6位数字 - if level in ("city", "district") and re.fullmatch(r"\d{6}", code): - if _name_matches_exact(target, name): - exact_candidate = code - break - # 作为回退: 包含匹配, 但不立即返回, 继续寻找精确匹配 - if _normalize_name(target) in _normalize_name(name): - contains_candidate = code - - # 收集地级市代码,用于后续搜索县级市 - if level == "city" and re.fullmatch(r"\d{6}", code): - cities_to_search.append(code) - - if exact_candidate: - return f"{exact_candidate}_full" if prefer_full else exact_candidate - if contains_candidate: - return f"{contains_candidate}_full" if prefer_full else contains_candidate - # 如果在省级数据中未找到,搜索地级市下的区县数据(如县级市) - for city_code in cities_to_search: + # 匹配城市或区县级别 + if level in ("city", "district") and re.fullmatch(r"\d{6}", code): + if _name_matches_exact(city, name): + ccode = code + break + + if not ccode: + return None + + # Step 3: 如果提供了区县名称, 从城市数据中查找区县 + if county: try: city_data = requests.get( - f"https://geo.datav.aliyun.com/areas_v3/bound/{city_code}_full.json", + f"https://geo.datav.aliyun.com/areas_v3/bound/{ccode}_full.json", timeout=20, ).json() except Exception: - continue - + return None + + dcode = None for feat in city_data.get("features", []): props = feat.get("properties", {}) level = props.get("level") name = props.get("name", "") code = str(props.get("adcode", "")) - - # 检查区县级别的行政区(包括县级市) - if level == "district" and re.fullmatch(r"\d{6}", code): - if _name_matches_exact(target, name): - return f"{code}_full" if prefer_full else code - # 包含匹配作为备选 - if _normalize_name(target) in _normalize_name(name): - # 找到包含匹配的县级市,直接返回 - return f"{code}_full" if prefer_full else code - # 如果城市/区县未匹配到, 回退使用省级包含匹配 - if contains_province_candidate: - return f"{contains_province_candidate}_full" if prefer_full else contains_province_candidate - return None + # 匹配区县 (包括县级市, 其 level 可能为 "city") + if level in ("district", "city") and re.fullmatch(r"\d{6}", code): + if _name_matches_exact(county, name): + dcode = code + break + + if not dcode: + return None + # 区县级别已经是最终边界,不需要 _full 后缀 + return dcode + + # 只解析到市级 + return f"{ccode}_full" if prefer_full else ccode -def fetch_and_save_geojson_by_name(city_name: str, out_dir: Path, prefer_full: bool = False) -> Path: +def fetch_and_save_geojson_by_name( + province: str, + city: str, + county: Optional[str], + out_dir: Path, + prefer_full: bool = False, +) -> Path: """ - 通过城市名称解析 adcode, 并直接拉取与保存 GeoJSON. + 通过省-市-县名称解析 adcode, 并直接拉取与保存 GeoJSON. + + Parameters + ---------- + province : str + 省份名称, 如 "湖北省". 不能为空. + city : str + 城市名称, 如 "武汉市". 不能为空. + county : str, optional + 区县名称, 如 "江岸区". 可为空 (表示只解析到市级). + out_dir : Path + 输出目录, 用于保存 GeoJSON 文件. + prefer_full : bool, optional + 是否下载下辖区划的 GeoJSON, 默认 False. + + Returns + ------- + Path + 保存的 GeoJSON 文件路径. + + Raises + ------ + ValueError + 如果省或市为空, 或无法解析到行政区划代码. """ - code = resolve_adcode_by_name(city_name, prefer_full=prefer_full) + code = resolve_adcode_by_name(province, city, county, prefer_full=prefer_full) if not code: - raise ValueError(f"无法通过名称解析到行政区划代码: {city_name}") - return fetch_and_save_geojson(code, city_name, out_dir) + raise ValueError( + f"无法通过名称解析到行政区划代码: province={province}, city={city}, county={county}" + ) + + # 构建输出文件名 + if county: + region_name = f"{province}_{city}_{county}" + else: + region_name = f"{province}_{city}" + + return fetch_and_save_geojson(code, region_name, out_dir) if __name__ == "__main__": - # city_name = "湖北省" - # city_name = "武汉市" - # city_name = "十堰市" - # city_name = "钟祥市" - # city_name = "" - out_dir = Path("./data/vectors/") - out = fetch_and_save_geojson_by_name(city_name, out_dir, prefer_full=False) - print(f"Saved raw JSON and GeoJSON for {city_name}: {out}.") + # 示例 1: 只获取市级边界 (province + city) + # out = fetch_and_save_geojson_by_name("湖北省", "武汉市", None, out_dir=Path("./data/vectors/")) + + # 示例 2: 获取十堰市县区级边界 + # out = fetch_and_save_geojson_by_name( + # "湖北省", "十堰市", None, out_dir=Path("./data/vectors/"), prefer_full=True + # ) + + # 示例 3: 获取区县级边界 (province + city + county) + out = fetch_and_save_geojson_by_name( + "湖北省", "十堰市", "郧西县", out_dir=Path("./data/vectors/"), prefer_full=True + ) + + print(f"Saved raw JSON and GeoJSON: {out}.")