From c90432a81555369593162a76e16fb88a3c0129f5 Mon Sep 17 00:00:00 2001 From: xhong Date: Tue, 14 Oct 2025 11:04:58 +0800 Subject: [PATCH] =?UTF-8?q?feat(DataV=5FSuPER):=20=E5=A2=9E=E5=BC=BA?= =?UTF-8?q?=E8=A1=8C=E6=94=BF=E5=8C=BA=E5=88=92=E4=BB=A3=E7=A0=81=E8=A7=A3?= =?UTF-8?q?=E6=9E=90=E5=8A=9F=E8=83=BD=EF=BC=8C=E6=94=AF=E6=8C=81=E5=8E=BF?= =?UTF-8?q?=E7=BA=A7=E5=B8=82=E6=90=9C=E7=B4=A2.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DATA_SuPER/DataV_SuPER.py | 55 +++++++++++++++++++++++++++++++-------- 1 file changed, 44 insertions(+), 11 deletions(-) diff --git a/DATA_SuPER/DataV_SuPER.py b/DATA_SuPER/DataV_SuPER.py index 7d7c197..ccaa2db 100644 --- a/DATA_SuPER/DataV_SuPER.py +++ b/DATA_SuPER/DataV_SuPER.py @@ -1,5 +1,6 @@ """ -访问阿里云 DataV 下载的行政区边界数据保存为原始 JSON, 并清洗后保存为 GeoJSON. +访问阿里云 DataV 下载的行政区边界数据保存为原始 JSON, 有部分字段与GDAL不兼容, 导致无法 +直接读取, 需要先清洗再保存为 GeoJSON. - 官方地址: https://datav.aliyun.com/portal/school/atlas/area_selector - Step1: 按"城市名称"解析为行政区划代码 (adcode); @@ -9,7 +10,7 @@ ------------------------------------------------------------------------------- Authors: Hong Xie -Last Updated: 2025-10-11 +Last Updated: 2025-10-14 =============================================================================== """ @@ -37,7 +38,7 @@ def fetch_and_save_geojson(accode: str, city_name: str, out_dir: str) -> Path: """ raw_data = get_datav_json(accode) - # 处理 features: 移除/转换不兼容 GeoPandas 的属性 + # 处理 features: 移除不兼容 GeoPandas 的属性 def sanitize_properties(props: dict) -> dict: out = {} for k, v in props.items(): @@ -89,6 +90,7 @@ def resolve_adcode_by_name(city_name: str, prefer_full: bool = False) -> Optiona """ 通过城市名称解析 DataV 行政区划代码. 优先遍历全国(100000_full)和各省级(full)数据进行匹配. + 如果在省级数据中未找到, 会进一步搜索地级市下的区县数据. 返回如 "420100" 或 "420100_full", 找不到则返回 None. """ # 先在全国层级中尝试匹配(通常包含省级与直辖市) @@ -102,6 +104,8 @@ def resolve_adcode_by_name(city_name: str, prefer_full: bool = False) -> Optiona target = city_name contains_province_candidate = None + provinces = [] + if cn: # 先尝试在全国数据中直接匹配省级名称 for feat in cn.get("features", []): @@ -114,16 +118,11 @@ def resolve_adcode_by_name(city_name: str, prefer_full: bool = False) -> Optiona return f"{code}_full" if prefer_full else code if _normalize_name(target) in _normalize_name(name): contains_province_candidate = code - # 从全国数据中提取省级代码, 用于后续深入搜索城市/区县 - provinces = [] - for feat in cn.get("features", []): - props = feat.get("properties", {}) - if props.get("level") == "province": - code = str(props.get("adcode", "")) - if re.fullmatch(r"\d{6}", code): provinces.append(code) # 遍历各省级行政区, 精确匹配城市名 + cities_to_search = [] # 收集需要进一步搜索的地级市 + for pcode in provinces: try: prov = requests.get( @@ -132,13 +131,16 @@ def resolve_adcode_by_name(city_name: str, prefer_full: bool = False) -> Optiona ).json() except Exception: continue + exact_candidate = None contains_candidate = None + for feat in prov.get("features", []): props = feat.get("properties", {}) level = props.get("level") name = props.get("name", "") code = str(props.get("adcode", "")) + # 仅考虑城市或区县, 且编码为6位数字 if level in ("city", "district") and re.fullmatch(r"\d{6}", code): if _name_matches_exact(target, name): @@ -147,11 +149,41 @@ def resolve_adcode_by_name(city_name: str, prefer_full: bool = False) -> Optiona # 作为回退: 包含匹配, 但不立即返回, 继续寻找精确匹配 if _normalize_name(target) in _normalize_name(name): contains_candidate = code + + # 收集地级市代码,用于后续搜索县级市 + if level == "city" and re.fullmatch(r"\d{6}", code): + cities_to_search.append(code) + if exact_candidate: return f"{exact_candidate}_full" if prefer_full else exact_candidate if contains_candidate: return f"{contains_candidate}_full" if prefer_full else contains_candidate + # 如果在省级数据中未找到,搜索地级市下的区县数据(如县级市) + for city_code in cities_to_search: + try: + city_data = requests.get( + f"https://geo.datav.aliyun.com/areas_v3/bound/{city_code}_full.json", + timeout=20, + ).json() + except Exception: + continue + + for feat in city_data.get("features", []): + props = feat.get("properties", {}) + level = props.get("level") + name = props.get("name", "") + code = str(props.get("adcode", "")) + + # 检查区县级别的行政区(包括县级市) + if level == "district" and re.fullmatch(r"\d{6}", code): + if _name_matches_exact(target, name): + return f"{code}_full" if prefer_full else code + # 包含匹配作为备选 + if _normalize_name(target) in _normalize_name(name): + # 找到包含匹配的县级市,直接返回 + return f"{code}_full" if prefer_full else code + # 如果城市/区县未匹配到, 回退使用省级包含匹配 if contains_province_candidate: return f"{contains_province_candidate}_full" if prefer_full else contains_province_candidate @@ -171,7 +203,8 @@ def fetch_and_save_geojson_by_name(city_name: str, out_dir: str, prefer_full: bo if __name__ == "__main__": # city_name = "武汉市" # city_name = "十堰市" - city_name = "湖北省" + # city_name = "湖北省" + city_name = "钟祥市" out_dir = "./data/vectors/" out = fetch_and_save_geojson_by_name(city_name, out_dir, prefer_full=False) print(f"Saved raw JSON and GeoJSON for {city_name}: {out}")