feat(DataV_SuPER): 增强行政区划代码解析功能,支持县级市搜索.
This commit is contained in:
parent
2b007df006
commit
c90432a815
@ -1,5 +1,6 @@
|
||||
"""
|
||||
访问阿里云 DataV 下载的行政区边界数据保存为原始 JSON, 并清洗后保存为 GeoJSON.
|
||||
访问阿里云 DataV 下载的行政区边界数据保存为原始 JSON, 有部分字段与GDAL不兼容, 导致无法
|
||||
直接读取, 需要先清洗再保存为 GeoJSON.
|
||||
|
||||
- 官方地址: https://datav.aliyun.com/portal/school/atlas/area_selector
|
||||
- Step1: 按"城市名称"解析为行政区划代码 (adcode);
|
||||
@ -9,7 +10,7 @@
|
||||
|
||||
-------------------------------------------------------------------------------
|
||||
Authors: Hong Xie
|
||||
Last Updated: 2025-10-11
|
||||
Last Updated: 2025-10-14
|
||||
===============================================================================
|
||||
"""
|
||||
|
||||
@ -37,7 +38,7 @@ def fetch_and_save_geojson(accode: str, city_name: str, out_dir: str) -> Path:
|
||||
"""
|
||||
raw_data = get_datav_json(accode)
|
||||
|
||||
# 处理 features: 移除/转换不兼容 GeoPandas 的属性
|
||||
# 处理 features: 移除不兼容 GeoPandas 的属性
|
||||
def sanitize_properties(props: dict) -> dict:
|
||||
out = {}
|
||||
for k, v in props.items():
|
||||
@ -89,6 +90,7 @@ def resolve_adcode_by_name(city_name: str, prefer_full: bool = False) -> Optiona
|
||||
"""
|
||||
通过城市名称解析 DataV 行政区划代码.
|
||||
优先遍历全国(100000_full)和各省级(full)数据进行匹配.
|
||||
如果在省级数据中未找到, 会进一步搜索地级市下的区县数据.
|
||||
返回如 "420100" 或 "420100_full", 找不到则返回 None.
|
||||
"""
|
||||
# 先在全国层级中尝试匹配(通常包含省级与直辖市)
|
||||
@ -102,6 +104,8 @@ def resolve_adcode_by_name(city_name: str, prefer_full: bool = False) -> Optiona
|
||||
|
||||
target = city_name
|
||||
contains_province_candidate = None
|
||||
provinces = []
|
||||
|
||||
if cn:
|
||||
# 先尝试在全国数据中直接匹配省级名称
|
||||
for feat in cn.get("features", []):
|
||||
@ -114,16 +118,11 @@ def resolve_adcode_by_name(city_name: str, prefer_full: bool = False) -> Optiona
|
||||
return f"{code}_full" if prefer_full else code
|
||||
if _normalize_name(target) in _normalize_name(name):
|
||||
contains_province_candidate = code
|
||||
# 从全国数据中提取省级代码, 用于后续深入搜索城市/区县
|
||||
provinces = []
|
||||
for feat in cn.get("features", []):
|
||||
props = feat.get("properties", {})
|
||||
if props.get("level") == "province":
|
||||
code = str(props.get("adcode", ""))
|
||||
if re.fullmatch(r"\d{6}", code):
|
||||
provinces.append(code)
|
||||
|
||||
# 遍历各省级行政区, 精确匹配城市名
|
||||
cities_to_search = [] # 收集需要进一步搜索的地级市
|
||||
|
||||
for pcode in provinces:
|
||||
try:
|
||||
prov = requests.get(
|
||||
@ -132,13 +131,16 @@ def resolve_adcode_by_name(city_name: str, prefer_full: bool = False) -> Optiona
|
||||
).json()
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
exact_candidate = None
|
||||
contains_candidate = None
|
||||
|
||||
for feat in prov.get("features", []):
|
||||
props = feat.get("properties", {})
|
||||
level = props.get("level")
|
||||
name = props.get("name", "")
|
||||
code = str(props.get("adcode", ""))
|
||||
|
||||
# 仅考虑城市或区县, 且编码为6位数字
|
||||
if level in ("city", "district") and re.fullmatch(r"\d{6}", code):
|
||||
if _name_matches_exact(target, name):
|
||||
@ -147,11 +149,41 @@ def resolve_adcode_by_name(city_name: str, prefer_full: bool = False) -> Optiona
|
||||
# 作为回退: 包含匹配, 但不立即返回, 继续寻找精确匹配
|
||||
if _normalize_name(target) in _normalize_name(name):
|
||||
contains_candidate = code
|
||||
|
||||
# 收集地级市代码,用于后续搜索县级市
|
||||
if level == "city" and re.fullmatch(r"\d{6}", code):
|
||||
cities_to_search.append(code)
|
||||
|
||||
if exact_candidate:
|
||||
return f"{exact_candidate}_full" if prefer_full else exact_candidate
|
||||
if contains_candidate:
|
||||
return f"{contains_candidate}_full" if prefer_full else contains_candidate
|
||||
|
||||
# 如果在省级数据中未找到,搜索地级市下的区县数据(如县级市)
|
||||
for city_code in cities_to_search:
|
||||
try:
|
||||
city_data = requests.get(
|
||||
f"https://geo.datav.aliyun.com/areas_v3/bound/{city_code}_full.json",
|
||||
timeout=20,
|
||||
).json()
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
for feat in city_data.get("features", []):
|
||||
props = feat.get("properties", {})
|
||||
level = props.get("level")
|
||||
name = props.get("name", "")
|
||||
code = str(props.get("adcode", ""))
|
||||
|
||||
# 检查区县级别的行政区(包括县级市)
|
||||
if level == "district" and re.fullmatch(r"\d{6}", code):
|
||||
if _name_matches_exact(target, name):
|
||||
return f"{code}_full" if prefer_full else code
|
||||
# 包含匹配作为备选
|
||||
if _normalize_name(target) in _normalize_name(name):
|
||||
# 找到包含匹配的县级市,直接返回
|
||||
return f"{code}_full" if prefer_full else code
|
||||
|
||||
# 如果城市/区县未匹配到, 回退使用省级包含匹配
|
||||
if contains_province_candidate:
|
||||
return f"{contains_province_candidate}_full" if prefer_full else contains_province_candidate
|
||||
@ -171,7 +203,8 @@ def fetch_and_save_geojson_by_name(city_name: str, out_dir: str, prefer_full: bo
|
||||
if __name__ == "__main__":
|
||||
# city_name = "武汉市"
|
||||
# city_name = "十堰市"
|
||||
city_name = "湖北省"
|
||||
# city_name = "湖北省"
|
||||
city_name = "钟祥市"
|
||||
out_dir = "./data/vectors/"
|
||||
out = fetch_and_save_geojson_by_name(city_name, out_dir, prefer_full=False)
|
||||
print(f"Saved raw JSON and GeoJSON for {city_name}: {out}")
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user