feat(DataV_SuPER): 重构为支持省-市-县三级行政区划解析
- 将单一城市名称参数改为省、市、县三级参数,提升解析准确性 - 实现层级查找策略:全国->省->市->县,并添加参数校验 - 更新输出文件命名规则,使用"省_市"或"省_市_县"格式 - 完善函数文档字符串和错误提示信息
This commit is contained in:
parent
2988ca0a53
commit
f608e3afab
@ -3,22 +3,38 @@
|
|||||||
直接读取, 需要先清洗再保存为 GeoJSON.
|
直接读取, 需要先清洗再保存为 GeoJSON.
|
||||||
|
|
||||||
- 官方地址: https://datav.aliyun.com/portal/school/atlas/area_selector
|
- 官方地址: https://datav.aliyun.com/portal/school/atlas/area_selector
|
||||||
- Step1: 按"城市名称"解析为行政区划代码 (adcode);
|
- Step1: 按"省-市-县名称"解析为行政区划代码 (adcode);
|
||||||
- Step2: 将 DataV 原始数据保存为 `城市名称.json` 文件;
|
- Step2: 将 DataV 原始数据保存为 `省-市-县名称.json` 文件;
|
||||||
- Step3: 移除不兼容 GDAL/GeoPandas 的属性字段 (parent, center, centroid, acroutes);
|
- Step3: 移除不兼容 GDAL/GeoPandas 的属性字段 (parent, center, centroid, acroutes);
|
||||||
- Step4: 将清洗后的结果写出为 `城市名称.geojson` 文件.
|
- Step4: 将清洗后的结果写出为 `省-市-县名称.geojson` 文件.
|
||||||
|
|
||||||
-------------------------------------------------------------------------------
|
-------------------------------------------------------------------------------
|
||||||
Authors: Hong Xie
|
Authors: CVEO Team
|
||||||
Last Updated: 2025-10-20
|
Last Updated: 2026-04-13
|
||||||
===============================================================================
|
===============================================================================
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import json
|
import json
|
||||||
import requests
|
import re
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
import re
|
|
||||||
|
import requests
|
||||||
|
|
||||||
|
|
||||||
|
def _validate_region_params(province: str, city: str, county: Optional[str]) -> None:
|
||||||
|
"""
|
||||||
|
Validate that province and city are non-empty.
|
||||||
|
|
||||||
|
Raises
|
||||||
|
------
|
||||||
|
ValueError
|
||||||
|
If province or city is None or empty string.
|
||||||
|
"""
|
||||||
|
if not province or not province.strip():
|
||||||
|
raise ValueError("province (省) cannot be empty")
|
||||||
|
if not city or not city.strip():
|
||||||
|
raise ValueError("city (市) cannot be empty")
|
||||||
|
|
||||||
|
|
||||||
def get_datav_json(accode: str) -> dict:
|
def get_datav_json(accode: str) -> dict:
|
||||||
@ -32,9 +48,19 @@ def get_datav_json(accode: str) -> dict:
|
|||||||
return response.json()
|
return response.json()
|
||||||
|
|
||||||
|
|
||||||
def fetch_and_save_geojson(accode: str, city_name: str, out_dir: Path) -> Path:
|
def fetch_and_save_geojson(accode: str, region_name: str, out_dir: Path) -> Path:
|
||||||
"""
|
"""
|
||||||
获取 DataV 原始数据, 先保存为 .json; 随后清洗属性并另存为 .geojson.
|
获取 DataV 原始数据, 先保存为 .json; 随后清洗属性并另存为 .geojson.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
accode : str
|
||||||
|
行政区划代码, 如 "420100" 或 "420100_full".
|
||||||
|
region_name : str
|
||||||
|
区域名称, 用于输出文件名. 当使用省-市-县三级参数时, 文件名为
|
||||||
|
"{province}_{city}.geojson" 或 "{province}_{city}_{county}.geojson".
|
||||||
|
out_dir : Path
|
||||||
|
输出目录.
|
||||||
"""
|
"""
|
||||||
raw_data = get_datav_json(accode)
|
raw_data = get_datav_json(accode)
|
||||||
|
|
||||||
@ -54,11 +80,11 @@ def fetch_and_save_geojson(accode: str, city_name: str, out_dir: Path) -> Path:
|
|||||||
out_dir_path = Path(out_dir)
|
out_dir_path = Path(out_dir)
|
||||||
out_dir_path.mkdir(parents=True, exist_ok=True)
|
out_dir_path.mkdir(parents=True, exist_ok=True)
|
||||||
# 先保存原始 JSON(未清洗)
|
# 先保存原始 JSON(未清洗)
|
||||||
raw_json_path = out_dir_path / f"{city_name}.json"
|
raw_json_path = out_dir_path / f"{region_name}.json"
|
||||||
with raw_json_path.open("w", encoding="utf-8") as f:
|
with raw_json_path.open("w", encoding="utf-8") as f:
|
||||||
json.dump(raw_data, f, ensure_ascii=False)
|
json.dump(raw_data, f, ensure_ascii=False)
|
||||||
# 再保存清洗后的 GeoJSON
|
# 再保存清洗后的 GeoJSON
|
||||||
out_path = out_dir_path / f"{city_name}.geojson"
|
out_path = out_dir_path / f"{region_name}.geojson"
|
||||||
# 深拷贝后进行清洗, 避免影响原始数据
|
# 深拷贝后进行清洗, 避免影响原始数据
|
||||||
data = json.loads(json.dumps(raw_data))
|
data = json.loads(json.dumps(raw_data))
|
||||||
features = data.get("features", [])
|
features = data.get("features", [])
|
||||||
@ -86,126 +112,185 @@ def _name_matches_exact(target: str, candidate: str) -> bool:
|
|||||||
return _normalize_name(target) == _normalize_name(candidate)
|
return _normalize_name(target) == _normalize_name(candidate)
|
||||||
|
|
||||||
|
|
||||||
def resolve_adcode_by_name(city_name: str, prefer_full: bool = False) -> Optional[str]:
|
def resolve_adcode_by_name(
|
||||||
|
province: str,
|
||||||
|
city: str,
|
||||||
|
county: Optional[str] = None,
|
||||||
|
prefer_full: bool = False,
|
||||||
|
) -> Optional[str]:
|
||||||
"""
|
"""
|
||||||
通过城市名称解析 DataV 行政区划代码.
|
通过省-市-县名称解析 DataV 行政区划代码.
|
||||||
优先遍历全国(100000_full)和各省级(full)数据进行匹配.
|
|
||||||
如果在省级数据中未找到, 会进一步搜索地级市下的区县数据.
|
采用层级查找策略: 全国数据 -> 省级数据 -> 市级数据 -> 区县数据.
|
||||||
返回如 "420100" 或 "420100_full", 找不到则返回 None.
|
前两级 (省/市) 必须提供且不能为空; 第三级 (县) 可为空.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
province : str
|
||||||
|
省份名称, 如 "湖北省". 不能为空.
|
||||||
|
city : str
|
||||||
|
城市名称, 如 "武汉市". 不能为空.
|
||||||
|
county : str, optional
|
||||||
|
区县名称, 如 "江岸区". 可为空 (表示只解析到市级).
|
||||||
|
prefer_full : bool, optional
|
||||||
|
是否返回下辖完整边界代码 (如 "420100_full"), 默认 False.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
str or None
|
||||||
|
行政区划代码, 如 "420100" 或 "420100_full", 找不到则返回 None.
|
||||||
|
|
||||||
|
Raises
|
||||||
|
------
|
||||||
|
ValueError
|
||||||
|
如果 province 或 city 为空.
|
||||||
"""
|
"""
|
||||||
# 先在全国层级中尝试匹配(通常包含省级与直辖市)
|
_validate_region_params(province, city, county)
|
||||||
|
|
||||||
|
# Step 1: 从全国数据中查找省份
|
||||||
try:
|
try:
|
||||||
cn = requests.get(
|
cn = requests.get(
|
||||||
"https://geo.datav.aliyun.com/areas_v3/bound/100000_full.json",
|
"https://geo.datav.aliyun.com/areas_v3/bound/100000_full.json",
|
||||||
timeout=20,
|
timeout=20,
|
||||||
).json()
|
).json()
|
||||||
except Exception:
|
except Exception:
|
||||||
cn = None
|
return None
|
||||||
|
|
||||||
target = city_name
|
pcode = None
|
||||||
contains_province_candidate = None
|
for feat in cn.get("features", []):
|
||||||
provinces = []
|
props = feat.get("properties", {})
|
||||||
|
if props.get("level") == "province":
|
||||||
if cn:
|
name = props.get("name", "")
|
||||||
# 先尝试在全国数据中直接匹配省级名称
|
code = str(props.get("adcode", ""))
|
||||||
for feat in cn.get("features", []):
|
if re.fullmatch(r"\d{6}", code):
|
||||||
props = feat.get("properties", {})
|
if _name_matches_exact(province, name):
|
||||||
if props.get("level") == "province":
|
pcode = code
|
||||||
name = props.get("name", "")
|
break
|
||||||
code = str(props.get("adcode", ""))
|
|
||||||
if re.fullmatch(r"\d{6}", code):
|
|
||||||
if _name_matches_exact(target, name):
|
|
||||||
return f"{code}_full" if prefer_full else code
|
|
||||||
if _normalize_name(target) in _normalize_name(name):
|
|
||||||
contains_province_candidate = code
|
|
||||||
provinces.append(code)
|
|
||||||
|
|
||||||
# 遍历各省级行政区, 精确匹配城市名
|
if not pcode:
|
||||||
cities_to_search = [] # 收集需要进一步搜索的地级市
|
return None
|
||||||
|
|
||||||
for pcode in provinces:
|
# Step 2: 从省份数据中查找城市
|
||||||
|
# 特殊处理直辖市: 当 province == city 时 (如 "北京市" == "北京市")
|
||||||
|
if _name_matches_exact(province, city):
|
||||||
|
ccode = pcode
|
||||||
|
else:
|
||||||
try:
|
try:
|
||||||
prov = requests.get(
|
prov_data = requests.get(
|
||||||
f"https://geo.datav.aliyun.com/areas_v3/bound/{pcode}_full.json",
|
f"https://geo.datav.aliyun.com/areas_v3/bound/{pcode}_full.json",
|
||||||
timeout=20,
|
timeout=20,
|
||||||
).json()
|
).json()
|
||||||
except Exception:
|
except Exception:
|
||||||
continue
|
return None
|
||||||
|
|
||||||
exact_candidate = None
|
ccode = None
|
||||||
contains_candidate = None
|
for feat in prov_data.get("features", []):
|
||||||
|
|
||||||
for feat in prov.get("features", []):
|
|
||||||
props = feat.get("properties", {})
|
props = feat.get("properties", {})
|
||||||
level = props.get("level")
|
level = props.get("level")
|
||||||
name = props.get("name", "")
|
name = props.get("name", "")
|
||||||
code = str(props.get("adcode", ""))
|
code = str(props.get("adcode", ""))
|
||||||
|
|
||||||
# 仅考虑城市或区县, 且编码为6位数字
|
|
||||||
if level in ("city", "district") and re.fullmatch(r"\d{6}", code):
|
|
||||||
if _name_matches_exact(target, name):
|
|
||||||
exact_candidate = code
|
|
||||||
break
|
|
||||||
# 作为回退: 包含匹配, 但不立即返回, 继续寻找精确匹配
|
|
||||||
if _normalize_name(target) in _normalize_name(name):
|
|
||||||
contains_candidate = code
|
|
||||||
|
|
||||||
# 收集地级市代码,用于后续搜索县级市
|
|
||||||
if level == "city" and re.fullmatch(r"\d{6}", code):
|
|
||||||
cities_to_search.append(code)
|
|
||||||
|
|
||||||
if exact_candidate:
|
|
||||||
return f"{exact_candidate}_full" if prefer_full else exact_candidate
|
|
||||||
if contains_candidate:
|
|
||||||
return f"{contains_candidate}_full" if prefer_full else contains_candidate
|
|
||||||
|
|
||||||
# 如果在省级数据中未找到,搜索地级市下的区县数据(如县级市)
|
# 匹配城市或区县级别
|
||||||
for city_code in cities_to_search:
|
if level in ("city", "district") and re.fullmatch(r"\d{6}", code):
|
||||||
|
if _name_matches_exact(city, name):
|
||||||
|
ccode = code
|
||||||
|
break
|
||||||
|
|
||||||
|
if not ccode:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Step 3: 如果提供了区县名称, 从城市数据中查找区县
|
||||||
|
if county:
|
||||||
try:
|
try:
|
||||||
city_data = requests.get(
|
city_data = requests.get(
|
||||||
f"https://geo.datav.aliyun.com/areas_v3/bound/{city_code}_full.json",
|
f"https://geo.datav.aliyun.com/areas_v3/bound/{ccode}_full.json",
|
||||||
timeout=20,
|
timeout=20,
|
||||||
).json()
|
).json()
|
||||||
except Exception:
|
except Exception:
|
||||||
continue
|
return None
|
||||||
|
|
||||||
|
dcode = None
|
||||||
for feat in city_data.get("features", []):
|
for feat in city_data.get("features", []):
|
||||||
props = feat.get("properties", {})
|
props = feat.get("properties", {})
|
||||||
level = props.get("level")
|
level = props.get("level")
|
||||||
name = props.get("name", "")
|
name = props.get("name", "")
|
||||||
code = str(props.get("adcode", ""))
|
code = str(props.get("adcode", ""))
|
||||||
|
|
||||||
# 检查区县级别的行政区(包括县级市)
|
|
||||||
if level == "district" and re.fullmatch(r"\d{6}", code):
|
|
||||||
if _name_matches_exact(target, name):
|
|
||||||
return f"{code}_full" if prefer_full else code
|
|
||||||
# 包含匹配作为备选
|
|
||||||
if _normalize_name(target) in _normalize_name(name):
|
|
||||||
# 找到包含匹配的县级市,直接返回
|
|
||||||
return f"{code}_full" if prefer_full else code
|
|
||||||
|
|
||||||
# 如果城市/区县未匹配到, 回退使用省级包含匹配
|
# 匹配区县 (包括县级市, 其 level 可能为 "city")
|
||||||
if contains_province_candidate:
|
if level in ("district", "city") and re.fullmatch(r"\d{6}", code):
|
||||||
return f"{contains_province_candidate}_full" if prefer_full else contains_province_candidate
|
if _name_matches_exact(county, name):
|
||||||
return None
|
dcode = code
|
||||||
|
break
|
||||||
|
|
||||||
|
if not dcode:
|
||||||
|
return None
|
||||||
|
# 区县级别已经是最终边界,不需要 _full 后缀
|
||||||
|
return dcode
|
||||||
|
|
||||||
|
# 只解析到市级
|
||||||
|
return f"{ccode}_full" if prefer_full else ccode
|
||||||
|
|
||||||
|
|
||||||
def fetch_and_save_geojson_by_name(city_name: str, out_dir: Path, prefer_full: bool = False) -> Path:
|
def fetch_and_save_geojson_by_name(
|
||||||
|
province: str,
|
||||||
|
city: str,
|
||||||
|
county: Optional[str],
|
||||||
|
out_dir: Path,
|
||||||
|
prefer_full: bool = False,
|
||||||
|
) -> Path:
|
||||||
"""
|
"""
|
||||||
通过城市名称解析 adcode, 并直接拉取与保存 GeoJSON.
|
通过省-市-县名称解析 adcode, 并直接拉取与保存 GeoJSON.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
province : str
|
||||||
|
省份名称, 如 "湖北省". 不能为空.
|
||||||
|
city : str
|
||||||
|
城市名称, 如 "武汉市". 不能为空.
|
||||||
|
county : str, optional
|
||||||
|
区县名称, 如 "江岸区". 可为空 (表示只解析到市级).
|
||||||
|
out_dir : Path
|
||||||
|
输出目录, 用于保存 GeoJSON 文件.
|
||||||
|
prefer_full : bool, optional
|
||||||
|
是否下载下辖区划的 GeoJSON, 默认 False.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
Path
|
||||||
|
保存的 GeoJSON 文件路径.
|
||||||
|
|
||||||
|
Raises
|
||||||
|
------
|
||||||
|
ValueError
|
||||||
|
如果省或市为空, 或无法解析到行政区划代码.
|
||||||
"""
|
"""
|
||||||
code = resolve_adcode_by_name(city_name, prefer_full=prefer_full)
|
code = resolve_adcode_by_name(province, city, county, prefer_full=prefer_full)
|
||||||
if not code:
|
if not code:
|
||||||
raise ValueError(f"无法通过名称解析到行政区划代码: {city_name}")
|
raise ValueError(
|
||||||
return fetch_and_save_geojson(code, city_name, out_dir)
|
f"无法通过名称解析到行政区划代码: province={province}, city={city}, county={county}"
|
||||||
|
)
|
||||||
|
|
||||||
|
# 构建输出文件名
|
||||||
|
if county:
|
||||||
|
region_name = f"{province}_{city}_{county}"
|
||||||
|
else:
|
||||||
|
region_name = f"{province}_{city}"
|
||||||
|
|
||||||
|
return fetch_and_save_geojson(code, region_name, out_dir)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
# city_name = "湖北省"
|
# 示例 1: 只获取市级边界 (province + city)
|
||||||
# city_name = "武汉市"
|
# out = fetch_and_save_geojson_by_name("湖北省", "武汉市", None, out_dir=Path("./data/vectors/"))
|
||||||
# city_name = "十堰市"
|
|
||||||
# city_name = "钟祥市"
|
# 示例 2: 获取十堰市县区级边界
|
||||||
# city_name = ""
|
# out = fetch_and_save_geojson_by_name(
|
||||||
out_dir = Path("./data/vectors/")
|
# "湖北省", "十堰市", None, out_dir=Path("./data/vectors/"), prefer_full=True
|
||||||
out = fetch_and_save_geojson_by_name(city_name, out_dir, prefer_full=False)
|
# )
|
||||||
print(f"Saved raw JSON and GeoJSON for {city_name}: {out}.")
|
|
||||||
|
# 示例 3: 获取区县级边界 (province + city + county)
|
||||||
|
out = fetch_and_save_geojson_by_name(
|
||||||
|
"湖北省", "十堰市", "郧西县", out_dir=Path("./data/vectors/"), prefer_full=True
|
||||||
|
)
|
||||||
|
|
||||||
|
print(f"Saved raw JSON and GeoJSON: {out}.")
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user