-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathvalidate_data.py
More file actions
116 lines (95 loc) · 3.73 KB
/
validate_data.py
File metadata and controls
116 lines (95 loc) · 3.73 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
#!/usr/bin/env python3
"""
验证Python错误数据JSON文件的完整性和格式
"""
import json
from typing import Dict, List, Set
def load_json_file(filename: str) -> Dict:
"""加载JSON文件"""
try:
with open(filename, 'r', encoding='utf-8') as f:
return json.load(f)
except FileNotFoundError:
print(f"错误:找不到文件 {filename}")
return {}
except json.JSONDecodeError as e:
print(f"错误:JSON格式无效 - {e}")
return {}
def validate_json_format(data: Dict) -> bool:
"""验证JSON基本格式"""
required_keys = ['version', 'categories', 'exceptions']
for key in required_keys:
if key not in data:
print(f"✗ 缺少必需字段: {key}")
return False
print("✓ JSON格式验证通过")
return True
def validate_data_integrity(data: Dict) -> bool:
"""验证数据完整性"""
issues = []
# 检查分类ID的唯一性
category_ids = [cat['id'] for cat in data.get('categories', [])]
if len(category_ids) != len(set(category_ids)):
issues.append("分类ID存在重复")
# 检查异常ID的唯一性
exception_ids = [exc['id'] for exc in data.get('exceptions', [])]
if len(exception_ids) != len(set(exception_ids)):
issues.append("异常ID存在重复")
# 检查异常的category_id是否都存在
valid_category_ids = set(category_ids)
for exc in data.get('exceptions', []):
if exc.get('category_id') not in valid_category_ids:
issues.append(f"异常 {exc.get('id')} 的category_id '{exc.get('category_id')}' 不存在")
# 检查每个异常是否有基本信息
for exc in data.get('exceptions', []):
exc_id = exc.get('id', 'unknown')
if not exc.get('exception_name'):
issues.append(f"异常 {exc_id} 缺少exception_name")
if not exc.get('description_en'):
issues.append(f"异常 {exc_id} 缺少英文描述")
if not exc.get('description_zh'):
issues.append(f"异常 {exc_id} 缺少中文描述")
if not exc.get('common_messages'):
issues.append(f"异常 {exc_id} 缺少常见错误消息")
if issues:
print("✗ 数据完整性检查发现问题:")
for issue in issues:
print(f" - {issue}")
return False
else:
print("✓ 数据完整性检查通过")
return True
def generate_statistics(data: Dict) -> None:
"""生成统计信息"""
print("\n=== 数据统计 ===")
print(f"分类数量: {len(data.get('categories', []))}")
print(f"异常数量: {len(data.get('exceptions', []))}")
# 按分类统计异常数量
category_counts = {}
for exc in data.get('exceptions', []):
cat_id = exc.get('category_id')
category_counts[cat_id] = category_counts.get(cat_id, 0) + 1
print("\n按分类统计:")
categories = {cat['id']: cat['name_zh'] for cat in data.get('categories', [])}
for cat_id, count in category_counts.items():
cat_name = categories.get(cat_id, cat_id)
print(f" {cat_name}: {count} 个异常")
# 统计错误消息数量
total_messages = sum(len(exc.get('common_messages', [])) for exc in data.get('exceptions', []))
print(f"\n总错误消息数量: {total_messages}")
def main():
"""主函数"""
print("Python错误数据验证工具")
print("=" * 30)
# 加载数据文件
data = load_json_file('python_errors_data.json')
if not data:
return
# 验证JSON格式
validate_json_format(data)
# 验证数据完整性
validate_data_integrity(data)
# 生成统计信息
generate_statistics(data)
if __name__ == "__main__":
main()