从 IP 地址范围获取城市
Getting City from IP Address range
- 我有一个 IP 地址。例如,192.168.2.10
- 我还有一本字典:
RANGES = {
'london': [
{'start': '10.10.0.0', 'end': '10.10.255.255'},
{'start': '192.168.1.0', 'end': '192.168.1.255'},
],
'munich': [
{'start': '10.12.0.0', 'end': '10.12.255.255'},
{'start': '172.16.10.0', 'end': '172.16.11.255'},
{'start': '192.168.2.0', 'end': '192.168.2.255'},
]
}
问题:我应该如何从我的IP地址中找到城市并使用这本字典花费尽可能少的时间(时间复杂度)?
编写一个自定义函数,将 IP 地址解析为数字元组以便于比较:
def get_city(ip):
for city in RANGES:
for d in RANGES[city]:
if tuple(map(int, d["start"].split("."))) <= tuple(map(int, ip.split("."))) <= tuple(map(int, d["end"].split("."))):
return city
>>> get_city("192.168.2.10")
"munich"
首先,您需要重新排列数据,以便更有效地查找。
- 创建将 IP 地址转换为数字的函数
- 并使用lower/start IP号作为新的数据键,同时保留结束IP值。
def ip_to_long(ip):
return reduce(lambda x, y: (x << 8) + y, map(int, ip.split('.')))
def data_transform(input_ranges):
data = {}
for location, items in RANGES.items():
for item in items:
data[ip_to_long(item['start'])] = dict(location=location, end=ip_to_long(item['end']))
现在,您可以使用 bisect
搜索排序的起始 IP,对于您的输入,在内部使用 RB-tree AIK 它。
下面是它的完整 PoC 代码:
from functools import reduce
from bisect import bisect_left
RANGES = {
'london': [
{'start': '10.10.0.0', 'end': '10.10.255.255'},
{'start': '192.168.1.0', 'end': '192.168.1.255'},
],
'munich': [
{'start': '10.12.0.0', 'end': '10.12.255.255'},
{'start': '172.16.10.0', 'end': '172.16.11.255'},
{'start': '192.168.2.0', 'end': '192.168.2.255'},
]
}
def ip_to_long(ip):
return reduce(lambda x, y: (x << 8) + y, map(int, ip.split('.')))
def data_transform(input_ranges):
data = {}
for location, items in input_ranges.items():
for item in items:
data[ip_to_long(item['start'])] = dict(location=location, end=ip_to_long(item['end']))
return data
def search_for_ip(search_ip, ip_starts, ip_data):
lookup_index = bisect_left(ip_starts, ip_to_long(search_ip))
if lookup_index > 0 and ip_data[ip_starts[lookup_index-1]]['end'] > ip_to_long(search_ip):
return ip_data[ip_starts[lookup_index-1]]['location']
return
new_data = data_transform(RANGES)
print(new_data)
ip_starts = sorted(list(new_data))
print(search_for_ip('192.168.2.100', ip_starts, new_data)) # -> munich
print(search_for_ip('192.168.1.100', ip_starts, new_data)) # -> lodon
print(search_for_ip('192.168.0.100', ip_starts, new_data)) # -> None
如果你想要任意大数据集的最佳复杂度,“正确答案”是季斌给出的答案。
要真正优化多次调用的性能,您确实需要重组数据,并使用内置的平分函数。
但如果您真的不想触及您的数据,您仍然可以使用 band-aid 自定义的 bisect 实现,它看起来像那样
RANGES = {
'london': [
{'start': '10.10.0.0', 'end': '10.10.255.255'},
{'start': '192.168.1.0', 'end': '192.168.1.255'},
],
'munich': [
{'start': '10.12.0.0', 'end': '10.12.255.255'},
{'start': '172.16.10.0', 'end': '172.16.11.255'},
{'start': '192.168.2.0', 'end': '192.168.2.255'},
]
}
def ipv4_str_to_tuple(ip_str):
return tuple(map(int, ip_str.split('.')))
def relative_in_range(ipv4_tuple, ip_range):
ipv4t_start = ipv4_str_to_tuple(ip_range['start'])
ipv4t_end = ipv4_str_to_tuple(ip_range['end'])
if ipv4t_start > ipv4_tuple:
return -1
if ipv4t_end < ipv4_tuple:
return 1
return 0
def from_those_ranges(ipv4_tuple, ranges):
#in-built bisect
lo, hi = 0, len(ranges)
while lo < hi:
mid = lo + (hi - lo) // 2
comp = relative_in_range(ipv4_tuple, ranges[mid])
if comp == 0:
return True
if comp > 0:
lo = mid + 1
else:
hi = mid
return False
def find_entry_from_ipv4_tuple(ipv4_tuple, entries_ranges):
for entry, entry_ranges in entries_ranges.items():
if from_those_ranges(ipv4_tuple, entry_ranges):
return entry
return None
def find_entry_from_ipv4_str(ipv4_str, entries_ranges):
ipv4_tuple = ipv4_str_to_tuple(ipv4_str)
return find_entry_from_ipv4_tuple(ipv4_tuple, entries_ranges)
print(find_entry_from_ipv4_str('10.2.4.2', RANGES))
print(find_entry_from_ipv4_str('192.168.2.1', RANGES))
print(find_entry_from_ipv4_str('192.168.1.1', RANGES))
print(find_entry_from_ipv4_str('172.12.10.25', RANGES))
print(find_entry_from_ipv4_str('192.168.2.1', RANGES))
print(find_entry_from_ipv4_str('10.10.5.5', RANGES))
-> None
-> 慕尼黑
-> 伦敦
-> None
-> 慕尼黑
-> 伦敦
等等
link 去游乐场:https://trinket.io/python/e1f9deb1c7
- 我有一个 IP 地址。例如,192.168.2.10
- 我还有一本字典:
RANGES = {
'london': [
{'start': '10.10.0.0', 'end': '10.10.255.255'},
{'start': '192.168.1.0', 'end': '192.168.1.255'},
],
'munich': [
{'start': '10.12.0.0', 'end': '10.12.255.255'},
{'start': '172.16.10.0', 'end': '172.16.11.255'},
{'start': '192.168.2.0', 'end': '192.168.2.255'},
]
}
问题:我应该如何从我的IP地址中找到城市并使用这本字典花费尽可能少的时间(时间复杂度)?
编写一个自定义函数,将 IP 地址解析为数字元组以便于比较:
def get_city(ip):
for city in RANGES:
for d in RANGES[city]:
if tuple(map(int, d["start"].split("."))) <= tuple(map(int, ip.split("."))) <= tuple(map(int, d["end"].split("."))):
return city
>>> get_city("192.168.2.10")
"munich"
首先,您需要重新排列数据,以便更有效地查找。
- 创建将 IP 地址转换为数字的函数
- 并使用lower/start IP号作为新的数据键,同时保留结束IP值。
def ip_to_long(ip):
return reduce(lambda x, y: (x << 8) + y, map(int, ip.split('.')))
def data_transform(input_ranges):
data = {}
for location, items in RANGES.items():
for item in items:
data[ip_to_long(item['start'])] = dict(location=location, end=ip_to_long(item['end']))
现在,您可以使用 bisect
搜索排序的起始 IP,对于您的输入,在内部使用 RB-tree AIK 它。
下面是它的完整 PoC 代码:
from functools import reduce
from bisect import bisect_left
RANGES = {
'london': [
{'start': '10.10.0.0', 'end': '10.10.255.255'},
{'start': '192.168.1.0', 'end': '192.168.1.255'},
],
'munich': [
{'start': '10.12.0.0', 'end': '10.12.255.255'},
{'start': '172.16.10.0', 'end': '172.16.11.255'},
{'start': '192.168.2.0', 'end': '192.168.2.255'},
]
}
def ip_to_long(ip):
return reduce(lambda x, y: (x << 8) + y, map(int, ip.split('.')))
def data_transform(input_ranges):
data = {}
for location, items in input_ranges.items():
for item in items:
data[ip_to_long(item['start'])] = dict(location=location, end=ip_to_long(item['end']))
return data
def search_for_ip(search_ip, ip_starts, ip_data):
lookup_index = bisect_left(ip_starts, ip_to_long(search_ip))
if lookup_index > 0 and ip_data[ip_starts[lookup_index-1]]['end'] > ip_to_long(search_ip):
return ip_data[ip_starts[lookup_index-1]]['location']
return
new_data = data_transform(RANGES)
print(new_data)
ip_starts = sorted(list(new_data))
print(search_for_ip('192.168.2.100', ip_starts, new_data)) # -> munich
print(search_for_ip('192.168.1.100', ip_starts, new_data)) # -> lodon
print(search_for_ip('192.168.0.100', ip_starts, new_data)) # -> None
如果你想要任意大数据集的最佳复杂度,“正确答案”是季斌给出的答案。
要真正优化多次调用的性能,您确实需要重组数据,并使用内置的平分函数。
但如果您真的不想触及您的数据,您仍然可以使用 band-aid 自定义的 bisect 实现,它看起来像那样
RANGES = {
'london': [
{'start': '10.10.0.0', 'end': '10.10.255.255'},
{'start': '192.168.1.0', 'end': '192.168.1.255'},
],
'munich': [
{'start': '10.12.0.0', 'end': '10.12.255.255'},
{'start': '172.16.10.0', 'end': '172.16.11.255'},
{'start': '192.168.2.0', 'end': '192.168.2.255'},
]
}
def ipv4_str_to_tuple(ip_str):
return tuple(map(int, ip_str.split('.')))
def relative_in_range(ipv4_tuple, ip_range):
ipv4t_start = ipv4_str_to_tuple(ip_range['start'])
ipv4t_end = ipv4_str_to_tuple(ip_range['end'])
if ipv4t_start > ipv4_tuple:
return -1
if ipv4t_end < ipv4_tuple:
return 1
return 0
def from_those_ranges(ipv4_tuple, ranges):
#in-built bisect
lo, hi = 0, len(ranges)
while lo < hi:
mid = lo + (hi - lo) // 2
comp = relative_in_range(ipv4_tuple, ranges[mid])
if comp == 0:
return True
if comp > 0:
lo = mid + 1
else:
hi = mid
return False
def find_entry_from_ipv4_tuple(ipv4_tuple, entries_ranges):
for entry, entry_ranges in entries_ranges.items():
if from_those_ranges(ipv4_tuple, entry_ranges):
return entry
return None
def find_entry_from_ipv4_str(ipv4_str, entries_ranges):
ipv4_tuple = ipv4_str_to_tuple(ipv4_str)
return find_entry_from_ipv4_tuple(ipv4_tuple, entries_ranges)
print(find_entry_from_ipv4_str('10.2.4.2', RANGES))
print(find_entry_from_ipv4_str('192.168.2.1', RANGES))
print(find_entry_from_ipv4_str('192.168.1.1', RANGES))
print(find_entry_from_ipv4_str('172.12.10.25', RANGES))
print(find_entry_from_ipv4_str('192.168.2.1', RANGES))
print(find_entry_from_ipv4_str('10.10.5.5', RANGES))
-> None
-> 慕尼黑
-> 伦敦
-> None
-> 慕尼黑
-> 伦敦
等等
link 去游乐场:https://trinket.io/python/e1f9deb1c7