iptogeo/data/build_c_array.py
2016-01-31 11:42:28 +01:00

252 lines
7.3 KiB
Python
Executable File

#!/usr/bin/env python
#-*- coding: utf-8
import sys
COUNTRY_CODE_INDEX=1
IP_TYPE_INDEX=2
IP_INDEX=3
IP_SIZE_INDEX=4
class IP_ELEMENT(object):
def __init__(self, start, end=None, size=0, country_code=None, level=0):
self._start = start
self._end = end
self._size = size
self._country_code = country_code
self._prev = None
self._next = None
self._childs = None
self._average = 0
self._level = level
if not self._end: self._compute_last_ip()
self._splitted_start = IP_ELEMENT.split_ip(self._start)
self._splitted_end = IP_ELEMENT.split_ip(self._end)
def _compute_last_ip(self):
size = self._size
end_ip = IP_ELEMENT.ip_to_int(self._start)
for i in range(0,4):
if not size: break # We can have _size == 0
end_ip += (((size % 256)-1) & 0xFF) << (i*8)
size = int(size/256)
self._end = IP_ELEMENT.ip_to_str(end_ip)
# print '%s + %d -> %s' % (self._start, self._size, self._end)
@staticmethod
def split_ip(ip):
return [int(x) for x in ip.split('.')]
@staticmethod
def ip_to_int(str_ip):
splitted_ip = IP_ELEMENT.split_ip(str_ip)
val = splitted_ip[0] << 24
val += splitted_ip[1] << 16
val += splitted_ip[2] << 8
val += splitted_ip[3] << 0
return val
@staticmethod
def ip_to_str(int_ip):
val = '%d.' % (int((int_ip >> 24) & 0xFF))
val += '%d.' % (int((int_ip >> 16) & 0xFF))
val += '%d.' % (int((int_ip >> 8) & 0xFF))
val += '%d' % (int((int_ip >> 0) & 0xFF))
return val
def set_next(self, ip):
self._next = ip
def set_prev(self, ip):
self._prev = ip
def set_childs(self, ip):
self._childs = ip
def set_average(self, average):
self._average = average
def set_level(self, level):
self._level = level
def name(self):
return 'ip__%s__%s' %(self._start.replace('.', '_'), self._end.replace('.', '_'))
def printme(self):
print 'static const ip_level %s = {' % (self.name())
print '\t.prev = %s,' % (self._prev and '&%s' % (self._prev.name()) or 'NULL')
print '\t.next = %s,' % (self._next and '&%s' % (self._next.name()) or 'NULL')
print '\t.childs = %s,' % (self._childs and '&%s' % (self._childs.name()) or 'NULL')
print '\t.start = %d,' % (self._splitted_start[self._level])
print '\t.end = %d,' % (self._splitted_end[self._level])
print '\t.average = %d,' % (self._average)
print '\t.code = %d,' % (self._country_code and self._country_code or 0)
print '};'
countries = []
ip_idx = [0] * 255
cur_ip_prefix = 1
cur_idx = 0
f = open("prefix_res_ipv4")
array_vals = {}
while True:
l = f.readline()
# l = sys.stdin.readline()
if not l: break
information = l.split('|')
country = information[COUNTRY_CODE_INDEX].lower()
if not country: continue # Available or reserved but not assigned
try:
country_idx = countries.index(country)
except ValueError:
country_idx = len(countries)
countries.append(country)
ip = information[IP_INDEX]
splitted_ip = ip.split('.')
int_ip = int(splitted_ip[0]) << 24
int_ip += int(splitted_ip[1]) << 16
int_ip += int(splitted_ip[2]) << 8
int_ip += int(splitted_ip[3]) << 0
interval_size = int(information[IP_SIZE_INDEX])
array_vals[ip] = IP_ELEMENT(ip, None, interval_size, country_idx)
print '/* This file was automatically generated, do not edit it ! */'
print '#include <stdint.h>\n\n'
def ip_sort(a, b):
for i in range(0, 4):
if a._splitted_start[i] != b._splitted_start[i]:
return a._splitted_start[i] - b._splitted_start[i]
return 0
ip_list = array_vals.values()
ip_list.sort(ip_sort)
def get_interval(root, intervals, level):
new_intervals = []
for ip in intervals:
if ip._splitted_start[level] != root: break
new_intervals.append(ip)
return new_intervals
# 1.5.0.0
# -> 1.5.0.0 .. 1.5.29.0
# -> 1.5.30.0 .. 1.5.30.128
# -> 1.5.30.129 .. 1.5.31.0
# -> 1.5.32.0 .. 1.5.33.0
# -> 1.6.32.0 .. 1.7.0.0
def print_interval(interval):
p = '['
for i in interval:
p += '%s, ' % (i.name())
p += ']'
return p
def compute_average(root):
total = 0
count = 0
child = root._childs
while child:
total += 1
count += (child._splitted_end[child._level] - child._splitted_start[child._level] + 1)
child = child._next
average = int(count/total)
# Find highest power of 2 < average
for i in range(0, 9):
if average < (1 << i):
root.set_average(i-1)
break
def manage_root(root, intervals, level):
cur_start = 0
prev = None
first = None
cur_len = 0
if level >= 3: return (0, None)
# print 'manage_root(%d, %s, %d)' %\
# (root, print_interval(intervals), level)
while True:
if cur_start >= len(intervals): break
cur_ip = intervals[cur_start]
sub_interval = get_interval(cur_ip._splitted_start[level],\
intervals[cur_start+1:],\
level)
if sub_interval:
cur_ip.set_level(level+1)
for ip in sub_interval:
ip.set_level(level+1)
ip_val = IP_ELEMENT.ip_to_int(cur_ip._start)
for i in range(level, 3):
ip_val &= ~(0xFF << ((2-i)*8)) & 0xFFFFFFFF
new_group = IP_ELEMENT(IP_ELEMENT.ip_to_str(ip_val), level=level)
sub_interval.insert(0, cur_ip)
child = manage_root(cur_ip._splitted_start[level+1], sub_interval, level+1)
new_group.set_childs(cur_ip)
compute_average(new_group)
cur_ip = new_group
cur_start += len(sub_interval)
else:
cur_ip.set_level(level)
cur_start += 1
cur_ip.set_prev(prev)
if (prev): prev.set_next(cur_ip)
prev = cur_ip
if not first: first = cur_ip
return first
def print_ip(ip):
cur_ip = ip
while cur_ip:
if cur_ip._childs:
print_ip(cur_ip._childs)
print 'static const ip_level %s;' % (cur_ip.name())
cur_ip = cur_ip._next
print ''
cur_ip = ip
while cur_ip:
cur_ip.printme()
cur_ip = cur_ip._next
start_idx = 0
end_idx = start_idx+1
cur_interval = [ip_list[start_idx]]
root = ip_list[start_idx]._splitted_start[0]
root_ips = [None] * 256
while True:
if end_idx >= len(ip_list): break
if ip_list[end_idx]._splitted_start[0] != root:
start_idx = end_idx
res = manage_root(root, cur_interval, 1)
print_ip(res)
root_ips[res._splitted_start[0]] = res
cur_interval = [ip_list[end_idx]]
root = ip_list[start_idx]._splitted_start[0]
else:
cur_interval.append(ip_list[end_idx])
end_idx += 1
res = manage_root(root, cur_interval, 1)
print_ip(res)
print 'static const ip_level* s_root_ip[256] = {'
for i in range(0, 256):
if root_ips[i]:
print '\t&%s,' % (root_ips[i].name())
else:
print '\tNULL, // %d' % (i)
print '};\n'
print 'static const uint8_t country_codes[][3] = {'
for cc in countries:
print '\t{"%s"},' % (cc)
print '};\n'