如何将键移入值和索引作为 apache 日志中字典中的键
How to shift key in to values and index as key in dictionary in apache log
如何将键移入值和索引作为字典中的键。我编写了程序并且工作正常。但我需要枚举输出并将键转移到值并将索引号作为我的键
data = '''
120.115.144.240 - - [29/Aug/2017:04:40:03 -0400] "GET /apng/assembler-2.0/assembler2.php HTTP/1.1" 200 231 "http://littlesvr.ca/apng/history.html" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.101 Safari/537.36"
202.167.250.99 - - [29/Aug/2017:04:41:10 -0400] "GET /apng/images/o_sample.png?1424751982?1424776117 HTTP/1.1" 200 115656 "http://bbs.mydigit.cn/read.php?tid=2186780&fpage=3" "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36"
120.115.144.240 - - [29/Aug/2017:04:40:03 -0400] "GET /apng/assembler-2.0/assembler2.php HTTP/1.1" 200 231 "http://littlesvr.ca/apng/history.html" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.101 Safari/537.36"
60.4.236.27 - - [29/Aug/2017:04:42:46 -0400] "POST /apng/images/o_sample.png?1424751982?1424776117 HTTP/1.1" 404 115656 "http://bbs.mydigit.cn/read.php?tid=1952896" "Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36"
120.115.144.240 - - [29/Aug/2017:04:40:03 -0400] "POST /apng/assembler-2.0/assembler2.php HTTP/1.1" 404 231 "http://littlesvr.ca/apng/history.html" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.101 Safari/537.36"'''
我的代码
import re
data = [line.strip() for line in data.splitlines() if line.strip()]
out = {}
for line in data:
g = re.findall(r'([\d.]+).*?(GET|POST).*?"\s(200|404)', line)
if not g:
continue
ip_address, method, status_code = g[0]
out.setdefault(ip_address, {})
out[ip_address].setdefault('404', 0)
out[ip_address].setdefault('200', 0)
out[ip_address].setdefault('GET', 0)
out[ip_address].setdefault('POST', 0)
out[ip_address][method] += 1
out[ip_address][status_code] += 1
from pprint import pprint
pprint(out, width=30)
我的出局如下
{'1.1.1.1': {'200': 2,
'404': 1,
'GET': 2,
'POST': 1},
'2.2.2.2': {'200': 1,
'404': 0,
'GET': 1,
'POST': 0},
'6.3.3.3': {'200': 0,
'404': 1,
'GET': 0,
'POST': 1}}
我想要的是
{1: {'IP': 120.115.144.240,
'200': 2,
'404': 1,
'GET': 2,
'POST': 1},
2: {'IP':202.167.250.99,
'200': 1,
'404': 0,
'GET': 1,
'POST': 0},
3: {'IP':60.4.236.27:
'200': 0,
'404': 1,
'GET': 0,
'POST': 1}}
使用enumerate
例如:
import re
data = [line.strip() for line in data.splitlines() if line.strip()]
out = {}
for ind, line in enumerate(data, 1):
g = re.findall(r'([\d.]+).*?(GET|POST).*?"\s(200|404)', line)
if not g:
continue
ip_address, method, status_code = g[0]
out.setdefault(ind, {"IP": ip_address})
out[ind].setdefault('404', 0)
out[ind].setdefault('200', 0)
out[ind].setdefault('GET', 0)
out[ind].setdefault('POST', 0)
out[ind][method] += 1
out[ind][status_code] += 1
from pprint import pprint
pprint(out, width=30)
有效地使用re.search
函数(而不是re.findall
)和collections.defaultdict
对象:
from collections import defaultdict
import re
data = '''... you data lines '''
ips = []
stats_dict = defaultdict(lambda: {'200': 0, '404': 0, 'GET': 0, 'POST': 0})
for line in data.splitlines():
line = line.strip()
if not line:
continue
m = re.search(r'([\d.]+).*?(GET|POST).*?"\s(200|404)', line)
if not m:
continue
ip_address, method, status_code = m.groups()
stats_dict[ip_address][method] += 1
stats_dict[ip_address][status_code] += 1
if ip_address not in ips:
ips.append(ip_address)
res = [{i: dict({'IP': ip_addr}, **stats_dict[ip_addr])}
for i, ip_addr in enumerate(ips, 1)]
pprint.pprint(res, width=20)
输出:
[{1: {'200': 2,
'404': 1,
'GET': 2,
'IP': '120.115.144.240',
'POST': 1}},
{2: {'200': 1,
'404': 0,
'GET': 1,
'IP': '202.167.250.99',
'POST': 0}},
{3: {'200': 0,
'404': 1,
'GET': 0,
'IP': '60.4.236.27',
'POST': 1}}]
如何将键移入值和索引作为字典中的键。我编写了程序并且工作正常。但我需要枚举输出并将键转移到值并将索引号作为我的键
data = '''
120.115.144.240 - - [29/Aug/2017:04:40:03 -0400] "GET /apng/assembler-2.0/assembler2.php HTTP/1.1" 200 231 "http://littlesvr.ca/apng/history.html" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.101 Safari/537.36"
202.167.250.99 - - [29/Aug/2017:04:41:10 -0400] "GET /apng/images/o_sample.png?1424751982?1424776117 HTTP/1.1" 200 115656 "http://bbs.mydigit.cn/read.php?tid=2186780&fpage=3" "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36"
120.115.144.240 - - [29/Aug/2017:04:40:03 -0400] "GET /apng/assembler-2.0/assembler2.php HTTP/1.1" 200 231 "http://littlesvr.ca/apng/history.html" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.101 Safari/537.36"
60.4.236.27 - - [29/Aug/2017:04:42:46 -0400] "POST /apng/images/o_sample.png?1424751982?1424776117 HTTP/1.1" 404 115656 "http://bbs.mydigit.cn/read.php?tid=1952896" "Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36"
120.115.144.240 - - [29/Aug/2017:04:40:03 -0400] "POST /apng/assembler-2.0/assembler2.php HTTP/1.1" 404 231 "http://littlesvr.ca/apng/history.html" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.101 Safari/537.36"'''
我的代码
import re
data = [line.strip() for line in data.splitlines() if line.strip()]
out = {}
for line in data:
g = re.findall(r'([\d.]+).*?(GET|POST).*?"\s(200|404)', line)
if not g:
continue
ip_address, method, status_code = g[0]
out.setdefault(ip_address, {})
out[ip_address].setdefault('404', 0)
out[ip_address].setdefault('200', 0)
out[ip_address].setdefault('GET', 0)
out[ip_address].setdefault('POST', 0)
out[ip_address][method] += 1
out[ip_address][status_code] += 1
from pprint import pprint
pprint(out, width=30)
我的出局如下
{'1.1.1.1': {'200': 2,
'404': 1,
'GET': 2,
'POST': 1},
'2.2.2.2': {'200': 1,
'404': 0,
'GET': 1,
'POST': 0},
'6.3.3.3': {'200': 0,
'404': 1,
'GET': 0,
'POST': 1}}
我想要的是
{1: {'IP': 120.115.144.240,
'200': 2,
'404': 1,
'GET': 2,
'POST': 1},
2: {'IP':202.167.250.99,
'200': 1,
'404': 0,
'GET': 1,
'POST': 0},
3: {'IP':60.4.236.27:
'200': 0,
'404': 1,
'GET': 0,
'POST': 1}}
使用enumerate
例如:
import re
data = [line.strip() for line in data.splitlines() if line.strip()]
out = {}
for ind, line in enumerate(data, 1):
g = re.findall(r'([\d.]+).*?(GET|POST).*?"\s(200|404)', line)
if not g:
continue
ip_address, method, status_code = g[0]
out.setdefault(ind, {"IP": ip_address})
out[ind].setdefault('404', 0)
out[ind].setdefault('200', 0)
out[ind].setdefault('GET', 0)
out[ind].setdefault('POST', 0)
out[ind][method] += 1
out[ind][status_code] += 1
from pprint import pprint
pprint(out, width=30)
有效地使用re.search
函数(而不是re.findall
)和collections.defaultdict
对象:
from collections import defaultdict
import re
data = '''... you data lines '''
ips = []
stats_dict = defaultdict(lambda: {'200': 0, '404': 0, 'GET': 0, 'POST': 0})
for line in data.splitlines():
line = line.strip()
if not line:
continue
m = re.search(r'([\d.]+).*?(GET|POST).*?"\s(200|404)', line)
if not m:
continue
ip_address, method, status_code = m.groups()
stats_dict[ip_address][method] += 1
stats_dict[ip_address][status_code] += 1
if ip_address not in ips:
ips.append(ip_address)
res = [{i: dict({'IP': ip_addr}, **stats_dict[ip_addr])}
for i, ip_addr in enumerate(ips, 1)]
pprint.pprint(res, width=20)
输出:
[{1: {'200': 2,
'404': 1,
'GET': 2,
'IP': '120.115.144.240',
'POST': 1}},
{2: {'200': 1,
'404': 0,
'GET': 1,
'IP': '202.167.250.99',
'POST': 0}},
{3: {'200': 0,
'404': 1,
'GET': 0,
'IP': '60.4.236.27',
'POST': 1}}]