awk、grep是常用的*nix工具,使用这两个工具可以高效对文件、文本流进行处理。
- 提取日志中的ip地址
使用grep
cat access.log | grep -oE '[[:digit:]]+\.[[:digit:]]+\.[[:digit:]]+\.[[:digit:]]+'
使用awk
awk 'match($0, /[[:digit:]]+\.[[:digit:]]+\.[[:digit:]]+\.[[:digit:]]+/, a) { print a[0] }' access.log
输出:
47.203.93.242
47.203.93.242
47.203.93.242
47.203.93.242
104.131.141.125
5.188.10.168
104.236.178.116
162.243.139.14
123.151.42.61
60.191.38.77
172.68.65.170
172.68.65.170
162.158.78.228
162.158.78.228
162.158.75.18
162.158.75.234
162.158.75.234
162.158.75.234
- 提取日志中特定访问记录
使用grep
# 提取blog相关的访问记录
cat access.log | grep -nE '/blog/'
使用awk
awk '/\/blog\// { print NR, $0 }' access.log
输出:
17 162.158.75.234 - - [23/Dec/2017:23:23:16 -0500] "GET /blog/ HTTP/1.1" 502 173 "-" "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:48.0) Gecko/20100101 Firefox/48.0" "35.226.253.128"
- 输入匹配的行及下面的2行信息(对于报错日志的查找很有用)
awk '/\ydef\y/ {c=1} c>0 && c<=3{ c+=1; print NR, $0 }' v2ex_sign.py
# out
5 def login(self):
6 loginpage = self.s.get("https://www.v2ex.com/signin", verify=False).text
7 payload={
19 def sign(self):
20 if self.s.get("https://www.v2ex.com/mission/daily", verify=False).text.find("fa-ok-sign")!=-1:
21 print self.u+" 已领取过奖励!"
29 def __init__(self,u,p):
30 self.u=u
31 self.p=p
- 过滤2017-12-23 16:07:12 到 2017-12-23 23:00:00 的nginx访问日志(示例中时间为UTC-5时区)
awk '$4 >= "[23/Dec/2017:16:07:12" && $4 <= "[23/Dec/2017:23:00:00" { print NR, $0 }' access.log
# output
8 162.243.139.14 - - [23/Dec/2017:16:07:12 -0500] "GET / HTTP/1.1" 403 169 "-" "Mozilla/5.0 zgrab/0.x" "-"
9 123.151.42.61 - - [23/Dec/2017:16:18:44 -0500] "GET http://www.baidu.com/ HTTP/1.1" 403 169 "-" "-" "-"
10 60.191.38.77 - - [23/Dec/2017:17:15:00 -0500] "GET / HTTP/1.1" 403 169 "-" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:47.0) Gecko/20100101 Firefox/47.0" "-"
11 172.68.65.170 - - [23/Dec/2017:21:35:38 -0500] "GET /robots.txt HTTP/1.1" 502 575 "-" "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.75 Safari/537.36 OPR/36.0.2130.32" "35.227.116.160"
12 172.68.65.170 - - [23/Dec/2017:21:35:38 -0500] "GET /robots.txt HTTP/1.1" 502 575 "-" "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.75 Safari/537.36 OPR/36.0.2130.32" "35.227.116.160"
13 162.158.78.228 - - [23/Dec/2017:21:35:38 -0500] "GET /index.php?option=com_myblog&task=ajaxupload HTTP/1.1" 502 575 "-" "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.75 Safari/537.36 OPR/36.0.2130.32" "35.227.116.160"
14 162.158.78.228 - - [23/Dec/2017:21:35:38 -0500] "GET /index.php?option=com_adsmanager&task=upload&tmpl=component HTTP/1.1" 502 575 "-" "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.75 Safari/537.36 OPR/36.0.2130.32" "35.227.116.160"