1 require 'rubygems'
 2 require 'inline'
 3 
 4 class ParseLogInC
 5   inline do |builder|
 6     builder.c <<-CODE
 7       #include 'ruby.h'
 8       #include 'stdio.h'f
 9       static VALUE nginx(VALUE line) {
10         VALUE ary = rb_ary_new();
11 
12         int lt = strlen(STR2CSTR(line));
13         char ip_str[24], time_str[26], path[lt > 200 ? lt : 200], i1[4], i2[lt/4];
14         sscanf(STR2CSTR(line), "%s - - [%s +0800] %s %s %s", ip_str, time_str, i1, path, i2);
15         rb_ary_push(ary, rb_str_new2(ip_str));
16         rb_ary_push(ary, rb_str_new2(time_str));
17         rb_ary_push(ary, rb_str_new2(path));
18 
19         return ary;
20       }
21     CODE
22   end
23 end
24 
25 NginxLogRegexp = Regexp.new [
26   '((?:\d+\.){3}\d+)', # ip
27   '[\ -]*',
28   '\[(.*?)\]', # [$time_local]
29   '[\" ]*',
30   '([A-Z]*) ', # HTTP verb
31   '(.*)', # url
32 ].join
33 
34 @plc = ParseLogInC.new
35 [%q(123.159.55.238 - - [28/Aug/2012:00:02:11 +0800] "GET //app?id=26964&client_id=142&channel_id=350 HTTP/1.1" 302 0 "-" "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1)" "-" ---- download.eoemarket.com),
36     %q(113.108.11.51 - - [26/Sep/2012:11:12:46 +0800] "GET //app?id=96061&client_id=140&channel_id=319 HTTP/1.1" 302 0 "http://s.myapp.com/dw_stats.jsp?dt=0&sid=AevJEO5AXd56AEqHdh0oAIlG&sn=4&idx=0&siteId=4&apkid=154699&st=0&url=sHdmymPs30ZhvM4YgPxtls6Sl2gYu8dlvbX4xbgOYKjIcB7FqAKXQLATVWAKd0FGCdLEmN%2FB%2FD%2BF%0AtH7nT7X9TWHPqokJ5nFE2YCDnMBkFNM%3D&key=%E6%89%8B%E6%9C%BA%E6%89%93%E7%A2%9F%E6%9C%BA" "MQQBrowser/3.5/Adr (Linux; U; 2.3.6; zh-cn; HUAWEI U8661 Build/U8661V100R001C17B827;320*480)" "211.138.243.113" ---- download.eoemarket.com),
37     %q(117.136.15.146 - - [26/Sep/2012:17:19:58 +0800] "GET /app?id=27034&client_id=12&channel_id=201&uniquely_code=333228032439999999990&w=480&dpi=240&sdk=15&brand=alps&product_id=MX9984K&model=e1809c_v75_gq1008_9p017&product=e1809c_v75_gq1008_9p017&locale=zh_CN&version_code=30&uniquely_code=359220438409074&channel_key=paU3GRB4hk9zOT3gmTTGA&api_key=jPo2Fs9C5a33c9TRNJTPxw&nonce=ef306825998be5dexeb1f6lv0a1b5fca49f8a99a&timestamp=1339080849896&api_sig=d7ew8f2fc5e90dc9e4e0899b7e87o896 HTTP/1.1" 404 3 "-" "Dalvik/1.6.0 (Linux; U; Android 4.0.3; e1809c_v75_gq1008_9p017 Build/IML74K)" "-" ---- download.eoemarket.com)
38 ].each do |line|
39   l1, l2 = line.split("HTTP/")
40   t1 = Time.now; @plc.nginx(l1); puts "c #{(Time.now - t1).to_f*100*100}"
41   t2 = Time.now; oa = l1.match(NginxLogRegexp).captures; ary = [oa[0], oa[1], oa[2]]; puts "r #{(Time.now - t2).to_f*100*100}"
42 end
43 
44 __END__
45 c 0.14
46 r 0.37
47 c 0.04
48 r 0.1
49 c 0.05
50 r 1.42