#! /usr/bin/python # logsummize.py # # Purpose: collects size and count statistics abount common "static" # file (like 'jpg','png','gif','css','js') from an Apache access log # in combined format # # Usage: the script has to be passed the name of the log file and, # optionally, a date to close search on. The date is treated as a string # so pass it as "04/Apr/2009" # # Example: logsummize.py access.log 04/Apr/2009 # # Author: Claudio Cicali # Date: 8/4/2009 # # Copyright 2009 Claudio Cicali # # This program is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by the Free # Software Foundation; either version 2 of the License, or (at your option) # any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software Foundation, # Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA import sys, re if __name__ == "__main__": extensions = ['jpg','png','gif','css','js'] totals = {} for extension in extensions: totals[extension] = {'size': 0, 'count': 0} try: f = open(sys.argv[1], 'r') except: print "Log file missing or unreadble (first script parameter)" sys.exit(-1) try: datelimit = sys.argv[2] except: datelimit = '' for line in f: if datelimit != '': data = re.compile("""\[(.*?):""").search(line) if data is not None and datelimit != data.group(1): continue # "GET /img/btnbg-2.png HTTP/1.1" 200 data = re.compile(""""GET (.*\.(%s)) HTTP/1.\d" (\d{3}) (\d+)""" % '|'.join(extensions)).search(line) if data is not None: # Filename file = data.group(1) # Extension extension = data.group(2) # Status status = data.group(3) # Size size = data.group(4) if status == "200": totals[extension]['count'] += 1 totals[extension]['size'] += int(size) totalSize = totalItems = 0 for extension, data in totals.iteritems(): print "%d %s: %dKB" % (data['count'], extension, data['size'] / 1024) totalSize += data['size'] totalItems += data['count'] print "\n%s items, %dKB total" % (totalItems, totalSize / 1024)