ab-stats/merge-data.py

224 lines
5.5 KiB
Python
Executable File

#!/usr/bin/python
import os, sys, csv, glob, re
# check if we got the dirname
if len(sys.argv) < 2:
sys.exit("Please provide the directory to work in.")
# check if exists
if not os.path.exists(sys.argv[1]):
sys.exit("The path '%s' does not seem to exist." % sys.argv[1])
if not os.path.isdir(sys.argv[1]):
sys.exit("The path '%s' does not seem to be a directory." % sys.argv[1])
if sys.argv[1][-1] != '/':
sys.argv[1] += '/'
# info
print "Working in '%s'" % sys.argv[1]
# writing the data out to a csv file
def write_data(f, data):
with open('%s%s' % (sys.argv[1], f), 'wb') as csvfile:
csvw = csv.writer(csvfile, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
for row in data:
csvw.writerow(row)
# handling the partial sums/averages
def handle_partsum(data, gcol, gn=None):
# if the gcol is the current column (or does not exist yet) don't do anything, obviously
if not data or gcol >= len(data[0]):
#print 'handle_partsum : gcol >= len(data[0]) : %s >= %s' % (gcol, len(data[0]))
return False;
# indexing the data
i = 0
# add groupname headers
# only used on the CSV part of this script
if data and gn:
data[i].append(gn)
i += 1
# 'average' header, used everywhere
data[i].append('average')
i += 1
# let's iterate through partsum
for i in range(i, len(data)):
# the data to be averaged
partsum = data[i][gcol:]
for j in range(len(partsum)):
partsum[j] = float(partsum[j])
# pretty straightforward -- the average is "0" if there are less than 3 data points...
if len(partsum) < 3:
data[i].append(0)
# ...otherwise it's the average of the datapoints (with minimum and maximum exempt)
else:
data[i].append((sum(partsum) - min(partsum) - max(partsum)) / (len(partsum) - 2))
#
# handling the *.csv files generated by ab
#
print " - handing CSV files"
# get the files
sources = glob.glob('%s*[0-9].csv' % sys.argv[1])
sources.sort()
if not len(sources):
sys.exit("No files matching the required '*[0-9].csv' globbing pattern found in '%s'." % sys.argv[1])
merged = []
# iterate the source files
gn = ''
gcol = 1
for f in sources:
# name of the current group
ngn = re.sub(r'(.*\/)?(.+)-[0-9]\.csv', r'\2', f)
# has the group changed?
if (gn != ngn):
# handle partsum
handle_partsum(merged, gcol, gn)
# ah, it has! use the new group name
gn = ngn
# which is the active gcol?
if merged:
gcol = len(merged[0])
# open the source file
with open(f, 'rb') as csvfile:
# if merged is empty
if not merged:
# headline
merged.append(['Legend', gn])
# iterate the rows
for row in csv.reader(csvfile, delimiter=','):
# and add whole rows to merged
merged.append(row)
# ah, so merged is not empty and already contains the first column
# no need for the non-data column then, eh?
else:
i = 0
merged[i].append(gn)
# iterate the rows
for row in csv.reader(csvfile, delimiter=','):
i+=1
merged[i].append(row[1])
# handle "dangling partsum"
handle_partsum(merged, gcol, gn)
print " - writing the result to '%smerged.csv'" % sys.argv[1]
# write it down
'''with open('%smerged.csv' % sys.argv[1], 'wb') as csvfile:
csvw = csv.writer(csvfile, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
for row in merged:
csvw.writerow(row)'''
write_data('merged.csv', merged)
#
# *.log files need to be handled differently
#
print " - handing LOG files"
# get the files
sources = glob.glob('%s*[0-9].log' % sys.argv[1])
sources.sort()
if not len(sources):
sys.exit("No files matching the required '*[0-9].log' globbing pattern found in '%s'." % sys.argv[1])
# the fields
meta = [
['Server Software'],
['Server Hostname'],
['Server Port'],
['Document Path'],
['Document Length'],
['Concurrency Level']
]
merged = [
['Legend'],
['Time taken for tests'],
['Complete requests'],
['Failed requests'],
['Write errors'],
['Non-2xx responses'],
['Total transferred'],
['HTML transferred'],
['Requests per second'],
['Time per request'],
['Time per request'],
['Transfer rate']
]
# first, handle metadata
# from the first file only, that's entirely enough
i = 0
with open(sources[0], 'rt') as f:
for l in f:
if l[:len(meta[i][0])] == meta[i][0]:
meta[i].append(re.sub(r'%s: +(.*)[\n\r]*' % meta[i][0], r'\1', l))
i+=1
if i >= len(meta):
break
# we have the metadata, let's write these to a file!
print " - writing metadata to '%smeta.csv'" % sys.argv[1]
write_data('meta.csv', meta)
# now, handle statistical data
# each source file please!
gn = ''
gcol = 1
for s in sources:
# group name
# name of the current group
ngn = re.sub(r'(.*\/)?(.+)-[0-9]\.log', r'\2', s)
# has the group changed?
if (gn != ngn):
# handle partsum
handle_partsum(merged, gcol)
# ah, it has! use the new group name
gn = ngn
# the "group column" (the first column in a group)
gcol = len(merged[0])
# add the header
merged[0].append(gn)
# index of the merged list, skipping the first row (the header)
i = 1
# on with it!
with open(s, 'rt') as f:
for l in f:
if l[:len(merged[i][0])] == merged[i][0]:
merged[i].append(re.sub(r'%s: +([0-9,\.]+).*[\n\r]*' % merged[i][0], r'\1', l))
i+=1
if i >= len(merged):
break
# handle dangling gcol
handle_partsum(merged, gcol)
# we have the stats, let's write these to a file!
print " - writing the result to '%smerged-logs.csv'" % sys.argv[1]
write_data('merged-logs.csv', merged)