From 5fcd1eeefa1b1c99a069c421c19010aa543a4ac8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20=27rysiek=27=20Wo=C5=BAniak?= Date: Sun, 16 Feb 2014 20:50:32 +0100 Subject: [PATCH] initial import (and full functionality, I guess?) --- ab.sh | 45 ++++++++++ merge-data.py | 224 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 269 insertions(+) create mode 100755 ab.sh create mode 100755 merge-data.py diff --git a/ab.sh b/ab.sh new file mode 100755 index 0000000..03abcfd --- /dev/null +++ b/ab.sh @@ -0,0 +1,45 @@ +#!/bin/bash +RUNS="$1"; +NAME="$2" +URL="$3" + +if [[ $RUNS == "" || $NAME == "" || $URL == "" ]]; then + echo + echo " invocation:" + echo " $0 " + echo + echo " runs - number of runs of ab to perform" + echo " name - test name, to be used on the names of output files" + echo " url - url to test" + echo + echo " results are saved to:" + echo " ./results/-.csv" + echo " ./results/-.log" + echo + echo " you can use merge-data.py to merge data (duh) from all available cvs and log files" + echo " and get averages calculated automatically" + echo + exit 1 +fi + +REQUESTS=2000 +CONCURRENCY=32 + +CSV="./results/${NAME}-.csv" +LOG="./results/${NAME}-.log" + +echo +echo "benchmarking $URL:" +echo " - name : $NAME" +echo " - runs : $RUNS" +echo " - reuqests : $REQUESTS" +echo " - concurrency : $CONCURRENCY" +echo " - output:" +echo " - CSV: $CSV" +echo " - LOG: $LOG" +echo " - running:" +for (( I=0; $I < $RUNS; I = $I + 1 )); do + echo " - run ${I}..." + ab -e "./results/${NAME}-${I}.csv" -n $REQUESTS -c $CONCURRENCY "$URL" 2>&1 >"./results/${NAME}-${I}.log" | sed -r -e 's/^C(.*)/ - c\1/' -e 's/^F(.*)/ - f\1/' +done +echo " - all done." diff --git a/merge-data.py b/merge-data.py new file mode 100755 index 0000000..9b9c92b --- /dev/null +++ b/merge-data.py @@ -0,0 +1,224 @@ +#!/usr/bin/python +import os, sys, csv, glob, re + +# check if we got the dirname +if len(sys.argv) < 2: + sys.exit("Please provide the directory to work in.") + +# check if exists +if not os.path.exists(sys.argv[1]): + sys.exit("The path '%s' does not seem to exist." % sys.argv[1]) + +if not os.path.isdir(sys.argv[1]): + sys.exit("The path '%s' does not seem to be a directory." % sys.argv[1]) + +if sys.argv[1][-1] != '/': + sys.argv[1] += '/' + +# info +print "Working in '%s'" % sys.argv[1] + +# writing the data out to a csv file +def write_data(f, data): + with open('%s%s' % (sys.argv[1], f), 'wb') as csvfile: + csvw = csv.writer(csvfile, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) + for row in data: + csvw.writerow(row) + + +# handling the partial sums/averages +def handle_partsum(data, gcol, gn=None): + + # if the gcol is the current column (or does not exist yet) don't do anything, obviously + if not data or gcol >= len(data[0]): + #print 'handle_partsum : gcol >= len(data[0]) : %s >= %s' % (gcol, len(data[0])) + return False; + + # indexing the data + i = 0 + + # add groupname headers + # only used on the CSV part of this script + if data and gn: + data[i].append(gn) + i += 1 + + # 'average' header, used everywhere + data[i].append('average') + i += 1 + + # let's iterate through partsum + for i in range(i, len(data)): + + # the data to be averaged + partsum = data[i][gcol:] + for j in range(len(partsum)): + partsum[j] = float(partsum[j]) + + # pretty straightforward -- the average is "0" if there are less than 3 data points... + if len(partsum) < 3: + data[i].append(0) + # ...otherwise it's the average of the datapoints (with minimum and maximum exempt) + else: + data[i].append((sum(partsum) - min(partsum) - max(partsum)) / (len(partsum) - 2)) + +# +# handling the *.csv files generated by ab +# + +print " - handing CSV files" + +# get the files +sources = glob.glob('%s*[0-9].csv' % sys.argv[1]) +sources.sort() + +if not len(sources): + sys.exit("No files matching the required '*[0-9].csv' globbing pattern found in '%s'." % sys.argv[1]) + +merged = [] + + +# iterate the source files +gn = '' +gcol = 1 +for f in sources: + + # name of the current group + ngn = re.sub(r'(.*\/)?(.+)-[0-9]\.csv', r'\2', f) + # has the group changed? + if (gn != ngn): + # handle partsum + handle_partsum(merged, gcol, gn) + # ah, it has! use the new group name + gn = ngn + # which is the active gcol? + if merged: + gcol = len(merged[0]) + + + # open the source file + with open(f, 'rb') as csvfile: + + # if merged is empty + if not merged: + # headline + merged.append(['Legend', gn]) + # iterate the rows + for row in csv.reader(csvfile, delimiter=','): + # and add whole rows to merged + merged.append(row) + + # ah, so merged is not empty and already contains the first column + # no need for the non-data column then, eh? + else: + i = 0 + merged[i].append(gn) + # iterate the rows + for row in csv.reader(csvfile, delimiter=','): + i+=1 + merged[i].append(row[1]) + +# handle "dangling partsum" +handle_partsum(merged, gcol, gn) + +print " - writing the result to '%smerged.csv'" % sys.argv[1] + +# write it down +'''with open('%smerged.csv' % sys.argv[1], 'wb') as csvfile: + csvw = csv.writer(csvfile, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) + for row in merged: + csvw.writerow(row)''' +write_data('merged.csv', merged) + +# +# *.log files need to be handled differently +# + +print " - handing LOG files" + +# get the files +sources = glob.glob('%s*[0-9].log' % sys.argv[1]) +sources.sort() + +if not len(sources): + sys.exit("No files matching the required '*[0-9].log' globbing pattern found in '%s'." % sys.argv[1]) + +# the fields +meta = [ +['Server Software'], +['Server Hostname'], +['Server Port'], +['Document Path'], +['Document Length'], +['Concurrency Level'] +] +merged = [ +['Legend'], +['Time taken for tests'], +['Complete requests'], +['Failed requests'], +['Write errors'], +['Non-2xx responses'], +['Total transferred'], +['HTML transferred'], +['Requests per second'], +['Time per request'], +['Time per request'], +['Transfer rate'] +] + +# first, handle metadata +# from the first file only, that's entirely enough +i = 0 +with open(sources[0], 'rt') as f: + for l in f: + if l[:len(meta[i][0])] == meta[i][0]: + meta[i].append(re.sub(r'%s: +(.*)[\n\r]*' % meta[i][0], r'\1', l)) + i+=1 + if i >= len(meta): + break + +# we have the metadata, let's write these to a file! +print " - writing metadata to '%smeta.csv'" % sys.argv[1] +write_data('meta.csv', meta) + +# now, handle statistical data +# each source file please! +gn = '' +gcol = 1 +for s in sources: + + # group name + # name of the current group + ngn = re.sub(r'(.*\/)?(.+)-[0-9]\.log', r'\2', s) + # has the group changed? + if (gn != ngn): + # handle partsum + handle_partsum(merged, gcol) + # ah, it has! use the new group name + gn = ngn + # the "group column" (the first column in a group) + gcol = len(merged[0]) + + # add the header + merged[0].append(gn) + + # index of the merged list, skipping the first row (the header) + i = 1 + + # on with it! + with open(s, 'rt') as f: + for l in f: + if l[:len(merged[i][0])] == merged[i][0]: + merged[i].append(re.sub(r'%s: +([0-9,\.]+).*[\n\r]*' % merged[i][0], r'\1', l)) + i+=1 + if i >= len(merged): + break + +# handle dangling gcol +handle_partsum(merged, gcol) + + +# we have the stats, let's write these to a file! +print " - writing the result to '%smerged-logs.csv'" % sys.argv[1] +write_data('merged-logs.csv', merged) \ No newline at end of file