From 5fcd1eeefa1b1c99a069c421c19010aa543a4ac8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20=27rysiek=27=20Wo=C5=BAniak?=
 <rysiek@fwioo.pl>
Date: Sun, 16 Feb 2014 20:50:32 +0100
Subject: [PATCH] initial import (and full functionality, I guess?)

---
 ab.sh         |  45 ++++++++++
 merge-data.py | 224 ++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 269 insertions(+)
 create mode 100755 ab.sh
 create mode 100755 merge-data.py
diff --git a/ab.sh b/ab.sh
new file mode 100755
index 0000000..03abcfd
--- /dev/null
+++ b/ab.sh
@@ -0,0 +1,45 @@
+#!/bin/bash
+RUNS="$1";
+NAME="$2"
+URL="$3"
+
+if [[ $RUNS == "" || $NAME == "" || $URL == "" ]]; then
+  echo
+  echo " invocation:"
+  echo "  $0 <runs> <name> <url>"
+  echo
+  echo " runs - number of runs of ab to perform"
+  echo " name - test name, to be used on the names of output files"
+  echo " url  - url to test"
+  echo
+  echo " results are saved to:"
+  echo "   ./results/<name>-<run>.csv"
+  echo "   ./results/<name>-<run>.log"
+  echo 
+  echo " you can use merge-data.py to merge data (duh) from all available cvs and log files"
+  echo " and get averages calculated automatically"
+  echo
+  exit 1
+fi
+
+REQUESTS=2000
+CONCURRENCY=32
+
+CSV="./results/${NAME}-<run>.csv"
+LOG="./results/${NAME}-<run>.log"
+
+echo
+echo "benchmarking $URL:"
+echo " - name        : $NAME"
+echo " - runs        : $RUNS"
+echo " - reuqests    : $REQUESTS"
+echo " - concurrency : $CONCURRENCY"
+echo " - output:"
+echo "   - CSV: $CSV"
+echo "   - LOG: $LOG"
+echo " - running:"
+for (( I=0; $I < $RUNS; I = $I + 1 )); do
+	echo "   - run ${I}..."
+	ab -e "./results/${NAME}-${I}.csv" -n $REQUESTS -c $CONCURRENCY "$URL" 2>&1 >"./results/${NAME}-${I}.log" | sed -r -e 's/^C(.*)/     - c\1/' -e 's/^F(.*)/     - f\1/'
+done
+echo " - all done."
diff --git a/merge-data.py b/merge-data.py
new file mode 100755
index 0000000..9b9c92b
--- /dev/null
+++ b/merge-data.py
@@ -0,0 +1,224 @@
+#!/usr/bin/python
+import os, sys, csv, glob, re
+
+# check if we got the dirname
+if len(sys.argv) < 2:
+  sys.exit("Please provide the directory to work in.")
+
+# check if exists
+if not os.path.exists(sys.argv[1]):
+  sys.exit("The path '%s' does not seem to exist." % sys.argv[1])
+
+if not os.path.isdir(sys.argv[1]):
+  sys.exit("The path '%s' does not seem to be a directory." % sys.argv[1])
+
+if sys.argv[1][-1] != '/':
+  sys.argv[1] += '/'
+
+# info
+print "Working in '%s'" % sys.argv[1]
+
+# writing the data out to a csv file
+def write_data(f, data):
+  with open('%s%s' % (sys.argv[1], f), 'wb') as csvfile:
+    csvw = csv.writer(csvfile, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
+    for row in data:
+      csvw.writerow(row)
+
+
+# handling the partial sums/averages
+def handle_partsum(data, gcol, gn=None):
+
+  # if the gcol is the current column (or does not exist yet) don't do anything, obviously
+  if not data or gcol >= len(data[0]):
+    #print 'handle_partsum : gcol >= len(data[0]) : %s >= %s' % (gcol, len(data[0]))
+    return False;
+
+  # indexing the data
+  i = 0
+
+  # add groupname headers
+  # only used on the CSV part of this script
+  if data and gn:
+    data[i].append(gn)
+    i += 1
+
+  # 'average' header, used everywhere
+  data[i].append('average')
+  i += 1
+
+  # let's iterate through partsum
+  for i in range(i, len(data)):
+
+    # the data to be averaged
+    partsum = data[i][gcol:]
+    for j in range(len(partsum)):
+      partsum[j] = float(partsum[j])
+
+    # pretty straightforward -- the average is "0" if there are less than 3 data points...
+    if len(partsum) < 3:
+      data[i].append(0)
+    # ...otherwise it's the average of the datapoints (with minimum and maximum exempt)
+    else:
+      data[i].append((sum(partsum) - min(partsum) - max(partsum)) / (len(partsum) - 2))
+
+#
+# handling the *.csv files generated by ab
+#
+
+print " - handing CSV files"
+
+# get the files
+sources = glob.glob('%s*[0-9].csv' % sys.argv[1])
+sources.sort()
+
+if not len(sources):
+  sys.exit("No files matching the required '*[0-9].csv' globbing pattern found in '%s'." % sys.argv[1])
+
+merged = []
+
+
+# iterate the source files
+gn = ''
+gcol = 1
+for f in sources:
+  
+  # name of the current group
+  ngn = re.sub(r'(.*\/)?(.+)-[0-9]\.csv', r'\2', f)
+  # has the group changed?
+  if (gn != ngn):
+    # handle partsum
+    handle_partsum(merged, gcol, gn)
+    # ah, it has! use the new group name
+    gn = ngn
+    # which is the active gcol?
+    if merged:
+      gcol = len(merged[0])
+
+  
+  # open the source file
+  with open(f, 'rb') as csvfile:
+    
+    # if merged is empty
+    if not merged:
+      # headline
+      merged.append(['Legend', gn])
+      # iterate the rows
+      for row in csv.reader(csvfile, delimiter=','):
+        # and add whole rows to merged
+        merged.append(row)
+
+    # ah, so merged is not empty and already contains the first column
+    # no need for the non-data column then, eh?
+    else:
+      i = 0
+      merged[i].append(gn)
+      # iterate the rows
+      for row in csv.reader(csvfile, delimiter=','):
+        i+=1
+        merged[i].append(row[1])
+
+# handle "dangling partsum"
+handle_partsum(merged, gcol, gn)
+
+print " - writing the result to '%smerged.csv'" % sys.argv[1]
+
+# write it down
+'''with open('%smerged.csv' % sys.argv[1], 'wb') as csvfile:
+  csvw = csv.writer(csvfile, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
+  for row in merged:
+    csvw.writerow(row)'''
+write_data('merged.csv', merged)
+
+#
+# *.log files need to be handled differently
+#
+
+print " - handing LOG files"
+
+# get the files
+sources = glob.glob('%s*[0-9].log' % sys.argv[1])
+sources.sort()
+
+if not len(sources):
+  sys.exit("No files matching the required '*[0-9].log' globbing pattern found in '%s'." % sys.argv[1])
+
+# the fields
+meta = [
+['Server Software'],
+['Server Hostname'],
+['Server Port'],
+['Document Path'],
+['Document Length'],
+['Concurrency Level']
+]
+merged = [
+['Legend'],
+['Time taken for tests'],
+['Complete requests'],
+['Failed requests'],
+['Write errors'],
+['Non-2xx responses'],
+['Total transferred'],
+['HTML transferred'],
+['Requests per second'],
+['Time per request'],
+['Time per request'],
+['Transfer rate']
+]
+
+# first, handle metadata
+# from the first file only, that's entirely enough
+i = 0
+with open(sources[0], 'rt') as f:
+  for l in f:
+    if l[:len(meta[i][0])] == meta[i][0]:
+      meta[i].append(re.sub(r'%s: +(.*)[\n\r]*' % meta[i][0], r'\1', l))
+      i+=1
+    if i >= len(meta):
+      break
+
+# we have the metadata, let's write these to a file!
+print " - writing metadata to '%smeta.csv'" % sys.argv[1]
+write_data('meta.csv', meta)
+
+# now, handle statistical data
+# each source file please!
+gn = ''
+gcol = 1
+for s in sources:
+  
+  # group name
+  # name of the current group
+  ngn = re.sub(r'(.*\/)?(.+)-[0-9]\.log', r'\2', s)
+  # has the group changed?
+  if (gn != ngn):
+    # handle partsum
+    handle_partsum(merged, gcol)
+    # ah, it has! use the new group name
+    gn = ngn
+    # the "group column" (the first column in a group)
+    gcol = len(merged[0])
+  
+  # add the header
+  merged[0].append(gn)
+
+  # index of the merged list, skipping the first row (the header)
+  i = 1
+  
+  # on with it!
+  with open(s, 'rt') as f:
+    for l in f:
+      if l[:len(merged[i][0])] == merged[i][0]:
+        merged[i].append(re.sub(r'%s: +([0-9,\.]+).*[\n\r]*' % merged[i][0], r'\1', l))
+        i+=1
+      if i >= len(merged):
+        break
+
+# handle dangling gcol
+handle_partsum(merged, gcol)
+
+
+# we have the stats, let's write these to a file!
+print " - writing the result to '%smerged-logs.csv'" % sys.argv[1]
+write_data('merged-logs.csv', merged)
\ No newline at end of file