Updated IdeaBank fetcher/parser, it works but requires some refactoring/cleanups
parent
3a9df521c0
commit
5f476ae95b
|
@ -26,46 +26,72 @@
|
|||
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
# POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
import csv
|
||||
from getopt import getopt
|
||||
import datetime
|
||||
import re
|
||||
import hashlib
|
||||
import requests
|
||||
from config import CurrentConfig
|
||||
from datetime import date, datetime
|
||||
from getopt import getopt, GetoptError
|
||||
from sqlalchemy import Column, Integer, String, Boolean, Date, create_engine, MetaData
|
||||
from sqlalchemy.ext.declarative import declarative_base
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
from sqlalchemy.schema import CreateTable, CreateIndex
|
||||
from time import time
|
||||
import bs4
|
||||
import time
|
||||
import random
|
||||
import csv
|
||||
import enum
|
||||
from datetime import date
|
||||
import hashlib
|
||||
import random
|
||||
import re
|
||||
import requests
|
||||
import sys
|
||||
|
||||
if not __name__ == "__main__":
|
||||
from webapp import app
|
||||
else:
|
||||
app = type("",(object,),{"config": {"SECRET": "foobar", "OWN_ACCOUNTS": ["PL48195000012006000648890002", "PL21195000012006000648890003", "PL91195000012006000648890004", "PL64195000012006000648890005", "PL45114010100000541244001003"]}})()
|
||||
config = {key: getattr(CurrentConfig,key) for key in dir(CurrentConfig) if key.isupper()}
|
||||
|
||||
Base = declarative_base()
|
||||
|
||||
class RawTransfer(Base):
|
||||
__tablename__ = 'raw_transfer'
|
||||
id = Column(Integer, primary_key=True)
|
||||
raw = Column(String(512))
|
||||
uid = Column(String(128), index = True)
|
||||
on_account = Column(String(32), index = True)
|
||||
amount = Column(Integer)
|
||||
currency = Column(String(8))
|
||||
date = Column(Date)
|
||||
type = Column(String(16))
|
||||
index = Column(Integer)
|
||||
|
||||
title = Column(String(256))
|
||||
|
||||
balance = Column(Integer)
|
||||
balance_currency = Column(String(8))
|
||||
|
||||
from_account = Column(String(32))
|
||||
to_account = Column(String(32))
|
||||
|
||||
from_name = Column(String(256))
|
||||
to_name = Column(String(256))
|
||||
|
||||
class IBParseError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class IBRow(object):
|
||||
SECRET = app.config["SECRET"]
|
||||
OWN_ACCOUNTS = app.config["OWN_ACCOUNTS"]
|
||||
class IBRow(RawTransfer):
|
||||
SECRET = config["SECRET"]
|
||||
OWN_ACCOUNTS = config["OWN_ACCOUNTS"]
|
||||
|
||||
def __unicode__(self):
|
||||
return u"{} *{} #{} @{} -\"{}\" -#{} => +\"{}\" +#{} [{}.{:02d} {}] ({}.{:02d} {}) ~\"{}\"".format(self.type, self.index, self.current_account, self.time, self.from_name, self.from_account, self.to, self.account, self.amount/100, self.amount%100, self.currency, self.balance/100, self.balance%100, self.balance_currency, self.title)
|
||||
return u"{} *{} #{} @{} -\"{}\" -#{} => +\"{}\" +#{} [{}.{:02d} {}] ({}.{:02d} {}) ~\"{}\"".format(self.type, self.index, self.on_account, self.date, self.from_name, self.from_account, self.to_name, self.to_account, self.amount/100, self.amount%100, self.currency, self.balance/100, self.balance%100, self.balance_currency, self.title)
|
||||
def __str__(self):
|
||||
return unicode(self).encode("utf-8")
|
||||
def __repr__(self):
|
||||
return str(self)
|
||||
|
||||
def __init__(self, row, current_account):
|
||||
self.raw = row
|
||||
def __init__(self, row, on_account, raw):
|
||||
self.raw = raw
|
||||
self.index = 1
|
||||
self.current_account = current_account
|
||||
self.time = datetime.datetime.strptime(row[IBField.date_completed], "%d.%m.%Y").date()
|
||||
self.account = IBParser.parse_account_number(row[IBField.to_account])
|
||||
self.to = row[IBField.to_name]
|
||||
self.on_account = on_account
|
||||
self.date = datetime.strptime(row[IBField.date_completed], "%d.%m.%Y").date()
|
||||
self.to_account = IBParser.parse_account_number(row[IBField.to_account])
|
||||
self.to_name = row[IBField.to_name]
|
||||
self.from_account = IBParser.parse_account_number(row[IBField.from_account])
|
||||
self.from_name = row[IBField.from_name]
|
||||
self.title = row[IBField.title]
|
||||
|
@ -84,22 +110,21 @@ class IBRow(object):
|
|||
self.balance = int(a)*100+int(b)
|
||||
self.balance_currency = c
|
||||
|
||||
if self.from_account == self.account:
|
||||
if self.from_account == self.to_account:
|
||||
self.type = "BANK_FEE"
|
||||
elif self.from_account in self.OWN_ACCOUNTS and self.account in self.OWN_ACCOUNTS:
|
||||
if self.account == self.current_account:
|
||||
elif self.from_account in self.OWN_ACCOUNTS and self.to_account in self.OWN_ACCOUNTS:
|
||||
if self.to_account == self.on_account:
|
||||
self.type = "OUT_FROM_OWN"
|
||||
else:
|
||||
self.type = "OUT_TO_OWN"
|
||||
elif self.from_account == self.current_account:
|
||||
elif self.from_account == self.on_account:
|
||||
self.type = "OUT"
|
||||
elif self.account == self.current_account:
|
||||
elif self.to_account == self.on_account:
|
||||
self.type = "IN"
|
||||
else:
|
||||
raise IBParseError("Can't figure out transfer type for current row", row)
|
||||
|
||||
self.uid = hashlib.sha256(self.SECRET + str(self)).hexdigest()
|
||||
print self.uid
|
||||
|
||||
class IBField(enum.Enum):
|
||||
from_name = u"Nadawca"
|
||||
|
@ -122,7 +147,7 @@ class IBParser(object):
|
|||
c = csv.reader(snapshot.splitlines(), delimiter=";")
|
||||
header = [r.decode("utf-8") for r in next(c, None)]
|
||||
if header is None:
|
||||
raise IBParseError("No header in history for {}".format(account_number))
|
||||
raise IBParseError("No header in history for {}".format(self.account_number))
|
||||
|
||||
for hf in header:
|
||||
try:
|
||||
|
@ -134,16 +159,16 @@ class IBParser(object):
|
|||
if not len(row) == len(self.fields):
|
||||
raise IBParseError("Row has {} fields, {} expected after parsing the header: \"{}\"".format(len(row), len(self.fields), ';'.join(row)))
|
||||
d = dict(zip(self.fields, [r.decode("utf-8") for r in row]))
|
||||
r = IBRow(d, account_number)
|
||||
r = IBRow(d, self.account_number,";".join(row))
|
||||
self.rows.append(r)
|
||||
|
||||
def get_by_type(self, y):
|
||||
return [row for row in self.rows if row.type == y]
|
||||
def get(self, type = None, on_account = None):
|
||||
return [row for row in self.rows if (row.type == type or type is None) and (row.on_account == on_account or on_account is None)]
|
||||
|
||||
@staticmethod
|
||||
def parse_account_number(s):
|
||||
formats = [
|
||||
"((?:[A-Za-z]{2})?[0-9]{2}) ([0-9]{4}) ([0-9]{4}) ([0-9]{4}) ([0-9]{4}) ([0-9]{4}) ([0-9]{4})", # 26 digits, optional country code - Poland
|
||||
"((?:[A-Za-z]{2})?[0-9]{2})[ ]?([0-9]{4})[ ]?([0-9]{4})[ ]?([0-9]{4})[ ]?([0-9]{4})[ ]?([0-9]{4})[ ]?([0-9]{4})", # 26 digits, optional country code - Poland
|
||||
]
|
||||
for f in formats:
|
||||
m = re.search(f, s)
|
||||
|
@ -221,7 +246,7 @@ class IBFetcher(object):
|
|||
|
||||
def _wait(self, seconds):
|
||||
print "[i] Waiting {} seconds".format(seconds)
|
||||
#time.sleep(seconds)
|
||||
time.sleep(seconds)
|
||||
|
||||
def _gettoken(self, soup):
|
||||
i = soup.find("input", type="hidden", attrs={"name": "banking"})
|
||||
|
@ -243,7 +268,7 @@ class IBFetcher(object):
|
|||
m = re.search("\/main\/index\/token\/([0-9]+)\/time\/", str(soup.head))
|
||||
if m is not None:
|
||||
t = m.group(1)
|
||||
r = self._getraw("main/index/token/{}/time/{:.0f}.js".format(t, time.time()*1000), params={"t": "{:.16f}".format(random.random())})
|
||||
r = self._getraw("main/index/token/{}/time/{:.0f}.js".format(t, time()*1000), params={"t": "{:.16f}".format(random.random())})
|
||||
print "[i] Fetched JS timestamp token: \"{}\"".format(r.text)
|
||||
|
||||
def process_wallet_page(self, soup):
|
||||
|
@ -331,30 +356,41 @@ def usage():
|
|||
|
||||
if __name__ == "__main__":
|
||||
try:
|
||||
opts, args = getopt.getopt(sys.argv[1:], "cl:", ["cached", "load="])
|
||||
except getopt.GetoptError as err:
|
||||
opts, args = getopt(sys.argv[1:], "hcl:", ["help", "cached", "load=", "print-schema"])
|
||||
except GetoptError as err:
|
||||
# print help information and exit:
|
||||
print str(err) # will print something like "option -a not recognized"
|
||||
usage()
|
||||
sys.exit(2)
|
||||
|
||||
CACHE_DIR = config["CACHE_DIR"]
|
||||
engine = create_engine(config["SQLALCHEMY_DATABASE_URI"])
|
||||
session = sessionmaker(bind=engine)()
|
||||
|
||||
cached = False
|
||||
load_files = {}
|
||||
for o, a in opts:
|
||||
if o in ("-h", "--help"):
|
||||
usage()
|
||||
sys.exit()
|
||||
elif o in ("--print-schema"):
|
||||
print "[i] Called with --print-schema, will print the create statement and quit."
|
||||
m = MetaData()
|
||||
print CreateTable(IBRow.__table__).compile(engine),";"
|
||||
for index in IBRow.__table__.indexes:
|
||||
print CreateIndex(index).compile(engine),";"
|
||||
sys.exit()
|
||||
elif o in ("-c", "--cached"):
|
||||
cached = True
|
||||
elif o in ("-l", "--load"):
|
||||
account_number, f = a.split(":")
|
||||
if account_number is None or f is None:
|
||||
an, f = a.split(":")
|
||||
if an is None or f is None:
|
||||
print "[e] --load argument \"{}\" appears malformed, could not split account number and file name".format(a)
|
||||
sys.exit(2)
|
||||
|
||||
account_number = IBParser.parse_account_number(account_number)
|
||||
account_number = IBParser.parse_account_number(an)
|
||||
if account_number is None:
|
||||
print "[e] Account number \"{}\" unparseable".format(account_number)
|
||||
print "[e] Account number \"{}\" unparseable".format(an)
|
||||
|
||||
history = open(f,'r').read()
|
||||
load_files[account_number] = history
|
||||
|
@ -363,38 +399,48 @@ if __name__ == "__main__":
|
|||
else:
|
||||
assert False, "unhandled option"
|
||||
|
||||
accs = ["PL48195000012006000648890002", "PL21195000012006000648890003", "PL91195000012006000648890004", "PL64195000012006000648890005"]
|
||||
|
||||
if cached:
|
||||
print "[i] Cached run - will not connect to the bank"
|
||||
history_logs = load_files
|
||||
if len(load_files) > 0:
|
||||
history_logs = load_files
|
||||
else:
|
||||
for account_number in config["IB_ACCOUNTS"]:
|
||||
print "[e] Automated cache loading not implemented"
|
||||
sys.exit(2)
|
||||
|
||||
else:
|
||||
print "[i] Normal run - will connect to the bank"
|
||||
fetcher = IBFetcher()
|
||||
history_logs = {}
|
||||
wallet = fetcher.login(raw_input("[?] ID: "), raw_input("[?] Password: "))
|
||||
if "IB_LOGIN" not in config.keys() or "IB_PASSWORD" not in config.keys():
|
||||
wallet = fetcher.login(raw_input("[?] ID: "), raw_input("[?] Password: "))
|
||||
else:
|
||||
print "[i] Using saved credentials"
|
||||
wallet = fetcher.login(config["IB_LOGIN"], config["IB_PASSWORD"])
|
||||
for account_number, account in wallet["accounts"].items():
|
||||
if account_number not in accs and False:
|
||||
print "[i] Skipping {} ({})".format(account_number, account["id"])
|
||||
continue
|
||||
print "[i] Fetching history for account {} ({})".format(account_number, account["id"])
|
||||
history = fetcher.fetch_account_history(account["id"])
|
||||
tmp = open(account_number,'w')
|
||||
tmp.write(history)
|
||||
tmp.close()
|
||||
cachefile = open(CACHE_DIR+"/"+account_number,'w')
|
||||
cachefile.write(history)
|
||||
cachefile.close()
|
||||
history_logs[account_number] = history
|
||||
|
||||
parsed = {}
|
||||
stats = {}
|
||||
for account_number, history in history_logs.items():
|
||||
print "[i] Parsing history for account {}".format(account_number)
|
||||
parser = IBParser(account_number)
|
||||
parser.parse(history)
|
||||
parsed[account_number] = parser.rows
|
||||
stats[account_number] = {}
|
||||
stats[account_number]["added"] = 0
|
||||
stats[account_number]["skipped"] = 0
|
||||
for row in parser.get():
|
||||
if not session.query(IBRow).filter_by(uid=row.uid).first():
|
||||
session.add(row)
|
||||
stats[account_number]["added"] += 1
|
||||
else:
|
||||
stats[account_number]["skipped"] += 1
|
||||
session.commit()
|
||||
|
||||
for a,p in parsed.items():
|
||||
print ""
|
||||
print "{}:".format(a)
|
||||
for e in p:
|
||||
print "\t{}".format(e)
|
||||
print ""
|
||||
print "[i] Done: ", stats
|
||||
#print f.create_report().read()
|
||||
|
|
|
@ -0,0 +1,8 @@
|
|||
class Config(object):
|
||||
DEBUG = False
|
||||
TESTING = False
|
||||
SQLALCHEMY_DATABASE_URI = "sqlite:///data.db"
|
||||
|
||||
|
||||
class DevelopmentConfig(Config):
|
||||
DEBUG = True
|
|
@ -1,2 +1,11 @@
|
|||
#!/bin/sh
|
||||
echo "$(date): Fetch started." >> fetch.log
|
||||
K_DIR="$HOME"
|
||||
K_FETCH_DIR="$K_DIR/fetch/"
|
||||
K_FETCH_LOG="$K_FETCH_DIR/fetch.log"
|
||||
K_FETCH_ENV="$K_FETCH_DIR/.env"
|
||||
|
||||
. $K_FETCH_ENV/bin/activate
|
||||
|
||||
echo "Fetch started." | ts >> "$K_FETCH_LOG"
|
||||
|
||||
python "$K_FETCH_DIR/banking-ib.py" 2>&1 | ts | tee -a "$K_FETCH_LOG"
|
||||
|
|
|
@ -0,0 +1,5 @@
|
|||
beautifulsoup4 (4.3.2)
|
||||
enum34 (1.1.6)
|
||||
psycopg2 (2.5.4)
|
||||
requests (2.5.1)
|
||||
SQLAlchemy (0.9.8)
|
Loading…
Reference in New Issue