Merge pull request 'Fix emails + cleanup + minor improvements' (#1) from cleanup into master

Reviewed-on: #1
This commit is contained in:
radex 2024-07-25 07:50:47 +00:00
commit 1d1b4b5541
35 changed files with 656 additions and 2113 deletions

3
.gitignore vendored
View file

@ -1,6 +1,5 @@
olddata
webapp/data.db
web/webapp/data.db
**/data.db
*pyc
*sublime*
kasownik.ini

View file

@ -14,4 +14,4 @@ ADD web /usr/src/web
ADD fetch /usr/src/fetch
STOPSIGNAL SIGINT
CMD ["uwsgi", "--http-socket", "0.0.0.0:5000", "--plugins", "python3", "--wsgi", "webapp.wsgi:app", "--threads", "10", "--master"]
CMD ["uwsgi", "--http-socket", "0.0.0.0:5000", "--wsgi", "webapp.wsgi:app", "--threads", "10", "--master"]

View file

@ -1,25 +1,12 @@
Kasownik
========
Warsaw Hackerspace Membership Management System.
# Kasownik
> „100 linii pythona!” - enki o skrypcie do składek
Warsaw Hackerspace Membership Management System
Summary
-------
## Summary
This project is divided into two separate modules:
* `web` - web frontend and basic logic, public-facing service
* `fetch` - bank account data fetcher, to be run in some secure domain
(at least separate UID) - supports "old" IdeaBank web interface
* `fetch` - bank account data fetcher
More info about these can be found in their respective `README.md` files.
Quick Start
-----------
1. [Register new SSO application](https://sso.hackerspace.pl/client/create) - client name and URI don't matter, redirect URI should be `http://localhost:5000/oauth/callback` (by default), other settings can stay default
2. Set `SPACEAUTH_CONSUMER_KEY` and `SPACEAUTH_CONSUMER_SECRET` envs to the client generated above
3. `docker-compose run --rm kasownik-web ./manage.py syncdb` (one time)
4. Run the app: `docker-compose up --build`
5. (TODO: Add missing table for fetcher, add example data)

View file

@ -1,23 +1,11 @@
Fetcher
=======
# kasownik-fetch
This is a separate process used for fetching of bank account data from IdeaBank.
This is a separete process used for fetching bank account data.
Raw transfer data is fetched into `raw_transfers` table (using credentails
separate from web interface worker) and are forwarded into `transfers` with
trigger and stored procedure (check it out in `triggers.sql`)
This process has its own `config.py` (example available in `config.py.dist`).
This process has its own `config.ini` (example available in `config.ini.dist`).
`raw_transfers` table schema is presented when executing:
python banking-ib.py --print-schema
gRPC proto files (`smsgw_pb2*.py`) can be rebuilt using:
python -m grpc_tools.protoc -Iprotos --python_out=. --grpc_python_out=. protos/smsgw.proto
TODO
----
* Cleanup logging
NOTE: See git history for old scraper (IdeaBank, BRE bank) implementations and SMSGW

View file

@ -1,668 +0,0 @@
#!/usr/bin/env/python2
# -*- coding: utf-8 -*-
# Copyright (c) 2017, Remigiusz Marcinkiewicz <remigiusz@marcinkiewicz.me>
# Based on iBRE/mBank CompanyNet crawler by Sergiusz Bazanski <q3k@q3k.org>
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
from datetime import date, datetime, timedelta
import sys
from time import sleep, time
import csv
import os
import random
import re
import logging
import logging.config
import argparse
import enum
import bs4
import requests
import grpc
from sqlalchemy import Column, Integer, String, Date, create_engine, MetaData
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker
from sqlalchemy.schema import CreateTable, CreateIndex
from six.moves import input
import smsgw_pb2
import smsgw_pb2_grpc
from config import CurrentConfig
config = {
key: getattr(CurrentConfig, key)
for key in dir(CurrentConfig) if key.isupper()}
Base = declarative_base()
if config.get('LOGGING'):
logging.config.dictConfig(config['LOGGING'])
else:
logging.basicConfig(level=logging.DEBUG)
class RawTransfer(Base):
__tablename__ = 'raw_transfer'
id = Column(Integer, primary_key=True)
raw = Column(String(512))
uid = Column(String(128), index=True)
on_account = Column(String(32), index=True)
amount = Column(Integer)
currency = Column(String(8))
date = Column(Date)
type = Column(String(16))
index = Column(Integer)
title = Column(String(256))
balance = Column(Integer)
balance_currency = Column(String(8))
from_account = Column(String(32))
to_account = Column(String(32))
from_name = Column(String(256))
to_name = Column(String(256))
class IBParseError(Exception):
pass
class IBMaintenanceError(Exception):
pass
class IBRow(RawTransfer):
SECRET = config["SECRET"]
OWN_ACCOUNTS = config["OWN_ACCOUNTS"]
def __unicode__(self):
return u"{} *{} #{} @{} -\"{}\" -#{} => +\"{}\" +#{} [{}.{:02d} {}] ({}.{:02d} {}) ~\"{}\"".format(
self.type, self.index, self.on_account, self.date, self.from_name, self.from_account,
self.to_name, self.to_account, self.amount/100, self.amount%100,
self.currency, self.balance/100, self.balance%100, self.balance_currency, self.title)
def __str__(self):
return unicode(self).encode("utf-8")
def __repr__(self):
return str(self)
def __init__(self, row, on_account, raw):
self.raw = raw.decode('utf-8')
self.uid = row[IBField.uid]
self.index = 1
self.date = datetime.strptime(row[IBField.date_completed], "%Y%m%d").date()
self.title = row[IBField.title]
af = re.compile(r"([0-9]+)\.([0-9]{2})")
m = af.match(row[IBField.amount])
if m is None:
raise IBParseError("Can't parse amount value \"{}\"".format(row[IBField.amount]), row)
a, b = m.groups()
self.amount = int(a)*100+int(b)
self.currency = row[IBField.currency]
own_account = IBParser.parse_account_number(row[IBField.own_account])
own_name = "Stowarzyszenie \"Warszawski Hackerspace\""
if own_account not in self.OWN_ACCOUNTS:
raise IBParseError("own_account {} not in OWN_ACCOUNTS - format change?".format(own_account))
self.on_account = own_account
other_account = IBParser.parse_account_number(row[IBField.other_account])
if other_account is None:
raise IBParseError("other_account {} could not be parsed".format(row[IBField.other_account]))
other_name = row[IBField.other_name]
direction = row[IBField.direction]
if direction == "uznanie":
direction = "IN"
self.type = "IN"
elif direction == u"Obiciążenie": # sic!
direction = "OUT"
self.type = "OUT"
else:
raise IBParseError("Can't parse direction specifier \"{}\"", direction)
if own_account == other_account:
self.type = "BANK_FEE"
self.from_account = self.to_account = own_account
self.from_name = self.to_name = own_name
elif own_account in self.OWN_ACCOUNTS and other_account in self.OWN_ACCOUNTS:
self.from_name = self.to_name = own_name
if direction == "IN":
self.type = "IN_FROM_OWN"
self.from_account = other_account
self.to_account = own_account
elif direction == "OUT":
self.type = "OUT_TO_OWN"
self.from_account = own_account
self.to_account = other_account
else:
raise IBParseError("Can't figure out details of an own-to-own transfer")
elif direction == "IN":
self.type = "IN"
self.from_account = other_account
self.to_account = own_account
self.from_name = other_name
self.to_name = own_name
elif direction == "OUT":
self.type = "OUT"
self.from_account = own_account
self.to_account = other_account
self.from_name = own_name
self.to_name = other_name
else:
raise IBParseError("Can't figure out transfer type for current row", row)
if None in (self.type, self.to_account, self.from_account, self.to_name, self.from_name):
raise IBParseError(
"Something went wrong - one of the mandatory values empty",
self.type, self.to_account, self.from_account,
self.to_name, self.from_name)
class IBField(enum.Enum):
#Data waluty;Data zlecenia;Numer rachunku nadawcy;Numer banku nadawcy;Kwota w walucie rachunku;Waluta;Kurs;Kwota w walucie zlecenia;Numer rachunku odbiorcy;Odbiorca;Numer banku odbiorcy;Tytuł;Obciążenie/uznanie;Numer transakcji w systemie centralnym;
date_completed = u"Data waluty"
date_issued = u"Data zlecenia"
own_account = u"Numer rachunku nadawcy"
own_bank = u"Numer banku nadawcy"
amount = u"Kwota w walucie rachunku"
currency = u"Waluta"
rate = u"Kurs"
transfer_amount = "Kwota w walucie zlecenia"
other_account = u"Numer rachunku odbiorcy"
other_name = u"Odbiorca"
other_bank = u"Numer banku odbiorcy"
title = u"Tytuł"
direction = u"Obciążenie/uznanie"
uid = u"Numer transakcji w systemie centralnym"
class IBParser(object):
def __init__(self, account_number):
self.account_number = account_number
self.rows = []
self.fields = []
def parse(self, snapshot):
# Patch #01: successful utf8 test and CSV separator injection
kek = u"IMPLR - STARVING - SKŁADKA ;".encode("utf-8")
snapshot = snapshot.replace(kek, kek[:-1])
# Patch #02: newline in internal investment deposit transaction title
snapshot = snapshot.replace('\n-', ' -')
lines = snapshot.splitlines()
header = lines.pop(0).decode("utf-8").split(";")
if header[-1]:
if 'Przepraszamy strona chwilowo niedostępna' in snapshot:
raise IBMaintenanceError(snapshot)
raise IBParseError("Last column no longer empty? %r", header)
header = header[:-1]
for hf in header:
try:
self.fields.append(IBField(hf))
except ValueError as e:
raise IBParseError("Unexpected field name \"{}\"".format(hf), e)
c = csv.reader(reversed(lines), delimiter=";")
for row in c:
row = row[:-1]
if len(row) != len(self.fields):
raise IBParseError("Row has {} fields, {} expected after parsing the header: \"{}\"".format(len(row), len(self.fields), ';'.join(row)))
d = dict(zip(self.fields, [r.decode("utf-8") for r in row]))
r = IBRow(d, self.account_number, ";".join(row))
self.rows.append(r)
def get(self, type_=None, on_account=None):
return [
row for row in self.rows
if (row.type == type_ or type_ is None)
and (row.on_account == on_account or on_account is None)]
@staticmethod
def parse_account_number(s):
formats = [
"((?:[A-Za-z]{2})?[0-9]{2})[ ]?([0-9]{4})[ ]?([0-9]{4})[ ]?([0-9]{4})[ ]?([0-9]{4})[ ]?([0-9]{4})[ ]?([0-9]{4})", # 26 digits, optional country code - Poland
"((?:[A-Za-z]{2})?[0-9]{2})[ ]?([A-Z]{4})[ ]?([0-9]{4})[ ]?([0-9]{4})[ ]?([0-9]{4})[ ]?([0-9]{2})", # 22 characters including BIC bank code - Ireland
"((?:[A-Za-z]{2})?[0-9]{2})[ ]?([A-Z]{4})[ ]?([0-9]{4})[ ]?([0-9]{4})[ ]?([0-9]{2})", # 18 characters including BIC bank code - Netherlands
"((?:[A-Za-z]{2})?[0-9]{2})[ ]?([0-9]{4})[ ]?([0-9]{4})[ ]?([0-9]{4})[ ]?([0-9]{4})[ ]?([0-9]{2})", # 22 digits - Germany
]
for f in formats:
m = re.search(f, s)
if m is not None:
break
if m is None:
return None
account = "".join(m.groups())
if len(m.group(1)) == 2:
account = "PL" + account
return account
class IBFetcher(object):
BASE = "https://secure.ideabank.pl/"
START_DATE = "01.11.2016"
def __init__(self):
self.logger = logging.getLogger(self.__class__.__name__)
self._soup = None
self.token = None
self.s = requests.Session()
self.s.headers.update({
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:50.0) Gecko/20100101 Firefox/50.0",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Accept-Language": "en-US,en;q=0.5",
"Accept-Encoding": "gzip, deflate",
"DNT": "1",
"Upgrade-Insecure-Requests": "1",
"Pragma": "no-cache",
"Cache-Control": "no-cache"
})
def _makesoup(self, data):
self._soup = bs4.BeautifulSoup(data)
return self._soup
def _dump(self):
fn = config["DUMP_FILE"]
self.logger.warning("Dumping the last page to %f", fn)
open(fn, 'w').write(unicode(self._soup).encode('utf-8'))
def _getraw(self, page, params={}):
url = self.BASE + page
r = self.s.get(url, params=params,
timeout=config.get('DEFAULT_TIMEOUT', 3600))
self.logger.debug("GET %s?%s -> %d", page, "&".join([
str(k)+"="+str(v) for k, v in params.items()]), r.status_code)
if r.status_code != 200:
raise Exception("return code %i" % r.status_code)
return r
def _get(self, page):
r = self._getraw(page)
self.s.headers.update({"Referer": r.url})
soup = self._makesoup(r.text)
self._gettoken(soup)
self._hitjstoken(soup)
return soup
def _postraw(self, page, data):
url = self.BASE + page
h = self.s.headers.copy()
h.update({
"Content-Type": "application/x-www-form-urlencoded",
"X-Requested-With": "XMLHttpRequest",
})
r = self.s.post(url, data, timeout=config.get('DEFAULT_TIMEOUT', 3600))
self.logger.debug("POST %s -> %d", page, r.status_code)
if r.status_code != 200:
self._dump()
raise Exception("return code %i" % r.status_code)
return r
def _post(self, page, data):
mdata = {}
mdata["banking"] = self.token
mdata.update(data)
r = self._postraw(page, mdata)
if re.search("forbidden",r.text) is not None:
self._dump()
raise Exception("Received \"forbidden3\" response. Bad token?")
self.s.headers.update({"Referer": r.url})
soup = self._makesoup(r.text)
self._gettoken(soup)
self._hitjstoken(soup)
return soup
def _wait(self, seconds):
self.logger.debug("Waiting %d seconds", seconds)
sleep(seconds)
def _gettoken(self, soup):
i = soup.find("input", type="hidden", attrs={"name": "banking"})
m = re.search("changeBanking\(\'([0-9a-fA-F]+)\'\)", str(soup))
if i is not None and i["value"] is not None:
t = i["value"]
elif m is not None:
t = m.group(1)
else:
t = None
if t is not None:
self.token = t
self.logger.debug("Token: %s", self.token)
else:
self.logger.debug("No new token found")
def _hitjstoken(self, soup):
m = re.search("\/main\/index\/token\/([0-9]+)\/time\/", str(soup.head))
if m is not None:
t = m.group(1)
r = self._getraw("main/index/token/{}/time/{:.0f}.js".format(t, time()*1000), params={"t": "{:.16f}".format(random.random())})
self.logger.debug("Fetched JS timestamp token: %r", r.text)
def smsgw_request(self, filter_body='.*', timeout=30):
addr = config.get('SMSGW_ADDRESS', 'smsgw.hswaw-prod.svc.k0.hswaw.net:443')
with open(config['SMSGW_CERT'], 'rb') as fd:
api_cert = fd.read()
with open(config['SMSGW_KEY'], 'rb') as fd:
api_key = fd.read()
with open(config['SMSGW_CA'], 'rb') as fd:
api_ca = fd.read()
credentials = grpc.ssl_channel_credentials(
api_ca,
api_key,
api_cert,
)
channel = grpc.secure_channel(addr, credentials)
stub = smsgw_pb2_grpc.SMSGatewayStub(channel)
smsgw_request = smsgw_pb2.MessagesRequest(
filter_body=filter_body
)
return stub.Messages(smsgw_request, timeout=timeout)
def process_wallet_page(self, soup):
wallet = {"accounts": {}}
account_ids = []
for button in soup.find_all("button", class_="historia1"):
account_ids.append(re.search("\/accounts\/index\/([0-9]+)\/2", str(button["onclick"])).group(1))
accounts = []
for dt in soup.find_all("table", id="data"):
account = {}
cell = dt.find("td", class_="cell1")
if cell is None or cell.string is None:
continue
account["number"] = IBParser.parse_account_number((cell.string.strip()))
if account["number"] is None:
continue
cells = cell.find_next_siblings("td")
account["currency"] = cells[0].string.strip()
account["balance"] = cells[1].string.strip()
account["available_balance"] = cells[2].string.strip()
account["pln_balance"] = cells[3].string.strip()
accounts.append(account)
for account_id, account in zip(account_ids, accounts):
account["id"] = account_id
wallet["accounts"][account["number"]] = account
if len(wallet["accounts"]) == 0:
self.logger.error("Empty accounts list. Undetected failed login? Aborting.")
self._dump()
sys.exit(4)
return wallet
def login(self, username, password, interactive=False):
sms_re = r'Silne uwierzytelnienie do logowania. Kod SMS: (.*)'
smsgw_message = None
try:
smsgw_message = self.smsgw_request(sms_re)
except Exception as exc:
self.logger.warning('Couldn\'t create smsgw service, will go interactive', exc_info=exc)
login1_page = self._get("main/index")
self._wait(3)
data = {}
data["js"] = "true"
data["login"] = username
login2_page = self._post("main/index", data)
self._wait(3)
data = {}
password2_input = login2_page.find("input", attrs={"name": "password2"})
if password2_input is None:
self.logger.error("Masked password screen encountered - aborting")
sys.exit(4)
else:
self.logger.debug("Regular password screen encountered")
data["log2"] = username
data["password2"] = password2_input["value"]
data["password"] = password
twofa_page = self._post("main/index", data)
self._wait(3)
sms_input = twofa_page.find("input", attrs={"name": "sms"})
if sms_input is None:
self.logger.error('No SMS query - aborting')
sys.exit(4)
if smsgw_message:
msg = next(smsgw_message)
self.logger.debug('Got message: %r', msg)
code = re.findall(sms_re, msg.body)
data = {
"sms": code
}
else:
data = {
"sms": input('[?] OTP: ')
}
wallet_page = self._post("main/index", data)
if wallet_page.find("div", class_="login_form"):
self.logger.error("Login failed, aborting")
self._dump()
try:
self.logger.error("Possible reason: %r", ','.join(wallet_page.find("ul", class_="error_list").stripped_strings))
except:
pass # screw it, we're fucked anyway
sys.exit(4)
self._wait(2)
return self.process_wallet_page(wallet_page)
def fetch_account_history(self, account_id, start=None, end=None):
if end is None:
end = date.today()
if start is None:
start = date.today() - timedelta(days=30)
data = {
"code": account_id,
"report_type": "csv_dr",
"start_date": '{:02d}.{:02d}.{:04d}'.format(start.day, start.month, start.year),
"end_date": '{:02d}.{:02d}.{:04d}'.format(end.day, end.month, end.year),
"banking": self.token
}
r = self._postraw("accounts/getHistoryDailyReportsFile", data)
return r.content.decode("utf-8-sig").encode("utf-8")
def usage():
pass
def lock():
fn = config["LOCK_FILE"]
if os.path.isfile(fn):
logging.error("Lock file %s exists, aborting", fn)
sys.exit(3)
logging.debug("Setting up lock file %s", fn)
open(fn,'w').close()
if not os.path.isfile(fn):
logging.error("Lock file %s somehow does not exist, aborting", fn)
sys.exit(3)
def release():
fn = config["LOCK_FILE"]
logging.debug("Removing lock file %s", fn)
if not os.path.isfile(fn):
logging.error("Lock file %s somehow does not exist, WTF?", fn)
sys.exit(3)
os.remove(fn)
if os.path.isfile(fn):
logging.error("Lock file %s somehow still exists, WTF?", fn)
sys.exit(3)
parser = argparse.ArgumentParser()
parser.add_argument('-n', '--no-action', action="store_true", help='do not commit any database changes')
parser.add_argument('-i', '--interactive', action="store_true", help='ask interactively for credentials')
parser.add_argument('-c', '--cached', action="store_true", help='use cached data (test)')
parser.add_argument('-l', '--load', action='append', help='process specified files (test)')
parser.add_argument('--print-schema', action="store_true", help='print table schema and quit')
if __name__ == "__main__":
args = parser.parse_args()
CACHE_DIR = config["CACHE_DIR"]
engine = create_engine(config["SQLALCHEMY_DATABASE_URI"])
session = sessionmaker(bind=engine)()
if args.print_schema:
logging.debug("Called with --print-schema, will print the create statement and quit.")
m = MetaData()
print('%s;' % CreateTable(IBRow.__table__).compile(engine))
for index in IBRow.__table__.indexes:
print('%s;' % CreateIndex(index).compile(engine))
sys.exit()
lock()
balances = {}
history_logs = {}
if args.load:
logging.debug("Using manually supplied files")
for fn in args.load:
an, f = fn.split(':')
account_number = IBParser.parse_account_number(an)
if account_number is None:
logging.error("File name number \"{}\" unparseable".format(f))
continue
logging.debug('Loading "%s" as "%s"', f, account_number)
with open(f, 'r') as fd:
history_logs[account_number] = fd.read()
elif args.cached:
logging.debug("Loading cached files from {}".format(CACHE_DIR))
for f in os.listdir(CACHE_DIR):
if f.startswith('balance-'):
continue
account_number = IBParser.parse_account_number(f)
if account_number is None:
logging.error("File name number \"{}\" unparseable".format(f))
continue
with open(CACHE_DIR + "/" + f,'r') as fd:
history_logs[account_number] = fd.read()
logging.debug("Loading \"{}\" as \"{}\"".format(f, account_number))
else:
logging.debug("Normal run - will connect to the bank")
fetcher = IBFetcher()
if "IB_LOGIN" not in config.keys() or "IB_PASSWORD" not in config.keys() or args.interactive:
wallet = fetcher.login(input("[?] ID: "), input("[?] Password: "), args.interactive)
else:
logging.debug("Using saved credentials")
wallet = fetcher.login(config["IB_LOGIN"], config["IB_PASSWORD"])
for account_number, account in wallet["accounts"].items():
logging.debug("Fetching history for account {} ({})".format(account_number, account["id"]))
history = fetcher.fetch_account_history(account["id"])
history_logs[account_number] = history
with open(CACHE_DIR+"/"+account_number,'w') as fd:
fd.write(history)
balances[account_number] = (account["available_balance"], account["currency"])
with open(CACHE_DIR+"/balance-"+account_number,'w') as fd:
fd.write("{} {}\n".format(
account["available_balance"],account["currency"]))
if not history_logs:
logging.error('Nothing to process')
sys.exit()
parsed = {}
stats = {}
for account_number, history in history_logs.items():
logging.debug("Parsing history for account {}".format(account_number))
parser = IBParser(account_number)
try:
parser.parse(history)
except IBMaintenanceError:
logging.exception('Maintenance error, skipping')
continue
stats[account_number] = {}
stats[account_number]["added"] = 0
stats[account_number]["skipped"] = 0
for row in parser.get():
if not session.query(IBRow).filter_by(uid=row.uid).first():
session.add(row)
stats[account_number]["added"] += 1
else:
stats[account_number]["skipped"] += 1
if args.no_action:
logging.info('Running with --no-action, not commiting.')
else:
session.commit()
# That is pretty ugly, but the only alternative would be to change handler
# level in runtime, and that'd still need some rollback anyway.
if any(v['added'] for v in stats.values()):
log_summary = logging.info
else:
log_summary = logging.debug
if balances:
log_summary("Account balances:")
for account_number,v in balances.items():
balance,currency = v
log_summary("\t{}: {} {}".format(account_number, balance, currency))
log_summary("Done: %r", stats)
release()

View file

@ -1,500 +0,0 @@
#!/usr/bin/env/python2
# -*- coding: utf-8 -*-
# Copyright (c) 2019, Piotr Dobrowolski <informatic@hackerspace.pl
# Based on IdeaBank crawler by Remigiusz Marcinkiewicz <remigiusz@marcinkiewicz.me>
# Based on iBRE/mBank CompanyNet crawler by Sergiusz Bazanski <q3k@q3k.org>
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
from datetime import date, datetime, timedelta
import sys
import time
import os
import re
import logging
import logging.config
import argparse
import json
import requests
from sqlalchemy import Column, Integer, String, Date, BigInteger, create_engine, MetaData
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker
from sqlalchemy.schema import CreateTable, CreateIndex
from six.moves import input
from config import CurrentConfig
config = {
key: getattr(CurrentConfig, key)
for key in dir(CurrentConfig) if key.isupper()}
Base = declarative_base()
if config.get('LOGGING'):
logging.config.dictConfig(config['LOGGING'])
else:
logging.basicConfig(level=logging.DEBUG)
class RawTransfer(Base):
__tablename__ = 'raw_transfer'
id = Column(Integer, primary_key=True)
raw = Column(String)
uid = Column(String(128), index=True)
on_account = Column(String(32), index=True)
amount = Column(Integer)
currency = Column(String(8))
date = Column(Date)
type = Column(String(16))
index = Column(Integer)
title = Column(String(256))
balance = Column(Integer)
balance_currency = Column(String(8))
from_account = Column(String(32))
to_account = Column(String(32))
from_name = Column(String(256))
to_name = Column(String(256))
scrape_timestamp = Column(BigInteger, default=lambda: round(time.time() * 1000000))
class IBParseError(Exception):
pass
class IBMaintenanceError(Exception):
pass
class IBRow(RawTransfer):
SECRET = config["SECRET"]
OWN_ACCOUNTS = config["OWN_ACCOUNTS"]
def __unicode__(self):
if self.balance is not None:
return u"{} *{} #{} @{} -\"{}\" -#{} => +\"{}\" +#{} [{}.{:02d} {}] ({}.{:02d} {}) ~\"{}\"".format(
self.type, self.index, self.on_account, self.date, self.from_name, self.from_account,
self.to_name, self.to_account, self.amount/100, self.amount%100,
self.currency, self.balance/100, self.balance%100, self.balance_currency, self.title)
return u"{} *{} #{} @{} -\"{}\" -#{} => +\"{}\" +#{} [{}.{:02d} {}] (?) ~\"{}\"".format(
self.type, self.index, self.on_account, self.date, self.from_name, self.from_account,
self.to_name, self.to_account, self.amount/100, self.amount%100,
self.currency, self.title)
def __str__(self):
return unicode(self).encode("utf-8")
def __repr__(self):
return str(self)
def __init__(self, row, own_account):
self.date = datetime.strptime(row['date'], '%d.%m.%Y').date()
self.index = 1
self.raw = json.dumps(row, separators=(',', ':'))
self.uid = row['id']
self.title = row['title']
self.amount = int(round(row['amount'] * 100))
self.currency = row['currency']
self.on_account = own_account
self.from_name = row['remitterName']
self.from_account = IBParser.parse_account_number(row['remitterNrb'])
self.to_name = row['beneficiaryName']
self.to_account = IBParser.parse_account_number(row['beneficiaryNrb'])
direction = row['kind']
if direction == 'OUT' or direction == 'CARD_TRANS':
self.type = 'OUT'
self.amount = -self.amount
if row['operationType'] == 'SELF':
self.type = 'OUT_TO_OWN'
elif direction == 'IN':
self.type = 'IN'
if row['operationType'] == 'SELF':
self.type = 'IN_FROM_OWN'
elif direction == 'FEE':
self.type = 'BANK_FEE'
self.amount = -self.amount
if self.to_name != self.from_name and 'Prowizja za przelew natychmiastowy' not in self.title:
# TODO FIXME: false for instant transfer fees
raise IBParseError("Invalid to_name/from_name (%r / %r)" % (
self.to_name, self.from_name))
if self.from_account not in self.OWN_ACCOUNTS and self.to_account not in self.OWN_ACCOUNTS:
# if self.from_account not in self.OWN_ACCOUNTS or self.to_account not in self.OWN_ACCOUNTS:
raise IBParseError("Wrong to_account/from_account on bank fee transfer (%r / %r)" % (
self.to_account, self.from_account
))
# TODO FIXME: false for instant transfer fees
# To account seems to always be main account
# self.to_account = self.from_account
else:
raise IBParseError(
"Can't parse direction specifier \"{}\"".format(direction))
if None in (self.type, self.to_account, self.from_account, self.to_name, self.from_name):
print(row)
raise IBParseError(
"Something went wrong - one of the mandatory values empty",
self.type, self.to_account, self.from_account,
self.to_name, self.from_name)
class IBParser(object):
def __init__(self, account_number):
self.account_number = account_number
self.rows = []
self.fields = []
def parse(self, snapshot):
for tx in snapshot:
self.rows.append(IBRow(tx, self.account_number))
def get(self, type_=None, on_account=None):
return [
row for row in self.rows
if (row.type == type_ or type_ is None)
and (row.on_account == on_account or on_account is None)]
@staticmethod
def parse_account_number(s):
formats = [
"((?:[A-Za-z]{2})?[0-9]{2})[ ]?([0-9]{4})[ ]?([0-9]{4})[ ]?([0-9]{4})[ ]?([0-9]{4})[ ]?([0-9]{4})[ ]?([0-9]{4})", # 26 digits, optional country code - Poland
"((?:[A-Za-z]{2})?[0-9]{2})[ ]?([A-Z]{4})[ ]?([0-9]{4})[ ]?([0-9]{4})[ ]?([0-9]{4})[ ]?([0-9]{2})", # 22 characters including BIC bank code - Ireland
"((?:[A-Za-z]{2})?[0-9]{2})[ ]?([A-Z]{4})[ ]?([0-9]{4})[ ]?([0-9]{4})[ ]?([0-9]{2})", # 18 characters including BIC bank code - Netherlands
"((?:[A-Za-z]{2})?[0-9]{14})", # 14 characters including BIC bank code - Belgium
"((?:[A-Za-z]{2})?[0-9]{2})[ ]?([0-9]{4})[ ]?([0-9]{4})[ ]?([0-9]{4})[ ]?([0-9]{4})[ ]?([0-9]{2})", # 22 digits - Germany
"^([0-9]{5})$", # 5 digits - weird special hax for Benevity (UK?)
]
for f in formats:
m = re.search(f, s)
if m is not None:
break
if m is None:
return None
account = "".join(m.groups())
if len(m.group(1)) == 2:
account = "PL" + account
return account
class IBFetcher(object):
BASE = "https://secure.ideabank.pl/"
START_DATE = "01.11.2016"
def __init__(self, config):
self.logger = logging.getLogger(self.__class__.__name__)
self.token = None
self.config = config
self.s = requests.Session()
self.s.headers.update({
"User-Agent": config.get("IB_UA", 'Mozilla/5.0 (X11; Linux x86_64; rv:69.0) Gecko/20100101 Firefox/69.0'),
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Accept-Language": "en-US,en;q=0.5",
"Accept-Encoding": "gzip, deflate",
"DNT": "1",
"Upgrade-Insecure-Requests": "1",
"Pragma": "no-cache",
"Cache-Control": "no-cache"
})
self.s.cookies.update({
'ib_trusted_device': config.get('IB_TRUSTED_DEVICE_TOKEN', ''),
})
def _wait(self, seconds):
self.logger.debug("Waiting %d seconds", seconds)
time.sleep(seconds)
def _request(self, url, method='GET', *args, **kwargs):
if not url.startswith('http'):
url = 'https://cloud.ideabank.pl/api' + url
if 'params' not in kwargs:
kwargs['params'] = {}
kwargs['params'].update({
'v': round(time.time() * 1000),
})
if self.token:
if 'headers' not in kwargs:
kwargs['headers'] = {}
kwargs['headers'].update({
'Authentication-Token': self.token,
})
resp = self.s.request(method, url, *args, **kwargs)
resp.raise_for_status()
return resp
def _get(self, url, *args, **kwargs):
return self._request(url, 'GET', *args, **kwargs)
def _post(self, url, *args, **kwargs):
return self._request(url, 'POST', *args, **kwargs)
def login(self, login, password):
self._get('https://sso.cloud.ideabank.pl/authenticate/login', params={
'login': login,
})
login_resp = self._post('https://sso.cloud.ideabank.pl/authenticate/login', json={
'login': login,
'password': password,
}).json()
login_token = login_resp['token']
apilogin_resp = self._post('/login', data={
'token': login_token,
})
auth_token = apilogin_resp.history[0].cookies['Authentication-Token']
self.token = auth_token
self.logger.debug('Authentication token: %s', self.token)
return auth_token
def get_wallet(self):
accounts = self._get('/accounts').json()
wallet = {
'accounts': {},
}
for g in accounts['firmAccountGroups']:
for a in g['accounts']:
account_number = IBParser.parse_account_number(a['nrb']['value'])
wallet['accounts'][account_number] = {
'id': a['productId'],
'available_balance': a['balance'], # FIXME activeBalance?
'currency': a['currencyCode'],
}
return wallet
def fetch_account_history(self, account_id, start=None, end=None):
epoch_start = date(2019, 10, 20)
if end is None:
end = date.today()
if start is None:
start = date.today() - timedelta(days=30)
if start < epoch_start:
start = epoch_start
total_pages = 1
page = 0
history = []
while page < total_pages:
transactions = self._get('/transactions', params={
'from': '{:04d}-{:02d}-{:02d}'.format(start.year, start.month, start.day),
'to': '{:04d}-{:02d}-{:02d}'.format(end.year, end.month, end.day),
'page': str(page),
'productIds': account_id,
}).json()
history.extend(transactions['history'])
page += 1
total_pages = transactions['page']['totalPages']
return history
def usage():
pass
def lock():
fn = config["LOCK_FILE"]
if os.path.isfile(fn):
logging.error("Lock file %s exists, aborting", fn)
sys.exit(3)
logging.debug("Setting up lock file %s", fn)
open(fn,'w').close()
if not os.path.isfile(fn):
logging.error("Lock file %s somehow does not exist, aborting", fn)
sys.exit(3)
def release():
fn = config["LOCK_FILE"]
logging.debug("Removing lock file %s", fn)
if not os.path.isfile(fn):
logging.error("Lock file %s somehow does not exist, WTF?", fn)
sys.exit(3)
os.remove(fn)
if os.path.isfile(fn):
logging.error("Lock file %s somehow still exists, WTF?", fn)
sys.exit(3)
parser = argparse.ArgumentParser()
parser.add_argument('-n', '--no-action', action="store_true", help='do not commit any database changes')
parser.add_argument('-c', '--cached', action="store_true", help='use cached data (test)')
parser.add_argument('-l', '--load', action='append', help='process specified files (test)')
parser.add_argument('-t', '--token', help='use authentication token')
parser.add_argument('--start', type=lambda s: datetime.strptime(s, '%Y-%m-%d').date(), help='start date (YYYY-MM-DD)')
parser.add_argument('--end', type=lambda s: datetime.strptime(s, '%Y-%m-%d').date(), help='end date (YYYY-MM-DD)')
parser.add_argument('--no-lock', action='store_true', help='don\'t use lockfile (test)')
parser.add_argument('--print-schema', action="store_true", help='print table schema and quit')
if __name__ == "__main__":
args = parser.parse_args()
CACHE_DIR = config["CACHE_DIR"]
engine = create_engine(config["SQLALCHEMY_DATABASE_URI"])
session = sessionmaker(bind=engine)()
if args.print_schema:
logging.debug("Called with --print-schema, will print the create " +
"statement and quit.")
m = MetaData()
print('%s;' % CreateTable(IBRow.__table__).compile(engine))
for index in IBRow.__table__.indexes:
print('%s;' % CreateIndex(index).compile(engine))
sys.exit()
if not args.no_lock:
lock()
balances = {}
history_logs = {}
if args.load:
logging.debug("Using manually supplied files")
for fn in args.load:
an, f = fn.split(':')
account_number = IBParser.parse_account_number(an)
if account_number is None:
logging.error("File name number \"{}\" unparseable".format(f))
continue
logging.debug('Loading "%s" as "%s"', f, account_number)
with open(f, 'r') as fd:
history_logs[account_number] = json.loads(fd.read())
elif args.cached:
logging.debug("Loading cached files from {}".format(CACHE_DIR))
for f in os.listdir(CACHE_DIR):
if f.startswith('balance-'):
continue
account_number = IBParser.parse_account_number(f)
if account_number is None:
logging.error("File name number \"{}\" unparseable".format(f))
continue
with open(CACHE_DIR + "/" + f, 'r') as fd:
try:
history_logs[account_number] = json.loads(fd.read())
except Exception as e:
logging.error("Failed to decode {}: {}".format(f, e))
logging.debug("Loading \"{}\" as \"{}\"".format(f, account_number))
else:
logging.debug("Normal run - will connect to the bank")
fetcher = IBFetcher(config)
if args.token:
fetcher.token = args.token
logging.debug("Using provided token")
elif "IB_LOGIN" not in config.keys() or "IB_PASSWORD" not in config.keys():
fetcher.login(input("[?] ID: "), input("[?] Password: "))
else:
logging.debug("Using saved credentials")
fetcher.login(config["IB_LOGIN"], config["IB_PASSWORD"])
wallet = fetcher.get_wallet()
for account_number, account in wallet["accounts"].items():
logging.debug("Fetching history for account {} ({})".format(
account_number, account["id"]))
history = fetcher.fetch_account_history(account["id"], start=args.start, end=args.end)
history_logs[account_number] = history
with open(CACHE_DIR + "/" + account_number, 'w') as fd:
fd.write(json.dumps(history))
balances[account_number] = (
account["available_balance"], account["currency"])
with open(CACHE_DIR + "/balance-"+account_number, 'w') as fd:
fd.write("{} {}\n".format(
account["available_balance"], account["currency"]))
if not history_logs:
logging.error('Nothing to process')
sys.exit()
parsed = {}
stats = {}
for account_number, history in history_logs.items():
logging.debug("Parsing history for account {}".format(account_number))
parser = IBParser(account_number)
try:
parser.parse(history)
except IBMaintenanceError:
logging.exception('Maintenance error, skipping')
continue
stats[account_number] = {}
stats[account_number]["added"] = 0
stats[account_number]["skipped"] = 0
for row in parser.get():
if not session.query(IBRow).filter_by(uid=row.uid).first():
if args.no_action:
print(row)
session.add(row)
stats[account_number]["added"] += 1
else:
stats[account_number]["skipped"] += 1
if args.no_action:
logging.info('Running with --no-action, not commiting.')
else:
session.commit()
# That is pretty ugly, but the only alternative would be to change handler
# level in runtime, and that'd still need some rollback anyway.
if any(v['added'] for v in stats.values()):
log_summary = logging.info
else:
log_summary = logging.debug
if balances:
log_summary("Account balances:")
for account_number, v in balances.items():
balance, currency = v
log_summary("\t{}: {} {}".format(
account_number, balance, currency))
log_summary("Done: %r", stats)
try:
if stats.get('PL91195000012006000648890004',{}).get('added'):
msg = 'holla holla get dolla: {1[0]} {1[1]} (+{0})'.format(stats.get('PL91195000012006000648890004',{}).get('added'), balances.get('PL91195000012006000648890004'))
requests.post('http://hackerspace.pl:43288/moonspeak/1/notification', params={
'target': '#hackerspace-pl-members', 'message': msg,
})
except Exception as exc:
print(exc)
if not args.no_lock:
release()

View file

@ -153,7 +153,11 @@ class CAMT052Parser:
transfer.type = "IN"
transfer.index = 1
transfer.uid = txdtls.find("ns:Refs", ns).find("ns:InstrId", ns).text + '.' + transfer.type
transfer.uid = (
txdtls.find("ns:Refs", ns).find("ns:InstrId", ns).text
+ "."
+ transfer.type
)
transfer.on_account = on_account
transfer.raw = ET.tostring(entry).decode()
transfer.amount = int(Decimal(amt.text) * 100)
@ -169,6 +173,7 @@ class CAMT052Parser:
class PekaoClient:
resp = None
def __init__(self, config):
self.config = config
self.logger = logging.getLogger(self.__class__.__name__)
@ -219,7 +224,9 @@ class PekaoClient:
"MaskLoginForm",
{
"p_passmasked_bis": mask_password(
password, login_mask, alias,
password,
login_mask,
alias,
)
},
)
@ -278,7 +285,9 @@ class PekaoClient:
date_from = datetime.datetime.now() - datetime.timedelta(days=60)
if date_from < pekao_epoch:
self.logger.warning("Rolling back from %r to %r (pekao epoch", date_from, pekao_epoch)
self.logger.warning(
"Rolling back from %r to %r (pekao epoch", date_from, pekao_epoch
)
date_from = pekao_epoch
if date_to is None:
@ -331,13 +340,13 @@ class PekaoClient:
def _go(self, url, method="GET", **args):
self.logger.debug("=> %s %s", method, url)
if self.resp and self.resp.url:
self.session.headers['Referer'] = self.resp.url
self.session.headers["Referer"] = self.resp.url
self.resp = self.session.request(method, url, timeout=15, **args)
self.logger.debug(" -> [%d] %s", self.resp.status_code, self.resp.url)
self.resp.raise_for_status()
self.bs = BeautifulSoup(self.resp.text, features='html.parser')
self.bs = BeautifulSoup(self.resp.text, features="html.parser")
def _submit_form(self, name, values):
form = self.bs.find("form", {"name": name})
@ -355,11 +364,12 @@ def lock(fn):
logging.error("Lock file %s exists, aborting", fn)
sys.exit(3)
logging.debug("Setting up lock file %s", fn)
open(fn,'w').close()
open(fn, "w").close()
if not os.path.isfile(fn):
logging.error("Lock file %s somehow does not exist, aborting", fn)
sys.exit(3)
def release(fn):
logging.debug("Removing lock file %s", fn)
if not os.path.isfile(fn):
@ -370,48 +380,67 @@ def release(fn):
logging.error("Lock file %s somehow still exists, WTF?", fn)
sys.exit(3)
parser = argparse.ArgumentParser()
parser.add_argument('--config', help="Load configuration file")
parser.add_argument('-n', '--no-action', action="store_true", help='do not commit any database changes')
parser.add_argument('-c', '--cached', action="store_true", help='use cached data (test)')
parser.add_argument('-l', '--load', action='append', help='process specified files (test)')
parser.add_argument('-t', '--token', help='use authentication token')
parser.add_argument('--no-lock', action='store_true', help='don\'t use lockfile (test)')
parser.add_argument('--print-schema', action="store_true", help='print table schema and quit')
parser.add_argument("--config", help="Load configuration file")
parser.add_argument(
"-n", "--no-action", action="store_true", help="do not commit any database changes"
)
parser.add_argument(
"-c", "--cached", action="store_true", help="use cached data (test)"
)
parser.add_argument(