Merge remote-tracking branch 'origin/banking-pekao'

This commit is contained in:
Kasownik 2022-01-12 01:50:36 +01:00
commit 3fc309b55f
3 changed files with 585 additions and 0 deletions

499
fetch/banking-pekaobiznes.py Executable file
View file

@ -0,0 +1,499 @@
#!/usr/bin/env nix-shell
#!nix-shell -i python3 -p python3 python3Packages.requests python3Packages.requests-cache python3Packages.beautifulsoup4 python3Packages.sqlalchemy
import argparse
import os
import configparser
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
import requests
import sys
import hashlib
import json
import re
import pprint
import atexit
import logging
import datetime
import http.cookiejar
import xml.etree.ElementTree as ET
from decimal import Decimal
from urllib.parse import urljoin, urlparse, parse_qs
from bs4 import BeautifulSoup
from binascii import unhexlify
from models import RawTransfer, get_schema
# loginPass = string zbudowany z wyswietlonych inputów, * w zablokowanycha
# loginMaskArray = hex2bytes ze zmiennej "loginMask"
# createPassMaskedBis ( username, loginPass, loginMaskArray )
# aliasArray → string to bytes
# mieszanie (ANDarray) loginPass z loginMaskArray → pod bajty \xff w loginMaskArray podstawiamy bajt/znak z loginPass
# sha1 ( alias1 ++ mieszany )
def mask_password(password, login_mask, login_alias):
hash_source = login_alias.encode()
for i, b in enumerate(unhexlify(login_mask)):
if b == 255:
hash_source += password[i].encode()
else:
hash_source += bytes([b])
return hashlib.sha1(hash_source).hexdigest()
class CAMT052Parser:
def __init__(self, xmldata, own_accounts=[]):
self.xml = ET.fromstring(ET.canonicalize(xmldata, strip_text=True))
self.own_accounts = own_accounts
@staticmethod
def parse_account_number(s):
formats = [
"((?:[A-Za-z]{2})?[0-9]{2})[ ]?([0-9]{4})[ ]?([0-9]{4})[ ]?([0-9]{4})[ ]?([0-9]{4})[ ]?([0-9]{4})[ ]?([0-9]{4})", # 26 digits, optional country code - Poland
"((?:[A-Za-z]{2})?[0-9]{2})[ ]?([A-Z]{4})[ ]?([0-9]{4})[ ]?([0-9]{4})[ ]?([0-9]{4})[ ]?([0-9]{2})", # 22 characters including BIC bank code - Ireland
"((?:[A-Za-z]{2})?[0-9]{2})[ ]?([A-Z]{4})[ ]?([0-9]{4})[ ]?([0-9]{4})[ ]?([0-9]{2})", # 18 characters including BIC bank code - Netherlands
"((?:[A-Za-z]{2})?[0-9]{2})[ ]?([0-9]{4})[ ]?([0-9]{4})[ ]?([0-9]{4})[ ]?([0-9]{4})[ ]?([0-9]{2})", # 22 digits - Germany
]
for f in formats:
m = re.search(f, s)
if m is not None:
break
if m is None:
return None
account = "".join(m.groups())
if len(m.group(1)) == 2:
account = "PL" + account
return account
def parse(self):
ns = {"ns": "urn:iso:std:iso:20022:tech:xsd:camt.052.001.02"}
report = self.xml.find("ns:BkToCstmrAcctRpt", ns).find("ns:Rpt", ns)
on_account = self.parse_account_number(
report.find("ns:Acct", ns).find("ns:Id", ns).find("ns:IBAN", ns).text
)
for entry in report.findall("ns:Ntry", ns):
txdtls = entry.find("ns:NtryDtls", ns).find("ns:TxDtls", ns)
tx_type = entry.find("ns:CdtDbtInd", ns).text
if tx_type == "DBIT":
remote_party = txdtls.find("ns:RltdPties", ns).find("ns:Cdtr", ns)
remote_party_acct = txdtls.find("ns:RltdPties", ns).find(
"ns:CdtrAcct", ns
)
elif tx_type == "CRDT":
remote_party = txdtls.find("ns:RltdPties", ns).find("ns:Dbtr", ns)
remote_party_acct = txdtls.find("ns:RltdPties", ns).find(
"ns:DbtrAcct", ns
)
else:
raise Exception("Unknown transaction type %r" % tx_type)
if remote_party_acct:
idelm = remote_party_acct.find("ns:Id", ns)
if idelm.find("ns:IBAN", ns) is not None:
remote_party_acct = idelm.find("ns:IBAN", ns).text
elif idelm.find("ns:Othr", ns) is not None:
remote_party_acct = idelm.find("ns:Othr", ns).find("ns:Id", ns).text
else:
print(ET.tostring(idelm).decode())
raise Exception("No remote party account found %r" % (idelm,))
remote_party_info = remote_party.find("ns:Nm", ns).text
if remote_party_info == "NOTPROVIDED":
remote_party_info = ""
if (
remote_party.find("ns:PstlAdr", ns) is not None
and remote_party.find("ns:PstlAdr", ns).find("ns:AdrLine", ns)
is not None
):
remote_party_info = (
remote_party_info
+ " "
+ remote_party.find("ns:PstlAdr", ns).find("ns:AdrLine", ns).text
).strip()
amt = entry.find("ns:Amt", ns)
transfer = RawTransfer()
transfer.index = 1
transfer.uid = txdtls.find("ns:Refs", ns).find("ns:TxId", ns).text
transfer.on_account = on_account
transfer.raw = ET.tostring(entry).decode()
transfer.amount = int(Decimal(amt.text) * 100)
transfer.currency = amt.attrib["Ccy"]
transfer.date = datetime.datetime.strptime(
entry.find("ns:BookgDt", ns).find("ns:DtTm", ns).text,
"%Y-%m-%dT%H:%M:%S",
).date()
transfer.title = txdtls.find("ns:RmtInf", ns).find("ns:Ustrd", ns).text
if tx_type == "DBIT":
transfer.to_account = (
None
if remote_party_acct is None
else self.parse_account_number(remote_party_acct)
)
transfer.to_name = remote_party_info
transfer.from_account = on_account
if remote_party_acct is None:
transfer.type = "BANK_FEE"
elif transfer.to_account in self.own_accounts:
transfer.type = "OUT_TO_OWN"
else:
transfer.type = "OUT"
else:
transfer.to_account = on_account
transfer.from_account = (
None
if remote_party_acct is None
else self.parse_account_number(remote_party_acct)
)
transfer.from_name = remote_party_info
if transfer.from_account in self.own_accounts:
transfer.type = "IN_FROM_OWN"
else:
transfer.type = "IN"
yield transfer
class PekaoClient:
def __init__(self, config):
self.config = config
self.logger = logging.getLogger(self.__class__.__name__)
if config.get("use-cache"):
import requests_cache
self.session = requests_cache.CachedSession(
"pekao_cache", allowable_methods=["GET", "POST"]
)
else:
self.session = requests.Session()
if config.get("cookies-file"):
self.session.cookies = http.cookiejar.LWPCookieJar(
filename=config.get("cookies-file")
)
try:
self.session.cookies.load(ignore_discard=True, ignore_expires=True)
except:
pass
atexit.register(
lambda: self.session.cookies.save(
ignore_discard=True, ignore_expires=True
)
)
self.session.headers["User-Agent"] = config.get(
"user-agent",
"Mozilla/5.0 (X11; Linux x86_64; rv:94.0) Gecko/20100101 Firefox/94.0",
)
def login(self, alias, password):
self._go("https://www.pekaobiznes24.pl/do/login")
self._submit_form(
"LoginAliasForm",
{
"p_alias": alias,
"deviceFingerprint": self.config["tdid"],
},
)
self._go("https://www.pekaobiznes24.pl/do/Authorization")
login_mask = re.findall("var loginMask = '([0-9a-f]*)';", self.resp.text)[0]
self._submit_form(
"MaskLoginForm",
{
"p_passmasked_bis": mask_password(
password, login_mask, alias,
)
},
)
redirect_url = urljoin(
self.resp.url, re.findall("top.location='(.*)'", self.resp.text)[0]
)
self._go(redirect_url)
if self.bs.find("form", {"name": "messagesOnLoginForm"}):
self.logger.info("Confirming messages on login...")
self._submit_form("messagesOnLoginForm", {"task": "SAVE_AS_READED"})
jsredir = re.findall('this.location = "(.*)"', self.resp.text)
if jsredir:
self._go(urljoin(self.resp.url, jsredir[0]))
self._go(urljoin(self.resp.url, self.bs.find("frame", {"name": "main"})["src"]))
url = urlparse(self.resp.url)
self.taglib_token = parse_qs(url.query)["org.apache.struts.taglib.html.TOKEN"][
0
]
self.logger.debug("taglib token: %r", self.taglib_token)
def list_accounts(self):
resp = self.session.post(
"https://www.pekaobiznes24.pl/webcorpo/do/allAccountsSelect?remChckdAcc=",
data={"org.apache.struts.taglib.html.TOKEN": self.taglib_token},
)
resp.raise_for_status()
accounts = json.loads(
re.findall("LB_ALL_PAGE_ACCOUNTS = (\[.*\]);", resp.text)[0]
)
return {acc["p_acc_id"]: acc for acc in accounts}
def fetch_transfers_camt052(self, account_id, date_from=None, date_to=None):
res = self.session.post(
"https://www.pekaobiznes24.pl/webcorpo/do/desktop",
data={
"task": "NAV_REDIRECT#exportTransTemplatesList",
"org.apache.struts.taglib.html.TOKEN": self.taglib_token,
},
)
res.raise_for_status()
pekao_epoch = datetime.datetime(2021, 11, 21)
if date_from is None:
date_from = datetime.datetime.now() - datetime.timedelta(days=60)
if date_from < pekao_epoch:
self.logger.warning("Rolling back from %r to %r (pekao epoch", date_from, pekao_epoch)
date_from = pekao_epoch
if date_to is None:
date_to = datetime.datetime.now()
res = self.session.post(
"https://www.pekaobiznes24.pl/webcorpo/do/exportTransactions",
data={
"org.apache.struts.taglib.html.TOKEN": self.taglib_token,
"clearP_text": "N",
"createZip": "false",
"exportType": "EO",
"p_acc_id": account_id,
# "p_acc_no": "...",
"p_itt_code": "EO",
"saveAsReport": "false",
"showInform": "0",
"synchConfirmed": "N",
"task": "EXPORT",
"templateId": "219697",
"p_date_type": "1",
# "p_last_cnt": last_days,
"p_date_from": date_from.strftime("%d.%m.%Y"),
"p_date_fromDAY": date_from.strftime("%d"),
"p_date_fromMON": date_from.strftime("%m"),
"p_date_fromYEAR": date_from.strftime("%Y"),
"minDate": "01/01/1945",
"p_date_to": date_to.strftime("%d.%m.%Y"),
"p_date_toDAY": date_to.strftime("%d"),
"p_date_toMON": date_to.strftime("%m"),
"p_date_toYEAR": date_to.strftime("%Y"),
},
)
res.raise_for_status()
if "xml" not in res.headers.get("content-disposition", ""):
errormsg = re.findall(
r"""var toast = toastr\['error'\]\(\s*"(.*)",\s*""\);""",
res.text,
re.MULTILINE,
)
if not errormsg or errormsg != ["Brak danych do eksportu."]:
raise Exception("Unknown error: %r" % errormsg)
return None
return res.text
def _go(self, url, method="GET", **args):
self.resp = self.session.request(method, url, **args)
self.resp.raise_for_status()
self.logger.debug("=> %s %s", method, self.resp.url)
self.bs = BeautifulSoup(self.resp.text, features='html.parser')
def _submit_form(self, name, values):
form = self.bs.find("form", {"name": name})
form_data = {}
for inp in form.find_all("input"):
if inp.get("name") and not inp.get("disabled"):
form_data[inp.get("name")] = inp.get("value")
data = {**form_data, **values}
target = urljoin(self.resp.url, form.get("action"))
self._go(target, form.get("method").upper(), data=data)
def lock(fn):
if os.path.isfile(fn):
logging.error("Lock file %s exists, aborting", fn)
sys.exit(3)
logging.debug("Setting up lock file %s", fn)
open(fn,'w').close()
if not os.path.isfile(fn):
logging.error("Lock file %s somehow does not exist, aborting", fn)
sys.exit(3)
def release(fn):
logging.debug("Removing lock file %s", fn)
if not os.path.isfile(fn):
logging.error("Lock file %s somehow does not exist, WTF?", fn)
sys.exit(3)
os.remove(fn)
if os.path.isfile(fn):
logging.error("Lock file %s somehow still exists, WTF?", fn)
sys.exit(3)
parser = argparse.ArgumentParser()
parser.add_argument('--config', help="Load configuration file", default="config.ini")
parser.add_argument('-n', '--no-action', action="store_true", help='do not commit any database changes')
parser.add_argument('-c', '--cached', action="store_true", help='use cached data (test)')
parser.add_argument('-l', '--load', action='append', help='process specified files (test)')
parser.add_argument('-t', '--token', help='use authentication token')
parser.add_argument('--no-lock', action='store_true', help='don\'t use lockfile (test)')
parser.add_argument('--print-schema', action="store_true", help='print table schema and quit')
if __name__ == "__main__":
args = parser.parse_args()
config = configparser.ConfigParser()
config.read(args.config)
logging.basicConfig(level=config['logging']['level'], format=config['logging'].get('format', '%(asctime)s [%(levelname)s] %(name)s: %(message)s'))
logging.getLogger('chardet').setLevel(logging.WARN)
CACHE_DIR = config['general']['cache_dir']
engine = create_engine(config['database']['uri'])
session = sessionmaker(bind=engine)()
if args.print_schema:
logging.debug("Called with --print-schema, will print the create " +
"statement and quit.")
print(get_schema(engine))
sys.exit()
if not args.no_lock:
lock(config['general']['lockfile'])
balances = {}
history_logs = {}
if args.load:
logging.debug("Using manually supplied files")
for fn in args.load:
an, f = fn.split(':')
account_number = IBParser.parse_account_number(an)
if account_number is None:
logging.error("File name number \"{}\" unparseable".format(f))
continue
logging.debug('Loading "%s" as "%s"', f, account_number)
with open(f, 'r') as fd:
history_logs[account_number] = json.loads(fd.read())
elif args.cached:
logging.debug("Loading cached files from {}".format(CACHE_DIR))
for f in os.listdir(CACHE_DIR):
if f.startswith('balance-'):
continue
account_number = CAMT052Parser.parse_account_number(f)
if account_number is None:
logging.error("File name number \"{}\" unparseable".format(f))
continue
with open(CACHE_DIR + "/" + f, 'r') as fd:
history_logs[account_number] = fd.read()
logging.debug("Loading \"{}\" as \"{}\"".format(f, account_number))
else:
logging.debug("Normal run - will connect to the bank")
fetcher = PekaoClient(config['scraper'])
if args.token:
fetcher.token = args.token
logging.debug("Using provided token")
elif "alias" not in config['scraper'] or "password" not in config['scraper']:
fetcher.login(input("[?] ID: "), input("[?] Password: "))
else:
logging.debug("Using saved credentials")
fetcher.login(config["scraper"]['alias'], config["scraper"]['password'])
accounts = fetcher.list_accounts()
for account_id, account in accounts.items():
account_number = CAMT052Parser.parse_account_number(account['p_acc_no'])
logging.debug("Fetching history for account {} ({}) {}".format(
account_number, account_id, account["p_acc_alias"],
))
history = fetcher.fetch_transfers_camt052(account_id)
history_logs[account_number] = history
with open(CACHE_DIR + "/" + account_number, 'w') as fd:
fd.write('' if history is None else history)
balances[account_number] = (
account["p_acc_avail_balance"], account["p_acc_currency"])
with open(CACHE_DIR + "/balance-"+account_number, 'w') as fd:
fd.write("{} {}\n".format(*balances[account_number]))
if not history_logs:
logging.error('Nothing to process')
sys.exit()
parsed = {}
stats = {}
for account_number, history in history_logs.items():
logging.debug("Parsing history for account {}".format(account_number))
if not history:
logging.debug('No transfers for that account, continuing...')
continue
parser = CAMT052Parser(history, own_accounts=list(history_logs.keys()))
rows = parser.parse()
stats[account_number] = {}
stats[account_number]["added"] = 0
stats[account_number]["skipped"] = 0
for row in rows:
if not session.query(RawTransfer).filter_by(uid=row.uid).first():
session.add(row)
stats[account_number]["added"] += 1
else:
stats[account_number]["skipped"] += 1
if args.no_action:
logging.info('Running with --no-action, not commiting.')
else:
session.commit()
# That is pretty ugly, but the only alternative would be to change handler
# level in runtime, and that'd still need some rollback anyway.
if any(v['added'] for v in stats.values()):
log_summary = logging.info
else:
log_summary = logging.debug
if balances:
log_summary("Account balances:")
for account_number, v in balances.items():
balance, currency = v
log_summary("\t{}: {} {}".format(
account_number, balance, currency))
log_summary("Done: %r", stats)
if not args.no_lock:
release(config['general']['lockfile'])

15
fetch/config.ini.dist Normal file
View file

@ -0,0 +1,15 @@
[general]
cache_dir=cache/
lockfile=lockfile
[database]
uri=sqlite:///./pekaobiznes.sqlite3
[scraper]
tdid=...trusted_device_id...
alias=ib12345678
password=SecretPassw0rd
user-agent="..."
[logging]
level=INFO

71
fetch/models.py Normal file
View file

@ -0,0 +1,71 @@
import time
from sqlalchemy import (
Column,
Integer,
String,
Date,
BigInteger,
create_engine,
MetaData,
)
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker
from sqlalchemy.schema import CreateTable, CreateIndex
Base = declarative_base()
class RawTransfer(Base):
__tablename__ = "raw_transfer"
id = Column(Integer, primary_key=True)
raw = Column(String)
uid = Column(String(128), index=True)
on_account = Column(String(32), index=True)
amount = Column(Integer)
currency = Column(String(8))
date = Column(Date)
type = Column(String(16))
index = Column(Integer)
title = Column(String(256))
balance = Column(Integer)
balance_currency = Column(String(8))
from_account = Column(String(32))
to_account = Column(String(32))
from_name = Column(String(256))
to_name = Column(String(256))
scrape_timestamp = Column(BigInteger, default=lambda: round(time.time() * 1000000))
def __str__(self):
return u'{} *{} #{} @{} -"{}" -#{} => +"{}" +#{} [{}.{:02d} {}] ~"{}"'.format(
self.type,
self.uid,
self.on_account,
self.date,
self.from_name,
self.from_account,
self.to_name,
self.to_account,
self.amount // 100,
self.amount % 100,
self.currency,
self.title,
)
def __repr__(self):
return "<Transfer %s>" % (str(self),)
def get_schema(engine):
schema = ""
m = MetaData()
schema += "%s;\n" % (CreateTable(RawTransfer.__table__).compile(engine),)
for index in RawTransfer.__table__.indexes:
schema += "%s;\n" % (CreateIndex(index).compile(engine),)
return schema