# Script to attempt to clean up our owncloud database (b/32) after The Postgres # Fuckup (b/30). # # Think of it as a one-shot fsck, documented in the form of the code that q3k@ # used to recover from this kerfuffle. # # SECURITY: It's full of manual SQL query crafting without parametrization. # Don't attempt to use it for anything else other than this one-shot usecase. # # You will need to tunnel to the postgreses running on Boston: # $ ssh \ # -L15432:127.0.0.1:5432 \ # -L15433:127.0.0.1:5433 \ # hackerspace.pl from datetime import datetime import os import psycopg2 incident_start = 1611529200 # when pg12 started to run incident_end = 1611788400 # when we rolled back to pg9 OWNCLOUD_PASSWORD = os.environ.get("OWNCLOUD_PASSWORD").strip() if not OWNCLOUD_PASSWORD: # Get it from boston, /var/www/owncloud/config/config.php. raise Exception("OWNCLOUD_PASSWORD must be set to owncloud postgres password") conn9 = psycopg2.connect(host="localhost", port=15432, user="owncloud", password=OWNCLOUD_PASSWORD, dbname="owncloud") conn12 = psycopg2.connect(host="localhost", port=15433, user="owncloud", password=OWNCLOUD_PASSWORD, dbname="owncloud") def idset(conn, table, keyname="id"): """Return a set of IDs from a given table, one per row.""" cur = conn.cursor() cur.execute(f"SELECT {keyname} FROM oc_{table}") res = cur.fetchall() cur.close() return set([r[0] for r in res]) def valset(conn, table, keys): """Return a set of concatenated values for the given keys in a table, one per row.""" keynames = ", ".join(keys) cur = conn.cursor() cur.execute(f"SELECT {keynames} FROM oc_{table}") res = cur.fetchall() cur.close() res = [';;;'.join([str(elem) for elem in r]) for r in res] return set(res) # Check accounts difference. # # RESULT: Thankfully, no accounts have been accidentally roled back. accounts12 = idset(conn12, "accounts", keyname="uid") accounts9 = idset(conn9, "accounts", keyname="uid") print("Accounts missing in 9:", accounts12 - accounts9) assert (accounts12 - accounts9) == set() def account_by_uid(conn, uid): """Return SSO UID for a given Owncloud UID.""" cur = conn.cursor() cur.execute(f"SELECT ldap_dn FROM oc_ldap_user_mapping WHERE owncloud_name = '{uid}'") dn, = cur.fetchone() cur.close() part = dn.split(',')[0] assert part.startswith('uid=') return part[4:] def storage_owner_by_id(conn, id_): """Return SSO UID for a given storage numerical ID.""" cur = conn.cursor() cur.execute(f"SELECT id FROM oc_storages WHERE numeric_id = '{id_}'") oid, = cur.fetchone() cur.close() if oid == 'object::store:amazon::nextcloud': return "S3" assert oid.startswith('object::user:') userid = oid[13:] assert len(userid) > 0 if userid == "gallery": return "GALLERY" return account_by_uid(conn, userid) # Check shares table. This table contains the intent of sharing some file with someone else. # # RESULT: we only have things that have been removed after rollback to PG9, # nothing was created in PG12 and lost. shareids12 = idset(conn12, "share") shareids9 = idset(conn9, "share") print("Shares missing in 9:", len(shareids12 - shareids9)) cur12 = conn12.cursor() for id_ in list(shareids12-shareids9): cur12.execute(f"SELECT uid_owner, file_target, stime, share_with FROM oc_share WHERE id = {id_}") uid_owner, file_target, stime, share_with = cur12.fetchone() account = account_by_uid(conn12, uid_owner) stime_human = datetime.utcfromtimestamp(stime).strftime('%Y-%m-%d %H:%M:%S') print(f"Missing share {id_} {file_target} owned by {account}..") if stime < incident_start or stime > incident_end: print(f" Skipping, created at {stime_human}") continue raise Exception("Unhandled.") cur12.close() # Check mounts table. This contains root file storages for each user, but also # incoming shares 'mounted' into a user's account. # From what I cen tell, storage_id/root_id are the source path that's being # mounted (root_id being the fileid inside an oc_filecache, and storage_id # being the storage in which that file is kept), while user_id/mount_point are # the mount destination (ie. path into which this is mounted for a user's # view). # # RESULT: we only have share-mounts missing for a handful of users. We choose # to ignore it, as we assume next time these users log in they will get the # mounts again. # TODO(q3k): verify this mounts12 = valset(conn12, "mounts", ["storage_id", "root_id", "user_id", "mount_point"]) mounts9 = valset(conn9, "mounts", ["storage_id", "root_id", "user_id", "mount_point"]) print("Mounts missing in 9:", len(mounts12 - mounts9)) # Mounts that appearify normally whenever you log into owncloud, as they are the result of shares': mount_names_ok = set(["2020-03-26_covid_templar", "camera", "Public Shaming", "przylbice.md", "Test.txt", "covid"]) # Mounts that used to be from a share that existed, but has been since deleted in PG9. mount_names_ok |= set(["Covid-instrukcje", "Chaos_modele_covid", "Covid_proces_presspack"]) mounts_sorted = [] for m in list(mounts12 - mounts9): storage_id, root_id, user_id, mount_point = m.split(';;;') mounts_sorted.append((storage_id, root_id, user_id, mount_point)) mounts_sorted = sorted(mounts_sorted, key=lambda el: el[2]) for storage_id, root_id, user_id, mount_point in mounts_sorted: assert mount_point.startswith("/" + user_id + "/") mount_point = mount_point[len(user_id)+1:] account = account_by_uid(conn12, user_id) print(f"Missing mount {mount_point}, storage ID {storage_id}, owned by {account}..") storage_owner = storage_owner_by_id(conn12, storage_id) print(f" Storage owner: {storage_owner}") parts = mount_point.split('/') if len(parts) == 4 and parts[0] == '' and parts[1] == 'files' and parts[2] in mount_names_ok and parts[3] == '': print(" Skipping, known okay") continue raise Exception("Unhandled")