forked from hswaw/hscloud
151 lines
5.9 KiB
Python
151 lines
5.9 KiB
Python
# Script to attempt to clean up our owncloud database (b/32) after The Postgres
|
|
# Fuckup (b/30).
|
|
#
|
|
# Think of it as a one-shot fsck, documented in the form of the code that q3k@
|
|
# used to recover from this kerfuffle.
|
|
#
|
|
# SECURITY: It's full of manual SQL query crafting without parametrization.
|
|
# Don't attempt to use it for anything else other than this one-shot usecase.
|
|
#
|
|
# You will need to tunnel to the postgreses running on Boston:
|
|
# $ ssh \
|
|
# -L15432:127.0.0.1:5432 \
|
|
# -L15433:127.0.0.1:5433 \
|
|
# hackerspace.pl
|
|
|
|
from datetime import datetime
|
|
import os
|
|
|
|
import psycopg2
|
|
|
|
|
|
incident_start = 1611529200 # when pg12 started to run
|
|
incident_end = 1611788400 # when we rolled back to pg9
|
|
|
|
|
|
OWNCLOUD_PASSWORD = os.environ.get("OWNCLOUD_PASSWORD").strip()
|
|
if not OWNCLOUD_PASSWORD:
|
|
# Get it from boston, /var/www/owncloud/config/config.php.
|
|
raise Exception("OWNCLOUD_PASSWORD must be set to owncloud postgres password")
|
|
|
|
|
|
conn9 = psycopg2.connect(host="localhost", port=15432, user="owncloud", password=OWNCLOUD_PASSWORD, dbname="owncloud")
|
|
conn12 = psycopg2.connect(host="localhost", port=15433, user="owncloud", password=OWNCLOUD_PASSWORD, dbname="owncloud")
|
|
|
|
|
|
def idset(conn, table, keyname="id"):
|
|
"""Return a set of IDs from a given table, one per row."""
|
|
cur = conn.cursor()
|
|
cur.execute(f"SELECT {keyname} FROM oc_{table}")
|
|
res = cur.fetchall()
|
|
cur.close()
|
|
return set([r[0] for r in res])
|
|
|
|
|
|
def valset(conn, table, keys):
|
|
"""Return a set of concatenated values for the given keys in a table, one per row."""
|
|
keynames = ", ".join(keys)
|
|
cur = conn.cursor()
|
|
cur.execute(f"SELECT {keynames} FROM oc_{table}")
|
|
res = cur.fetchall()
|
|
cur.close()
|
|
res = [';;;'.join([str(elem) for elem in r]) for r in res]
|
|
return set(res)
|
|
|
|
|
|
# Check accounts difference.
|
|
#
|
|
# RESULT: Thankfully, no accounts have been accidentally roled back.
|
|
accounts12 = idset(conn12, "accounts", keyname="uid")
|
|
accounts9 = idset(conn9, "accounts", keyname="uid")
|
|
print("Accounts missing in 9:", accounts12 - accounts9)
|
|
assert (accounts12 - accounts9) == set()
|
|
|
|
|
|
def account_by_uid(conn, uid):
|
|
"""Return SSO UID for a given Owncloud UID."""
|
|
cur = conn.cursor()
|
|
cur.execute(f"SELECT ldap_dn FROM oc_ldap_user_mapping WHERE owncloud_name = '{uid}'")
|
|
dn, = cur.fetchone()
|
|
cur.close()
|
|
part = dn.split(',')[0]
|
|
assert part.startswith('uid=')
|
|
return part[4:]
|
|
|
|
|
|
def storage_owner_by_id(conn, id_):
|
|
"""Return SSO UID for a given storage numerical ID."""
|
|
cur = conn.cursor()
|
|
cur.execute(f"SELECT id FROM oc_storages WHERE numeric_id = '{id_}'")
|
|
oid, = cur.fetchone()
|
|
cur.close()
|
|
if oid == 'object::store:amazon::nextcloud':
|
|
return "S3"
|
|
assert oid.startswith('object::user:')
|
|
userid = oid[13:]
|
|
assert len(userid) > 0
|
|
if userid == "gallery":
|
|
return "GALLERY"
|
|
return account_by_uid(conn, userid)
|
|
|
|
|
|
# Check shares table. This table contains the intent of sharing some file with someone else.
|
|
#
|
|
# RESULT: we only have things that have been removed after rollback to PG9,
|
|
# nothing was created in PG12 and lost.
|
|
shareids12 = idset(conn12, "share")
|
|
shareids9 = idset(conn9, "share")
|
|
print("Shares missing in 9:", len(shareids12 - shareids9))
|
|
cur12 = conn12.cursor()
|
|
for id_ in list(shareids12-shareids9):
|
|
cur12.execute(f"SELECT uid_owner, file_target, stime, share_with FROM oc_share WHERE id = {id_}")
|
|
uid_owner, file_target, stime, share_with = cur12.fetchone()
|
|
account = account_by_uid(conn12, uid_owner)
|
|
stime_human = datetime.utcfromtimestamp(stime).strftime('%Y-%m-%d %H:%M:%S')
|
|
print(f"Missing share {id_} {file_target} owned by {account}..")
|
|
if stime < incident_start or stime > incident_end:
|
|
print(f" Skipping, created at {stime_human}")
|
|
continue
|
|
raise Exception("Unhandled.")
|
|
cur12.close()
|
|
|
|
|
|
# Check mounts table. This contains root file storages for each user, but also
|
|
# incoming shares 'mounted' into a user's account.
|
|
# From what I cen tell, storage_id/root_id are the source path that's being
|
|
# mounted (root_id being the fileid inside an oc_filecache, and storage_id
|
|
# being the storage in which that file is kept), while user_id/mount_point are
|
|
# the mount destination (ie. path into which this is mounted for a user's
|
|
# view).
|
|
#
|
|
# RESULT: we only have share-mounts missing for a handful of users. We choose
|
|
# to ignore it, as we assume next time these users log in they will get the
|
|
# mounts again.
|
|
# TODO(q3k): verify this
|
|
mounts12 = valset(conn12, "mounts", ["storage_id", "root_id", "user_id", "mount_point"])
|
|
mounts9 = valset(conn9, "mounts", ["storage_id", "root_id", "user_id", "mount_point"])
|
|
print("Mounts missing in 9:", len(mounts12 - mounts9))
|
|
# Mounts that appearify normally whenever you log into owncloud, as they are the result of shares':
|
|
mount_names_ok = set(["2020-03-26_covid_templar", "camera", "Public Shaming", "przylbice.md", "Test.txt", "covid"])
|
|
# Mounts that used to be from a share that existed, but has been since deleted in PG9.
|
|
mount_names_ok |= set(["Covid-instrukcje", "Chaos_modele_covid", "Covid_proces_presspack"])
|
|
mounts_sorted = []
|
|
for m in list(mounts12 - mounts9):
|
|
storage_id, root_id, user_id, mount_point = m.split(';;;')
|
|
mounts_sorted.append((storage_id, root_id, user_id, mount_point))
|
|
mounts_sorted = sorted(mounts_sorted, key=lambda el: el[2])
|
|
for storage_id, root_id, user_id, mount_point in mounts_sorted:
|
|
assert mount_point.startswith("/" + user_id + "/")
|
|
mount_point = mount_point[len(user_id)+1:]
|
|
account = account_by_uid(conn12, user_id)
|
|
print(f"Missing mount {mount_point}, storage ID {storage_id}, owned by {account}..")
|
|
storage_owner = storage_owner_by_id(conn12, storage_id)
|
|
print(f" Storage owner: {storage_owner}")
|
|
|
|
parts = mount_point.split('/')
|
|
if len(parts) == 4 and parts[0] == '' and parts[1] == 'files' and parts[2] in mount_names_ok and parts[3] == '':
|
|
print(" Skipping, known okay")
|
|
continue
|
|
raise Exception("Unhandled")
|
|
|