From 959576ad96a5a7a53cb9b9f6c5e5fbb6ecff1bda Mon Sep 17 00:00:00 2001 From: Dirk Engling Date: Wed, 27 May 2020 01:14:11 +0200 Subject: Remove circular dependency on makefile --- bin/export.py | 158 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 158 insertions(+) create mode 100755 bin/export.py (limited to 'bin/export.py') diff --git a/bin/export.py b/bin/export.py new file mode 100755 index 00000000..26460e8f --- /dev/null +++ b/bin/export.py @@ -0,0 +1,158 @@ +#! venv/bin/python +import psycopg2 +import psycopg2.extras +import pathlib +import os.path +import datetime +import pypandoc +import json +from datetime import timedelta, datetime, tzinfo +#from langdetect import detect +import sys + +conn = psycopg2.connect(database="cccms_dev", user="postgres", password="", host="127.0.0.1") +cursor = conn.cursor(cursor_factory=psycopg2.extras.DictCursor) +lcursor = conn.cursor() +cursor.execute("select id, login, email, admin from users", []) +user = cursor.fetchall() + +page_list = [] +redirect_list = {} + +pathlib.Path(os.path.dirname('git/')).mkdir(parents=True, exist_ok=True) +os.system("git -C git/ init") +pathlib.Path(os.path.dirname('git/images/')).mkdir(parents=True, exist_ok=True) +pathlib.Path(os.path.dirname('git/files/')).mkdir(parents=True, exist_ok=True) +pathlib.Path(os.path.dirname('dump/images/')).mkdir(parents=True, exist_ok=True) +pathlib.Path(os.path.dirname('dump/files/')).mkdir(parents=True, exist_ok=True) + +rev = 0 +cursor.execute("select id, name, upload_content_type, upload_file_name, upload_updated_at from assets order by id", []) +assets = cursor.fetchall() +for asset in assets: + if not asset.get('upload_file_name'): + continue + source = "/usr/local/www/cccms/public/system/uploads/{}/original/{}".format(asset['id'], asset['upload_file_name']) + destdir = 'files/' + if asset['upload_content_type'].startswith('image'): + destdir = 'images/' + page_list.append({ 'date': asset['upload_updated_at'], 'revision': rev, 'comment': "asset {}".format(asset['name']), 'fname': source, 'gname': destdir+asset['upload_file_name'], 'editor': 'admin', 'email': 'admin@cccms.de'}) + rev = rev + 1 + redirect_list['/public/system/uploads/{}/original/{}'.format(asset['id'], asset['upload_file_name'])] = destdir+asset['upload_file_name'] + +with open('redirects.json', 'w') as outfile: + json.dump(redirect_list, outfile) + +cursor.execute("select id, unique_name, created_at from nodes order by id", []) +nodes = cursor.fetchall() + +for node in nodes: + if not 'unique_name' in node: + print ("WARNING: NO unique_name in node " + str(node.id)) + continue + + if node['id'] == 1: + continue + + if "/" in str(node['unique_name']): + pathlib.Path(os.path.dirname('dump/'+node['unique_name'])).mkdir(parents=True, exist_ok=True) + if str(node['unique_name']).startswith('updates/'): + pathlib.Path(os.path.dirname('git/'+node['unique_name'])).mkdir(parents=True, exist_ok=True) + else: + pathlib.Path(os.path.dirname('git/pages/'+node['unique_name'])).mkdir(parents=True, exist_ok=True) + + cursor.execute("select id, revision, created_at, updated_at, published_at, user_id, editor_id from pages where node_id = %s", [node['id']]) + pages = cursor.fetchall() + + # print ( "WORKING ON NODE: " + str(node['id'])) + + for page in pages: + + # ignoring unpublished pages + if not page['published_at']: + # print("UNPUBLISHED: {}\n".format(node['id'])) + continue + + cursor.execute("select name from tags where id in (select tag_id from taggings where taggable_id = %s)", [page['id']]) + tags = cursor.fetchall() + + editor = "admin" + if page['editor_id']: + cursor.execute("select login from users where id = %s", [page['editor_id']]) + e = cursor.fetchall() + if len(e): + editor = e[0].get("login", "NO-LOGIN") + + creator = "admin" + if page['user_id']: + cursor.execute("select login from users where id = %s", [page['user_id']]) + c = cursor.fetchall() + if len(c): + creator = c[0].get("login", "NO-LOGIN") + + previewimage = '' + cursor.execute("select upload_file_name from related_assets full join assets on related_assets.asset_id = assets.id where related_assets.page_id = %s and position = 1;", [page['id']]) + related_asset = cursor.fetchall() + if len(related_asset): + previewimage = related_asset[0].get('upload_file_name') + + cursor.execute("select locale, title, abstract, body, created_at, updated_at from page_translations where page_id = %s", [page['id']]) + page_translations = cursor.fetchall() + for translation in page_translations: + if not translation.get("title"): + continue + + if len(tags) and not isinstance(tags[0], str): + tags = [y for x in tags for y in x] + + if translation['locale'] == 'en': + fname = "dump/" + node['unique_name'] + '.en.md:' + str(page['revision']) + '@' + str(int(page['updated_at'].timestamp())) + gitname = node['unique_name'] + '.en.md' + else: + fname = "dump/" + node['unique_name'] + '.md:' + str(page['revision']) + '@' + str(int(page['updated_at'].timestamp())) + gitname = node['unique_name'] + '.md' + + if not gitname.startswith('updates/'): + gitname = 'pages/' + gitname + + #lang = detect(translation.get('body')) + #print('{}:{} node_id {: 5d} revision {: 3d} with page_id {: 5d}, path {}'.format(lang, translation['locale'], node['id'], page['revision'], page['id'], node['unique_name'])) + + # md = pypandoc.convert_text(str(translation.get('body', "")), 'md', format='html', extra_args=['--atx-headers']) + # rst = pypandoc.convert_text(str(translation.get('body', "")), 'rst', format='html', extra_args=['--atx-headers']) + # with open( "dump/" + node['unique_name'] + '.' + translation['locale'] + '.md.html:' + str(page['revision']) + '@' + str(int(page['updated_at'].timestamp())), "w") as f: + # f.write(pypandoc.convert_text( md, 'html', format='md', extra_args=['--atx-headers'])) + # with open( "dump/" + node['unique_name'] + '.' + translation['locale'] + '.rst.html:' + str(page['revision']) + '@' + str(int(page['updated_at'].timestamp())), "w") as f: + # f.write(pypandoc.convert_text( rst, 'html', format='rst', extra_args=['--atx-headers'])) + + with open(fname, "w") as f: + f.write("title: {}\n".format(' '.join(translation.get("title", "").split()))) + f.write("date: {}\n".format(page['published_at'].strftime("%Y-%m-%d %H:%M:%S %z"))) + if page.get('updated_at'): + f.write("updated: {}\n".format(page['updated_at'].strftime("%Y-%m-%d %H:%M:%S %z"))) + f.write("author: {}\n".format(creator)) + f.write("tags: {}\n".format(', '.join(tags).lower())) + if previewimage: + f.write("previewimage: /images/{}\n".format(previewimage)) + f.write("\n") + + # Add abstract, if one is there + if translation.get('abstract'): + f.write(str(translation.get('abstract', ""))) + f.write("\n\n") + if translation.get('body'): + f.write("\n\n") + + f.write(pypandoc.convert_text( str(translation.get('body', "")), 'markdown-smart', format='html-native_divs-native_spans', extra_args=['--atx-headers'])) + userrec = next(filter(lambda person: person['login'] == editor, user)) + page_list.append({ 'date': page['updated_at'], 'revision': page['revision'], 'comment': "page revision {}".format(page['revision']), 'fname': fname, 'gname': gitname, 'editor': editor, 'email': userrec['email']}) + +page_list.sort(key=lambda tup: (tup['date'], tup['revision'])) +for page in page_list: + print(page) + os.system("cp {} git/{}".format(page['fname'], page['gname'])) + os.system("git -C git/ add {}".format(page['gname'])) + os.environ['GIT_COMMITTER_NAME'] = page['editor'] + os.environ['GIT_COMMITTER_EMAIL'] = page['email'] + os.system('git -C git/ commit -m "committing {}" --author="{} <{}>" --date="{}"'.format(page['comment'], page['editor'], page['email'], page['date'])) + -- cgit v1.2.3