summaryrefslogtreecommitdiff
path: root/bin/export.py
blob: 26460e8f040d6649631c5a70f75efdd785054c35 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
#! venv/bin/python
import psycopg2
import psycopg2.extras
import pathlib
import os.path
import datetime
import pypandoc
import json
from datetime import timedelta, datetime, tzinfo
#from langdetect import detect
import sys

conn = psycopg2.connect(database="cccms_dev", user="postgres", password="", host="127.0.0.1")
cursor = conn.cursor(cursor_factory=psycopg2.extras.DictCursor)
lcursor = conn.cursor()
cursor.execute("select id, login, email, admin from users", [])
user = cursor.fetchall()

page_list = []
redirect_list = {}

pathlib.Path(os.path.dirname('git/')).mkdir(parents=True, exist_ok=True)
os.system("git -C git/ init")
pathlib.Path(os.path.dirname('git/images/')).mkdir(parents=True, exist_ok=True)
pathlib.Path(os.path.dirname('git/files/')).mkdir(parents=True, exist_ok=True)
pathlib.Path(os.path.dirname('dump/images/')).mkdir(parents=True, exist_ok=True)
pathlib.Path(os.path.dirname('dump/files/')).mkdir(parents=True, exist_ok=True)

rev = 0
cursor.execute("select id, name, upload_content_type, upload_file_name, upload_updated_at from assets order by id", [])
assets = cursor.fetchall()
for asset in assets:
    if not asset.get('upload_file_name'):
        continue
    source = "/usr/local/www/cccms/public/system/uploads/{}/original/{}".format(asset['id'], asset['upload_file_name'])
    destdir = 'files/'
    if asset['upload_content_type'].startswith('image'):
        destdir = 'images/'
    page_list.append({ 'date': asset['upload_updated_at'], 'revision': rev, 'comment': "asset {}".format(asset['name']), 'fname': source, 'gname': destdir+asset['upload_file_name'], 'editor': 'admin', 'email': 'admin@cccms.de'})
    rev = rev + 1
    redirect_list['/public/system/uploads/{}/original/{}'.format(asset['id'], asset['upload_file_name'])] = destdir+asset['upload_file_name']

with open('redirects.json', 'w') as outfile:
    json.dump(redirect_list, outfile)

cursor.execute("select id, unique_name, created_at from nodes order by id", [])
nodes = cursor.fetchall()

for node in nodes:
    if not 'unique_name' in node:
        print ("WARNING: NO unique_name in node " + str(node.id))
        continue

    if node['id'] == 1:
        continue

    if "/" in str(node['unique_name']):
        pathlib.Path(os.path.dirname('dump/'+node['unique_name'])).mkdir(parents=True, exist_ok=True)
        if str(node['unique_name']).startswith('updates/'):
            pathlib.Path(os.path.dirname('git/'+node['unique_name'])).mkdir(parents=True, exist_ok=True)
        else:
            pathlib.Path(os.path.dirname('git/pages/'+node['unique_name'])).mkdir(parents=True, exist_ok=True)

    cursor.execute("select id, revision, created_at, updated_at, published_at, user_id, editor_id from pages where node_id = %s", [node['id']])
    pages = cursor.fetchall()

    # print ( "WORKING ON NODE: " + str(node['id']))

    for page in pages:

        # ignoring unpublished pages
        if not page['published_at']:
            # print("UNPUBLISHED: {}\n".format(node['id']))
            continue

        cursor.execute("select name from tags where id in (select tag_id from taggings where taggable_id = %s)", [page['id']])
        tags = cursor.fetchall()

        editor = "admin"
        if page['editor_id']:
            cursor.execute("select login from users where id = %s", [page['editor_id']])
            e = cursor.fetchall()
            if len(e):
                editor = e[0].get("login", "NO-LOGIN")

        creator = "admin"
        if page['user_id']:
            cursor.execute("select login from users where id = %s", [page['user_id']])
            c = cursor.fetchall()
            if len(c):
                creator = c[0].get("login", "NO-LOGIN")

        previewimage = ''
        cursor.execute("select upload_file_name from related_assets full join assets on related_assets.asset_id = assets.id where related_assets.page_id = %s and position = 1;", [page['id']])
        related_asset = cursor.fetchall()
        if len(related_asset):
            previewimage = related_asset[0].get('upload_file_name')

        cursor.execute("select locale, title, abstract, body, created_at, updated_at from page_translations where page_id = %s", [page['id']])
        page_translations = cursor.fetchall()
        for translation in page_translations:
            if not translation.get("title"):
                continue

            if len(tags) and not isinstance(tags[0], str):
                tags =  [y for x in tags for y in x]

            if translation['locale'] == 'en':
                fname = "dump/" + node['unique_name'] + '.en.md:' + str(page['revision']) + '@' + str(int(page['updated_at'].timestamp()))
                gitname = node['unique_name'] + '.en.md'
            else:
                fname = "dump/" + node['unique_name'] + '.md:' + str(page['revision']) + '@' + str(int(page['updated_at'].timestamp()))
                gitname = node['unique_name'] + '.md'

            if not gitname.startswith('updates/'):
                gitname = 'pages/' + gitname

            #lang = detect(translation.get('body'))
            #print('{}:{}   node_id {: 5d} revision {: 3d} with page_id {: 5d}, path {}'.format(lang, translation['locale'], node['id'], page['revision'], page['id'], node['unique_name']))

            # md = pypandoc.convert_text(str(translation.get('body', "")), 'md', format='html', extra_args=['--atx-headers'])
            # rst = pypandoc.convert_text(str(translation.get('body', "")), 'rst', format='html', extra_args=['--atx-headers'])
            # with open( "dump/" + node['unique_name'] + '.' + translation['locale'] + '.md.html:' + str(page['revision']) + '@' + str(int(page['updated_at'].timestamp())), "w") as f:
            #     f.write(pypandoc.convert_text( md, 'html', format='md', extra_args=['--atx-headers']))
            # with open( "dump/" + node['unique_name'] + '.' + translation['locale'] + '.rst.html:' + str(page['revision']) + '@' + str(int(page['updated_at'].timestamp())), "w") as f:
            #     f.write(pypandoc.convert_text( rst, 'html', format='rst', extra_args=['--atx-headers']))

            with open(fname, "w") as f:
                f.write("title: {}\n".format(' '.join(translation.get("title", "").split())))
                f.write("date: {}\n".format(page['published_at'].strftime("%Y-%m-%d %H:%M:%S %z")))
                if page.get('updated_at'):
                    f.write("updated: {}\n".format(page['updated_at'].strftime("%Y-%m-%d %H:%M:%S %z")))
                f.write("author: {}\n".format(creator))
                f.write("tags: {}\n".format(', '.join(tags).lower()))
                if previewimage:
                    f.write("previewimage: /images/{}\n".format(previewimage))
                f.write("\n")

                # Add abstract, if one is there
                if translation.get('abstract'):
                    f.write(str(translation.get('abstract', "")))
                    f.write("\n\n")
                    if translation.get('body'):
                        f.write("<!-- TEASER_END -->\n\n")

                f.write(pypandoc.convert_text( str(translation.get('body', "")), 'markdown-smart', format='html-native_divs-native_spans', extra_args=['--atx-headers']))
            userrec = next(filter(lambda person: person['login'] == editor, user))
            page_list.append({ 'date': page['updated_at'], 'revision': page['revision'], 'comment': "page revision {}".format(page['revision']), 'fname': fname, 'gname': gitname, 'editor': editor, 'email': userrec['email']})

page_list.sort(key=lambda tup: (tup['date'], tup['revision']))
for page in page_list:
    print(page)
    os.system("cp {} git/{}".format(page['fname'], page['gname']))
    os.system("git -C git/ add {}".format(page['gname']))
    os.environ['GIT_COMMITTER_NAME'] = page['editor']
    os.environ['GIT_COMMITTER_EMAIL'] = page['email']
    os.system('git -C git/ commit -m "committing {}" --author="{} <{}>" --date="{}"'.format(page['comment'], page['editor'], page['email'], page['date']))