summaryrefslogtreecommitdiff
path: root/bin/export.py
diff options
context:
space:
mode:
Diffstat (limited to 'bin/export.py')
-rwxr-xr-xbin/export.py158
1 files changed, 158 insertions, 0 deletions
diff --git a/bin/export.py b/bin/export.py
new file mode 100755
index 00000000..26460e8f
--- /dev/null
+++ b/bin/export.py
@@ -0,0 +1,158 @@
1#! venv/bin/python
2import psycopg2
3import psycopg2.extras
4import pathlib
5import os.path
6import datetime
7import pypandoc
8import json
9from datetime import timedelta, datetime, tzinfo
10#from langdetect import detect
11import sys
12
13conn = psycopg2.connect(database="cccms_dev", user="postgres", password="", host="127.0.0.1")
14cursor = conn.cursor(cursor_factory=psycopg2.extras.DictCursor)
15lcursor = conn.cursor()
16cursor.execute("select id, login, email, admin from users", [])
17user = cursor.fetchall()
18
19page_list = []
20redirect_list = {}
21
22pathlib.Path(os.path.dirname('git/')).mkdir(parents=True, exist_ok=True)
23os.system("git -C git/ init")
24pathlib.Path(os.path.dirname('git/images/')).mkdir(parents=True, exist_ok=True)
25pathlib.Path(os.path.dirname('git/files/')).mkdir(parents=True, exist_ok=True)
26pathlib.Path(os.path.dirname('dump/images/')).mkdir(parents=True, exist_ok=True)
27pathlib.Path(os.path.dirname('dump/files/')).mkdir(parents=True, exist_ok=True)
28
29rev = 0
30cursor.execute("select id, name, upload_content_type, upload_file_name, upload_updated_at from assets order by id", [])
31assets = cursor.fetchall()
32for asset in assets:
33 if not asset.get('upload_file_name'):
34 continue
35 source = "/usr/local/www/cccms/public/system/uploads/{}/original/{}".format(asset['id'], asset['upload_file_name'])
36 destdir = 'files/'
37 if asset['upload_content_type'].startswith('image'):
38 destdir = 'images/'
39 page_list.append({ 'date': asset['upload_updated_at'], 'revision': rev, 'comment': "asset {}".format(asset['name']), 'fname': source, 'gname': destdir+asset['upload_file_name'], 'editor': 'admin', 'email': 'admin@cccms.de'})
40 rev = rev + 1
41 redirect_list['/public/system/uploads/{}/original/{}'.format(asset['id'], asset['upload_file_name'])] = destdir+asset['upload_file_name']
42
43with open('redirects.json', 'w') as outfile:
44 json.dump(redirect_list, outfile)
45
46cursor.execute("select id, unique_name, created_at from nodes order by id", [])
47nodes = cursor.fetchall()
48
49for node in nodes:
50 if not 'unique_name' in node:
51 print ("WARNING: NO unique_name in node " + str(node.id))
52 continue
53
54 if node['id'] == 1:
55 continue
56
57 if "/" in str(node['unique_name']):
58 pathlib.Path(os.path.dirname('dump/'+node['unique_name'])).mkdir(parents=True, exist_ok=True)
59 if str(node['unique_name']).startswith('updates/'):
60 pathlib.Path(os.path.dirname('git/'+node['unique_name'])).mkdir(parents=True, exist_ok=True)
61 else:
62 pathlib.Path(os.path.dirname('git/pages/'+node['unique_name'])).mkdir(parents=True, exist_ok=True)
63
64 cursor.execute("select id, revision, created_at, updated_at, published_at, user_id, editor_id from pages where node_id = %s", [node['id']])
65 pages = cursor.fetchall()
66
67 # print ( "WORKING ON NODE: " + str(node['id']))
68
69 for page in pages:
70
71 # ignoring unpublished pages
72 if not page['published_at']:
73 # print("UNPUBLISHED: {}\n".format(node['id']))
74 continue
75
76 cursor.execute("select name from tags where id in (select tag_id from taggings where taggable_id = %s)", [page['id']])
77 tags = cursor.fetchall()
78
79 editor = "admin"
80 if page['editor_id']:
81 cursor.execute("select login from users where id = %s", [page['editor_id']])
82 e = cursor.fetchall()
83 if len(e):
84 editor = e[0].get("login", "NO-LOGIN")
85
86 creator = "admin"
87 if page['user_id']:
88 cursor.execute("select login from users where id = %s", [page['user_id']])
89 c = cursor.fetchall()
90 if len(c):
91 creator = c[0].get("login", "NO-LOGIN")
92
93 previewimage = ''
94 cursor.execute("select upload_file_name from related_assets full join assets on related_assets.asset_id = assets.id where related_assets.page_id = %s and position = 1;", [page['id']])
95 related_asset = cursor.fetchall()
96 if len(related_asset):
97 previewimage = related_asset[0].get('upload_file_name')
98
99 cursor.execute("select locale, title, abstract, body, created_at, updated_at from page_translations where page_id = %s", [page['id']])
100 page_translations = cursor.fetchall()
101 for translation in page_translations:
102 if not translation.get("title"):
103 continue
104
105 if len(tags) and not isinstance(tags[0], str):
106 tags = [y for x in tags for y in x]
107
108 if translation['locale'] == 'en':
109 fname = "dump/" + node['unique_name'] + '.en.md:' + str(page['revision']) + '@' + str(int(page['updated_at'].timestamp()))
110 gitname = node['unique_name'] + '.en.md'
111 else:
112 fname = "dump/" + node['unique_name'] + '.md:' + str(page['revision']) + '@' + str(int(page['updated_at'].timestamp()))
113 gitname = node['unique_name'] + '.md'
114
115 if not gitname.startswith('updates/'):
116 gitname = 'pages/' + gitname
117
118 #lang = detect(translation.get('body'))
119 #print('{}:{} node_id {: 5d} revision {: 3d} with page_id {: 5d}, path {}'.format(lang, translation['locale'], node['id'], page['revision'], page['id'], node['unique_name']))
120
121 # md = pypandoc.convert_text(str(translation.get('body', "")), 'md', format='html', extra_args=['--atx-headers'])
122 # rst = pypandoc.convert_text(str(translation.get('body', "")), 'rst', format='html', extra_args=['--atx-headers'])
123 # with open( "dump/" + node['unique_name'] + '.' + translation['locale'] + '.md.html:' + str(page['revision']) + '@' + str(int(page['updated_at'].timestamp())), "w") as f:
124 # f.write(pypandoc.convert_text( md, 'html', format='md', extra_args=['--atx-headers']))
125 # with open( "dump/" + node['unique_name'] + '.' + translation['locale'] + '.rst.html:' + str(page['revision']) + '@' + str(int(page['updated_at'].timestamp())), "w") as f:
126 # f.write(pypandoc.convert_text( rst, 'html', format='rst', extra_args=['--atx-headers']))
127
128 with open(fname, "w") as f:
129 f.write("title: {}\n".format(' '.join(translation.get("title", "").split())))
130 f.write("date: {}\n".format(page['published_at'].strftime("%Y-%m-%d %H:%M:%S %z")))
131 if page.get('updated_at'):
132 f.write("updated: {}\n".format(page['updated_at'].strftime("%Y-%m-%d %H:%M:%S %z")))
133 f.write("author: {}\n".format(creator))
134 f.write("tags: {}\n".format(', '.join(tags).lower()))
135 if previewimage:
136 f.write("previewimage: /images/{}\n".format(previewimage))
137 f.write("\n")
138
139 # Add abstract, if one is there
140 if translation.get('abstract'):
141 f.write(str(translation.get('abstract', "")))
142 f.write("\n\n")
143 if translation.get('body'):
144 f.write("<!-- TEASER_END -->\n\n")
145
146 f.write(pypandoc.convert_text( str(translation.get('body', "")), 'markdown-smart', format='html-native_divs-native_spans', extra_args=['--atx-headers']))
147 userrec = next(filter(lambda person: person['login'] == editor, user))
148 page_list.append({ 'date': page['updated_at'], 'revision': page['revision'], 'comment': "page revision {}".format(page['revision']), 'fname': fname, 'gname': gitname, 'editor': editor, 'email': userrec['email']})
149
150page_list.sort(key=lambda tup: (tup['date'], tup['revision']))
151for page in page_list:
152 print(page)
153 os.system("cp {} git/{}".format(page['fname'], page['gname']))
154 os.system("git -C git/ add {}".format(page['gname']))
155 os.environ['GIT_COMMITTER_NAME'] = page['editor']
156 os.environ['GIT_COMMITTER_EMAIL'] = page['email']
157 os.system('git -C git/ commit -m "committing {}" --author="{} <{}>" --date="{}"'.format(page['comment'], page['editor'], page['email'], page['date']))
158