3 files changed, 238 insertions, 0 deletions
diff --git a/bin/Makefile b/bin/Makefile
new file mode 100644
index 00000000..10ac0d1e
--- /dev/null
+++ b/bin/Makefile
@@ -0,0 +1,18 @@
+all: rebuild nikola build
+rebuild:
+        rm -rf git dump
+        ./export.py
+        rm git/pages/updates.md
+nikola:
+        venv/bin/nikola init -q git
+        cp site/conf.py git
+        cd git; nikola plugin -i localsearch
+        cd git; nikola theme -n ccc --parent=bootstrap4
+        cp -r site/themes/ccc git/themes/
+        cp -r site/data site/shortcodes git/
+        cp -r site/index.rst site/index.en.rst site/search.html site/cpu site/themen git/pages/
+build:
+        cd git; nikola build --backend=sqlite3
diff --git a/bin/cleaner.py b/bin/cleaner.py
new file mode 100755
index 00000000..d8c999da
--- /dev/null
+++ b/bin/cleaner.py
@@ -0,0 +1,62 @@
+#! venv/bin/python
+import psycopg2
+import psycopg2.extras
+import pathlib
+import os.path
+import datetime
+import pypandoc
+from bs4 import BeautifulSoup
+from datetime import timedelta, datetime, tzinfo
+#from langdetect import detect
+import sys
+import re
+conn = psycopg2.connect(database="cccms_dev", user="postgres", password="", host="127.0.0.1")
+cursor = conn.cursor(cursor_factory=psycopg2.extras.DictCursor)
+cursor.execute("select * from page_translations")
+page_translations = cursor.fetchall()
+fo = open("orig.txt", "w")
+fm = open("modi.txt", "w")
+for translation in page_translations:
+    body = translation.get('body')
+    cursor.execute("select * from pages where id = %s", [translation['page_id']])
+    page = cursor.fetchone()
+    if not body:
+        continue
+    if not '<p id=' in body:
+        continue
+    fo.write(body)
+    print( 'Needing fixup: {}'.format(page['id']))
+    soup = BeautifulSoup(body, 'html5lib')
+    # https://stackoverflow.com/questions/10555932/how-do-i-use-beautifulsoup-to-replace-a-tag-with-its-contents
+    # for match in soup.findAll('div'):
+    #    match.replaceWithChildren()
+    # https://stackoverflow.com/questions/33500888/how-to-remove-tags-that-have-no-content
+    [x.decompose() for x in soup.findAll(lambda tag: (not tag.contents or len(tag.get_text(strip=True)) <= 0) and not tag.name == 'br' )]
+    # for elm in soup.find_all('span', style=re.compile(r"text-decoration: underline;")):
+    #    elm.name = 'u'
+    #    del elm.attrs['style']
+    # for elm in soup.find_all('span', style=re.compile(r"font-weight: bold;")):
+    #    elm.name = 'b'
+    #    del elm.attrs['style']
+    # print(soup.prettify())
+    for elm in soup.find_all('p', id=re.compile(r"magic")):
+        del elm.attrs
+    if soup.body and soup.body.children:
+        fm.write("".join([str(x) for x in soup.body.children]))
+        cursor.execute("update page_translations set body = %s where id = %s", ["".join([str(x) for x in soup.body.children]), translation['id']])
+        conn.commit()
diff --git a/bin/export.py b/bin/export.py
new file mode 100755
index 00000000..26460e8f
--- /dev/null
+++ b/bin/export.py
@@ -0,0 +1,158 @@
+#! venv/bin/python
+import psycopg2
+import psycopg2.extras
+import pathlib
+import os.path
+import datetime
+import pypandoc
+import json
+from datetime import timedelta, datetime, tzinfo
+#from langdetect import detect
+import sys
+conn = psycopg2.connect(database="cccms_dev", user="postgres", password="", host="127.0.0.1")
+cursor = conn.cursor(cursor_factory=psycopg2.extras.DictCursor)
+lcursor = conn.cursor()
+cursor.execute("select id, login, email, admin from users", [])
+user = cursor.fetchall()
+page_list = []
+redirect_list = {}
+pathlib.Path(os.path.dirname('git/')).mkdir(parents=True, exist_ok=True)
+os.system("git -C git/ init")
+pathlib.Path(os.path.dirname('git/images/')).mkdir(parents=True, exist_ok=True)
+pathlib.Path(os.path.dirname('git/files/')).mkdir(parents=True, exist_ok=True)
+pathlib.Path(os.path.dirname('dump/images/')).mkdir(parents=True, exist_ok=True)
+pathlib.Path(os.path.dirname('dump/files/')).mkdir(parents=True, exist_ok=True)
+rev = 0
+cursor.execute("select id, name, upload_content_type, upload_file_name, upload_updated_at from assets order by id", [])
+assets = cursor.fetchall()
+for asset in assets:
+    if not asset.get('upload_file_name'):
+        continue
+    source = "/usr/local/www/cccms/public/system/uploads/{}/original/{}".format(asset['id'], asset['upload_file_name'])
+    destdir = 'files/'
+    if asset['upload_content_type'].startswith('image'):
+        destdir = 'images/'
+    page_list.append({ 'date': asset['upload_updated_at'], 'revision': rev, 'comment': "asset {}".format(asset['name']), 'fname': source, 'gname': destdir+asset['upload_file_name'], 'editor': 'admin', 'email': 'admin@cccms.de'})
+    rev = rev + 1
+    redirect_list['/public/system/uploads/{}/original/{}'.format(asset['id'], asset['upload_file_name'])] = destdir+asset['upload_file_name']
+with open('redirects.json', 'w') as outfile:
+    json.dump(redirect_list, outfile)
+cursor.execute("select id, unique_name, created_at from nodes order by id", [])
+nodes = cursor.fetchall()
+for node in nodes:
+    if not 'unique_name' in node:
+        print ("WARNING: NO unique_name in node " + str(node.id))
+        continue
+    if node['id'] == 1:
+        continue
+    if "/" in str(node['unique_name']):
+        pathlib.Path(os.path.dirname('dump/'+node['unique_name'])).mkdir(parents=True, exist_ok=True)
+        if str(node['unique_name']).startswith('updates/'):
+            pathlib.Path(os.path.dirname('git/'+node['unique_name'])).mkdir(parents=True, exist_ok=True)
+        else:
+            pathlib.Path(os.path.dirname('git/pages/'+node['unique_name'])).mkdir(parents=True, exist_ok=True)
+    cursor.execute("select id, revision, created_at, updated_at, published_at, user_id, editor_id from pages where node_id = %s", [node['id']])
+    pages = cursor.fetchall()
+    # print ( "WORKING ON NODE: " + str(node['id']))
+    for page in pages:
+        # ignoring unpublished pages
+        if not page['published_at']:
+            # print("UNPUBLISHED: {}\n".format(node['id']))
+            continue
+        cursor.execute("select name from tags where id in (select tag_id from taggings where taggable_id = %s)", [page['id']])
+        tags = cursor.fetchall()
+        editor = "admin"
+        if page['editor_id']:
+            cursor.execute("select login from users where id = %s", [page['editor_id']])
+            e = cursor.fetchall()
+            if len(e):
+                editor = e[0].get("login", "NO-LOGIN")
+        creator = "admin"
+        if page['user_id']:
+            cursor.execute("select login from users where id = %s", [page['user_id']])
+            c = cursor.fetchall()
+            if len(c):
+                creator = c[0].get("login", "NO-LOGIN")
+        previewimage = ''
+        cursor.execute("select upload_file_name from related_assets full join assets on related_assets.asset_id = assets.id where related_assets.page_id = %s and position = 1;", [page['id']])
+        related_asset = cursor.fetchall()
+        if len(related_asset):
+            previewimage = related_asset[0].get('upload_file_name')
+        cursor.execute("select locale, title, abstract, body, created_at, updated_at from page_translations where page_id = %s", [page['id']])
+        page_translations = cursor.fetchall()
+        for translation in page_translations:
+            if not translation.get("title"):
+                continue
+            if len(tags) and not isinstance(tags[0], str):
+                tags =  [y for x in tags for y in x]
+            if translation['locale'] == 'en':
+                fname = "dump/" + node['unique_name'] + '.en.md:' + str(page['revision']) + '@' + str(int(page['updated_at'].timestamp()))
+                gitname = node['unique_name'] + '.en.md'
+            else:
+                fname = "dump/" + node['unique_name'] + '.md:' + str(page['revision']) + '@' + str(int(page['updated_at'].timestamp()))
+                gitname = node['unique_name'] + '.md'
+            if not gitname.startswith('updates/'):
+                gitname = 'pages/' + gitname
+            #lang = detect(translation.get('body'))
+            #print('{}:{}   node_id {: 5d} revision {: 3d} with page_id {: 5d}, path {}'.format(lang, translation['locale'], node['id'], page['revision'], page['id'], node['unique_name']))
+            # md = pypandoc.convert_text(str(translation.get('body', "")), 'md', format='html', extra_args=['--atx-headers'])
+            # rst = pypandoc.convert_text(str(translation.get('body', "")), 'rst', format='html', extra_args=['--atx-headers'])
+            # with open( "dump/" + node['unique_name'] + '.' + translation['locale'] + '.md.html:' + str(page['revision']) + '@' + str(int(page['updated_at'].timestamp())), "w") as f:
+            #     f.write(pypandoc.convert_text( md, 'html', format='md', extra_args=['--atx-headers']))
+            # with open( "dump/" + node['unique_name'] + '.' + translation['locale'] + '.rst.html:' + str(page['revision']) + '@' + str(int(page['updated_at'].timestamp())), "w") as f:
+            #     f.write(pypandoc.convert_text( rst, 'html', format='rst', extra_args=['--atx-headers']))
+            with open(fname, "w") as f:
+                f.write("title: {}\n".format(' '.join(translation.get("title", "").split())))
+                f.write("date: {}\n".format(page['published_at'].strftime("%Y-%m-%d %H:%M:%S %z")))
+                if page.get('updated_at'):
+                    f.write("updated: {}\n".format(page['updated_at'].strftime("%Y-%m-%d %H:%M:%S %z")))
+                f.write("author: {}\n".format(creator))
+                f.write("tags: {}\n".format(', '.join(tags).lower()))
+                if previewimage:
+                    f.write("previewimage: /images/{}\n".format(previewimage))
+                f.write("\n")
+                # Add abstract, if one is there
+                if translation.get('abstract'):
+                    f.write(str(translation.get('abstract', "")))
+                    f.write("\n\n")
+                    if translation.get('body'):
+                        f.write("<!-- TEASER_END -->\n\n")
+                f.write(pypandoc.convert_text( str(translation.get('body', "")), 'markdown-smart', format='html-native_divs-native_spans', extra_args=['--atx-headers']))
+            userrec = next(filter(lambda person: person['login'] == editor, user))
+            page_list.append({ 'date': page['updated_at'], 'revision': page['revision'], 'comment': "page revision {}".format(page['revision']), 'fname': fname, 'gname': gitname, 'editor': editor, 'email': userrec['email']})
+page_list.sort(key=lambda tup: (tup['date'], tup['revision']))
+for page in page_list:
+    print(page)
+    os.system("cp {} git/{}".format(page['fname'], page['gname']))
+    os.system("git -C git/ add {}".format(page['gname']))
+    os.environ['GIT_COMMITTER_NAME'] = page['editor']
+    os.environ['GIT_COMMITTER_EMAIL'] = page['email']
+    os.system('git -C git/ commit -m "committing {}" --author="{} <{}>" --date="{}"'.format(page['comment'], page['editor'], page['email'], page['date']))

diff --git a/bin/Makefile b/bin/Makefile new file mode 100644 index 00000000..10ac0d1e --- /dev/null +++ b/bin/Makefile
@@ -0,0 +1,18 @@
	1	all: rebuild nikola build
	2
	3	rebuild:
	4	rm -rf git dump
	5	./export.py
	6	rm git/pages/updates.md
	7
	8	nikola:
	9	venv/bin/nikola init -q git
	10	cp site/conf.py git
	11	cd git; nikola plugin -i localsearch
	12	cd git; nikola theme -n ccc --parent=bootstrap4
	13	cp -r site/themes/ccc git/themes/
	14	cp -r site/data site/shortcodes git/
	15	cp -r site/index.rst site/index.en.rst site/search.html site/cpu site/themen git/pages/
	16
	17	build:
	18	cd git; nikola build --backend=sqlite3


diff --git a/bin/cleaner.py b/bin/cleaner.py new file mode 100755 index 00000000..d8c999da --- /dev/null +++ b/bin/cleaner.py
@@ -0,0 +1,62 @@
	1	#! venv/bin/python
	2	import psycopg2
	3	import psycopg2.extras
	4	import pathlib
	5	import os.path
	6	import datetime
	7	import pypandoc
	8	from bs4 import BeautifulSoup
	9	from datetime import timedelta, datetime, tzinfo
	10	#from langdetect import detect
	11	import sys
	12	import re
	13
	14	conn = psycopg2.connect(database="cccms_dev", user="postgres", password="", host="127.0.0.1")
	15	cursor = conn.cursor(cursor_factory=psycopg2.extras.DictCursor)
	16
	17	cursor.execute("select * from page_translations")
	18	page_translations = cursor.fetchall()
	19
	20	fo = open("orig.txt", "w")
	21	fm = open("modi.txt", "w")
	22
	23	for translation in page_translations:
	24	body = translation.get('body')
	25	cursor.execute("select * from pages where id = %s", [translation['page_id']])
	26	page = cursor.fetchone()
	27
	28	if not body:
	29	continue
	30
	31	if not '<p id=' in body:
	32	continue
	33
	34	fo.write(body)
	35
	36	print( 'Needing fixup: {}'.format(page['id']))
	37	soup = BeautifulSoup(body, 'html5lib')
	38
	39	# https://stackoverflow.com/questions/10555932/how-do-i-use-beautifulsoup-to-replace-a-tag-with-its-contents
	40	# for match in soup.findAll('div'):
	41	# match.replaceWithChildren()
	42
	43	# https://stackoverflow.com/questions/33500888/how-to-remove-tags-that-have-no-content
	44	[x.decompose() for x in soup.findAll(lambda tag: (not tag.contents or len(tag.get_text(strip=True)) <= 0) and not tag.name == 'br' )]
	45
	46	# for elm in soup.find_all('span', style=re.compile(r"text-decoration: underline;")):
	47	# elm.name = 'u'
	48	# del elm.attrs['style']
	49
	50	# for elm in soup.find_all('span', style=re.compile(r"font-weight: bold;")):
	51	# elm.name = 'b'
	52	# del elm.attrs['style']
	53
	54	# print(soup.prettify())
	55
	56	for elm in soup.find_all('p', id=re.compile(r"magic")):
	57	del elm.attrs
	58
	59	if soup.body and soup.body.children:
	60	fm.write("".join([str(x) for x in soup.body.children]))
	61	cursor.execute("update page_translations set body = %s where id = %s", ["".join([str(x) for x in soup.body.children]), translation['id']])
	62	conn.commit()


diff --git a/bin/export.py b/bin/export.py new file mode 100755 index 00000000..26460e8f --- /dev/null +++ b/bin/export.py
@@ -0,0 +1,158 @@
	1	#! venv/bin/python
	2	import psycopg2
	3	import psycopg2.extras
	4	import pathlib
	5	import os.path
	6	import datetime
	7	import pypandoc
	8	import json
	9	from datetime import timedelta, datetime, tzinfo
	10	#from langdetect import detect
	11	import sys
	12
	13	conn = psycopg2.connect(database="cccms_dev", user="postgres", password="", host="127.0.0.1")
	14	cursor = conn.cursor(cursor_factory=psycopg2.extras.DictCursor)
	15	lcursor = conn.cursor()
	16	cursor.execute("select id, login, email, admin from users", [])
	17	user = cursor.fetchall()
	18
	19	page_list = []
	20	redirect_list = {}
	21
	22	pathlib.Path(os.path.dirname('git/')).mkdir(parents=True, exist_ok=True)
	23	os.system("git -C git/ init")
	24	pathlib.Path(os.path.dirname('git/images/')).mkdir(parents=True, exist_ok=True)
	25	pathlib.Path(os.path.dirname('git/files/')).mkdir(parents=True, exist_ok=True)
	26	pathlib.Path(os.path.dirname('dump/images/')).mkdir(parents=True, exist_ok=True)
	27	pathlib.Path(os.path.dirname('dump/files/')).mkdir(parents=True, exist_ok=True)
	28
	29	rev = 0
	30	cursor.execute("select id, name, upload_content_type, upload_file_name, upload_updated_at from assets order by id", [])
	31	assets = cursor.fetchall()
	32	for asset in assets:
	33	if not asset.get('upload_file_name'):
	34	continue
	35	source = "/usr/local/www/cccms/public/system/uploads/{}/original/{}".format(asset['id'], asset['upload_file_name'])
	36	destdir = 'files/'
	37	if asset['upload_content_type'].startswith('image'):
	38	destdir = 'images/'
	39	page_list.append({ 'date': asset['upload_updated_at'], 'revision': rev, 'comment': "asset {}".format(asset['name']), 'fname': source, 'gname': destdir+asset['upload_file_name'], 'editor': 'admin', 'email': 'admin@cccms.de'})
	40	rev = rev + 1
	41	redirect_list['/public/system/uploads/{}/original/{}'.format(asset['id'], asset['upload_file_name'])] = destdir+asset['upload_file_name']
	42
	43	with open('redirects.json', 'w') as outfile:
	44	json.dump(redirect_list, outfile)
	45
	46	cursor.execute("select id, unique_name, created_at from nodes order by id", [])
	47	nodes = cursor.fetchall()
	48
	49	for node in nodes:
	50	if not 'unique_name' in node:
	51	print ("WARNING: NO unique_name in node " + str(node.id))
	52	continue
	53
	54	if node['id'] == 1:
	55	continue
	56
	57	if "/" in str(node['unique_name']):
	58	pathlib.Path(os.path.dirname('dump/'+node['unique_name'])).mkdir(parents=True, exist_ok=True)
	59	if str(node['unique_name']).startswith('updates/'):
	60	pathlib.Path(os.path.dirname('git/'+node['unique_name'])).mkdir(parents=True, exist_ok=True)
	61	else:
	62	pathlib.Path(os.path.dirname('git/pages/'+node['unique_name'])).mkdir(parents=True, exist_ok=True)
	63
	64	cursor.execute("select id, revision, created_at, updated_at, published_at, user_id, editor_id from pages where node_id = %s", [node['id']])
	65	pages = cursor.fetchall()
	66
	67	# print ( "WORKING ON NODE: " + str(node['id']))
	68
	69	for page in pages:
	70
	71	# ignoring unpublished pages
	72	if not page['published_at']:
	73	# print("UNPUBLISHED: {}\n".format(node['id']))
	74	continue
	75
	76	cursor.execute("select name from tags where id in (select tag_id from taggings where taggable_id = %s)", [page['id']])
	77	tags = cursor.fetchall()
	78
	79	editor = "admin"
	80	if page['editor_id']:
	81	cursor.execute("select login from users where id = %s", [page['editor_id']])
	82	e = cursor.fetchall()
	83	if len(e):
	84	editor = e[0].get("login", "NO-LOGIN")
	85
	86	creator = "admin"
	87	if page['user_id']:
	88	cursor.execute("select login from users where id = %s", [page['user_id']])
	89	c = cursor.fetchall()
	90	if len(c):
	91	creator = c[0].get("login", "NO-LOGIN")
	92
	93	previewimage = ''
	94	cursor.execute("select upload_file_name from related_assets full join assets on related_assets.asset_id = assets.id where related_assets.page_id = %s and position = 1;", [page['id']])
	95	related_asset = cursor.fetchall()
	96	if len(related_asset):
	97	previewimage = related_asset[0].get('upload_file_name')
	98
	99	cursor.execute("select locale, title, abstract, body, created_at, updated_at from page_translations where page_id = %s", [page['id']])
	100	page_translations = cursor.fetchall()
	101	for translation in page_translations:
	102	if not translation.get("title"):
	103	continue
	104
	105	if len(tags) and not isinstance(tags[0], str):
	106	tags = [y for x in tags for y in x]
	107
	108	if translation['locale'] == 'en':
	109	fname = "dump/" + node['unique_name'] + '.en.md:' + str(page['revision']) + '@' + str(int(page['updated_at'].timestamp()))
	110	gitname = node['unique_name'] + '.en.md'
	111	else:
	112	fname = "dump/" + node['unique_name'] + '.md:' + str(page['revision']) + '@' + str(int(page['updated_at'].timestamp()))
	113	gitname = node['unique_name'] + '.md'
	114
	115	if not gitname.startswith('updates/'):
	116	gitname = 'pages/' + gitname
	117
	118	#lang = detect(translation.get('body'))
	119	#print('{}:{} node_id {: 5d} revision {: 3d} with page_id {: 5d}, path {}'.format(lang, translation['locale'], node['id'], page['revision'], page['id'], node['unique_name']))
	120
	121	# md = pypandoc.convert_text(str(translation.get('body', "")), 'md', format='html', extra_args=['--atx-headers'])
	122	# rst = pypandoc.convert_text(str(translation.get('body', "")), 'rst', format='html', extra_args=['--atx-headers'])
	123	# with open( "dump/" + node['unique_name'] + '.' + translation['locale'] + '.md.html:' + str(page['revision']) + '@' + str(int(page['updated_at'].timestamp())), "w") as f:
	124	# f.write(pypandoc.convert_text( md, 'html', format='md', extra_args=['--atx-headers']))
	125	# with open( "dump/" + node['unique_name'] + '.' + translation['locale'] + '.rst.html:' + str(page['revision']) + '@' + str(int(page['updated_at'].timestamp())), "w") as f:
	126	# f.write(pypandoc.convert_text( rst, 'html', format='rst', extra_args=['--atx-headers']))
	127
	128	with open(fname, "w") as f:
	129	f.write("title: {}\n".format(' '.join(translation.get("title", "").split())))
	130	f.write("date: {}\n".format(page['published_at'].strftime("%Y-%m-%d %H:%M:%S %z")))
	131	if page.get('updated_at'):
	132	f.write("updated: {}\n".format(page['updated_at'].strftime("%Y-%m-%d %H:%M:%S %z")))
	133	f.write("author: {}\n".format(creator))
	134	f.write("tags: {}\n".format(', '.join(tags).lower()))
	135	if previewimage:
	136	f.write("previewimage: /images/{}\n".format(previewimage))
	137	f.write("\n")
	138
	139	# Add abstract, if one is there
	140	if translation.get('abstract'):
	141	f.write(str(translation.get('abstract', "")))
	142	f.write("\n\n")
	143	if translation.get('body'):
	144	f.write("<!-- TEASER_END -->\n\n")
	145
	146	f.write(pypandoc.convert_text( str(translation.get('body', "")), 'markdown-smart', format='html-native_divs-native_spans', extra_args=['--atx-headers']))
	147	userrec = next(filter(lambda person: person['login'] == editor, user))
	148	page_list.append({ 'date': page['updated_at'], 'revision': page['revision'], 'comment': "page revision {}".format(page['revision']), 'fname': fname, 'gname': gitname, 'editor': editor, 'email': userrec['email']})
	149
	150	page_list.sort(key=lambda tup: (tup['date'], tup['revision']))
	151	for page in page_list:
	152	print(page)
	153	os.system("cp {} git/{}".format(page['fname'], page['gname']))
	154	os.system("git -C git/ add {}".format(page['gname']))
	155	os.environ['GIT_COMMITTER_NAME'] = page['editor']
	156	os.environ['GIT_COMMITTER_EMAIL'] = page['email']
	157	os.system('git -C git/ commit -m "committing {}" --author="{} <{}>" --date="{}"'.format(page['comment'], page['editor'], page['email'], page['date']))
	158