Files
pycom-documentation/migration.py
Emmanuel Florent 48e3a22c9e initial commit
2019-06-17 14:24:53 +02:00

297 lines
8.9 KiB
Python

import os
import sys
# import urllib.request
import os.path
import mistune
import xml.etree.ElementTree as ET
from lxml import etree
from io import StringIO
from shutil import copyfile
import re
weights = { '': 10 } # per level weight in toc
prev_ident = ""
SUMMARY="content/SUMMARY.md"
KEY_SEP="@"
SRC_URL = '/' # 'https://development.pycom.io/'
def _list_content():
import glob
from pathlib import Path
return [f for f in Path('content').glob('**/*.md')] + [f for f in Path('content').glob('**/*.html')]
def _parse_file(filename):
text = ""
with open(filename) as fp:
line = fp.readline()
cnt = 1
while line:
line = _parse_line(filename, fp.readline())
cnt += 1
text += line
return text
def _make_index(src):
dst = './content/' + '/'.join(src.split('/')[:-1]) + '/_index.md'
src = './content/' + src
# print('# copy:', src , '-->' ,dst)
try:
copyfile(src, dst)
except Exception as e:
print('# ', e)
def _make_key(url):
if url.endswith('README.md') or url.endswith('introduction.md'):
_make_index(url)
return KEY_SEP.join(url.split('/')[:-1])
else:
return KEY_SEP.join(url.split("/")).replace(".md","")
def _make_parent(key, level):
return KEY_SEP.join(key.split(KEY_SEP)[-2 - level:-1])
w = 10
prev_level = 1
def _traverse(root, nodes, last_h2):
global weights
weight = 10
parent = root
for node in nodes:
# w = w + 10
if node.text:
if node.attrib.has_key('href'):
url = node.attrib['href']
key = _make_key(url)
name = root.text + '>' + node.text
name = node.text
if node.text == "Introduction":
name = last_h2
else:
name = node.text
# last_h2=node.text
parent = _make_parent(key, _traverse.level)
# print(etree.tostring(root, pretty_print=True).decode())
weight_key = parent
if weight_key in weights:
weights[weight_key] = weights[weight_key] + 10
else:
weights[weight_key] = 10
url =SRC_URL + url.replace('introduction.md', '/').replace('README.md', '/').replace('.md', '/')
# print('#', traverse.level * ' ',weights[weight_key] , '>',key, node.text)
# print('#', traverse.level * ' ',weights[traverse.level] , traverse.level, root.text, '>', node.text,'[', url,']')
print(_write_toc(key, name, url, parent, weights[weight_key]))
else:
if node.tag == 'h2':
w= 0
last_h2 = node.text
print('# ***', last_h2)
_traverse.level += 1
_traverse(parent, list(node), last_h2)
# weights[traverse.level]= 10
if node.text:
parent = node
_traverse.level -= 1
def _write_toc(key, name, url, parent, weight):
toc =""
toc += "[[menu.main]]\n"
toc += " name = \"%s\"\n" % name.replace("\\","") # remove escapes
toc += " url = \"%s\"\n" % url
toc += " identifier = \"%s\"\n" % key
if parent:
toc += " parent = \"%s\"\n" % parent
toc += " weight = %s\n" % weight
return toc
def _override_file(f, text):
file = open(f,"w")
file.write(text)
file.close()
def _parse_line(f, line):
if "{% page-ref " in line:
# {% page-ref page="development/wipy2.md" %}
# -->
# {{< refname "development/wipy2.md" >}}
line = line.replace("{% page-ref page=", "{{% refname ")
line = line.replace("%}", "%}}")
if "{% hint" in line:
line = line.replace("{%", "{{%")
line = line.replace("%}", "%}}")
if "endhint" in line:
# {% endhint %} --> {{% /hint %}}
line = line.replace("endhint", "/hint")
line = line.replace("{%", "{{<")
line = line.replace("%}", ">}}")
if ".md)" in line:
line = line.replace(".md)", ")")
return line
def _find_title(filename):
with open(SUMMARY) as fp:
line = fp.readline()
cnt = 1
print(filename)
while line:
# filename was a python3 POSIXFile object
filename = str(filename).replace("_index.md", "README.md")
if str(filename)[8:].replace('.md','') in line:
# result is beetween []
result = re.search('\[(.*)\]', line)
if result is not None:
return result.group(1).replace("\\","")
line = fp.readline()
print("title not found for", filename)
return ""
# def _find_title(filename):
# with open(SUMMARY) as fp:
# line = fp.readline()
# cnt = 1
# while line:
# if line.startswith('#'):
# print(filename, line)
# return line.replace('#', '').strip()
# line = fp.readline()
# print("title not found for", filename)
# # sys.exit(1)
# return ""
def _remove_front_matters(file_path):
file = open(file_path, "r")
line = file.readline()
# check if first line contains '---'
if not line.startswith("---"):
print("no front matter in file")
return
i = 0
line = ""
# skip until next '---'
while not line.startswith('---'):
line = file.readline()
i = i + 1
# recopy content
output_path = '%s.output' % file_path
with open(output_path, 'wb') as out_file:
out_file.write(line.encode())
while line:
line = file.readline()
out_file.write(line.encode())
# move to original filemame
os.rename(output_path, file_path)
def _make_front_matters(title, urls):
fm =""
fm+= "---\n"
fm += "title: \"%s\"\n" % title.replace("\\","") # remove escapes if any
#
fm += "aliases:\n"
for url in urls:
fm += " - %s\n" % url
fm += "---\n"
print(fm)
return fm
import json
def _write_front_matters(file_path, title, urls):
# remove possible existing front matters tags
_remove_front_matters(file_path)
# load the gitbook redirects.json
# open a tmp file
output_path = '%s.output' % file_path
with open(output_path, 'wb') as out_file:
# write the front matter tags in blank file
out_file.write(_make_front_matters(title, urls).encode())
# recopy the rest of the file
with open(file_path, 'r') as in_file:
out_file.write(in_file.read().encode())
# delete tmp file and place new one
os.rename(output_path, file_path)
# callable public methods
def load_sumary():
markdown = mistune.Markdown()
f = open(SUMMARY)
parser = etree.HTMLParser()
tree = etree.parse(StringIO(markdown(f.read())), parser)
uls = tree.xpath("//body/*")
_traverse.level = 1
_traverse(tree.xpath("//body/*")[0], uls,'')
return tree
def parse_files():
text = ""
for f in _list_content():
text = _parse_file(f)
_override_file(f, text)
def _make_urls(file_path, redirects):
urls = []
if not (str(file_path).endswith("_index.md") or str(file_path).endswith("README.md")):
urls.append(str(file_path)[8:].replace(".md", ".html"))
urls.append(str(file_path)[8:])
for r in redirects['redirects']:
if 'to' in r:
r['to'] = r['to'].replace(".html", "") #[2:]
r['to'] = r['to'].replace(".html", "")
file_path = str(file_path).replace(".md", "") #[8:]
if r['to'][2:] == file_path[8:]:
print("compare %s %s" % (r['to'], file_path))
urls.append(r["from"])
return urls
def write_all_front_matters():
with open("redirects.json", "r") as read_file:
redirects = json.load(read_file)
for f in _list_content():
urls = _make_urls(f, redirects)
title = _find_title(f)
_write_front_matters(f, title, urls)
load_sumary()
parse_files()
write_all_front_matters()
# ({{% ref page="../dashboard.md" %}})
# {% content "fifth" %}
# {% content "first" %}
# {% content "forth" %}
# {% content "second" %}
# {% content "third" %}
# {% endhint %}
# {% endtabs %}
# {% hint style="danger" %}
# {% hint style="info" %}
# {% hint style="warning" %}
# {% page-ref page="../../pymakr/installation/atom.md" %}
# {% tabs first="Exp Board 2.0", second="Exp Board 3.0" %}
# {% tabs first="Exp Board 2.0", second="Exp Board 3.0", third="Pytrack/Pysense/Pyscan", forth="USB UART Adapter", fifth="WiFi" %}
# {% tabs first="Exp Board 2.0", second="Exp Board 3.0", third="Pytrack/Pysense/Pyscan", fourth="USB UART Adapter", fifth="WiFi" %}
# {% tabs first="Exp Board 2.0", second="Exp Board 3.0", third="Pytrack/Pysense/Pyscan", fourth="USB UART Adapter", fifth="Wifi" %}
# {{% /hint %}}
# {{% hint style="info" %}}