88 γραμμές
4,4 KiB
Bash
Εκτελέσιμο αρχείο
88 γραμμές
4,4 KiB
Bash
Εκτελέσιμο αρχείο
#!/bin/bash
|
|
# Work on a copy of the site or an extract of the tar files
|
|
|
|
# Delete any non html. It should be located in the static/ dir
|
|
#find . -type f -not -name "*.html" -delete
|
|
#rm -r */img
|
|
#rm contents-full.html
|
|
# Initial Conversion to MarkDown
|
|
#find . -name "*.html" -exec sh -c "cat {} | sed -n '/Main content table/,/End of main content/p;/End of main content/q' | iconv -f iso8859-7 -t utf-8 | pandoc -s -r html -t markdown_github-raw_html | grep -vE '\[Περιεχόμενα\]|\[Επόμενο\]|\[Προηγούμενο\]' > {}.md_1" \;
|
|
# or
|
|
#find . -name "*.html" -exec sh -c "cat {} | sed -n '/Main content table/,/End of main content/p;/End of main content/q' | iconv -f iso8859-7 -t utf-8 | pandoc -s --columns=160 -r html -t markdown+pipe_tables-raw_html | grep -vE '\[Περιεχόμενα\]|\[Επόμενο\]|\[Προηγούμενο\]|^:::' > {}.md_1" \;
|
|
|
|
for d in `find . -mindepth 1 -maxdepth 1 -type d`; do
|
|
echo \#Entering dir $d
|
|
cd $d
|
|
for chapter in $(ls [0-1]*html.md_1 | sed -e 's/\(.*\)-[0-9]*.html\.md_1/\1/' | cut -f1 -d\. | uniq); do
|
|
issue=$(basename $d)
|
|
# construct article header
|
|
intro=${chapter}.html.md_1
|
|
title=$(cat $intro | sed '/^====.*/Q' | grep -v "^$" | tr -d "\'")
|
|
author="$(cat $intro | sed -n '/^\[\*.*/p;/.+\*\]/Q' | sed -e "s/\ <.*>//" | sed -e "s/{.*}//" | sed -e "s/ (.*)//" | tr -d "*,[]\\\'" )"
|
|
adate="$(cat $intro | sed -n '/^*\[/p' | sed -e 's/\*\[\(.*\)\].*/\1/' | tr -d "*\'")"
|
|
case "$adate" in
|
|
*Ιανουάριος* | *Ιανουαρίου*)
|
|
amonth="1"
|
|
;;
|
|
*Φεβρουάριος* | *Φεβρουαρίου*)
|
|
amonth="2"
|
|
;;
|
|
*Μάρτιος* | *Μαρτίου*)
|
|
amonth="3"
|
|
;;
|
|
*Απρίλιος* | *Απριλίου*)
|
|
amonth="4"
|
|
;;
|
|
*Μάϊος* | *Μαΐου*)
|
|
amonth="5"
|
|
;;
|
|
*Ιούνιος* | *Ιουνίου*)
|
|
amonth="6"
|
|
;;
|
|
*Ιούλιος* | *Ιουλίου*)
|
|
amonth="7"
|
|
;;
|
|
*Αύγουστος* | *Αυγούστου*)
|
|
amonth="8"
|
|
;;
|
|
*Σεπτέμβριος* | *Σεπτεμβρίου*)
|
|
amonth="9"
|
|
;;
|
|
*Οκτώβριος* | *Οκτωβρίου*)
|
|
amonth="10"
|
|
;;
|
|
*Νοέμβριος* | *Νοεμβρίου*)
|
|
amonth="11"
|
|
;;
|
|
*Δεκέμβριος*)
|
|
amonth="12"
|
|
;;
|
|
*)
|
|
echo -n "failed date"
|
|
;;
|
|
esac
|
|
ayear=$(echo $adate | sed -e "s/.*\([[:digit:]]\{4\}\).*/\1/")
|
|
series="Magaz ${issue}"
|
|
order="$(basename $chapter | cut -d_ -f1|bc)"
|
|
# Start Template
|
|
# Start Front Matter
|
|
echo '+++' > ${chapter}.md
|
|
echo "title = '$title'">> ${chapter}.md
|
|
echo "date = '$(date -d $ayear-$amonth-01 +%Y-%m-%dT00:00:00Z)'" >> ${chapter}.md
|
|
echo "description = ''" >> ${chapter}.md
|
|
echo "author = '$author'">> ${chapter}.md
|
|
echo "issue = ['$series']">> ${chapter}.md
|
|
echo "issue_weight = $order">> ${chapter}.md
|
|
echo '+++'>> ${chapter}.md
|
|
# Append content
|
|
#cat ${intro} | sed -e "s/(${chapter}.*)//" | sed -e "s/^-.*\[\(.*\)\]$/- \1/" | sed -e "s/\([[:digit:]]\)\. \[\(.*\)\]$/\1. \2/" >> ${chapter}.md
|
|
cat ${intro} | sed -e "s/${chapter}.*\.html//" | sed -e 's/\[\(.*\)\]{.*} \[\(.*\)\](.*/**\1 \2**/' | tail +7 >> ${chapter}.md
|
|
cat ${chapter}-?.html.md_1 >> ${chapter}.md 2>/dev/null
|
|
cat ${chapter}-??.html.md_1 >> ${chapter}.md 2>/dev/null
|
|
|
|
# Change image links to absolute paths
|
|
sed -i "s/(img\//(\/${issue}\/img\//g" ${chapter}.md
|
|
|
|
done
|
|
cd ..
|
|
done
|