#!/bin/bash # Work on a copy of the site or an extract of the tar files # Delete any non html. It should be located in the static/ dir #find . -type f -not -name "*.html" -delete #rm -r */img #rm contents-full.html # Initial Conversion to MarkDown #find . -name "*.html" -exec sh -c "cat {} | sed -n '/Main content table/,/End of main content/p;/End of main content/q' | iconv -f iso8859-7 -t utf-8 | pandoc -s -r html -t markdown_github-raw_html | grep -vE '\[Περιεχόμενα\]|\[Επόμενο\]|\[Προηγούμενο\]' > {}.md_1" \; # or #find . -name "*.html" -exec sh -c "cat {} | sed -n '/Main content table/,/End of main content/p;/End of main content/q' | iconv -f iso8859-7 -t utf-8 | pandoc -s --columns=160 -r html -t markdown+pipe_tables-raw_html | grep -vE '\[Περιεχόμενα\]|\[Επόμενο\]|\[Προηγούμενο\]|^:::' > {}.md_1" \; for d in `find . -mindepth 1 -maxdepth 1 -type d`; do echo \#Entering dir $d cd $d for chapter in $(ls [0-1]*html.md_1 | sed -e 's/\(.*\)-[0-9]*.html\.md_1/\1/' | cut -f1 -d\. | uniq); do issue=$(basename $d) # construct article header intro=${chapter}.html.md_1 title=$(cat $intro | sed '/^====.*/Q' | grep -v "^$" | tr -d "\'") author="$(cat $intro | sed -n '/^\[\*.*/p;/.+\*\]/Q' | sed -e "s/\ <.*>//" | sed -e "s/{.*}//" | sed -e "s/ (.*)//" | tr -d "*,[]\\\'" )" adate="$(cat $intro | sed -n '/^*\[/p' | sed -e 's/\*\[\(.*\)\].*/\1/' | tr -d "*\'")" case "$adate" in *Ιανουάριος* | *Ιανουαρίου*) amonth="1" ;; *Φεβρουάριος* | *Φεβρουαρίου*) amonth="2" ;; *Μάρτιος* | *Μαρτίου*) amonth="3" ;; *Απρίλιος* | *Απριλίου*) amonth="4" ;; *Μάϊος* | *Μαΐου*) amonth="5" ;; *Ιούνιος* | *Ιουνίου*) amonth="6" ;; *Ιούλιος* | *Ιουλίου*) amonth="7" ;; *Αύγουστος* | *Αυγούστου*) amonth="8" ;; *Σεπτέμβριος* | *Σεπτεμβρίου*) amonth="9" ;; *Οκτώβριος* | *Οκτωβρίου*) amonth="10" ;; *Νοέμβριος* | *Νοεμβρίου*) amonth="11" ;; *Δεκέμβριος*) amonth="12" ;; *) echo -n "failed date" ;; esac ayear=$(echo $adate | sed -e "s/.*\([[:digit:]]\{4\}\).*/\1/") series="Magaz ${issue}" order="$(basename $chapter | cut -d_ -f1|bc)" # Start Template # Start Front Matter echo '+++' > ${chapter}.md echo "title = '$title'">> ${chapter}.md echo "date = '$(date -d $ayear-$amonth-01 +%Y-%m-%dT00:00:00Z)'" >> ${chapter}.md echo "description = ''" >> ${chapter}.md echo "author = '$author'">> ${chapter}.md echo "issue = ['$series']">> ${chapter}.md echo "issue_weight = $order">> ${chapter}.md echo '+++'>> ${chapter}.md # Append content #cat ${intro} | sed -e "s/(${chapter}.*)//" | sed -e "s/^-.*\[\(.*\)\]$/- \1/" | sed -e "s/\([[:digit:]]\)\. \[\(.*\)\]$/\1. \2/" >> ${chapter}.md cat ${intro} | sed -e "s/${chapter}.*\.html//" | sed -e 's/\[\(.*\)\]{.*} \[\(.*\)\](.*/**\1 \2**/' | tail +7 >> ${chapter}.md cat ${chapter}-?.html.md_1 >> ${chapter}.md 2>/dev/null cat ${chapter}-??.html.md_1 >> ${chapter}.md 2>/dev/null # Change image links to absolute paths sed -i "s/(img\//(\/${issue}\/img\//g" ${chapter}.md done cd .. done