#!/bin/bash ## # Generate html page with blog article excerpts from ./posts.txt. Post file names should # be added to ./posts.txt in the exact order that they are supposed to appear on the blog # page. # Check if required executables can be found if ! type readlink dirname html2text mv cat cksum base64 pup; then echo 'One or more required executables are not present. Generation cancelled' >&2 echo 'Note: You can install pup with "go get github.com/ericchiang/pup"' >&2 exit 1 fi # Determine script directory (requires GNU readlink) here="$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")" printf 'Changing directory: ' pushd "$here" || exit $? posts_file="$here/publish.txt" if ! [[ -f "$posts_file" ]]; then printf 'Posts file "%s" not found. Generation cancelled.\n' "$posts_file" >&2 exit 1 fi escape-html() { sed 's/&/\&/g; s//\>/g; s/"/\"/g; s/'"'"'/\'/g' } html-to-text() { html2text -nobs -style compact "$@" } print-blog-html-top() { echo ' Blog
Home | RSS Feed

Blog

' } print-blog-html-bottom() { echo ' ' } rfc-822-date-time() { LC_ALL=C date "$@" --rfc-email } print-post-html-top() { declare title="$1" cat < ${title}
Blog | RSS Feed
EOF } print-post-html-bottom() { declare publish_date="$1" last_edit_date="$2" cat < First published: ${publish_date}
Last edited: ${last_edit_date}
EOF } # Note: pubDate and lastBuildDate are both set to the current time. print-blog-rss-top() { cat < Hugot Blog https://hugot.nl/blog.html Hugo's personal blog en-us $(rfc-822-date-time) $(rfc-822-date-time) http://blogs.law.harvard.edu/tech/rss Hugo's Custom Bash Script social@hugot.nl (Hugot) infra@hugot.nl (Hugot Infra) EOF } print-blog-rss-bottom() { echo ' ' } el() { format_string="$1" shift printf "<$format_string>" "$@" } el-close() { echo "" } el-enclose() { element_name="$1" shift printf '%s' "<$element_name>" printf '%s' "$@" printf '%s' "" } publish_dir="$here/publish" site_url="https://hugot.nl" blog_html="$publish_dir/blog.html" new_html="$blog_html.new" blog_rss="$publish_dir/feed.xml" new_rss="$blog_rss.new" mkdir -p "$publish_dir" || exit $? print-blog-html-top > "$new_html" print-blog-rss-top > "$new_rss" while read -r post_html; do # Convert the post's html to text to make it easier to use the blog's text text="$(html-to-text "$post_html" | escape-html)" || exit $? # The title should be on the 2nd line of text, right after the link to the # homepage. This is a bit inflexible but it will do for now. title="$(head -n 1 <<<"$text" | tr -d '*')" || exit $? # Use the first 5 lines after the title as post excerpt. excerpt="$(tail -n +2 <<<"$text" | head -n 5)" || exit $? # Escape just the article element for use in the RSS feed article description. # This way the entire article can be read from an RSS reader. article_html="$({ head -n -1 | tail -n +2 | escape-html; } < "$post_html")" # Escape the post html file name to safely use it in the generated html. href="$(escape-html <<<"$post_html")" || exit $? post_dir="$(dirname "$post_html")" || exit $? post_publish_dir="$publish_dir/posts/$(basename "$post_dir")" || exit $? pubdate_file="$post_dir/publish_date.txt" checksum_file="$post_dir/last_checksum.txt" last_edit_file="$post_dir/last_edit_date.txt" current_checksum="$(cksum < "$post_html")" declare checksum='' # Determine a publishing date for the post if [[ -f "$pubdate_file" ]]; then read -r pubdate < "$pubdate_file" else pubdate="$(date)" echo "$pubdate" > "$pubdate_file" fi if [[ -f "$checksum_file" ]]; then read -r checksum < "$checksum_file" else echo "$current_checksum" > "$checksum_file" checksum="$current_checksum" fi if [[ -f "$last_edit_file" ]]; then read -r last_edit_date < "$last_edit_file" fi if [[ "$checksum" != "$current_checksum" ]]; then last_edit_date="$(date)" echo "$last_edit_date" > "$last_edit_file" echo "$current_checksum" > "$checksum_file" fi # Convert publishing date to be conform RFC 822 pubdate="$(rfc-822-date-time --date="$pubdate")" last_edit_date="$(rfc-822-date-time --date="$last_edit_date")" declare post_index_file="$post_publish_dir/index.html" if [[ "$checksum" != "$current_checksum" ]] || ! [[ -f "$post_index_file" ]]; then printf 'Publishing %s\n' "$post_html" >&2 mkdir -p "$post_publish_dir" print-post-html-top "$title" > "$post_index_file" cat "$post_html" >> "$post_index_file" print-post-html-bottom "$pubdate" "$last_edit_date" >> "$post_index_file" fi { el div printf '

%s

' "$href" "$title" printf '%s' "$pubdate" el 'p style="margin-top: 0.5em;"' printf '%s ... Continue reading' "$excerpt" "$href" el-close p el-close div el hr } >> "$new_html" { el item el-enclose title "$title" el-enclose link "$site_url/$href" el-enclose description "$article_html" el-enclose pubDate "$pubdate" echo "$title$(base64 <<<"$checksum")" el-close item } >> "$new_rss" done < "$posts_file" print-blog-html-bottom >> "$new_html" print-blog-rss-bottom >> "$new_rss" mv -v "$new_html" "$blog_html" || exit $? mv -v "$new_rss" "$blog_rss" || exit $? cp -v "$here/style.css" "$publish_dir/style.css" cp -v "$here/index.html" "$publish_dir/index.html" cp -rv "$here/assets" "$publish_dir/assets" echo 'SUCCESS!'