#!/bin/bash
##
# Generate html page with blog article excerpts from ./posts.txt. Post file names should
# be added to ./posts.txt in the exact order that they are supposed to appear on the blog
# page.
# Check if required executables can be found
if ! type readlink dirname html2text mv cat cksum base64 pup; then
echo 'One or more required executables are not present. Generation cancelled' >&2
echo 'Note: You can install pup with "go get github.com/ericchiang/pup"' >&2
exit 1
fi
# Determine script directory (requires GNU readlink)
here="$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")"
printf 'Changing directory: '
pushd "$here" || exit $?
posts_file="$here/posts.txt"
if ! [[ -f "$posts_file" ]]; then
printf 'Posts file "%s" not found. Generation cancelled.\n' "$posts_file" >&2
exit 1
fi
escape-html() {
sed 's/&/\&/g; s/\</g; s/>/\>/g; s/"/\"/g; s/'"'"'/\'/g'
}
html-to-text() {
html2text -nobs -style compact "$@"
}
print-blog-html-top() {
echo '
'
}
print-blog-html-bottom() {
echo '
'
}
rfc-822-date-time() {
LC_ALL=C date "$@" +'%a, %d %b %Y %H:%M:%S %Z'
}
# Note: pubDate and lastBuildDate are both set to the current time.
print-blog-rss-top() {
cat <Hugot Blog
https://hugot.nl/blog.html
Hugo's personal blogen-us$(rfc-822-date-time)$(rfc-822-date-time)http://blogs.law.harvard.edu/tech/rssHugo's Custom Bash Scriptsocial@hugot.nl (Hugot)infra@hugot.nl (Hugot Infra)
EOF
}
print-blog-rss-bottom() {
echo ''
}
el() {
format_string="$1"
shift
printf "<$format_string>" "$@"
}
el-close() {
echo "$1>"
}
el-enclose() {
element_name="$1"
shift
printf '%s' "<$element_name>"
printf '%s' "$@"
printf '%s' "$element_name>"
}
site_url="https://hugot.nl"
blog_html="$here/blog.html"
new_html="$blog_html.new"
blog_rss="$here/feed.xml"
new_rss="$blog_rss.new"
print-blog-html-top > "$new_html"
print-blog-rss-top > "$new_rss"
while read -r post_html; do
# Convert the post's html to text to make it easier to use the blog's text
text="$(html-to-text "$post_html" | escape-html)" || exit $?
# The title should be on the 2nd line of text, right after the link to the
# homepage. This is a bit inflexible but it will do for now.
title="$(tail -n +3 <<<"$text" | head -n 1 | tr -d '*')" || exit $?
# Use the first 5 lines after the title as post excerpt.
# excerpt="$(tail -n +4 <<<"$text" | head -n 5)" || exit $?
# Include full post content
excerpt="$(pup article < "$post_html" | escape-html)"
# Escape the post html file name to safely use it in the generated html.
href="$(escape-html <<<"$post_html")" || exit $?
post_dir="$(dirname "$post_html")" || exit $?
pubdate_file="$post_dir/publish_date.txt"
# Determine a publishing date for the post
if [[ -f "$pubdate_file" ]]; then
read -r pubdate < "$pubdate_file"
else
pubdate="$(date)"
echo "$pubdate" > "$pubdate_file"
fi
# Convert publishing date to be conform RFC 822
pubdate="$(rfc-822-date-time --date="$pubdate")"
{
el div
printf '