| 1 | #!/usr/bin/env bash
 | 
| 2 | #
 | 
| 3 | # Spell checker.
 | 
| 4 | #
 | 
| 5 | # Usage:
 | 
| 6 | #   doctools/spelling.sh <function name>
 | 
| 7 | #
 | 
| 8 | # Examples:
 | 
| 9 | #   doctools/spelling.sh check-oils-docs
 | 
| 10 | #   doctools/spelling.sh check-blog
 | 
| 11 | 
 | 
| 12 | set -o nounset
 | 
| 13 | set -o pipefail
 | 
| 14 | set -o errexit
 | 
| 15 | 
 | 
| 16 | # Make this symlink work:
 | 
| 17 | #   ~/git/oilshell/oilshell.org -> ../oil/doctools/spelling.sh
 | 
| 18 | 
 | 
| 19 | # This file is doctools/spelling.sh
 | 
| 20 | OIL_ROOT=$(dirname $(dirname $(readlink -f $0)))
 | 
| 21 | readonly OIL_ROOT
 | 
| 22 | echo $OIL_ROOT
 | 
| 23 | 
 | 
| 24 | readonly SPELLING_PY=$OIL_ROOT/doctools/spelling.py
 | 
| 25 | readonly BASE_DIR=_tmp/spelling  # relative path
 | 
| 26 | 
 | 
| 27 | spelling() {
 | 
| 28 |   PYTHONPATH=$OIL_ROOT $SPELLING_PY "$@"
 | 
| 29 | }
 | 
| 30 | 
 | 
| 31 | to-ninja() {
 | 
| 32 |   echo '
 | 
| 33 | rule text-dump
 | 
| 34 |   command = lynx -dump $in > $out
 | 
| 35 |   description = text-dump $in $out
 | 
| 36 | 
 | 
| 37 | rule word-split
 | 
| 38 |   command = cat $in | PYTHONPATH='"$OIL_ROOT $SPELLING_PY"' word-split > $out
 | 
| 39 |   description = word-split $in $out
 | 
| 40 | 
 | 
| 41 | '
 | 
| 42 | 
 | 
| 43 |   while read html; do
 | 
| 44 |     # replace .html with .txt
 | 
| 45 |     local txt=$BASE_DIR/${html//'.html'/.txt}
 | 
| 46 |     local words=$BASE_DIR/${html//'.html'/.words}
 | 
| 47 | 
 | 
| 48 |     echo "build $txt: text-dump $html"
 | 
| 49 |     echo
 | 
| 50 |     echo "build $words: word-split $txt"
 | 
| 51 |     echo
 | 
| 52 |   done
 | 
| 53 | }
 | 
| 54 | 
 | 
| 55 | lines() {
 | 
| 56 |   for x in "$@"; do
 | 
| 57 |     echo "$x"
 | 
| 58 |   done
 | 
| 59 | }
 | 
| 60 | 
 | 
| 61 | doc-to-text() {
 | 
| 62 |   ### Convert files in the given directories
 | 
| 63 | 
 | 
| 64 |   # for the blog, omit anything that starts with _
 | 
| 65 |   lines "$@" | to-ninja > _tmp/doc.ninja
 | 
| 66 | 
 | 
| 67 |   ninja -f _tmp/doc.ninja
 | 
| 68 | }
 | 
| 69 | 
 | 
| 70 | clean() {
 | 
| 71 |   rm -r -f -v $BASE_DIR
 | 
| 72 | }
 | 
| 73 | 
 | 
| 74 | check-tree() {
 | 
| 75 |   local subdir=$1
 | 
| 76 |   shift
 | 
| 77 | 
 | 
| 78 |   # Depends on build/doc.sh all-markdown
 | 
| 79 |   doc-to-text "$@"
 | 
| 80 | 
 | 
| 81 |   echo
 | 
| 82 |   echo 'Word Counts'
 | 
| 83 |   echo
 | 
| 84 | 
 | 
| 85 |   # For curiosity: word count by file
 | 
| 86 |   find $BASE_DIR/$subdir -name '*.words' | xargs wc -l | sort -n
 | 
| 87 | 
 | 
| 88 |   # Use alphabetical order
 | 
| 89 |   find $BASE_DIR/$subdir -name '*.words' | sort | xargs \
 | 
| 90 |     $0 spelling check --known-words /usr/share/dict/words
 | 
| 91 | }
 | 
| 92 | 
 | 
| 93 | check-one() {
 | 
| 94 |   local words=${1:-_tmp/spelling/_release/VERSION/doc/eggex.words}
 | 
| 95 | 
 | 
| 96 |   spelling check --known-words /usr/share/dict/words $words
 | 
| 97 | }
 | 
| 98 | 
 | 
| 99 | check-oils-docs() {
 | 
| 100 |   local dir=_release/VERSION/doc
 | 
| 101 |   check-tree $dir $dir/*.html
 | 
| 102 | }
 | 
| 103 | 
 | 
| 104 | check-doc-ref() {
 | 
| 105 |   local dir=_release/VERSION/doc/ref
 | 
| 106 |   check-tree $dir $dir/*.html
 | 
| 107 | }
 | 
| 108 | 
 | 
| 109 | check-blog() {
 | 
| 110 |   # Omit drafts starting with _
 | 
| 111 |   check-tree _site/blog _site/blog/20??/*/[^_]*.html
 | 
| 112 | }
 | 
| 113 | 
 | 
| 114 | "$@"
 |