#! /bin/sh
# #############################################################################
NAME_="htmloptim"
HTML_="optimize html file"
PURPOSE_="optimize html file by reducing its size"
SYNOPSIS_="$NAME_ [-vhlrdospbtm] [-c <n>] <file> [file...]"
REQUIRES_="standard GNU commands, file"
VERSION_="1.2"
DATE_="2005-07-20; last update: 2006-01-29"
AUTHOR_="Dawid Michalczyk <dm@eonworks.com>"
URL_="www.comp.eonworks.com"
CATEGORY_="www"
PLATFORM_="Linux"
SHELL_="bash"
DISTRIBUTE_="yes"
# #############################################################################
# This program is distributed under the terms of the GNU General Public License
# NOTE:
# Do not edit in editor which automatically transaltes tabs to spaces! This
# script needs embedded tabs for pattern matching.
# HISTORY:
# 2005-07-20 v1.0
# 2005-07-22 v1.1 - added -t option
# - added skipping files with <pre> tags
# - better handling of stdo and stdr messages
# 2006-01-29 v1.2 - added the -p option (remove trailing spaces and tabs)
# - added skipping of more <pre> like tags: <listing>
# <plaintext> <xmp>
# - added script description
# - renamed the script from htmoptim to htmloptim
# TODO:
# - more testing of option -t
# - optimize around <pre> like tags
# - remove html comments
# - remove script tags
usage () {
echo >&2 "$NAME_ $VERSION_ - $PURPOSE_
Usage: $SYNOPSIS_
Requires: $REQUIRES_
Options:
-r, replace the input file, default output is stdo
-d, convert DOS to Unix endline (remove CR)
-t, remove empty space between > and < (end and begin tag indicators)
-o, put all html content on one line (remove LF)
-s, remove leading spaces and tabs
-p, remove trailing spaces and tabs
-b, remove blank lines and lines containing only spaces or tabs
-c <n>, compact text, fold text at line <n> width
-v, verbose
-m, manual
-h, usage and options (help)
-l, see this script"
exit 1
}
manual() { echo >&2 "
NAME
$NAME_ $VERSION_ - $PURPOSE_
SYNOPSIS
$SYNOPSIS_
DESCRIPTION
$NAME_ reduces the size of html file by optimizing its content. This is
done by removing unnecessary characters like spaces, tabs, line feeds or
blank lines.
If the -r option is used, the optimized version will replace the original
only if it is smaller.
Files with <pre> <listing> <plaintext> and <xmp> tags are skipped.
To unoptimize a file, or to make it easily readable, use the tidy tool:
tidy.sourgeforge.net
AUTHOR
$AUTHOR_ Suggestions and bug reports are welcome.
For updates and more scripts visit $URL_
"; }
# check if required command is in $PATH variable
which file &> /dev/null
[ $? != 0 ] && { echo >&2 ${NAME_}: the \"file\" command needed to run this script is not in your \$PATH; exit 1; }
# arg check
[ $# -eq 0 ] && { echo >&2 missing argument, type $NAME_ -h for help; exit 1; }
# tmp file set up
tmp_1=/tmp/tmp1.${RANDOM}$$
tmp_2=/tmp/tmp2.${RANDOM}$$
# signal trapping and tmp file removal
trap 'rm -f $tmp_1 $tmp_2 >/dev/null 2>&1' 0
trap "exit 1" 1 2 3 15
# var init
verbose= # verbose output
replace= # replace the input file, default output is stdo
rmblanks= # remove blank lines
rmtrail= # remove trailing spaces and tabs
rmleads= # remove leading spaces and tabs
dtu= # convert DOS to Unix endline; remove CR
oneline= # remove LF; put all html content one one line
compact= # compact text; fold text at specified line width
tagspace= # remove empty space between > and < tags
text= # boolean, is input file an ascii file
# option and arg handling
while getopts vhlc:otdspmbr options; do
case $options in
c) compact=$OPTARG ;;
t) tagspace=on ;;
o) oneline=on ;;
d) dtu=on ;;
s) rmleads=on ;;
p) rmtrail=on ;;
b) rmblanks=on ;;
r) replace=on ;;
v) verbose=on ;;
m) manual | more ; exit ;;
h) usage ;;
l) more $0 ; exit ;;
\?) echo invalid or missing argument, type $NAME_ -h for help; exit 1 ;;
esac
done
shift $(( $OPTIND - 1 ))
# input file check
[ $# -eq 0 ] && { echo >&2 specify file"(s)" to work on, type $NAME_ -h for help; exit 1; }
# arg int check
[[ $compact == *[!0-9]* ]] && { echo >&2 ${NAME_}: the argument to option c must be an integer; exit 1; }
# local funcs
file_GetSize() {
set -- $(ls -l "$1"); echo $5
}
str_DelimitInt() {
echo $1 | sed '{ s/$/@/; : loop; s/\(...\)@/@.\1/; t loop; s/@//; s/^\.//; }'
}
# main
for a in "$@"; do
# does file exist
[ -f "$a" ] || { echo >&2 ${NAME_}: file \"$a\" does not exist; exit 1; }
# is input an ascii file
file "$a" | grep -q text
[ $? == 0 ] && text=0 || text=1
cp -- "$a" $tmp_1
if [[ $text == 0 ]]; then
# files with <pre> <listing> <plaintext> and <xmp> tags are skipped
grep -qi -e "<[ ]*pre" -e "<[ ]*listing[ ]*>" -e "<[ ]*plaintext[ ]*>" -e "<[ ]*xmp[ ]*>" "$tmp_1"
[ $? == 0 ] && { echo >&2 ${NAME_}: skipping \"$a\" - it contains one of the following tags "<pre> <listing> <plaintext> <xmp>" ; continue; }
# original file size
osize=$(file_GetSize $tmp_1)
# html optimizing
[[ $dtu ]] && { tr -d \\015 < $tmp_1 > $tmp_2 ; cp $tmp_2 $tmp_1; }
[[ $rmblanks ]] && { sed '/^[ ]*$/d' < $tmp_1 > $tmp_2 ; cp $tmp_2 $tmp_1; }
[[ $rmleads ]] && { sed 's/^[ ]*//' < $tmp_1 > $tmp_2 ; cp $tmp_2 $tmp_1; }
[[ $rmtrail ]] && { sed 's/[ ]*$//g' < $tmp_1 > $tmp_2 ; cp $tmp_2 $tmp_1; }
[[ $compact ]] && { fmt -w $compact $tmp_1 > $tmp_2 ; cp $tmp_2 $tmp_1; }
[[ $oneline ]] && { tr '\012' ' ' < $tmp_1 > $tmp_2 ; cp $tmp_2 $tmp_1; }
[[ $tagspace ]] && { sed 's/>[ ]*</></g' < $tmp_1 > $tmp_2 ; cp $tmp_2 $tmp_1; }
# replacing only files that are smaller then the original
nsize=$(file_GetSize $tmp_1) # new, optimized file size
if (( $nsize < $osize ));then
[[ $replace ]] && mv -- $tmp_1 "$a" || cat $tmp_1
# so we dont get verbose output appended to stdo
if [[ $verbose ]] && [[ $replace ]]; then
echo "$a" $(str_DelimitInt $osize) "->" $(str_DelimitInt $nsize) bytes
fi
else
# so we dont get verbose output appended to stdo
echo >&2 ${NAME_}: "$a" optimization did not decrease the size
fi
elif [[ $text == 1 ]];then
echo >&2 ${NAME_}: skipping: "$a" not an ascii file
fi
done
|