aboutsummaryrefslogtreecommitdiffstats
path: root/scripts
diff options
authorPeter Schiffer <pschiffe@redhat.com>2014-02-16 08:16:17 +0100
committerMichael Kerrisk <mtk.manpages@gmail.com>2014-02-16 08:32:07 +0100
commit2bde1fa364b91f23625df7fc1519f30b2cdc5a81 (patch)
tree44aa7b2e3188d95c9a9407fe665b07b105046e5c /scripts
parentb44821fb539cea58e59ec9e4a46df78280e50bb7 (diff)
downloadman-pages-2bde1fa364b91f23625df7fc1519f30b2cdc5a81.tar.gz
convert_to_utf_8.sh: Script to convert pages to UTF-8
See https://bugzilla.kernel.org/show_bug.cgi?id=60807 Signed-off-by: Michael Kerrisk <mtk.manpages@gmail.com>
Diffstat (limited to 'scripts')
-rw-r--r--scripts/convert_to_utf_8.sh65
1 files changed, 65 insertions, 0 deletions
diff --git a/scripts/convert_to_utf_8.sh b/scripts/convert_to_utf_8.sh
new file mode 100644
index 0000000000..787d9b4821
--- /dev/null
+++ b/scripts/convert_to_utf_8.sh
@@ -0,0 +1,65 @@
+#!/bin/sh
+#
+# convert_to_utf_8.sh
+#
+# Find man pages with encoding other than us-ascii, and convert them
+# to the utf-8 encoding.
+#
+# Example usage:
+#
+# cd man-pages-x.yy
+# sh convert_to_utf_8.sh <output_dir> man?/*
+#
+######################################################################
+#
+# (C) Copyright 2013, Peter Schiffer <pschiffe@redhat.com>
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details
+# (http://www.gnu.org/licenses/gpl-2.0.html).
+#
+
+if [[ $# -lt 2 ]]; then
+ echo "Usage: ${0} <output_dir> man?/*" 1>&2
+ exit 1
+fi
+
+out_dir="$1"
+shift
+
+enc_line="'\\\" t -*- coding: UTF-8 -*-"
+
+for f in "$@"; do
+ enc=$(file -bi "$f" | cut -d = -f 2)
+ if [[ $enc != "us-ascii" ]]; then
+ dirn=$(dirname "$f")
+ basen=$(basename "$f")
+ new_dir="${out_dir}/${dirn}"
+ if [[ ! -e "$new_dir" ]]; then
+ mkdir -p "$new_dir"
+ fi
+ case "$basen" in
+ iso_8859-11.7 | iso_8859-13.7)
+ from_enc=$enc
+ ;;
+ armscii-8.7 | cp1251.7 | iso_8859-*.7 | koi8-?.7)
+ from_enc="${basen%.7}"
+ ;;
+ *)
+ from_enc=$enc
+ ;;
+ esac
+ printf "Converting %-23s from %s\n" "$f" "$from_enc"
+ echo "$enc_line" > "${new_dir}/${basen}"
+ iconv -f "$from_enc" -t utf-8 "$f" \
+ | sed "/.*-\*- coding:.*/d;/.\\\" t$/d" >> "${new_dir}/${basen}"
+ fi
+done
+
+exit 0