summaryrefslogtreecommitdiffstats
path: root/contrib/tesseract-langs.sh
diff options
context:
space:
mode:
authorMario Domenech Goulart <mario@ossystems.com.br>2014-05-26 09:59:00 -0300
committerMartin Jansa <Martin.Jansa@gmail.com>2014-06-10 12:16:23 +0200
commitcb41796a5e0573bf3676b5c54fcc12c6dd42f9fb (patch)
tree5c7665d5744fea0f2ee55efd620ab06736ee2956 /contrib/tesseract-langs.sh
parentfccc8f1514c5bc84a343a757f60c9dc793db2643 (diff)
downloadmeta-openembedded-cb41796a5e0573bf3676b5c54fcc12c6dd42f9fb.tar.gz
contrib/tesseract-langs.sh: add script to generate recipes for tesseract languages
This script writes language recipes for tesseract. It downloads the listing of available languages and language tarballs from the official site and writes language recipes tesseract-lang-<lang>_<version>.bb for each language. Signed-off-by: Mario Domenech Goulart <mario@ossystems.com.br> Signed-off-by: Martin Jansa <Martin.Jansa@gmail.com>
Diffstat (limited to 'contrib/tesseract-langs.sh')
-rwxr-xr-xcontrib/tesseract-langs.sh92
1 files changed, 92 insertions, 0 deletions
diff --git a/contrib/tesseract-langs.sh b/contrib/tesseract-langs.sh
new file mode 100755
index 000000000..50873c139
--- /dev/null
+++ b/contrib/tesseract-langs.sh
@@ -0,0 +1,92 @@
1#! /bin/sh
2
3# Copyright (C) 2014, O.S. Systems Software Ltda. All Rights Reserved
4# Released under the MIT license (see meta-openembedded layer's COPYING.MIT)
5
6PV='3.02'
7
8# Sometimes the software package has a minor version, but language
9# packages have not. Example:
10# software package: tesseract-ocr-3.02.02.tar.gz
11# language package: tesseract-ocr-3.02.por.tar.gz
12MINOR_PV=02
13
14recipes_dir=$1
15
16usage() {
17 echo "Usage: `basename $0` <recipes dir> [ <download dir> ]"
18}
19
20if [ -z "$recipes_dir" ]; then
21 usage
22 exit 1
23fi
24mkdir -p "$recipes_dir"
25
26file_list_uri='https://code.google.com/p/tesseract-ocr/downloads/list'
27file_list=`mktemp`
28
29remove_dl_dir=
30if [ -z "$2" ]; then
31 remove_dl_dir=1
32 dl_dir=`mktemp -d`
33else
34 dl_dir="$2"
35fi
36
37mkdir -p $dl_dir
38
39tesseract_langs() {
40 wget -q -O "$file_list" "$file_list_uri"
41
42 grep -E 'a href="detail\?name=tesseract-ocr-'${PV}'\.[^\.]+.tar.gz&amp;can=2&amp;q=">' "$file_list" | \
43 sed -r -e 's/.*tesseract-ocr-'${PV}'\.*([^\.]+)\.tar\.gz.*/\1/' | \
44 grep -Ev '('${MINOR_PV}'|'${MINOR_PV}'-doc-html)' | \
45 sort -u
46}
47
48download_lang_files() {
49 local langs="$1"
50 local uri
51 for lang in $langs; do
52 if [ ! -e "$dl_dir/tesseract-ocr-${PV}.${lang}.tar.gz" ]; then
53 uri="https://tesseract-ocr.googlecode.com/files/tesseract-ocr-${PV}.${lang}.tar.gz"
54 echo "Downloading $uri"
55 wget -q -P "$dl_dir" "$uri"
56 fi
57 done
58}
59
60create_recipe() {
61 local lang=$1
62 local tarball
63
64 tarball="$dl_dir/tesseract-ocr-${PV}.${lang}.tar.gz"
65
66 md5sum=`md5sum $tarball | awk '{print $1}'`
67 sha256sum=`sha256sum $tarball | awk '{print $1}'`
68
69 cat > $recipes_dir/tesseract-lang-`echo ${lang} | sed s/_/-/g`_${PV}.bb <<EOF
70# Copyright (C) 2014, O.S. Systems Software Ltda. All Rights Reserved
71# Released under the MIT license (see meta-openembedded layer's COPYING.MIT)
72
73TESSERACT_LANG = "$lang"
74
75require tesseract-lang.inc
76
77SRC_URI[md5sum] = "${md5sum}"
78SRC_URI[sha256sum] = "${sha256sum}"
79EOF
80}
81
82
83LANGS=`tesseract_langs`
84
85download_lang_files "$LANGS"
86
87for lang in $LANGS; do
88 create_recipe $lang
89done
90
91[ -n "$remove_dl_dir" ] && rm -rf $dl_dir
92rm -f $file_list