#!/bin/sh # $Id: tesseract.SlackBuild,v 1.7 2023/09/26 19:06:23 root Exp root $ # Copyright 2011, 2023 Eric Hameleers, Eindhoven, NL # All rights reserved. # # Permission to use, copy, modify, and distribute this software for # any purpose with or without fee is hereby granted, provided that # the above copyright notice and this permission notice appear in all # copies. # # THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED # WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. # IN NO EVENT SHALL THE AUTHORS AND COPYRIGHT HOLDERS AND THEIR # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF # USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT # OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF # SUCH DAMAGE. # ----------------------------------------------------------------------------- # # Slackware SlackBuild script # =========================== # By: Eric Hameleers # For: tesseract # Descr: an OCR system # URL: https://github.com/tesseract-ocr # Needs: leptonica # Changelog: # 3.00-1: 25/sep/2011 by Eric Hameleers # * Initial build. One language is contained in the main package # ('eng' by default), the other languages are split off into # data packages. # 5.3.2-1: 26/sep/2023 by Eric Hameleers # * Update. # # Run 'sh tesseract.SlackBuild' to build a Slackware package. # The package (.txz) and .txt file as well as build logs are created in /tmp . # Install it using 'installpkg'. # # ----------------------------------------------------------------------------- # Set initial variables: PRGNAM=tesseract VERSION=${VERSION:-5.3.2} DATAVER=${DATAVER:-4.1.0} BUILD=${BUILD:-1} NUMJOBS=${NUMJOBS:-" -j$(nproc) "} TAG=${TAG:-alien} DOCS="AUTHORS CONTRIBUTING.md ChangeLog INSTALL* LICENSE README.md VERSION" # Where do we look for sources? SRCDIR=$(cd $(dirname $0); pwd) # Default language which is going to be built into the main tesseract package. DEFLANG=${DEFLANG:-"eng"} # If you want only a few language packs, define them like this instead: # LANGPACKS="eng nld fra" LANGPACKS=${LANGPACKS:-"$(cat $SRCDIR/languages 2>/dev/null |grep -v "^#" |cut -f1 -d= |tr '\n' ' ' |tr -s ' ' |sed -e 's/ *$//')"} # The list of languages is found at: https://tesseract-ocr.github.io/tessdoc/Data-Files#data-files-for-version-400-november-29-2016 # We need to have at least one language pack or tesseract will not be useable: if [ -z "$LANGPACKS" ]; then LANGPACKS="${DEFLANG}" elif ! echo "$LANGPACKS" | grep -qw $DEFLANG ; then # The default language must also be listed in LANGPACKS: LANGPACKS="$DEFLANG $LANGPACKS" fi # Place to build (TMP) package (PKG) and output (OUTPUT) the program: TMP=${TMP:-/tmp/build} PKG=$TMP/package-$PRGNAM OUTPUT=${OUTPUT:-/tmp} SOURCE[0]="$SRCDIR/${PRGNAM}-${VERSION}.tar.gz" SRCURL[0]="https://github.com/tesseract-ocr/${PRGNAM}/archive/${VERSION}/${PRGNAM}-${VERSION}.tar.gz" # We have an arbitrary amount of language packs to download: III=1 for TB in $LANGPACKS ; do SOURCE[$III]="$SRCDIR/${TB}.traineddata" SRCURL[$III]="https://github.com/tesseract-ocr/tessdata/raw/${DATAVER}/${TB}.traineddata" let III=($III + 1) done ## ## --- with a little luck, you won't have to edit below this point --- ## ## # Automatically determine the architecture we're building on: if [ -z "$ARCH" ]; then case "$(uname -m)" in i?86) ARCH=i586 ;; arm*) readelf /usr/bin/file -A | egrep -q "Tag_CPU.*[4,5]" && ARCH=arm || ARCH=armv7hl ;; # Unless $ARCH is already set, use uname -m for all other archs: *) ARCH=$(uname -m) ;; esac export ARCH fi # Set CFLAGS/CXXFLAGS and LIBDIRSUFFIX: case "$ARCH" in i?86) SLKCFLAGS="-O2 -march=${ARCH} -mtune=i686" SLKLDFLAGS=""; LIBDIRSUFFIX="" ;; x86_64) SLKCFLAGS="-O2 -fPIC" SLKLDFLAGS="-L/usr/lib64"; LIBDIRSUFFIX="64" ;; armv7hl) SLKCFLAGS="-O2 -march=armv7-a -mfpu=vfpv3-d16" SLKLDFLAGS=""; LIBDIRSUFFIX="" ;; *) SLKCFLAGS=${SLKCFLAGS:-"-O2"} SLKLDFLAGS=${SLKLDFLAGS:-""}; LIBDIRSUFFIX=${LIBDIRSUFFIX:-""} ;; esac case "$ARCH" in arm*) TARGET=$ARCH-slackware-linux-gnueabi ;; *) TARGET=$ARCH-slackware-linux ;; esac # Exit the script on errors: set -e trap 'echo "$0 FAILED at line ${LINENO}" | tee $OUTPUT/error-${PRGNAM}.log' ERR # Catch unitialized variables: set -u P1=${1:-1} # Save old umask and set to 0022: _UMASK_=$(umask) umask 0022 # Create working directories: mkdir -p $OUTPUT # place for the package to be saved mkdir -p $TMP/tmp-$PRGNAM # location to build the source mkdir -p $PKG # place for the package to be built rm -rf $PKG/* # always erase old package's contents rm -rf $TMP/tmp-$PRGNAM/* # remove the remnants of previous build rm -rf $OUTPUT/{autogen,configure,make,install,error,makepkg}-$PRGNAM.log # remove old log files # Source file availability: for (( i = 0; i < ${#SOURCE[*]}; i++ )) ; do if ! [ -f ${SOURCE[$i]} ]; then echo "Source '$(basename ${SOURCE[$i]})' not available yet..." # Check if the $SRCDIR is writable at all - if not, download to $OUTPUT [ -w "$SRCDIR" ] || SOURCE[$i]="$OUTPUT/$(basename ${SOURCE[$i]})" if [ -f ${SOURCE[$i]} ]; then echo "Ah, found it!"; continue; fi if ! [ "x${SRCURL[$i]}" == "x" ]; then echo "Will download file to $(dirname $SOURCE[$i])" wget --no-check-certificate -nv -T 20 -O "${SOURCE[$i]}" "${SRCURL[$i]}" || true if [ $? -ne 0 -o ! -s "${SOURCE[$i]}" ]; then echo "Fail to download '$(basename ${SOURCE[$i]})'. Aborting the build." mv -f "${SOURCE[$i]}" "${SOURCE[$i]}".FAIL exit 1 fi else echo "File '$(basename ${SOURCE[$i]})' not available. Aborting the build." exit 1 fi fi done if [ "$P1" == "--download" ]; then echo "Download complete." exit 0 fi # --- PACKAGE BUILDING --- echo "++" echo "|| $PRGNAM-$VERSION" echo "++" cd $TMP/tmp-$PRGNAM echo "Extracting the source archive(s) for $PRGNAM..." tar -xvf ${SOURCE[0]} cd ${PRGNAM}-${VERSION} chown -R root:root . chmod -R u+w,go+r-w,a+rX-st . echo Building ... ./autogen.sh \ 2>&1 | tee $OUTPUT/autogen-${PRGNAM}.log LDFLAGS="$SLKLDFLAGS" \ CXXFLAGS="$SLKCFLAGS" \ CFLAGS="$SLKCFLAGS" \ ./configure \ --prefix=/usr \ --libdir=/usr/lib${LIBDIRSUFFIX} \ --mandir=/usr/man \ --docdir=/usr/doc/$PRGNAM-$VERSION \ --localstatedir=/var \ --sysconfdir=/etc \ --disable-static \ --program-prefix= \ --program-suffix= \ --build=$TARGET \ 2>&1 | tee $OUTPUT/configure-${PRGNAM}.log make $NUMJOBS 2>&1 | tee $OUTPUT/make-${PRGNAM}.log make training 2>&1 | tee -a $OUTPUT/make-${PRGNAM}.log # Now we can install the lot (and then split off the language packs): make DESTDIR=$PKG install 2>&1 | tee $OUTPUT/install-${PRGNAM}.log make training-install DESTDIR=$PKG 2>&1 | tee -a $OUTPUT/install-${PRGNAM}.log # The language files are not automatically installed: for (( i = 1; i < ${#SOURCE[*]}; i++ )) ; do install -D -m0644 ${SOURCE[$i]} -t $PKG/usr/share/tessdata/ done # Don't ship .la files: rm -f $PKG/{,usr/}lib${LIBDIRSUFFIX}/*.la # Add documentation: mkdir -p $PKG/usr/doc/$PRGNAM-$VERSION cp -a $DOCS $PKG/usr/doc/$PRGNAM-$VERSION || true cat $SRCDIR/$(basename $0) > $PKG/usr/doc/$PRGNAM-$VERSION/$PRGNAM.SlackBuild chown -R root:root $PKG/usr/doc/$PRGNAM-$VERSION find $PKG/usr/doc -type f -exec chmod 644 {} \; # Compress the man page(s): if [ -d $PKG/usr/man ]; then find $PKG/usr/man -type f -name "*.?" -exec gzip -9f {} \; for i in $(find $PKG/usr/man -type l -name "*.?") ; do ln -s $( readlink $i ).gz $i.gz ; rm $i ; done fi # Strip binaries (if any): find $PKG | xargs file | grep -e "executable" -e "shared object" | grep ELF \ | cut -f 1 -d : | xargs strip --strip-unneeded 2> /dev/null || true # Add a package description: mkdir -p $PKG/install DEFDESC="$(grep ^${DEFLANG}= $SRCDIR/languages |cut -f2 -d=)" cat $SRCDIR/slack-desc \ | sed -e "s/@LANG@/$DEFLANG/g" -e "s/@LANGDESC@/$DEFDESC/g" \ > ${PKG}/install/slack-desc cat $SRCDIR/slack-required > $PKG/install/slack-required # Split out language packs if we have built additional languages: cd $PKG for lang in ${LANGPACKS} ; do # Do not split off the "default" language data: [ "$lang" = "$DEFLANG" ] && continue langdesc="$(grep ^${lang}= $SRCDIR/languages |cut -f2 -d=)" echo "Splitting out '$lang' ($langdesc) datafile to separate package..." rm -rf ${PKG}-data-$lang mkdir -p ${PKG}-data-$lang/usr/share/tessdata mv ${PKG}/usr/share/tessdata/${lang}.traineddata \ ${PKG}-data-$lang/usr/share/tessdata/ # Create slack-desc : mkdir -p ${PKG}-data-$lang/install cat $SRCDIR/slack-desc.data \ | sed -e "s/@LANG@/$lang/g" -e "s/@LANGDESC@/$langdesc/g" > \ ${PKG}-data-$lang/install/slack-desc # Create the package: cd ${PKG}-data-$lang /sbin/makepkg -p -l y -c n $OUTPUT/${PRGNAM}-data-${lang}-${VERSION}-noarch-${BUILD}${TAG}.${PKGTYPE:-txz} cd - cd $OUTPUT md5sum ${PRGNAM}-data-${lang}-${VERSION}-noarch-${BUILD}${TAG}.${PKGTYPE:-txz} > ${PRGNAM}-data-$lang-${VERSION}-noarch-${BUILD}${TAG}.${PKGTYPE:-txz}.md5 cd - cat $PKG-data-$lang/install/slack-desc | grep "^${PRGNAM}-data-$lang" > $OUTPUT/${PRGNAM}-data-$lang-${VERSION}-noarch-${BUILD}${TAG}.txt done # End of splitting out language packs # Build the package: cd $PKG makepkg --linkadd y --chown n $OUTPUT/${PRGNAM}-${VERSION}-${ARCH}-${BUILD}${TAG}.${PKGTYPE:-txz} 2>&1 | tee $OUTPUT/makepkg-${PRGNAM}.log cd $OUTPUT md5sum ${PRGNAM}-${VERSION}-${ARCH}-${BUILD}${TAG}.${PKGTYPE:-txz} > ${PRGNAM}-${VERSION}-${ARCH}-${BUILD}${TAG}.${PKGTYPE:-txz}.md5 cd - cat $PKG/install/slack-desc | grep "^${PRGNAM}" > $OUTPUT/${PRGNAM}-${VERSION}-${ARCH}-${BUILD}${TAG}.txt cat $PKG/install/slack-required > $OUTPUT/${PRGNAM}-${VERSION}-${ARCH}-${BUILD}${TAG}.dep # Restore the original umask: umask ${_UMASK_}