diff --git a/common/shlibs b/common/shlibs index e8b08a85de5..3d5db5f1c58 100644 --- a/common/shlibs +++ b/common/shlibs @@ -2273,7 +2273,7 @@ libhttp_parser.so.2.9 http-parser-2.9.0_1 libmaa.so.4 libmaa-1.4.2_1 libcodeblocks.so.0 codeblocks-13.12_1 libleptonica.so.6 leptonica-1.84.0_1 -libtesseract.so.4 tesseract-ocr-4.0.0_1 +libtesseract.so.5 tesseract-ocr-5.3.3_1 libffmpegthumbnailer.so.4 ffmpegthumbnailer-2.0.10_1 libopenraw.so.7 libopenraw-0.1.0_1 libopenrawgnome.so.7 libopenraw-0.1.0_1 diff --git a/srcpkgs/tesseract-ocr-kur b/srcpkgs/tesseract-ocr-kmr similarity index 100% rename from srcpkgs/tesseract-ocr-kur rename to srcpkgs/tesseract-ocr-kmr diff --git a/srcpkgs/tesseract-ocr-kur_ara b/srcpkgs/tesseract-ocr-kur_ara deleted file mode 120000 index 79bcf15f05b..00000000000 --- a/srcpkgs/tesseract-ocr-kur_ara +++ /dev/null @@ -1 +0,0 @@ -tesseract-ocr \ No newline at end of file diff --git a/srcpkgs/tesseract-ocr/files/COPYING b/srcpkgs/tesseract-ocr/files/COPYING deleted file mode 100644 index 11e05af425f..00000000000 --- a/srcpkgs/tesseract-ocr/files/COPYING +++ /dev/null @@ -1,14 +0,0 @@ -This repository contains language data for Tesseract Open Source -OCR Engine. All data in the repository are licensed under the Apache -License: - -** Licensed under the Apache License, Version 2.0 (the "License"); -** you may not use this file except in compliance with the License. -** You may obtain a copy of the License at -** http://www.apache.org/licenses/LICENSE-2.0 -** Unless required by applicable law or agreed to in writing, software -** distributed under the License is distributed on an "AS IS" BASIS, -** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -** See the License for the specific language governing permissions and -** limitations under the License. - diff --git a/srcpkgs/tesseract-ocr/patches/disable-neon.patch b/srcpkgs/tesseract-ocr/patches/disable-neon.patch new file mode 100644 index 00000000000..d491ef1e47b --- /dev/null +++ b/srcpkgs/tesseract-ocr/patches/disable-neon.patch @@ -0,0 +1,14 @@ +--- a/configure.ac ++++ b/configure.ac +@@ -177,6 +177,11 @@ + AC_DEFINE([HAVE_NEON], [1], [Enable NEON instructions]) + ;; + ++ arm|armv7l) ++ ++ AC_MSG_WARN([No compiler options for $host_cpu]) ++ ;; ++ + arm*) + + AX_CHECK_COMPILE_FLAG([-mfpu=neon], [neon=true], [neon=false], [$WERROR]) diff --git a/srcpkgs/tesseract-ocr/patches/musl-sys-time.patch b/srcpkgs/tesseract-ocr/patches/musl-sys-time.patch index 9c6337d1886..5c75864248f 100644 --- a/srcpkgs/tesseract-ocr/patches/musl-sys-time.patch +++ b/srcpkgs/tesseract-ocr/patches/musl-sys-time.patch @@ -1,12 +1,13 @@ ---- a/src/ccutil/ocrclass.h 2019-07-07 14:34:08.000000000 +0200 -+++ b/src/ccutil/ocrclass.h 2019-07-08 10:47:15.347415888 +0200 -@@ -31,6 +31,9 @@ - #ifdef _WIN32 - #include // for timeval - #endif +--- a/include/tesseract/ocrclass.h ++++ b/include/tesseract/ocrclass.h +@@ -29,6 +29,10 @@ + + #include + #include +#ifndef __GLIBC__ +#include +#endif ++ + + namespace tesseract { - /********************************************************************** - * EANYCODE_CHAR diff --git a/srcpkgs/tesseract-ocr/template b/srcpkgs/tesseract-ocr/template index de6df3a768d..49b40458883 100644 --- a/srcpkgs/tesseract-ocr/template +++ b/srcpkgs/tesseract-ocr/template @@ -1,14 +1,15 @@ # Template file for 'tesseract-ocr' pkgname=tesseract-ocr -version=4.1.1 -revision=9 -_tessdataver=4.0.0 +version=5.3.3 +revision=1 +_tessdataver=4.1.0 create_wrksrc=yes build_style=gnu-configure configure_args="LIBLEPT_HEADERSDIR=${XBPS_CROSS_BASE}/usr/include $(vopt_enable openmp)" make_build_args="all training" hostmakedepends="automake libtool pkg-config leptonica libxslt asciidoc" -makedepends="cairo-devel pango-devel leptonica-devel $(vopt_if openmp libgomp-devel) icu-devel" +makedepends="cairo-devel pango-devel leptonica-devel $(vopt_if openmp libgomp-devel) icu-devel + libarchive-devel libcurl-devel" short_desc="Tesseract Open Source OCR engine" maintainer="Orphaned " license="Apache-2.0" @@ -16,13 +17,15 @@ homepage="https://github.com/tesseract-ocr/tesseract" distfiles=" https://github.com/tesseract-ocr/tesseract/archive/${version}.tar.gz>${pkgname}-${version}.tar.gz https://github.com/tesseract-ocr/tessdata/archive/${_tessdataver}.tar.gz>tessdata-${_tessdataver}.tar.gz" -checksum="2a66ff0d8595bff8f04032165e6c936389b1e5727c3ce5a27b3e059d218db1cb - 38c637d3a1763f6c3d32e8f1d979f045668676ec5feb8ee1869ee77cedd31b08" +checksum="dc4329f85f41191b2d813b71b528ba6047745813474e583ccce8795ff2ff5681 + 990fffb9b7a9b52dc9a2d053a9ef6852ca2b72bd8dfb22988b0b990a700fd3c7" build_options="openmp" build_options_default="openmp" desc_option_openmp="Enable Open MP (gomp)" +disable_parallel_build=yes # fails to build otherwise + # Create a package for one specific language $1 pkg_lang() { local f script lang=$1 @@ -46,8 +49,8 @@ pkg_lang() { post_extract() { mv tesseract-${version}/* . + rm -rf tessdata-${_tessdataver}/{tessconfigs,configs,pdf.ttf} mv tessdata-${_tessdataver}/* ${wrksrc}/tessdata - rmdir tessdata-${_tessdataver} } pre_configure() { NOCONFIGURE=1 ./autogen.sh @@ -62,7 +65,6 @@ post_install() { mv ${DESTDIR}/usr/share/man/man1/tesseract{,-ocr}.1 vdoc ChangeLog vdoc README.md - vlicense ${FILESDIR}/COPYING LICENSE-tessdata # Move the pseudo languges "equ" (math / equation detection) and # "osd" (orientation and script detection) to the main package for lang in equ osd; do @@ -79,13 +81,6 @@ tesseract-ocr-tools_package() { vmkdir usr/share/tesseract vmkdir usr/share/man/man1 vmkdir usr/share/man/man5 - # Copy shell scripts - for f in language-specific.sh tesstrain.sh tesstrain_utils.sh; do - if [ -e ${wrksrc}/training/${f} ]; then - cp -a ${wrksrc}/training/${f} \ - ${PKGDESTDIR}/usr/share/tesseract - fi - done # Move tool manual pages for f in ambiguous_words cntraining combine_tessdata \ dawg2wordlist mftraining shapeclustering unicharambigs \ @@ -99,7 +94,8 @@ tesseract-ocr-tools_package() { } } tesseract-ocr-devel_package() { - depends="${sourcepkg}>=${version}_${revision}" + depends="${sourcepkg}>=${version}_${revision} leptonica-devel + libarchive-devel libcurl-devel" short_desc+=" - development files" pkg_install() { vmove usr/include/tesseract @@ -129,7 +125,7 @@ tesseract-ocr-all_package() { for lang in afr amh ara asm aze aze_cyrl bel ben bod bos bre bul cat ceb \ ces chi_sim chi_tra chr cos cym dan deu div dzo ell eng enm epo est eus fao \ fas fil fin fra frk frm fry gla gle glg grc guj hat heb hin hrv hun hye iku ind isl ita \ - ita_old jav jpn kan kat kat_old kaz khm kir kor kur kur_ara lao lat lav lit ltz mal mar \ + ita_old jav jpn kan kat kat_old kaz khm kir kmr kor lao lat lav lit ltz mal mar \ mkd mlt mon mri msa mya nep nld nor oci ori pan pol por que pus ron rus san sin slk slv \ snd spa spa_old sqi srp srp_latn sun swa swe syr tam tat tel tgk tgl tha tir ton tur \ uig ukr urd uzb uzb_cyrl vie yid yor \ @@ -576,6 +572,13 @@ tesseract-ocr-kir_package() { $(pkg_lang ${pkgname#tesseract-ocr-}) } } +tesseract-ocr-kmr_package() { + depends="${sourcepkg}>=${version}_${revision}" + short_desc+=" - Kurmanji (Kurdish - Latin Script) language data" + pkg_install() { + $(pkg_lang ${pkgname#tesseract-ocr-}) + } +} tesseract-ocr-kor_package() { depends="${sourcepkg}>=${version}_${revision}" short_desc+=" - Korean language data" @@ -583,20 +586,6 @@ tesseract-ocr-kor_package() { $(pkg_lang ${pkgname#tesseract-ocr-}) } } -tesseract-ocr-kur_package() { - depends="${sourcepkg}>=${version}_${revision}" - short_desc+=" - Kurdish language data" - pkg_install() { - $(pkg_lang ${pkgname#tesseract-ocr-}) - } -} -tesseract-ocr-kur_ara_package() { - depends="${sourcepkg}>=${version}_${revision}" - short_desc+=" - Kurdish (Arabic) language data" - pkg_install() { - $(pkg_lang ${pkgname#tesseract-ocr-}) - } -} tesseract-ocr-lao_package() { depends="${sourcepkg}>=${version}_${revision}" short_desc+=" - Lao language data"