diff --git a/5.3.0.tar.gz b/5.3.0.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..bdd10e12e9471b1d6633e1f12bc25ce3292a0e9f Binary files /dev/null and b/5.3.0.tar.gz differ diff --git a/tesseract.spec b/tesseract.spec new file mode 100644 index 0000000000000000000000000000000000000000..9bf609c3aeb566d9b01964186ad5222340126593 --- /dev/null +++ b/tesseract.spec @@ -0,0 +1,268 @@ +%define anolis_release 1 + +%bcond_with mingw + +Name: tesseract +Version: 5.3.0 +Release: %{anolis_release}%{?dist} +Summary: Raw OCR Engine + +License: Apache-2.0 +URL: https://github.com/tesseract-ocr/%{name} +Source0: https://github.com/tesseract-ocr/tesseract/archive/refs/tags/%{version}.tar.gz + +# Fix library name case +# Fix shared library version suffix +# Honour TESSDATA_PREFIX +# Build training libs statically +Patch0: tesseract_cmake.patch +# Generate correct libdir path in /usr/lib64/pkgconfig/tesseract.pc +# Already merged upstream, can be dropped at next release +# https://github.com/tesseract-ocr/tesseract/commit/5e116fa5cad249b8a08d22af652cf52f44fbb8cd +Patch1: tesseract_libdir.patch + +BuildRequires: cmake +BuildRequires: gcc-c++ +BuildRequires: giflib-devel +BuildRequires: leptonica-devel +BuildRequires: libicu-devel +BuildRequires: libjpeg-turbo-devel +BuildRequires: libtool +BuildRequires: libtiff-devel +BuildRequires: libwebp-devel +BuildRequires: pango-devel +BuildRequires: /usr/bin/asciidoc +BuildRequires: /usr/bin/xsltproc +BuildRequires: openjpeg2-devel + +%if %{with mingw} +BuildRequires: mingw32-filesystem >= 95 +BuildRequires: mingw32-gcc +BuildRequires: mingw32-giflib +BuildRequires: mingw32-binutils +BuildRequires: mingw32-icu +BuildRequires: mingw32-leptonica +BuildRequires: mingw32-libgomp +BuildRequires: mingw32-libjpeg-turbo +BuildRequires: mingw32-libtiff +BuildRequires: mingw32-libwebp +BuildRequires: mingw32-pango + +BuildRequires: mingw64-filesystem >= 95 +BuildRequires: mingw64-gcc +BuildRequires: mingw64-giflib +BuildRequires: mingw64-binutils +BuildRequires: mingw64-icu +BuildRequires: mingw64-leptonica +BuildRequires: mingw64-libgomp +BuildRequires: mingw64-libjpeg-turbo +BuildRequires: mingw64-libtiff +BuildRequires: mingw64-libwebp +BuildRequires: mingw64-pango +%endif + +Requires: tesseract-langpack-eng + + +%description +A commercial quality OCR engine originally developed at HP between 1985 and +1995. In 1995, this engine was among the top 3 evaluated by UNLV. It was +open-sourced by HP and UNLV in 2005. + + +%package devel +Summary: Development files for %{name} +Requires: %{name} = %{version}-%{release} + +%description devel +The %{name}-devel package contains header file for +developing applications that use %{name}. + + +%package tools +Summary: Training tools for %{name} +Requires: %{name} = %{version}-%{release} + +%description tools +The %{name}-tools package contains tools for training %{name}. + +%package doc +Summary: Documentation files for %{name} +Requires: %{name} = %{version}-%{release} +BuildArch: noarch + +%description doc +The %{name}-doc package contains documentation files for %{name}. + +%if %{with mingw} +%package -n mingw32-%{name} +Summary: MinGW Windows tesseract-ocr library +BuildArch: noarch + +%description -n mingw32-%{name} +MinGW Windows tesseract-ocr library. + + +%package -n mingw32-%{name}-tools +Summary: MinGW Windows tesseract-ocr library tools +Requires: mingw32-%{name} = %{version}-%{release} +BuildArch: noarch + +%description -n mingw32-%{name}-tools +MinGW Windows tesseract-ocr library tools. + + +%package -n mingw64-%{name} +Summary: MinGW Windows tesseract-ocr library +BuildArch: noarch + +%description -n mingw64-%{name} +MinGW Windows tesseract-ocr library. + + +%package -n mingw64-%{name}-tools +Summary: MinGW Windows tesseract-ocr library tools +Requires: mingw64-%{name} = %{version}-%{release} +BuildArch: noarch + +%description -n mingw64-%{name}-tools +MinGW Windows tesseract-ocr library tools. + +%{?mingw_debug_package} +%endif + +%prep +%autosetup -p1 -n %{name}-%{version} + +%build +# Native build +%cmake -DCMAKE_INSTALL_LIBDIR=%{_lib} -DTESSDATA_PREFIX=%{_datadir}/%{name} +%cmake_build + +# Manually build manfiles, cmake does not build them +man_xslt=http://docbook.sourceforge.net/release/xsl/current/manpages/docbook.xsl +for file in doc/*.asc; do + asciidoc -b docbook -d manpage -o - $file | XML_CATALOG_FILES=%{_sysconfdir}/xml/catalog xsltproc --nonet -o ${file/.asc/} $man_xslt - +done + +%if %{with mingw} +# MinGW build +MINGW32_CMAKE_ARGS=-DTESSDATA_PREFIX=%{mingw32_datadir}/%{name} \ +MINGW64_CMAKE_ARGS=-DTESSDATA_PREFIX=%{mingw64_datadir}/%{name} +%mingw_cmake -DSW_BUILD=OFF +%mingw_make_build +%endif + + +%install +%cmake_install +mkdir -p %{buildroot}%{_mandir}/{man1,man5}/ +cp -a doc/*.1 %{buildroot}%{_mandir}/man1/ +cp -a doc/*.5 %{buildroot}%{_mandir}/man5/ + +%if %{with mingw} +%mingw_make_install +%mingw_debug_install_post +%endif + +%generate_compatibility_deps + +%files +%license LICENSE +%{_bindir}/%{name} +%{_libdir}/lib%{name}.so.5.3.0 +%{_datadir}/%{name}/ +%{_mandir}/man1/tesseract.1* +%dir %{abidir} +%{abidir}/libtesseract.dump +%{abidir}/tesseract-option.list + +%files devel +%{_includedir}/%{name} +%{_libdir}/lib%{name}.so +%{_libdir}/libcommon_training.a +%{_libdir}/libunicharset_training.a +%{_libdir}/cmake/%{name}/ +%{_libdir}/pkgconfig/%{name}.pc + +%files tools +%{_bindir}/ambiguous_words +%{_bindir}/classifier_tester +%{_bindir}/cntraining +%{_bindir}/combine_lang_model +%{_bindir}/combine_tessdata +%{_bindir}/dawg2wordlist +%{_bindir}/lstmeval +%{_bindir}/lstmtraining +%{_bindir}/merge_unicharsets +%{_bindir}/mftraining +%{_bindir}/set_unicharset_properties +%{_bindir}/shapeclustering +%{_bindir}/text2image +%{_bindir}/unicharset_extractor +%{_bindir}/wordlist2dawg +%{_mandir}/man1/ambiguous_words.1* +%{_mandir}/man1/classifier_tester.1* +%{_mandir}/man1/cntraining.1* +%{_mandir}/man1/combine_lang_model.1* +%{_mandir}/man1/combine_tessdata.1* +%{_mandir}/man1/dawg2wordlist.1* +%{_mandir}/man1/lstmeval.1* +%{_mandir}/man1/lstmtraining.1* +%{_mandir}/man1/merge_unicharsets.1* +%{_mandir}/man1/mftraining.1* +%{_mandir}/man1/set_unicharset_properties.1* +%{_mandir}/man1/shapeclustering.1* +%{_mandir}/man1/text2image.1* +%{_mandir}/man1/unicharset_extractor.1* +%{_mandir}/man1/wordlist2dawg.1* +%{_mandir}/man5/unicharambigs.5.* +%{_mandir}/man5/unicharset.5.* +%{abidir}/classifier_tester-option.list +%{abidir}/cntraining-option.list +%{abidir}/combine_lang_model-option.list +%{abidir}/combine_tessdata-option.list +%{abidir}/lstmeval-option.list +%{abidir}/lstmtraining-option.list +%{abidir}/mftraining-option.list +%{abidir}/set_unicharset_properties-option.list +%{abidir}/shapeclustering-option.list +%{abidir}/text2image-option.list +%{abidir}/unicharset_extractor-option.list + +%if %{with mingw} +%files -n mingw32-%{name} +%license LICENSE +%{mingw32_bindir}/libtesseract-53.dll +%{mingw32_includedir}/tesseract/ +%{mingw32_libdir}/libtesseract.dll.a +%{mingw32_libdir}/libcommon_training.a +%{mingw32_libdir}/libunicharset_training.a +%{mingw32_libdir}/pkgconfig/tesseract.pc +%{mingw32_libdir}/cmake/%{name}/ +%{mingw32_datadir}/%{name}/ + +%files -n mingw32-%{name}-tools +%{mingw32_bindir}/*.exe + +%files -n mingw64-%{name} +%license LICENSE +%{mingw64_bindir}/libtesseract-53.dll +%{mingw64_includedir}/tesseract/ +%{mingw64_libdir}/libtesseract.dll.a +%{mingw64_libdir}/libcommon_training.a +%{mingw64_libdir}/libunicharset_training.a +%{mingw64_libdir}/pkgconfig/tesseract.pc +%{mingw64_libdir}/cmake/%{name}/ +%{mingw64_datadir}/%{name}/ + +%files -n mingw64-%{name}-tools +%{mingw64_bindir}/*.exe +%endif + +%files doc +%doc README.md INSTALL ChangeLog CONTRIBUTING.md AUTHORS + +%changelog +* Fri Mar 17 2023 happy_orange - 5.3.0-1 +- package init from upstream diff --git a/tesseract_cmake.patch b/tesseract_cmake.patch new file mode 100644 index 0000000000000000000000000000000000000000..8cacfd648b1cd747ba2ebdef09ec0a10fc63204b --- /dev/null +++ b/tesseract_cmake.patch @@ -0,0 +1,70 @@ +diff -rupN --no-dereference tesseract-5.3.0/CMakeLists.txt tesseract-5.3.0-new/CMakeLists.txt +--- tesseract-5.3.0/CMakeLists.txt 2022-12-22 14:57:57.000000000 +0100 ++++ tesseract-5.3.0-new/CMakeLists.txt 2022-12-23 10:33:09.303707322 +0100 +@@ -345,7 +345,7 @@ elseif(UNIX) + set(LIB_pthread pthread) + endif() + elseif(WIN32) +- set(LIB_Ws2_32 Ws2_32) ++ set(LIB_Ws2_32 ws2_32) + endif() + + add_definitions("-DCMAKE_BUILD") +@@ -812,11 +812,14 @@ set_target_properties(libtesseract + set_target_properties(libtesseract + PROPERTIES SOVERSION ${VERSION_MAJOR}.${VERSION_MINOR}.${VERSION_PATCH}) + +-if(WIN32) ++if(MSVC) + set_target_properties(libtesseract + PROPERTIES OUTPUT_NAME tesseract${VERSION_MAJOR}${VERSION_MINOR}) + set_target_properties(libtesseract + PROPERTIES DEBUG_OUTPUT_NAME tesseract${VERSION_MAJOR}${VERSION_MINOR}d) ++elseif(MINGW) ++ set_target_properties(libtesseract PROPERTIES SUFFIX "-${VERSION_MAJOR}${VERSION_MINOR}${CMAKE_SHARED_LIBRARY_SUFFIX}") ++ set_target_properties(libtesseract PROPERTIES OUTPUT_NAME tesseract) + else() + set_target_properties(libtesseract PROPERTIES OUTPUT_NAME tesseract) + endif() +@@ -931,9 +934,9 @@ install( + + if(INSTALL_CONFIGS) + install(FILES ${TESSERACT_CONFIGS} +- DESTINATION ${CMAKE_INSTALL_PREFIX}/share/tessdata/configs) ++ DESTINATION ${TESSDATA_PREFIX}/tessdata/configs) + install(FILES ${TESSERACT_TESSCONFIGS} +- DESTINATION ${CMAKE_INSTALL_PREFIX}/share/tessdata/tessconfigs) ++ DESTINATION ${TESSDATA_PREFIX}/tessdata/tessconfigs) + endif() + + # ############################################################################## +diff -rupN --no-dereference tesseract-5.3.0/src/training/CMakeLists.txt tesseract-5.3.0-new/src/training/CMakeLists.txt +--- tesseract-5.3.0/src/training/CMakeLists.txt 2022-12-22 14:57:57.000000000 +0100 ++++ tesseract-5.3.0-new/src/training/CMakeLists.txt 2022-12-23 10:33:09.303707322 +0100 +@@ -115,7 +115,7 @@ if(NOT DISABLED_LEGACY_ENGINE) + common/trainingsampleset.h) + endif() + +-add_library(common_training ${COMMON_TRAINING_SRC}) ++add_library(common_training STATIC ${COMMON_TRAINING_SRC}) + target_include_directories(common_training PUBLIC common + ${CMAKE_CURRENT_BINARY_DIR}) + target_link_libraries(common_training PUBLIC libtesseract) +@@ -254,7 +254,7 @@ if(ICU_FOUND) + + file(GLOB unicharset_training_src unicharset/*) + +- add_library(unicharset_training ${unicharset_training_src}) ++ add_library(unicharset_training STATIC ${unicharset_training_src}) + if(SW_BUILD) + target_link_libraries(unicharset_training + PUBLIC common_training org.sw.demo.unicode.icu.i18n) +@@ -378,7 +378,7 @@ if(ICU_FOUND) + + file(GLOB pango_training_src pango/*) + +- add_library(pango_training ${pango_training_src}) ++ add_library(pango_training STATIC ${pango_training_src}) + target_link_libraries(pango_training PUBLIC unicharset_training) + if(SW_BUILD) + target_link_libraries(pango_training diff --git a/tesseract_libdir.patch b/tesseract_libdir.patch new file mode 100644 index 0000000000000000000000000000000000000000..b3565d4a2b991c61dcc4a12bc760e0c14cba7da2 --- /dev/null +++ b/tesseract_libdir.patch @@ -0,0 +1,29 @@ +From 5e116fa5cad249b8a08d22af652cf52f44fbb8cd Mon Sep 17 00:00:00 2001 +From: Frank Dana +Date: Thu, 2 Feb 2023 19:57:59 -0500 +Subject: [PATCH] Fix libdir in tesseract.pc from CMake + +tesseract.pc.cmake was hardcoding libdir to +`{prefix}/lib`, which is wrong for systems that use +`/usr/lib64/` on 64-bit. `CMAKE_INSTALL_LIBDIR` +is already expected to contain the libdir path +relative to the install prefix. +--- + tesseract.pc.cmake | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/tesseract.pc.cmake b/tesseract.pc.cmake +index 7f36ce19..5469a398 100644 +--- a/tesseract.pc.cmake ++++ b/tesseract.pc.cmake +@@ -1,6 +1,6 @@ + prefix=@CMAKE_INSTALL_PREFIX@ + exec_prefix=${prefix}/bin +-libdir=${prefix}/lib ++libdir=${prefix}/@CMAKE_INSTALL_LIBDIR@ + includedir=${prefix}/include + + Name: @tesseract_NAME@ +-- +2.39.1 +