guix-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

06/08: gnu: Add tesseract-ocr.


From: Taylan Ulrich B.
Subject: 06/08: gnu: Add tesseract-ocr.
Date: Sat, 28 Mar 2015 12:26:44 +0000

taylanub pushed a commit to branch master
in repository guix.

commit d814be32d5205fc946ba4dce724b6202718df268
Author: Taylan Ulrich Bayırlı/Kammer <address@hidden>
Date:   Sat Mar 21 20:11:56 2015 +0100

    gnu: Add tesseract-ocr.
    
    * gnu/packages/ocr.scm (tesseract-ocr): New variable.
---
 gnu/packages/ocr.scm |   55 ++++++++++++++++++++++++++++++++++++++++++++++---
 1 files changed, 51 insertions(+), 4 deletions(-)

diff --git a/gnu/packages/ocr.scm b/gnu/packages/ocr.scm
index 8408b43..32da42b 100644
--- a/gnu/packages/ocr.scm
+++ b/gnu/packages/ocr.scm
@@ -17,12 +17,14 @@
 ;;; along with GNU Guix.  If not, see <http://www.gnu.org/licenses/>.
 
 (define-module (gnu packages ocr)
-  #:use-module (guix licenses)
+  #:use-module ((guix licenses) #:prefix license:)
   #:use-module (guix packages)
   #:use-module (guix download)
   #:use-module (guix build-system gnu)
-  #:use-module ((gnu packages compression)
-                #:select (lzip)))
+  #:use-module (gnu packages autotools)
+  #:use-module (gnu packages compression)
+  #:use-module (gnu packages image)
+  #:use-module (gnu packages pkg-config))
 
 (define-public ocrad
   (package
@@ -43,4 +45,49 @@
      "GNU Ocrad is an optical character recognition program based on a
 feature extraction method.  It can read images in PBM, PGM or PPM formats and
 it produces text in 8-bit or UTF-8 formats.")
-    (license gpl3+)))
+    (license license:gpl3+)))
+
+(define-public tesseract-ocr
+  (package
+    (name "tesseract-ocr")
+    (version "3.02.02")
+    (source
+     (origin
+       (method url-fetch)
+       (uri (string-append
+             "https://tesseract-ocr.googlecode.com/files/tesseract-ocr-";
+             version ".tar.gz"))
+       (sha256
+        (base32 "0g81m9y4iydp7kgr56mlkvjdwpp3mb01q385yhdnyvra7z5kkk96"))
+       (modules '((guix build utils)))
+       ;; Leptonica added a pkg-config file in the meanwhile.
+       (snippet
+        '(substitute* "tesseract.pc.in"
+           (("^# Requires: lept  ## .*")
+            "Requires: lept\n")))))
+    (build-system gnu-build-system)
+    (native-inputs
+     `(("autoconf" ,autoconf)
+       ("automake" ,automake)
+       ("libtool" ,libtool)))
+    (propagated-inputs
+     `(("leptonica" ,leptonica)))
+    (arguments
+     '(#:phases
+       (modify-phases %standard-phases
+         (add-after
+          unpack autogen
+          (lambda _
+            (zero? (system* "sh" "autogen.sh")))))
+       #:configure-flags
+       (let ((leptonica (assoc-ref %build-inputs "leptonica")))
+         (list (string-append "LIBLEPT_HEADERSDIR=" leptonica "/include")))))
+    (home-page "https://code.google.com/p/tesseract-ocr/";)
+    (synopsis "Optical character recognition engine")
+    (description
+     "Tesseract is an optical character recognition (OCR) engine with very
+high accuracy.  It supports many languages, output text formatting, hOCR
+positional information and page layout analysis.  Several image formats are
+supported through the Leptonica library.  It can also detect whether text is
+monospaced or proportional.")
+    (license license:asl2.0)))



reply via email to

[Prev in Thread] Current Thread [Next in Thread]