LCOV - code coverage report
Current view: top level - lisp/language - indian.el (source / functions) Hit Total Coverage
Test: tramp-tests-after.info Lines: 4 4 100.0 %
Date: 2017-08-30 10:12:24 Functions: 1 1 100.0 %

          Line data    Source code
       1             : ;;; indian.el --- Indian languages support -*- coding: utf-8; -*-
       2             : 
       3             : ;; Copyright (C) 1997, 1999, 2001-2017 Free Software Foundation, Inc.
       4             : ;; Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
       5             : ;;   National Institute of Advanced Industrial Science and Technology (AIST)
       6             : ;;   Registration Number H14PRO021
       7             : 
       8             : ;; Maintainer:  Kenichi Handa <handa@m17n.org>
       9             : ;;              KAWABATA, Taichi <kawabata@m17n.org>
      10             : ;; Keywords:    multilingual, i18n, Indian
      11             : 
      12             : ;; This file is part of GNU Emacs.
      13             : 
      14             : ;; GNU Emacs is free software: you can redistribute it and/or modify
      15             : ;; it under the terms of the GNU General Public License as published by
      16             : ;; the Free Software Foundation, either version 3 of the License, or
      17             : ;; (at your option) any later version.
      18             : 
      19             : ;; GNU Emacs is distributed in the hope that it will be useful,
      20             : ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
      21             : ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      22             : ;; GNU General Public License for more details.
      23             : 
      24             : ;; You should have received a copy of the GNU General Public License
      25             : ;; along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.
      26             : 
      27             : ;;; Commentary:
      28             : 
      29             : ;; This file contains definitions of Indian language environments, and
      30             : ;; setups for displaying the scrtipts used there.
      31             : 
      32             : ;;; Code:
      33             : 
      34             : (define-coding-system 'in-is13194-devanagari
      35             :   "8-bit encoding for ASCII (MSB=0) and IS13194-Devanagari (MSB=1)."
      36             :   :coding-type 'iso-2022
      37             :   :mnemonic ?D
      38             :   :designation [ascii indian-is13194 nil nil]
      39             :   :charset-list '(ascii indian-is13194)
      40             :   :post-read-conversion 'in-is13194-post-read-conversion
      41             :   :pre-write-conversion 'in-is13194-pre-write-conversion)
      42             : 
      43             : (define-coding-system-alias 'devanagari 'in-is13194-devanagari)
      44             : 
      45             : (set-language-info-alist
      46             :  "Devanagari" '((charset unicode)
      47             :                 (coding-system utf-8)
      48             :                 (coding-priority utf-8)
      49             :                 (input-method . "devanagari-aiba")
      50             :                 (documentation . "\
      51             : Such languages using Devanagari script as Hindi and Marathi
      52             : are supported in this language environment."))
      53             :  '("Indian"))
      54             : 
      55             : (set-language-info-alist
      56             :  "Bengali" '((charset unicode)
      57             :              (coding-system utf-8)
      58             :              (coding-priority utf-8)
      59             :              (input-method . "bengali-itrans")
      60             :              (documentation . "\
      61             : Such languages using Bengali script as Bengali and Assamese
      62             : are supported in this language environment."))
      63             :  '("Indian"))
      64             : 
      65             : (set-language-info-alist
      66             :  "Punjabi" '((charset unicode)
      67             :               (coding-system utf-8)
      68             :               (coding-priority utf-8)
      69             :               (input-method . "punjabi-itrans")
      70             :               (documentation . "\
      71             : North Indian language Punjabi is supported in this language environment."))
      72             :  '("Indian"))
      73             : 
      74             : (set-language-info-alist
      75             :  "Gujarati" '((charset unicode)
      76             :               (coding-system utf-8)
      77             :               (coding-priority utf-8)
      78             :               (input-method . "gujarati-itrans")
      79             :               (documentation . "\
      80             : North Indian language Gujarati is supported in this language environment."))
      81             :  '("Indian"))
      82             : 
      83             : (set-language-info-alist
      84             :  "Oriya" '((charset unicode)
      85             :               (coding-system utf-8)
      86             :               (coding-priority utf-8)
      87             :               (input-method . "oriya-itrans")
      88             :               (documentation . "\
      89             : Such languages using Oriya script as Oriya, Khonti, and Santali
      90             : are supported in this language environment."))
      91             :  '("Indian"))
      92             : 
      93             : (set-language-info-alist
      94             :  "Tamil" '((charset unicode)
      95             :            (coding-system utf-8)
      96             :            (coding-priority utf-8)
      97             :            (input-method . "tamil-itrans")
      98             :            (documentation . "\
      99             : South Indian Language Tamil is supported in this language environment."))
     100             :  '("Indian"))
     101             : 
     102             : (set-language-info-alist
     103             :  "Telugu" '((charset unicode)
     104             :             (coding-system utf-8)
     105             :             (coding-priority utf-8)
     106             :             (input-method . "telugu-itrans")
     107             :             (documentation . "\
     108             : South Indian Language Telugu is supported in this language environment."))
     109             :  '("Indian"))
     110             : 
     111             : (set-language-info-alist
     112             :  "Kannada" '((charset unicode)
     113             :              (coding-system mule-utf-8)
     114             :              (coding-priority mule-utf-8)
     115             :              (input-method . "kannada-itrans")
     116             :              (sample-text . "Kannada (ಕನ್ನಡ)     ನಮಸ್ಕಾರ")
     117             :              (documentation . "\
     118             : Kannada language and script is supported in this language
     119             : environment.")) 
     120             :  '("Indian"))
     121             : 
     122             : (set-language-info-alist
     123             :  "Malayalam" '((charset unicode)
     124             :                (coding-system utf-8)
     125             :                (coding-priority utf-8)
     126             :                (input-method . "malayalam-itrans")
     127             :                (documentation . "\
     128             : South Indian language Malayalam is supported in this language environment."))
     129             :  '("Indian"))
     130             : 
     131             : ;; Replace mnemonic characters in REGEXP according to TABLE.  TABLE is
     132             : ;; an alist of (MNEMONIC-STRING . REPLACEMENT-STRING).
     133             : 
     134             : (defun indian-compose-regexp (regexp table)
     135           9 :   (let ((case-fold-search nil))
     136           9 :     (dolist (elt table)
     137          95 :       (setq regexp (replace-regexp-in-string (car elt) (cdr elt) regexp t t)))
     138           9 :     regexp))
     139             : 
     140             : (defconst devanagari-composable-pattern
     141             :   (let ((table
     142             :          '(("a" . "[\u0900-\u0902]")        ; vowel modifier (above)
     143             :            ("A" . "\u0903")         ; vowel modifier (post) 
     144             :            ("V" . "[\u0904-\u0914\u0960-\u0961\u0972]") ; independent vowel
     145             :            ("C" . "[\u0915-\u0939\u0958-\u095F\u0979-\u097F]") ; consonant
     146             :            ("R" . "\u0930")         ; RA
     147             :            ("n" . "\u093C")         ; NUKTA
     148             :            ("v" . "[\u093E-\u094C\u094E\u0955\u0962-\u0963]") ; vowel sign
     149             :            ("H" . "\u094D")         ; HALANT
     150             :            ("s" . "[\u0951-\u0952]")        ; stress sign
     151             :            ("t" . "[\u0953-\u0954]")        ; accent
     152             :            ("N" . "\u200C")         ; ZWNJ
     153             :            ("J" . "\u200D")         ; ZWJ
     154             :            ("X" . "[\u0900-\u097F]"))))     ; all coverage
     155             :     (indian-compose-regexp
     156             :      (concat
     157             :       ;; syllables with an independent vowel, or
     158             :       "\\(?:RH\\)?Vn?\\(?:J?HR\\)?v*n?a?s?t?A?\\|"
     159             :       ;; consonant-based syllables, or
     160             :       "Cn?\\(?:J?HJ?Cn?\\)*\\(?:H[NJ]?\\|v*n?a?s?t?A?\\)\\|"
     161             :       ;; special consonant form, or
     162             :       "JHR\\|"
     163             :       ;; any other singleton characters
     164             :       "X")
     165             :      table))
     166             :   "Regexp matching a composable sequence of Devanagari characters.")
     167             : 
     168             : (defconst bengali-composable-pattern
     169             :   (let ((table
     170             :          '(("a" . "\u0981")         ; SIGN CANDRABINDU
     171             :            ("A" . "[\u0982-\u0983]")        ; SIGN ANUSVARA .. VISARGA
     172             :            ("V" . "[\u0985-\u0994\u09E0-\u09E1]") ; independent vowel
     173             :            ("C" . "[\u0995-\u09B9\u09DC-\u09DF\u09F1]") ; consonant
     174             :            ("B" . "[\u09AC\u09AF-\u09B0\u09F0]")            ; BA, YA, RA
     175             :            ("R" . "[\u09B0\u09F0]")         ; RA
     176             :            ("n" . "\u09BC")         ; NUKTA
     177             :            ("v" . "[\u09BE-\u09CC\u09D7\u09E2-\u09E3]") ; vowel sign
     178             :            ("H" . "\u09CD")         ; HALANT
     179             :            ("T" . "\u09CE")         ; KHANDA TA
     180             :            ("N" . "\u200C")         ; ZWNJ
     181             :            ("J" . "\u200D")         ; ZWJ
     182             :            ("X" . "[\u0980-\u09FF]"))))     ; all coverage
     183             :     (indian-compose-regexp
     184             :      (concat
     185             :       ;; syllables with an independent vowel, or
     186             :       "\\(?:RH\\)?Vn?\\(?:J?HB\\)?v*n?a?A?\\|"
     187             :       ;; consonant-based syllables, or
     188             :       "Cn?\\(?:J?HJ?Cn?\\)*\\(?:H[NJ]?\\|v*[NJ]?v?a?A?\\)\\|"
     189             :       ;; another syllables with an independent vowel, or
     190             :       "\\(?:RH\\)?T\\|"
     191             :       ;; special consonant form, or
     192             :       "JHB\\|"
     193             :       ;; any other singleton characters
     194             :       "X")
     195             :      table))
     196             :   "Regexp matching a composable sequence of Bengali characters.")
     197             : 
     198             : (defconst gurmukhi-composable-pattern
     199             :   (let ((table
     200             :          '(("a" . "[\u0A01-\u0A02\u0A70]") ; SIGN ADAK BINDI .. BINDI, TIPPI
     201             :            ("A" . "\u0A03")         ; SIGN VISARGA
     202             :            ("V" . "[\u0A05-\u0A14]")        ; independent vowel
     203             :            ("C" . "[\u0A15-\u0A39\u0A59-\u0A5E]")   ; consonant
     204             :            ("Y" . "[\u0A2F-u0A30\u0A35\u0A39]") ; YA, RA, VA, HA
     205             :            ("n" . "\u0A3C")         ; NUKTA
     206             :            ("v" . "[\u0A3E-\u0A4C]")        ; vowel sign
     207             :            ("H" . "\u0A4D")         ; VIRAMA
     208             :            ("N" . "\u200C")         ; ZWNJ
     209             :            ("J" . "\u200D")         ; ZWJ
     210             :            ("X" . "[\u0A00-\u0A7F]"))))     ; all coverage
     211             :     (indian-compose-regexp
     212             :      (concat
     213             :       ;; consonant-based syllables, or
     214             :       "Cn?\\(?:J?HJ?Cn?\\)*\\(?:H[NJ]?\\|v*n?a?A?\\)\\|"
     215             :       ;; syllables with an independent vowel, or
     216             :       "Vn?\\(?:J?HY\\)?v*n?a?A?\\|"
     217             :       ;; special consonant form, or
     218             :       "JHY\\|"
     219             :       ;; any other singleton characters
     220             :       "X")
     221             :      table))
     222             :   "Regexp matching a composable sequence of Gurmukhi characters.")
     223             : 
     224             : (defconst gujarati-composable-pattern
     225             :   (let ((table
     226             :          '(("a" . "[\u0A81-\u0A82]")        ; SIGN CANDRABINDU .. ANUSVARA
     227             :            ("A" . "\u0A83")         ; SIGN VISARGA
     228             :            ("V" . "[\u0A85-\u0A94\u0AE0-\u0AE1]") ; independent vowel
     229             :            ("C" . "[\u0A95-\u0AB9]")        ; consonant
     230             :            ("R" . "\u0AB0")         ; RA
     231             :            ("n" . "\u0ABC")         ; NUKTA
     232             :            ("v" . "[\u0ABE-\u0ACC\u0AE2-\u0AE3]") ; vowel sign
     233             :            ("H" . "\u0ACD")         ; VIRAMA
     234             :            ("N" . "\u200C")         ; ZWNJ
     235             :            ("J" . "\u200D")         ; ZWJ
     236             :            ("X" . "[\u0A80-\u0AFF]"))))     ; all coverage
     237             :     (indian-compose-regexp
     238             :      (concat
     239             :       ;; syllables with an independent vowel, or
     240             :       "\\(?:RH\\)?Vn?\\(?:J?HR\\)?v*n?a?A?\\|"
     241             :       ;; consonant-based syllables, or
     242             :       "Cn?\\(?:J?HJ?Cn?\\)*\\(?:H[NJ]?\\|v*n?a?A?\\)\\|"
     243             :       ;; special consonant form, or
     244             :       "JHR\\|"
     245             :       ;; any other singleton characters
     246             :       "X")
     247             :      table))
     248             :   "Regexp matching a composable sequence of Gujarati characters.")
     249             : 
     250             : (defconst oriya-composable-pattern
     251             :   (let ((table
     252             :          '(("a" . "\u0B01")         ; SIGN CANDRABINDU
     253             :            ("A" . "[\u0B02-\u0B03]")        ; SIGN ANUSVARA .. VISARGA
     254             :            ("V" . "[\u0B05-\u0B14\u0B60-\u0B61]") ; independent vowel
     255             :            ("C" . "[\u0B15-\u0B39\u0B5C-\u0B5D\u0B71]")     ; consonant
     256             :            ("B" . "[\u0B15-\u0B17\u0B1B-\u0B1D\u0B1F-\u0B21\u0B23-\u0B24\u0B27-\u0B30\u0B32-\u0B35\u0B38-\u0B39]") ; consonant with below form
     257             :            ("R" . "\u0B30")         ; RA
     258             :            ("n" . "\u0B3C")         ; NUKTA
     259             :            ("v" . "[\u0B3E-\u0B4C\u0B56-\u0B57\u0B62-\u0B63]") ; vowel sign
     260             :            ("H" . "\u0B4D")         ; VIRAMA
     261             :            ("N" . "\u200C")         ; ZWNJ
     262             :            ("J" . "\u200D")         ; ZWJ
     263             :            ("X" . "[\u0B00-\u0B7F]"))))     ; all coverage
     264             :     (indian-compose-regexp
     265             :      (concat
     266             :       ;; syllables with an independent vowel, or
     267             :       "\\(?:RH\\)?Vn?\\(?:J?HB\\)?v*n?a?A?\\|"
     268             :       ;; consonant-based syllables, or
     269             :       "Cn?\\(?:J?HJ?Cn?\\)*\\(?:H[NJ]?\\|v*n?a?A?\\)\\|"
     270             :       ;; special consonant form, or
     271             :       "JHB\\|"
     272             :       ;; any other singleton characters
     273             :       "X")
     274             :      table))
     275             :   "Regexp matching a composable sequence of Oriya characters.")
     276             : 
     277             : (defconst tamil-composable-pattern
     278             :   (let ((table
     279             :          '(("a" . "\u0B82")         ; SIGN ANUSVARA
     280             :            ("V" . "[\u0B85-\u0B94]")        ; independent vowel
     281             :            ("C" . "[\u0B95-\u0BB9]")        ; consonant
     282             :            ("v" . "[\u0BBE-\u0BCC\u0BD7]") ; vowel sign
     283             :            ("H" . "\u0BCD")         ; VIRAMA
     284             :            ("N" . "\u200C")         ; ZWNJ
     285             :            ("J" . "\u200D")         ; ZWJ
     286             :            ("X" . "[\u0B80-\u0BFF]"))))     ; all coverage
     287             :     (indian-compose-regexp
     288             :      (concat
     289             :       ;; consonant-based syllables, or
     290             :       "C\\(?:J?HJ?C\\)*\\(?:H[NJ]?\\|v*a?\\)\\|"
     291             :       ;; syllables with an independent vowel, or
     292             :       "Vv*a?\\|"
     293             :       ;; any other singleton characters
     294             :       "X")
     295             :      table))
     296             :   "Regexp matching a composable sequence of Tamil characters.")
     297             : 
     298             : (defconst telugu-composable-pattern
     299             :   (let ((table
     300             :          '(("a" . "[\u0C01-\u0C03]")        ; SIGN CANDRABINDU .. VISARGA
     301             :            ("V" . "[\u0C05-\u0C14\u0C60-\u0C61]") ; independent vowel
     302             :            ("C" . "[\u0C15-\u0C39\u0C58-\u0C59]") ; consonant
     303             :            ("v" . "[\u0C3E-\u0C4C\u0C55-\u0C56\u0C62-\u0C63]")      ; vowel sign
     304             :            ("H" . "\u0C4D")         ; VIRAMA
     305             :            ("N" . "\u200C")         ; ZWNJ
     306             :            ("J" . "\u200D")         ; ZWJ
     307             :            ("X" . "[\u0C00-\u0C7F]"))))     ; all coverage
     308             :     (indian-compose-regexp
     309             :      (concat
     310             :       ;; consonant-based syllables, or
     311             :       "C\\(?:J?HJ?C\\)*\\(?:H[NJ]?\\|v*a?\\)\\|"
     312             :       ;; syllables with an independent vowel, or
     313             :       "V\\(?:J?HC\\)?v*a?\\|"
     314             :       ;; special consonant form, or
     315             :       "JHC\\|"
     316             :       ;; any other singleton characters
     317             :       "X")
     318             :      table))
     319             :   "Regexp matching a composable sequence of Telugu characters.")
     320             : 
     321             : (defconst kannada-composable-pattern
     322             :   (let ((table
     323             :          '(("A" . "[\u0C82-\u0C83]")        ; SIGN ANUSVARA .. VISARGA
     324             :            ("V" . "[\u0C85-\u0C94\u0CE0-\u0CE1]") ; independent vowel
     325             :            ("C" . "[\u0C95-\u0CB9\u0CDE]")    ; consonant
     326             :            ("R" . "\u0CB0")         ; RA
     327             :            ("n" . "\u0CBC")         ; NUKTA
     328             :            ("v" . "[\u0CBE-\u0CCC\u0CD5-\u0CD6\u0CE2-\u0CE3]") ; vowel sign
     329             :            ("H" . "\u0CCD")         ; VIRAMA
     330             :            ("N" . "\u200C")         ; ZWNJ
     331             :            ("J" . "\u200D")         ; ZWJ
     332             :            ("X" . "[\u0C80-\u0CFF]"))))     ; all coverage
     333             :     (indian-compose-regexp
     334             :      (concat
     335             :       ;; syllables with an independent vowel, or
     336             :       "\\(?:RH\\)?Vn?\\(?:J?HC\\)?v?A?\\|"
     337             :       ;; consonant-based syllables, or
     338             :       "Cn?\\(?:J?HJ?Cn?\\)*\\(?:H[NJ]?\\|v*n?A?\\)\\|"
     339             :       ;; special consonant form, or
     340             :       "JHC\\|"
     341             :       ;; any other singleton characters
     342             :       "X")
     343             :      table))
     344             :   "Regexp matching a composable sequence of Kannada characters.")
     345             : 
     346             : (defconst malayalam-composable-pattern
     347             :   (let ((table
     348             :          '(("A" . "[\u0D02-\u0D03]")        ; SIGN ANUSVARA .. VISARGA
     349             :            ("V" . "[\u0D05-\u0D14\u0D60-\u0D61]") ; independent vowel
     350             :            ("C" . "[\u0D15-\u0D39]")                  ; consonant 
     351             :            ("Y" . "[\u0D2F-\u0D30\u0D32\u0D35]")  ; YA, RA, LA, VA
     352             :            ("v" . "[\u0D3E-\u0D4C\u0D57\u0D62-\u0D63]")     ; postbase matra
     353             :            ("H" . "\u0D4D")                   ; SIGN VIRAMA
     354             :            ("N" . "\u200C")                   ; ZWNJ
     355             :            ("J" . "\u200D")                   ; ZWJ
     356             :            ("X" . "[\u0D00-\u0D7F]"))))               ; all coverage
     357             :     (indian-compose-regexp
     358             :      (concat
     359             :       ;; consonant-based syllables, or
     360             :       "C\\(?:J?HJ?C\\)*\\(?:H[NJ]?\\|v?A?\\)\\|"
     361             :       ;; syllables with an independent vowel, or
     362             :       "V\\(?:J?HY\\)?v*?A?\\|"
     363             :       ;; special consonant form, or
     364             :       "JHY\\|"
     365             :       ;; any other singleton characters
     366             :       "X")
     367             :      table))
     368             :   "Regexp matching a composable sequence of Malayalam characters.")
     369             : 
     370             : (let ((script-regexp-alist
     371             :        `((devanagari . ,devanagari-composable-pattern)
     372             :          (bengali . ,bengali-composable-pattern)
     373             :          (gurmukhi . ,gurmukhi-composable-pattern)
     374             :          (gujarati . ,gujarati-composable-pattern)
     375             :          (oriya . ,oriya-composable-pattern)
     376             :          (tamil . ,tamil-composable-pattern)
     377             :          (telugu . ,telugu-composable-pattern)
     378             :          (kannada . ,kannada-composable-pattern)
     379             :          (malayalam . ,malayalam-composable-pattern))))
     380             :   (map-char-table
     381             :    #'(lambda (key val)
     382             :        (let ((slot (assq val script-regexp-alist)))
     383             :          (if slot
     384             :              (set-char-table-range
     385             :               composition-function-table key
     386             :               (list (vector (cdr slot) 0 'font-shape-gstring))))))
     387             :    char-script-table))
     388             : 
     389             : (provide 'indian)
     390             : 
     391             : ;;; indian.el ends here

Generated by: LCOV version 1.12