# liblouis: isiXhosa/isiZulu braille code (grade 2)

#-display-name: isiXhosa contracted braille
#-index-name: isiXhosa, contracted

#-copyright: 2021, South African Braille Authority
#-license: LGPLv2.1

# Copyright (C) 2021 South African Braille Authority <http://www.sabrailleauthority.org.za>
#
# This file is part of liblouis.
#
# liblouis is free software: you can redistribute it and/or modify it
# under the terms of the GNU Lesser General Public License as
# published by the Free Software Foundation, either version 2.1 of
# the License, or (at your option) any later version.
#
# liblouis is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with liblouis. If not, see
# <http://www.gnu.org/licenses/>.

#+language: xh
#+type: literary
#+contraction: full
#+grade: 2
#+direction: both

#+system: ubc-nguni
# "UBC" stands for "Unified Braille Code" and is a generalization of UEB.
# See https://sabrailleauthority.org.za/unified-braille-code-ubc.

# The same table is used for isiXhosa and isiZulu. IsiXhosa and
# isiZulu are two of the most widely spoken languages in South
# Africa. They belong to the Nguni language family and are mutually
# intelligible. IsiXhosa is predominent in the Eastern and Western
# Cape provinces, but is spoken across South Africa. IsiZulu is
# predominent in the KwaZulu-Natal province, but is also widely spoken
# in South Africa.
#
# The Nguni braille code is based on UEB, therefore the grade 1 system
# is identical to UEB. The grade 2 system is simple, with less than 40
# contractions and no complex rules, yet it achieves approximately 21%
# contraction.
#
# No additional codes such as Nemeth or computer braille codes are
# used; the unified Nguni code is used also for technical material.

# The table has been created by Christo de Klerk <cjdk@mweb.co.za> and
# Laurent Cadet de Fontenay <laurentd@gmail.com>, according to the
# official code for these languages under the auspices of and approved
# by the South African Braille Authority (SABA).

# The table is maintained by Christo de Klerk <cjdk@mweb.co.za>.

#-author: Laurent Cadet de Fontenay <laurentd@gmail.com>
#-author: Christo de Klerk <cjdk@mweb.co.za>
#-maintainer: Christo de Klerk <cjdk@mweb.co.za>

include xh-za-g1.utb

multind 56-6 nocontractsign capsletter

seqdelimiter -—
seqdelimiter ‐   \x2010
seqdelimiter ‑   \x2011
seqdelimiter –   \x2013
seqdelimiter —   \x2014
seqbeforechars ([{"“'‘
seqafterchars  )]}"”'’.,;:.!?…
seqafterpattern 'd
seqafterpattern 'll
seqafterpattern 're
seqafterpattern 's
seqafterpattern 't
seqafterpattern 've
seqafterpattern ’d
seqafterpattern ’ll
seqafterpattern ’re
seqafterpattern ’s
seqafterpattern ’t
seqafterpattern ’ve
#TODO:  all caps words (see lou_translateString.c:inSequence()
seqafterpattern 'D
seqafterpattern 'LL
seqafterpattern 'RE
seqafterpattern 'S
seqafterpattern 'T
seqafterpattern 'VE
seqafterexpression '([DSTdst]|ll|[rv]e|LL|[RV]E)
seqafterpattern ’D
seqafterpattern ’LL
seqafterpattern ’RE
seqafterpattern ’S
seqafterpattern ’T
seqafterpattern ’VE
seqafterexpression ’([DSTdst]|ll|[rv]e|LL|[RV]E)

#      Alphabetic Wordsigns
#   -    standing alone
#   - followed by 'd, 'll, 're, 's, 't, 've and standing alone
#   - These rules need to be adapted for isiXhosa, but may cause no harm left in place.

#   must be before contractions?
match %[^_~]%<* kodwa (['’]([DSTdst]|ll|[rv]e|LL|[RV]E))?%>*%[^_~] 13
match %[^_~]%<* ukuthi (['’]([DSTdst]|ll|[rv]e|LL|[RV]E))?%>*%[^_~] 136

nofor word kodwa  13
nofor word ukuthi  136


contraction k
contraction u

contraction K
contraction U


#   10.2   Strong Wordsigns
#   - 10.2.1   standing alone
#   - 10.2.2   followed by 'd, 'll, 're, 's, 't, 've and standing alone
#   - Unsure if the following appear standing alone in isiXhosa, but rules can be adapted as appropriate and have been left in place for now to cater for English words.

match %[^_~]%<* ya (['’]([DSTdst]|ll|[rv]e|LL|[RV]E))?%>*%[^_~] 16
match %[^_~]%<* sh (['’]([DSTdst]|ll|[rv]e|LL|[RV]E))?%>*%[^_~] 146
match %[^_~]%<* th (['’]([DSTdst]|ll|[rv]e|LL|[RV]E))?%>*%[^_~] 1456

nofor word ya  16
nofor word sh  146
nofor word th  1456



#   10.3   Strong Groupsigns
#   - It may be that some of these appear as strong contractions in isiXhosa, but they are lumped together here.
#   - Rules are also to be adapted as appropriate.

# ba
always ba 126
# match (%[^_~]%<*) ba ('([DSTdst]|ll|[rv]e|LL|[RV]E))?(%>*%[^_~]) =

# ka
always ka 2346

# kh
always kh 23456

# ku
always ku 346

# la
always la 156

# le
always le 1246

# na
always na 12356

# nd
always nd 12346

# ne
always ne 34

# ng
always ng 123456

# ni
always ni 345

# sh
always sh 146
# match (%[^_~]%<*) sh (['’]([DSTdst]|ll|[rv]e|LL|[RV]E))?(%>*%[^_~]) =

# th
always th 1456
# match (%[^_~]%<*) th (['’]([DSTdst]|ll|[rv]e|LL|[RV]E))?(%>*%[^_~]) =

# wa
always wa 12456

# we
always we 1256

# ya
always ya 16
# match (%[^_~]%<*) ya (['’]([DSTdst]|ll|[rv]e|LL|[RV]E))?(%>*%[^_~]) =

# zi
always zi 246




#   10.5   Lower Wordsigns

attribute 1 +=*,.?!"'“”‘’  punctuation with only lower dots (except hyphens)
#class lowerdotspunc +=*,.?!"'“”‘’

# ye
match %[^_]|%[^_~]%<*[([{] ye %[^_]|[)}\\]]%>*%[^_~] 26
partword ye 26
nofor always ye 26

# si
match %[^_]|%[^_~]%<*[([{] si %[^_]|[)}\\]]%>*%[^_~] 35
nofor always si 35



#   10.6   Lower Groupsigns

# bh   
match %a bh %a 23
nofor sufword bh 23

empmatchbefore empmatchafter match %a bh %a 23
nofor midword bh 23

# bh   10.6.5   10.6.6
empmatchbefore empmatchafter match %a bh %a 23

nofor midword bh 23

empmatchafter match %[^_~]%<* bh [Cc]![Hh] 23
empmatchafter match %[^_~]%<* bh [Ee][Ss][Tt] 23
empmatchafter match %[^_~]%<* bh [ABDFGHIJLMNOPQRTUVWXYZabdfghijlmnopqrtuvwxyz] 23

nofor begword bh 23

# nt   10.6.5   10.6.6
empmatchbefore empmatchafter match %a nt %a 25

nofor midword nt 25


empmatchafter match %[^_~]%<* nt [Cc]![Hh] 25
empmatchafter match %[^_~]%<* nt [Ee][Ss][Tt] 25
empmatchafter match %[^_~]%<* nt [ABDFGHIJLMNOPQRTUVWXYZabdfghijlmnopqrtuvwxyz] 25

nofor begword nt 25


# hl   10.6.5   10.6.6
empmatchbefore empmatchafter match %a hl %a 235

nofor midword hl 235

# ph
begmidword ph 356

empmatchbefore empmatchafter match %a ph %a 356

nofor midword ph 356

empmatchafter match %[^_~]%<* ph [Cc]![Hh] 356
empmatchafter match %[^_~]%<* ph [Ee][Ss][Tt] 356
empmatchafter match %[^_~]%<* ph [ABDFGHIJLMNOPQRTUVWXYZabdfghijlmnopqrtuvwxyz] 356

nofor begword ph 356

# si
partword si 35
word sisi 35-234-24

#   10.7   Initial-Letter Contractions

# emva   
always emva 5-15

# emuva
always emuva 45-15

# fana
always fana 5-124

# futhi
always futhi 456-124

# jalo  
always jalo 5-245

# kakhulu
always kakhulu 5-2346

# khona
always khona 5-23456

# kanti
always kanti 45-2346

# lokho
always lokho 5-123

# lapha
always lapha 5-156

# lapho
always lapho 45-156

# uma
always uma 5-134

# ama
always ama 456-134

# noma
always noma 5-1345

# ngoba
always ngoba 5-123456

# ngoku
always ngoku 45-123456

# onke  
always onke 5-135

# pansi
always pansi 5-1234

# uthi
match %[^_~]%<*|![ao] uthi - 5-136
nofor always uthi 5-136

# ukuba
always ukuba 45-136

# ulu
always ulu 456-136

# nxa
always nxa 5-1346



#   10.8   Final-Letter Groupsigns

# None applicable


#   10.9   Shortforms   not including 10.9.3
#TODO:  2.6.4

attribute 2 BCDFGHJKLMNPQRSTVWXZbcdfghjklmnpqrstvwxz'’  no vowels (except w) and 2.6.4

match %[_~^]%<* phambi ([Ss]|['’][Ss])?%>*%[_~^] 356-12
match %[_~^]%<* phambili ([Ss]|['’][Ss])?%>*%[_~^] 356-12-123
match %[_~^]%<* phandle ([Ss]|['’][Ss])?%>*%[_~^] 356-145
match %[_~^]%<* phakathi ([Ss]|['’][Ss])?%>*%[_~^] 356-13
match %[_~^]%<*%a* phantsi (%2%a*)?%>*%[_~^] 356-234
match %[_~^]%<* phezu ([Ss]|['’][Ss])?%>*%[_~^] 356-1356
match %[_~^]%<* phezulu ([Ss]|['’][Ss])?%>*%[_~^] 356-1356-123

nofor word phambi 356-12
nofor word phambili 356-12-123
nofor word phandle 356-145
nofor word phakathi 356-13
nofor word phantsi 356-234
nofor word phezu 356-1356
nofor word phezulu 356-1356-123

contraction 0b phambi
contraction 0bl phambili
contraction 0d phandle
contraction 0k phakathi
contraction 0s phantsi
contraction 0z phezu
contraction 0zl phezulu


# Shape symbols require a grade 1 indicator (Section 3.22.1)
contraction \x25a0 ■ filled (solid) square
contraction \x25a1 □ square
contraction \x25a7 ▧ shaded square (upper left to lower right)
contraction \x25b2 ▲ filled (solid) (equilateral) triangle
contraction \x25b3 △ regular (equilateral) triangle
contraction \x25cb ○ circle
contraction \x25cd ◍ shaded circle