1 /*
2  * Distributed under the Boost Software License, Version 1.0.
3  *    (See accompanying file LICENSE_1_0.txt or copy at
4  *          http://www.boost.org/LICENSE_1_0.txt)
5  */
6 module pango.script;
7 
8 import pango.utils;
9 import pango.language;
10 import pango.c.script;
11 import pango.c.language;
12 
13 import std..string;
14 import std.range;
15 import std.conv;
16 
17 
18 /**
19 * PangoScript:
20 * InvalidCode: a value never returned from pango_script_for_unichar()
21 * Common: a character used by multiple different scripts
22 * Inherited: a mark glyph that takes its script from the
23 * base glyph to which it is attached
24 * Arabic: 	Arabic
25 * Armenian: Armenian
26 * Bengali: 	Bengali
27 * Bopomofo: Bopomofo
28 * Cherokee: 	Cherokee
29 * Coptic: 	Coptic
30 * Cyrillic: 	Cyrillic
31 * Deseret: 	Deseret
32 * Devanagari: 	Devanagari
33 * Ethiopic: 	Ethiopic
34 * Georgian: 	Georgian
35 * Gothic: 	Gothic
36 * Greek: 	Greek
37 * Gujarati: 	Gujarati
38 * Gurmukhi: 	Gurmukhi
39 * Han: 	Han
40 * Hangul: 	Hangul
41 * Hebrew: 	Hebrew
42 * Hiragana: 	Hiragana
43 * Kannada: 	Kannada
44 * Katakana: 	Katakana
45 * Khmer: 	Khmer
46 * Lao: 	Lao
47 * Latin: 	Latin
48 * Malayalam: 	Malayalam
49 * Mongolian: 	Mongolian
50 * Myanmar: 	Myanmar
51 * Ogham: 	Ogham
52 * OldItalic: 	Old Italic
53 * Oriya: 	Oriya
54 * Runic: 	Runic
55 * Sinhala: 	Sinhala
56 * Syriac: 	Syriac
57 * Tamil: 	Tamil
58 * Telugu: 	Telugu
59 * Thaana: 	Thaana
60 * Thai: 	Thai
61 * Tibetan: 	Tibetan
62 * CanadianAboriginal: 	Canadian Aboriginal
63 * Yi: 	Yi
64 * Tagalog: 	Tagalog
65 * Hanunoo: 	Hanunoo
66 * Buhid: 	Buhid
67 * Tagbanwa: 	Tagbanwa
68 * Braille: 	Braille
69 * Cypriot: 	Cypriot
70 * Limbu: 	Limbu
71 * Osmanya: 	Osmanya
72 * Shavian: 	Shavian
73 * LinearB: 	Linear B
74 * TaiLe: 	Tai Le
75 * Ugaritic: 	Ugaritic
76 * NewTaiLue: 	New Tai Lue. Since 1.10
77 * Buginese: 	Buginese. Since 1.10
78 * Glagolitic: 	Glagolitic. Since 1.10
79 * Tifinagh: 	Tifinagh. Since 1.10
80 * SylotiNagri: 	Syloti Nagri. Since 1.10
81 * OldPersian: 	Old Persian. Since 1.10
82 * Kharoshthi: 	Kharoshthi. Since 1.10
83 * Unknown: 		an unassigned code point. Since 1.14
84 * Balinese: 		Balinese. Since 1.14
85 * Cuneiform: 	Cuneiform. Since 1.14
86 * Phoenician: 	Phoenician. Since 1.14
87 * PhagsPa: 		Phags-pa. Since 1.14
88 * Nko: 		N'Ko. Since 1.14
89 * KayahLi:   Kayah Li. Since 1.20.1
90 * Lepcha:     Lepcha. Since 1.20.1
91 * Rejang:     Rejang. Since 1.20.1
92 * Sundanese:  Sundanese. Since 1.20.1
93 * Saurashtra: Saurashtra. Since 1.20.1
94 * Cham:       Cham. Since 1.20.1
95 * OlChiki:   Ol Chiki. Since 1.20.1
96 * Vai:        Vai. Since 1.20.1
97 * Carian:     Carian. Since 1.20.1
98 * Lycian:     Lycian. Since 1.20.1
99 * Lydian:     Lydian. Since 1.20.1
100 * Batak:      Batak. Since 1.32
101 * Brahmi:     Brahmi. Since 1.32
102 * Mandaic:    Mandaic. Since 1.32
103 * Chakma:               Chakma. Since: 1.32
104 * MeroiticCursive:     Meroitic Cursive. Since: 1.32
105 * MeroiticHieroglyphs: Meroitic Hieroglyphs. Since: 1.32
106 * Miao:                 Miao. Since: 1.32
107 * Sharada:              Sharada. Since: 1.32
108 * SoraSompeng:         Sora Sompeng. Since: 1.32
109 * Takri:                Takri. Since: 1.32
110 *
111 * The #PangoScript enumeration identifies different writing
112 * systems. The values correspond to the names as defined in the
113 * Unicode standard.
114 * Note that new types may be added in the future. Applications should be ready
115 * to handle unknown values.  This enumeration is interchangeable with
116 * #GUnicodeScript.  See <ulink
117 * url="http://www.unicode.org/reports/tr24/">Unicode Standard Annex
118 * #24: Script names</ulink>.
119 */
120 enum Script {                                                       /* ISO 15924 code */
121     InvalidCode         = PangoScript.PANGO_SCRIPT_INVALID_CODE,
122     Common              = PangoScript.PANGO_SCRIPT_COMMON,             /* Zyyy */
123     Inherited           = PangoScript.PANGO_SCRIPT_INHERITED,          /* Qaai */
124     Arabic              = PangoScript.PANGO_SCRIPT_ARABIC,             /* Arab */
125     Armenian            = PangoScript.PANGO_SCRIPT_ARMENIAN,           /* Armn */
126     Bengali             = PangoScript.PANGO_SCRIPT_BENGALI,            /* Beng */
127     Bopomofo            = PangoScript.PANGO_SCRIPT_BOPOMOFO,           /* Bopo */
128     Cherokee            = PangoScript.PANGO_SCRIPT_CHEROKEE,           /* Cher */
129     Coptic              = PangoScript.PANGO_SCRIPT_COPTIC,             /* Qaac */
130     Cyrillic            = PangoScript.PANGO_SCRIPT_CYRILLIC,           /* Cyrl (Cyrs) */
131     Deseret             = PangoScript.PANGO_SCRIPT_DESERET,            /* Dsrt */
132     Devanagari          = PangoScript.PANGO_SCRIPT_DEVANAGARI,         /* Deva */
133     Ethiopic            = PangoScript.PANGO_SCRIPT_ETHIOPIC,           /* Ethi */
134     Georgian            = PangoScript.PANGO_SCRIPT_GEORGIAN,           /* Geor (Geon, Geoa) */
135     Gothic              = PangoScript.PANGO_SCRIPT_GOTHIC,             /* Goth */
136     Greek               = PangoScript.PANGO_SCRIPT_GREEK,              /* Grek */
137     Gujarati            = PangoScript.PANGO_SCRIPT_GUJARATI,           /* Gujr */
138     Gurmukhi            = PangoScript.PANGO_SCRIPT_GURMUKHI,           /* Guru */
139     Han                 = PangoScript.PANGO_SCRIPT_HAN,                /* Hani */
140     Hangul              = PangoScript.PANGO_SCRIPT_HANGUL,             /* Hang */
141     Hebrew              = PangoScript.PANGO_SCRIPT_HEBREW,             /* Hebr */
142     Hiragana            = PangoScript.PANGO_SCRIPT_HIRAGANA,           /* Hira */
143     Kannada             = PangoScript.PANGO_SCRIPT_KANNADA,            /* Knda */
144     Katakana            = PangoScript.PANGO_SCRIPT_KATAKANA,           /* Kana */
145     Khmer               = PangoScript.PANGO_SCRIPT_KHMER,              /* Khmr */
146     Lao                 = PangoScript.PANGO_SCRIPT_LAO,                /* Laoo */
147     Latin               = PangoScript.PANGO_SCRIPT_LATIN,              /* Latn (Latf, Latg) */
148     Malayalam           = PangoScript.PANGO_SCRIPT_MALAYALAM,          /* Mlym */
149     Mongolian           = PangoScript.PANGO_SCRIPT_MONGOLIAN,          /* Mong */
150     Myanmar             = PangoScript.PANGO_SCRIPT_MYANMAR,            /* Mymr */
151     Ogham               = PangoScript.PANGO_SCRIPT_OGHAM,              /* Ogam */
152     OldItalic           = PangoScript.PANGO_SCRIPT_OLD_ITALIC,         /* Ital */
153     Oriya               = PangoScript.PANGO_SCRIPT_ORIYA,              /* Orya */
154     Runic               = PangoScript.PANGO_SCRIPT_RUNIC,              /* Runr */
155     Sinhala             = PangoScript.PANGO_SCRIPT_SINHALA,            /* Sinh */
156     Syriac              = PangoScript.PANGO_SCRIPT_SYRIAC,             /* Syrc (Syrj, Syrn, Syre) */
157     Tamil               = PangoScript.PANGO_SCRIPT_TAMIL,              /* Taml */
158     Telugu              = PangoScript.PANGO_SCRIPT_TELUGU,             /* Telu */
159     Thaana              = PangoScript.PANGO_SCRIPT_THAANA,             /* Thaa */
160     Thai                = PangoScript.PANGO_SCRIPT_THAI,               /* Thai */
161     Tibetan             = PangoScript.PANGO_SCRIPT_TIBETAN,            /* Tibt */
162     CanadianAboriginal  = PangoScript.PANGO_SCRIPT_CANADIAN_ABORIGINAL, /* Cans */
163     Yi                  = PangoScript.PANGO_SCRIPT_YI,                 /* Yiii */
164     Tagalog             = PangoScript.PANGO_SCRIPT_TAGALOG,            /* Tglg */
165     Hanunoo             = PangoScript.PANGO_SCRIPT_HANUNOO,            /* Hano */
166     Buhid               = PangoScript.PANGO_SCRIPT_BUHID,              /* Buhd */
167     Tagbanwa            = PangoScript.PANGO_SCRIPT_TAGBANWA,           /* Tagb */
168 
169     /* Unicode-4.0 additions */
170     Braille             = PangoScript.PANGO_SCRIPT_BRAILLE,            /* Brai */
171     Cypriot             = PangoScript.PANGO_SCRIPT_CYPRIOT,            /* Cprt */
172     Limbu               = PangoScript.PANGO_SCRIPT_LIMBU,              /* Limb */
173     Osmanya             = PangoScript.PANGO_SCRIPT_OSMANYA,            /* Osma */
174     Shavian             = PangoScript.PANGO_SCRIPT_SHAVIAN,            /* Shaw */
175     LinearB             = PangoScript.PANGO_SCRIPT_LINEAR_B,           /* Linb */
176     TaiLe               = PangoScript.PANGO_SCRIPT_TAI_LE,             /* Tale */
177     Ugaritic            = PangoScript.PANGO_SCRIPT_UGARITIC,           /* Ugar */
178 
179     /* Unicode-4.1 additions */
180     NewTaiLue           = PangoScript.PANGO_SCRIPT_NEW_TAI_LUE,        /* Talu */
181     Buginese            = PangoScript.PANGO_SCRIPT_BUGINESE,           /* Bugi */
182     Glagolitic          = PangoScript.PANGO_SCRIPT_GLAGOLITIC,         /* Glag */
183     Tifinagh            = PangoScript.PANGO_SCRIPT_TIFINAGH,           /* Tfng */
184     SylotiNagri         = PangoScript.PANGO_SCRIPT_SYLOTI_NAGRI,       /* Sylo */
185     OldPersian          = PangoScript.PANGO_SCRIPT_OLD_PERSIAN,        /* Xpeo */
186     Kharoshthi          = PangoScript.PANGO_SCRIPT_KHAROSHTHI,         /* Khar */
187 
188     /* Unicode-5.0 additions */
189     Unknown             = PangoScript.PANGO_SCRIPT_UNKNOWN,            /* Zzzz */
190     Balinese            = PangoScript.PANGO_SCRIPT_BALINESE,           /* Bali */
191     Cuneiform           = PangoScript.PANGO_SCRIPT_CUNEIFORM,          /* Xsux */
192     Phoenician          = PangoScript.PANGO_SCRIPT_PHOENICIAN,         /* Phnx */
193     PhagsPa             = PangoScript.PANGO_SCRIPT_PHAGS_PA,           /* Phag */
194     Nko                 = PangoScript.PANGO_SCRIPT_NKO,                /* Nkoo */
195 
196     /* Unicode-5.1 additions */
197     KayahLi             = PangoScript.PANGO_SCRIPT_KAYAH_LI,           /* Kali */
198     Lepcha              = PangoScript.PANGO_SCRIPT_LEPCHA,             /* Lepc */
199     Rejang              = PangoScript.PANGO_SCRIPT_REJANG,             /* Rjng */
200     Sundanese           = PangoScript.PANGO_SCRIPT_SUNDANESE,          /* Sund */
201     Saurashtra          = PangoScript.PANGO_SCRIPT_SAURASHTRA,         /* Saur */
202     Cham                = PangoScript.PANGO_SCRIPT_CHAM,               /* Cham */
203     OlChiki             = PangoScript.PANGO_SCRIPT_OL_CHIKI,           /* Olck */
204     Vai                 = PangoScript.PANGO_SCRIPT_VAI,                /* Vaii */
205     Carian              = PangoScript.PANGO_SCRIPT_CARIAN,             /* Cari */
206     Lycian              = PangoScript.PANGO_SCRIPT_LYCIAN,             /* Lyci */
207     Lydian              = PangoScript.PANGO_SCRIPT_LYDIAN,             /* Lydi */
208 
209     /* Unicode-6.0 additions */
210     Batak               = PangoScript.PANGO_SCRIPT_BATAK,              /* Batk */
211     Brahmi              = PangoScript.PANGO_SCRIPT_BRAHMI,             /* Brah */
212     Mandaic             = PangoScript.PANGO_SCRIPT_MANDAIC,            /* Mand */
213 
214     /* Unicode-6.1 additions */
215     Chakma              = PangoScript.PANGO_SCRIPT_CHAKMA,             /* Cakm */
216     MeroiticCursive     = PangoScript.PANGO_SCRIPT_MEROITIC_CURSIVE,   /* Merc */
217     MeroiticHieroglyphs = PangoScript.PANGO_SCRIPT_MEROITIC_HIEROGLYPHS,/* Mero */
218     Miao                = PangoScript.PANGO_SCRIPT_MIAO,               /* Plrd */
219     Sharada             = PangoScript.PANGO_SCRIPT_SHARADA,            /* Shrd */
220     SoraSompeng         = PangoScript.PANGO_SCRIPT_SORA_SOMPENG,       /* Sora */
221     Takri               = PangoScript.PANGO_SCRIPT_TAKRI               /* Takr */
222 }
223 
224 
225 Script scriptForChar(dchar ch) {
226     return cast(Script)pango_script_for_unichar(ch);
227 }
228 
229 Language scriptSampleLanguage(Script script) {
230     PangoLanguage *lang = pango_script_get_sample_language(cast(PangoScript)script);
231     if (!lang) {
232         throw new Exception("could not get Language for script " ~ script.to!string);
233     }
234     return Language(lang);
235 }
236 
237 
238 /**
239  * TextScriptRange: wrapper around PangoScriptIter
240  */
241 struct TextScriptRange {
242 
243     private PangoScriptIter *ptr_ = null;
244     private string text_;
245     private bool empty_ = false;
246 
247 
248     struct Section {
249         const(char)[] text;
250         Script script;
251     }
252 
253 
254     this(string text) {
255         text_ = text;
256         ptr_ = pango_script_iter_new(text_.ptr, cast(int)text_.length);
257         if (text.length == 0) {
258             empty_ = true;
259         }
260     }
261 
262     @disable this(this);
263 
264     ~this() {
265         pango_script_iter_free(ptr_);
266         ptr_ = null;
267     }
268 
269     @property bool empty() const {
270         return empty_;
271     }
272 
273     @property Section front() {
274         const (char) *start;
275         const (char) *end;
276         PangoScript script;
277         pango_script_iter_get_range(ptr_, &start, &end, &script);
278         assert(end >= start);
279         if (end == start) {
280             empty_ = true;
281             return Section("", cast(Script)script);
282         }
283         empty_ = false;
284         return Section(start[0 .. (end-start)], cast(Script)script);
285     }
286 
287     void popFront() {
288         empty_ = !(cast(bool)pango_script_iter_next(ptr_));
289     }
290 }
291 
292 static assert(isInputRange!TextScriptRange);