1 /*
2  * Distributed under the Boost Software License, Version 1.0.
3  *    (See accompanying file LICENSE_1_0.txt or copy at
4  *          http://www.boost.org/LICENSE_1_0.txt)
5  */
6 module pango.break_;
7 
8 import pango.utils;
9 import pango.item;
10 import pango.language;
11 import pango.c.break_;
12 import pango.c.language;
13 
14 import glib;
15 
16 
17 /* Logical attributes of a character.
18  */
19 /**
20  * PangoLogAttr:
21  * @is_line_break: if set, can break line in front of character
22  * @is_mandatory_break: if set, must break line in front of character
23  * @is_char_break: if set, can break here when doing character wrapping
24  * @is_white: is whitespace character
25  * @is_cursor_position: if set, cursor can appear in front of character.
26  * i.e. this is a grapheme boundary, or the first character
27  * in the text.
28  * This flag implements Unicode's
29  * <ulink url="http://www.unicode.org/reports/tr29/">Grapheme
30  * Cluster Boundaries</ulink> semantics.
31  * @is_word_start: is first character in a word
32  * @is_word_end: is first non-word char after a word
33  * Note that in degenerate cases, you could have both @is_word_start
34  * and @is_word_end set for some character.
35  * @is_sentence_boundary: is a sentence boundary.
36  * There are two ways to divide sentences. The first assigns all
37  * inter-sentence whitespace/control/format chars to some sentence,
38  * so all chars are in some sentence; @is_sentence_boundary denotes
39  * the boundaries there. The second way doesn't assign
40  * between-sentence spaces, etc. to any sentence, so
41  * @is_sentence_start/@is_sentence_end mark the boundaries of those sentences.
42  * @is_sentence_start: is first character in a sentence
43  * @is_sentence_end: is first char after a sentence.
44  * Note that in degenerate cases, you could have both @is_sentence_start
45  * and @is_sentence_end set for some character. (e.g. no space after a
46  * period, so the next sentence starts right away)
47  * @backspace_deletes_character: if set, backspace deletes one character
48  * rather than the entire grapheme cluster. This
49  * field is only meaningful on grapheme
50  * boundaries (where @is_cursor_position is
51  * set).  In some languages, the full grapheme
52  * (e.g.  letter + diacritics) is considered a
53  * unit, while in others, each decomposed
54  * character in the grapheme is a unit. In the
55  * default implementation of pango_break(), this
56  * bit is set on all grapheme boundaries except
57  * those following Latin, Cyrillic or Greek base characters.
58  * @is_expandable_space: is a whitespace character that can possibly be
59  * expanded for justification purposes. (Since: 1.18)
60  * @is_word_boundary: is a word boundary.
61  * More specifically, means that this is not a position in the middle
62  * of a word.  For example, both sides of a punctuation mark are
63  * considered word boundaries.  This flag is particularly useful when
64  * selecting text word-by-word.
65  * This flag implements Unicode's
66  * <ulink url="http://www.unicode.org/reports/tr29/">Word
67  * Boundaries</ulink> semantics. (Since: 1.22)
68  *
69  * The #PangoLogAttr structure stores information
70  * about the attributes of a single character.
71  */
72 alias LogAttr = PangoLogAttr;
73 
74 
75 /* Determine information about cluster/word/line breaks in a string
76  * of Unicode text.
77  */
78 LogAttr[] pangoBreak(string text, Analysis *analysis)
79 {
80     glong len = g_utf8_strlen(text.ptr, text.length)+1;
81     if (!len) return [];
82     LogAttr[] res = new LogAttr[len];
83     pango_break(text.ptr, cast(int)text.length, &analysis.pangoStruct,
84         res.ptr, cast(int)len);
85     return res;
86 }
87 
88 void pangoFindParagraphBoundary(string text, out int paragraphDelimiterIndex, out int nextParagraphStart) {
89     pango_find_paragraph_boundary(text.ptr, cast(int)text.length,
90         &paragraphDelimiterIndex, &nextParagraphStart);
91 }
92 
93 LogAttr[] pangoGetLogAttrs(string text, int level, Language language)
94 {
95     glong len = g_utf8_strlen(text.ptr, text.length)+1;
96     if (!len) return [];
97     LogAttr[] res = new LogAttr[len];
98     pango_get_log_attrs(text.ptr, cast(int)text.length, level,
99         language.nativePtr, res.ptr, cast(int)len);
100     return res;
101 }
102 
103 /* This is the default break algorithm, used if no language
104  * engine overrides it. Normally you should use pango_break()
105  * instead; this function is mostly useful for chaining up
106  * from a language engine override.
107  */
108 LogAttr[] pangoDefaultBreak(string text, Analysis *analysis)
109 {
110     glong len = g_utf8_strlen(text.ptr, text.length)+1;
111     if (!len) return [];
112     LogAttr[] res = new LogAttr[len];
113     pango_default_break(text.ptr, cast(int)text.length, &analysis.pangoStruct, res.ptr, cast(int)len);
114     return res;
115 }