1 /*
2  * Distributed under the Boost Software License, Version 1.0.
3  *    (See accompanying file LICENSE_1_0.txt or copy at
4  *          http://www.boost.org/LICENSE_1_0.txt)
5  */
6 module pango.c.break_;
7 
8 import pango.c.item;
9 import pango.c.language;
10 
11 import glib;
12 
13 import std.bitmanip;
14 
15 extern(C):
16 
17 /* Logical attributes of a character.
18  */
19 /**
20  * PangoLogAttr:
21  * @is_line_break: if set, can break line in front of character
22  * @is_mandatory_break: if set, must break line in front of character
23  * @is_char_break: if set, can break here when doing character wrapping
24  * @is_white: is whitespace character
25  * @is_cursor_position: if set, cursor can appear in front of character.
26  * i.e. this is a grapheme boundary, or the first character
27  * in the text.
28  * This flag implements Unicode's
29  * <ulink url="http://www.unicode.org/reports/tr29/">Grapheme
30  * Cluster Boundaries</ulink> semantics.
31  * @is_word_start: is first character in a word
32  * @is_word_end: is first non-word char after a word
33  * Note that in degenerate cases, you could have both @is_word_start
34  * and @is_word_end set for some character.
35  * @is_sentence_boundary: is a sentence boundary.
36  * There are two ways to divide sentences. The first assigns all
37  * inter-sentence whitespace/control/format chars to some sentence,
38  * so all chars are in some sentence; @is_sentence_boundary denotes
39  * the boundaries there. The second way doesn't assign
40  * between-sentence spaces, etc. to any sentence, so
41  * @is_sentence_start/@is_sentence_end mark the boundaries of those sentences.
42  * @is_sentence_start: is first character in a sentence
43  * @is_sentence_end: is first char after a sentence.
44  * Note that in degenerate cases, you could have both @is_sentence_start
45  * and @is_sentence_end set for some character. (e.g. no space after a
46  * period, so the next sentence starts right away)
47  * @backspace_deletes_character: if set, backspace deletes one character
48  * rather than the entire grapheme cluster. This
49  * field is only meaningful on grapheme
50  * boundaries (where @is_cursor_position is
51  * set).  In some languages, the full grapheme
52  * (e.g.  letter + diacritics) is considered a
53  * unit, while in others, each decomposed
54  * character in the grapheme is a unit. In the
55  * default implementation of pango_break(), this
56  * bit is set on all grapheme boundaries except
57  * those following Latin, Cyrillic or Greek base characters.
58  * @is_expandable_space: is a whitespace character that can possibly be
59  * expanded for justification purposes. (Since: 1.18)
60  * @is_word_boundary: is a word boundary.
61  * More specifically, means that this is not a position in the middle
62  * of a word.  For example, both sides of a punctuation mark are
63  * considered word boundaries.  This flag is particularly useful when
64  * selecting text word-by-word.
65  * This flag implements Unicode's
66  * <ulink url="http://www.unicode.org/reports/tr29/">Word
67  * Boundaries</ulink> semantics. (Since: 1.22)
68  *
69  * The #PangoLogAttr structure stores information
70  * about the attributes of a single character.
71  */
72 struct PangoLogAttr
73 {
74     mixin(bitfields!(
75         guint, "is_line_break", 1,      /* Can break line in front of character */
76 
77         guint, "is_mandatory_break", 1, /* Must break line in front of character */
78 
79         guint, "is_char_break", 1,      /* Can break here when doing char wrap */
80 
81         guint, "is_white", 1,           /* Whitespace character */
82 
83         /* Cursor can appear in front of character (i.e. this is a grapheme
84          * boundary, or the first character in the text).
85          */
86         guint, "is_cursor_position", 1,
87 
88         /* Note that in degenerate cases, you could have both start/end set on
89          * some text, most likely for sentences (e.g. no space after a period, so
90          * the next sentence starts right away).
91          */
92 
93         guint, "is_word_start", 1,      /* first character in a word */
94         guint, "is_word_end", 1,      /* is first non-word char after a word */
95 
96         /* There are two ways to divide sentences. The first assigns all
97          * intersentence whitespace/control/format chars to some sentence,
98          * so all chars are in some sentence; is_sentence_boundary denotes
99          * the boundaries there. The second way doesn't assign
100          * between-sentence spaces, etc. to any sentence, so
101          * is_sentence_start/is_sentence_end mark the boundaries of those
102          * sentences.
103          */
104         guint, "is_sentence_boundary", 1,
105         guint, "is_sentence_start", 1,  /* first character in a sentence */
106         guint, "is_sentence_end", 1,    /* first non-sentence char after a sentence */
107 
108         /* If set, backspace deletes one character rather than
109          * the entire grapheme cluster.
110          */
111         guint, "backspace_deletes_character", 1,
112 
113         /* Only few space variants (U+0020 and U+00A0) have variable
114          * width during justification.
115          */
116         guint, "is_expandable_space", 1,
117 
118         /* Word boundary as defined by UAX#29 */
119         guint, "is_word_boundary", 1,	/* is NOT in the middle of a word */
120 
121         guint, "", 3));
122 };
123 
124 /* Determine information about cluster/word/line breaks in a string
125  * of Unicode text.
126  */
127 void pango_break (const(gchar)   *text,
128 		  int            length,
129 		  PangoAnalysis *analysis,
130 		  PangoLogAttr  *attrs,
131 		  int            attrs_len);
132 
133 void pango_find_paragraph_boundary (const(gchar) *text,
134 				    gint         length,
135 				    gint        *paragraph_delimiter_index,
136 				    gint        *next_paragraph_start);
137 
138 void pango_get_log_attrs (const(char)    *text,
139 			  int            length,
140 			  int            level,
141 			  PangoLanguage *language,
142 			  PangoLogAttr  *log_attrs,
143 			  int            attrs_len);
144 
145 
146 /* This is the default break algorithm, used if no language
147  * engine overrides it. Normally you should use pango_break()
148  * instead; this function is mostly useful for chaining up
149  * from a language engine override.
150  */
151 void pango_default_break (const(gchar)   *text,
152 			  int            length,
153 			  PangoAnalysis *analysis,
154 			  PangoLogAttr  *attrs,
155 			  int            attrs_len);
156