FreeBASIC  0.91.0
fb_unicode.h
Go to the documentation of this file.
1 /* Unicode definitions */
2 
3 typedef uint32_t UTF_32;
4 typedef uint16_t UTF_16;
5 typedef uint8_t UTF_8;
6 
7 #define UTF8_BYTEMASK 0xBF
8 #define UTF8_BYTEMARK 0x80
9 
10 #define UTF16_MAX_BMP (UTF_32)0x0000FFFF
11 #define UTF16_SUR_HIGH_START (UTF_32)0xD800
12 #define UTF16_SUR_HIGH_END (UTF_32)0xDBFF
13 #define UTF16_SUR_LOW_START (UTF_32)0xDC00
14 #define UTF16_SUR_LOW_END (UTF_32)0xDFFF
15 #define UTF16_HALFSHIFT 10
16 #define UTF16_HALFBASE (UTF_32)0x0010000UL
17 #define UTF16_HALFMASK (UTF_32)0x3FFUL
18 
19 #if defined HOST_DOS
20 # include <ctype.h>
21 # define FB_WCHAR char
22 # define _LC(c) c
23 # define FB_WEOF EOF
24 # define wcslen(s) strlen(s)
25 # define iswlower(c) islower(c)
26 # define iswupper(c) isupper(c)
27 # define towlower(c) tolower(c)
28 # define towupper(c) toupper(c)
29 # define wmemcmp(a,b,c) memcmp(a,b,c)
30 # define wmemchr(a,b,c) memchr(a,b,c)
31 # define mbstowcs __dos_mbstowcs
32 # define wcstombs __dos_wcstombs
33 # define wcsstr(str, strSearch) strstr(str, strSearch)
34 # define wcsncmp(str1, str2, count) strncmp(str1, str2, count)
35 # define wcstod strtod
36 # define wcstol strtol
37 # define wcstoll strtoll
38 # define wcstoul strtoul
39 # define wcstoull strtoull
40 # define wcschr strchr
41 # define wcscspn strcspn
42 # define FB_WSTR_WCHARTOCHAR fb_wstr_WcharToChar
43  static __inline__ size_t __dos_mbstowcs(FB_WCHAR *wcstr, const char *mbstr, size_t count)
44  {
45  return memcpy(wcstr,mbstr,count), count;
46  }
47  static __inline__ size_t __dos_wcstombs(char *mbstr, const FB_WCHAR *wcstr, size_t count)
48  {
49  return memcpy(mbstr,wcstr,count), count;
50  }
51  static __inline__ void fb_wstr_WcharToChar( char *dst, const FB_WCHAR *src, ssize_t chars )
52  {
53  memcpy(dst,src,chars);
54  }
55  static __inline__ int swprintf(FB_WCHAR *buffer, size_t n, const FB_WCHAR *format, ...)
56  {
57  int result;
58  va_list ap;
59  va_start(ap, format);
60  result = vsprintf( buffer, format, ap );
61  va_end(ap);
62  return result;
63  }
64 #elif defined HOST_MINGW || defined HOST_CYGWIN
65 # include <wchar.h>
66 # include <wctype.h>
67 # define FB_WCHAR wchar_t
68 # define _LC(c) L ## c
69 # if defined HOST_MINGW
70 # define FB_WEOF WEOF
71 # define swprintf _snwprintf
72 # define FB_WSTR_FROM_INT( buffer, num ) _itow( num, buffer, 10 )
73 # define FB_WSTR_FROM_UINT( buffer, num ) _ultow( (unsigned int) num, buffer, 10 )
74 # define FB_WSTR_FROM_UINT_OCT( buffer, num ) _itow( num, buffer, 8 )
75 # define FB_WSTR_FROM_INT64( buffer, num ) _i64tow( num, buffer, 10 )
76 # define FB_WSTR_FROM_UINT64( buffer, num ) _ui64tow( num, buffer, 10 )
77 # define FB_WSTR_FROM_UINT64_OCT( buffer, num ) _ui64tow( num, buffer, 8 )
78 # else
79 # define FB_WEOF ((FB_WCHAR)-1)
80 # endif
81 # define FB_WSTR_WCHARTOCHAR fb_wstr_WcharToChar
82  static __inline__ void fb_wstr_WcharToChar( char *dst, const FB_WCHAR *src, ssize_t chars )
83  {
84  while( chars-- ) {
85  UTF_16 c = *src++;
86  if( c > 255 ) {
87  if( c >= UTF16_SUR_HIGH_START && c <= UTF16_SUR_HIGH_END )
88  ++src;
89  c = '?';
90  }
91  *dst++ = c;
92  }
93  }
94 #else
95 # define __USE_ISOC99 1
96 # define __USE_ISOC95 1
97 # include <wchar.h>
98 # include <wctype.h>
99 # define FB_WCHAR wchar_t
100 # define _LC(c) L ## c
101 # define FB_WEOF WEOF
102 #endif
103 
104 #ifndef FB_WSTR_FROM_INT
105 #define FB_WSTR_FROM_INT( buffer, num ) \
106  swprintf( buffer, sizeof( int ) * 3 + 1, _LC("%d"), (int) (num) )
107 #endif
108 
109 #ifndef FB_WSTR_FROM_UINT
110 #define FB_WSTR_FROM_UINT( buffer, num ) \
111  swprintf( buffer, sizeof( unsigned int ) * 3 + 1, _LC("%u"), (unsigned) (num) )
112 #endif
113 
114 #ifndef FB_WSTR_FROM_UINT_OCT
115 #define FB_WSTR_FROM_UINT_OCT( buffer, num ) \
116  swprintf( buffer, sizeof( int ) * 4 + 1, _LC("%o"), (unsigned) (num) )
117 #endif
118 
119 #ifndef FB_WSTR_FROM_INT64
120 #define FB_WSTR_FROM_INT64( buffer, num ) \
121  swprintf( buffer, sizeof( long long ) * 3 + 1, _LC("%lld"), (long long) (num) )
122 #endif
123 
124 #ifndef FB_WSTR_FROM_UINT64
125 #define FB_WSTR_FROM_UINT64( buffer, num ) \
126  swprintf( buffer, sizeof( unsigned long long ) * 3 + 1, _LC("%llu"), (unsigned long long) (num) )
127 #endif
128 
129 #ifndef FB_WSTR_FROM_UINT64_OCT
130 #define FB_WSTR_FROM_UINT64_OCT( buffer, num ) \
131  swprintf( buffer, sizeof( long long ) * 4 + 1, _LC("%llo"), (unsigned long long) (num) )
132 #endif
133 
134 #ifndef FB_WSTR_FROM_FLOAT
135 #define FB_WSTR_FROM_FLOAT( buffer, num ) \
136  swprintf( buffer, 7+8 + 1, _LC("%.7g"), (double) (num) )
137 #endif
138 
139 #ifndef FB_WSTR_FROM_DOUBLE
140 #define FB_WSTR_FROM_DOUBLE( buffer, num ) \
141  swprintf( buffer, 16+8 + 1, _LC("%.16g"), (double) (num) )
142 #endif
143 
144 #ifndef FB_WSTR_WCHARTOCHAR
145 #define FB_WSTR_WCHARTOCHAR fb_wstr_WcharToChar
146 static __inline__ void fb_wstr_WcharToChar( char *dst, const FB_WCHAR *src, ssize_t chars )
147 {
148  while( chars ) {
149  UTF_32 c = *src++;
150  if( c > 255 )
151  c = '?';
152  *dst++ = c;
153  --chars;
154  }
155 }
156 #endif
157 
158 /* Calculate the number of characters between two pointers. */
159 static __inline__ ssize_t fb_wstr_CalcDiff( const FB_WCHAR *ini, const FB_WCHAR *end )
160 {
161  return ((intptr_t)end - (intptr_t)ini) / sizeof( FB_WCHAR );
162 }
163 
164 static __inline__ FB_WCHAR *fb_wstr_AllocTemp( ssize_t chars )
165 {
166  /* plus the null-term */
167  return (FB_WCHAR *)malloc( (chars + 1) * sizeof( FB_WCHAR ) );
168 }
169 
170 static __inline__ void fb_wstr_Del( FB_WCHAR *s )
171 {
172  free( (void *)s );
173 }
174 
175 /* Return the length of a WSTRING. */
176 static __inline__ ssize_t fb_wstr_Len( const FB_WCHAR *s )
177 {
178  /* without the null-term */
179  return wcslen( s );
180 }
181 
182 static __inline__ void fb_wstr_ConvFromA( FB_WCHAR *dst, ssize_t dst_chars, const char *src )
183 {
184  ssize_t chars;
185 
186  /* NULL? */
187  if( src == NULL ) {
188  chars = -1;
189  } else {
190  /* plus the null-term (note: "n" in chars, not bytes!) */
191  chars = mbstowcs( dst, src, dst_chars + 1 );
192  }
193 
194  /* error? */
195  if( chars == -1 )
196  *dst = _LC('\0');
197 
198  /* if there's no enough space in dst the null-term won't be added? */
199  else if( chars == (dst_chars + 1) )
200  dst[dst_chars] = _LC('\0');
201 }
202 
203 static __inline__ void fb_wstr_ConvToA( char *dst, const FB_WCHAR *src, ssize_t chars )
204 {
205  /* !!!FIXME!!! wcstombs() will fail and not emit '?' or such if the
206  characters are above 255 and can't be converted? not good.. */
207 #if 0
208  ssize_t bytes;
209 
210  /* plus the null-term */
211  bytes = wcstombs( dst, src, chars + 1 );
212 
213  /* error? */
214  if( bytes == -1 )
215  *dst = '\0';
216 
217  /* if there's no enough space in dst the null-term won't be added? */
218  else if( bytes == chars + 1 )
219  dst[src_chars] = '\0';
220 
221 #else
222  FB_WSTR_WCHARTOCHAR( dst, src, chars );
223 
224  /* plus the null-term */
225  dst[chars] = '\0';
226 #endif
227 }
228 
229 static __inline__ int fb_wstr_IsLower( FB_WCHAR c )
230 {
231  return iswlower( c );
232 }
233 
234 static __inline__ int fb_wstr_IsUpper( FB_WCHAR c )
235 {
236  return iswupper( c );
237 }
238 
239 static __inline__ FB_WCHAR fb_wstr_ToLower( FB_WCHAR c )
240 {
241  return towlower( c );
242 }
243 
244 static __inline__ FB_WCHAR fb_wstr_ToUpper( FB_WCHAR c )
245 {
246  return towupper( c );
247 }
248 
249 /* Copy n characters from A to B and terminate with NUL. */
250 static __inline__ void fb_wstr_Copy( FB_WCHAR *dst, const FB_WCHAR *src, ssize_t chars )
251 {
252  if( (src != NULL) && (chars > 0) )
253  dst = (FB_WCHAR *) FB_MEMCPYX( dst, src, chars * sizeof( FB_WCHAR ) );
254 
255  /* add the null-term */
256  *dst = _LC('\0');
257 }
258 
259 /* Copy n characters from A to B. */
260 static __inline__ FB_WCHAR *fb_wstr_Move( FB_WCHAR *dst, const FB_WCHAR *src, ssize_t chars )
261 {
262  return (FB_WCHAR *) FB_MEMCPYX( dst, src, chars * sizeof( FB_WCHAR ) );
263 }
264 
265 static __inline__ void fb_wstr_Fill( FB_WCHAR *dst, FB_WCHAR c, ssize_t chars )
266 {
267  ssize_t i;
268  for( i = 0; i < chars; i++ )
269  *dst++ = c;
270  /* add null-term */
271  *dst = _LC('\0');
272 }
273 
274 /* Skip all characters (c) from the beginning of the string, max 'n' chars. */
275 static __inline__ const FB_WCHAR *fb_wstr_SkipChar( const FB_WCHAR *s, ssize_t chars, FB_WCHAR c )
276 {
277  if( s == NULL )
278  return NULL;
279 
280  const FB_WCHAR *p = s;
281  while( chars > 0 )
282  {
283  if( *p != c )
284  return p;
285  ++p;
286  --chars;
287  }
288 
289  return p;
290 }
291 
292 /* Skip all characters (c) from the end of the string, max 'n' chars. */
293 static __inline__ const FB_WCHAR *fb_wstr_SkipCharRev( const FB_WCHAR *s, ssize_t chars, FB_WCHAR c )
294 {
295  if( (s == NULL) || (chars <= 0) )
296  return s;
297 
298  /* fixed-len's are filled with null's as in PB, strip them too */
299  const FB_WCHAR *p = &s[chars-1];
300  while( chars > 0 )
301  {
302  if( *p != c )
303  return p;
304  --p;
305  --chars;
306  }
307 
308  return p;
309 }
310 
311 static __inline__ FB_WCHAR *fb_wstr_Instr( const FB_WCHAR *s, const FB_WCHAR *patt )
312 {
313  return wcsstr( s, patt );
314 }
315 
316 static __inline__ size_t fb_wstr_InstrAny( const FB_WCHAR *s, const FB_WCHAR *sset )
317 {
318  return wcscspn( s, sset );
319 }
320 
321 static __inline__ int fb_wstr_Compare( const FB_WCHAR *str1, const FB_WCHAR *str2, ssize_t chars )
322 {
323  return wcsncmp( str1, str2, chars );
324 }