FreeBASIC  0.91.0
str_instr.c
Go to the documentation of this file.
1 /* instr function */
2 
3 #include "fb.h"
4 
5 #if 0
6 
22 static ssize_t fb_hFindQS
23  (
24  ssize_t start,
25  const char *pachText,
26  ssize_t len_text,
27  const char *pachPattern,
28  ssize_t len_pattern
29  )
30 {
31  ssize_t max_size = len_text - len_pattern + 1;
32  ssize_t qs_bc[256];
33  ssize_t i;
34 
35  /* create "bad character" shifts */
36  for( i=0; i!=256; ++i)
37  qs_bc[ i ] = len_pattern + 1;
38  for( i=0; i!=len_pattern; ++i )
39  qs_bc[ FB_CHAR_TO_INT(pachPattern[i]) ] = len_pattern - i;
40 
41  /* search for string */
42  for (i=start;
43  i<max_size;
44  i+=qs_bc[ FB_CHAR_TO_INT(pachText[ i + len_pattern ]) ])
45  {
46  if( memcmp( pachPattern, pachText + i, len_pattern )==0 ) {
47  return i + 1;
48  }
49  }
50 
51  return 0;
52 }
53 #endif
54 
55 /*
56  * Searches for a sub-string using the Boyer-Moore algorithm.
57  *
58  * - performs the comparisons from right to left
59  * - preprocessing phase in O(m + o-) time and space complexity
60  * - searching phase in O(m * n) time complexity
61  * - 3n text character comparisons in the worst case when searching
62  * for a non periodic pattern
63  * - O(n / m) best performance
64  *
65  * o- = greek letter "sigma"
66  *
67  * From "Handbook of Exact String-Matching Algorithms" by
68  * Christian Charras and Thierry Lecroq
69  * ( http://www-igm.univ-mlv.fr/~lecroq/string/string.pdf ).
70  *
71  * Implementation from
72  * http://www.iti.fh-flensburg.de/lang/algorithmen/pattern/bm.htm
73  */
74 
75 static ssize_t fb_hFindBM
76  (
77  ssize_t start,
78  const char *pachText,
79  ssize_t len_text,
80  const char *pachPattern,
81  ssize_t len_pattern
82  )
83 {
84  ssize_t i, j, len_max = len_text - len_pattern;
85  ssize_t bm_bc[256];
86  ssize_t *bm_gc, *suffixes;
87  ssize_t ret;
88 
89  bm_gc = (ssize_t*) malloc(sizeof(ssize_t) * (len_pattern + 1));
90  suffixes = (ssize_t*) malloc(sizeof(ssize_t) * (len_pattern + 1));
91 
92  memset( bm_gc, 0, sizeof(ssize_t) * (len_pattern+1) );
93  memset( suffixes, 0, sizeof(ssize_t) * (len_pattern+1) );
94 
95  /* create "bad character" shifts */
96  memset(bm_bc, -1, sizeof(bm_bc));
97  for( i=0; i!=len_pattern; ++i )
98  bm_bc[ FB_CHAR_TO_INT(pachPattern[i]) ] = i;
99 
100  /* preprocessing for "good end strategy" case 1 */
101  i = len_pattern; j=len_pattern+1;
102  suffixes[ i ] = j;
103 
104  while ( i!=0 )
105  {
106  char ch1 = pachPattern[i-1];
107  while ( j<=len_pattern && ch1!=pachPattern[j-1] )
108  {
109  if( bm_gc[j]==0 )
110  bm_gc[j] = j - i;
111  j = suffixes[j];
112  }
113  --i;
114  --j;
115  suffixes[i] = j;
116  }
117 
118  /* preprocessing for "good end strategy" case 2 */
119  j = suffixes[0];
120  for( i=0; i<=len_pattern; ++i )
121  {
122  if( bm_gc[i]==0 )
123  bm_gc[i] = j;
124  if( i==j )
125  j = suffixes[j];
126  }
127 
128  ret = 0;
129 
130  /* search */
131  i=start;
132  while( i <= len_max )
133  {
134  j = len_pattern;
135 
136  while( j!=0 && pachPattern[j-1]==pachText[i+j-1] )
137  --j;
138 
139  if( j==0 )
140  {
141  ret = i + 1;
142  break;
143  }
144  else
145  {
146  char chText = pachText[i+j-1];
147  ssize_t shift_gc = bm_gc[j];
148  ssize_t shift_bc = j - 1 - bm_bc[ FB_CHAR_TO_INT(chText) ];
149  i += ( (shift_gc > shift_bc) ? shift_gc : shift_bc );
150  }
151  }
152 
153  free( bm_gc );
154  free( suffixes );
155 
156  return ret;
157 }
158 
159 #if 0
160 static ssize_t fb_hFindNaive
161  (
162  ssize_t start,
163  const char *pachText,
164  ssize_t len_text,
165  const char *pachPattern,
166  ssize_t len_pattern
167  )
168 {
169  ssize_t i;
170  ssize_t imax = (len_text - len_pattern + 1);
171  pachText += start;
172  if( start < imax )
173  {
174  for( i=start; i != imax; ++i ) {
175  ssize_t j;
176  for( j=0; j!=len_pattern; ++j ) {
177  if( pachText[j]!=pachPattern[j] )
178  break;
179  }
180  if( j==len_pattern )
181  return i + 1;
182  ++pachText;
183  }
184  }
185  return 0;
186 }
187 #endif
188 
189 FBCALL ssize_t fb_StrInstr( ssize_t start, FBSTRING *src, FBSTRING *patt )
190 {
191  ssize_t r;
192 
193  if( (src == NULL) || (src->data == NULL) ||
194  (patt == NULL) || (patt->data == NULL) )
195  {
196  r = 0;
197  }
198  else
199  {
200  ssize_t size_src = FB_STRSIZE(src);
201  ssize_t size_patt = FB_STRSIZE(patt);
202 
203  if( (size_src == 0) || (size_patt == 0) ||
204  ((start < 1) || (start > size_src)) || (size_patt > size_src) )
205  {
206  r = 0;
207  }
208  else if( size_patt==1 )
209  {
210  const char *pszEnd = (const char *) FB_MEMCHR( src->data + start - 1, patt->data[0], size_src - start + 1);
211  if( pszEnd==NULL )
212  {
213  r = 0;
214  }
215  else
216  {
217  r = pszEnd - src->data + 1;
218  }
219  }
220  else
221  {
222 
223  r = fb_hFindBM( start - 1,
224  src->data, size_src,
225  patt->data, size_patt );
226 
227  }
228  }
229 
230  FB_STRLOCK();
231 
232  /* del if temp */
233  fb_hStrDelTemp_NoLock( src );
234  fb_hStrDelTemp_NoLock( patt );
235 
236  FB_STRUNLOCK();
237 
238  return r;
239 }