FreeBASIC  0.91.0
ast-vectorize.bas
Go to the documentation of this file.
1 '' AST vectorization
2 ''
3 '' chng: june/2008 written [bryan]
4 '' chng: nov/2008 allow singles/doubles mixed expression [bryan]
5 
6 #include once "fb.bi"
7 #include once "fbint.bi"
8 #include once "ast.bi"
9 
10 dim shared as integer vectorWidth
11 dim shared as integer maxVectorWidth '' 2 if doubles are found anywhere, otherwise 4
12 
13 function hNodesMatch _
14  ( _
15  byval vn as ASTNODE ptr, _
16  byval n as ASTNODE ptr _
17  ) as integer
18 
19  if( vn->class <> n->class ) then
20  return FALSE
21  end if
22 
23  if( vn->class = AST_NODECLASS_VAR ) then
24  if( vn->sym <> n->sym ) then
25  return FALSE
26  end if
27  end if
28 
29  if( astIsCONST( vn ) ) then
30  if( astGetDataType( vn ) <> astGetDataType( n ) ) then
31  return FALSE
32  end if
33 
34  if( typeGetClass( vn->dtype ) = FB_DATACLASS_FPOINT ) then
35  if( astConstGetFloat( vn ) <> astConstGetFloat( n ) ) then
36  return FALSE
37  end if
38  else
39  if( astConstGetInt( vn ) <> astConstGetInt( n ) ) then
40  return FALSE
41  end if
42  end if
43  end if
44 
45  return TRUE
46 end function
47 
48 function hAllowedInVectorize _
49  ( _
50  byval n as ASTNODE ptr, _
51  byval deref as integer _
52  ) as integer
53 
54  dim as integer dtype = any
55 
56  '' if this node is inside a deref or array idx, anything goes
57  if( deref ) then return TRUE
58 
59  '' check if this node could possibly be included in the vectorization
60  select case as const n->class
61  case AST_NODECLASS_BOP
62  if( n->op.op = AST_OP_ADD ) then return TRUE
63  if( n->op.op = AST_OP_SUB ) then return TRUE
64  if( n->op.op = AST_OP_MUL ) then return TRUE
65  if( n->op.op = AST_OP_DIV ) then return TRUE
66  if( n->op.op = AST_OP_ASSIGN ) then return TRUE
67 
68  case AST_NODECLASS_UOP
69  if( n->op.op = AST_OP_SWZ_REPEAT ) then return TRUE
70 
71  case AST_NODECLASS_ASSIGN, AST_NODECLASS_LOAD
72  return TRUE
73 
74  case AST_NODECLASS_CONV, AST_NODECLASS_CONST
75  dtype = ( n->dtype And FB_DT_TYPEMASK )
76  if( dtype = FB_DATATYPE_SINGLE ) then return TRUE
77  if( dtype = FB_DATATYPE_DOUBLE ) then
78  maxVectorWidth = 2
79  return TRUE
80  end if
81 
82  case AST_NODECLASS_VAR
83  '' can't use FB_DATACLASS because pointers would return FB_DATACLASS_INTEGER
84  dtype = ( n->sym->typ And FB_DT_TYPEMASK )
85 
86  '' if it's a UDT, the "field" node will catch non-float fields
87  if( ( dtype = FB_DATATYPE_SINGLE ) or ( dtype = FB_DATATYPE_STRUCT ) ) then
88  return TRUE
89  elseif ( dtype = FB_DATATYPE_DOUBLE ) then
90  maxVectorWidth = 2
91  return TRUE
92  end if
93  return FALSE
94 
95  case AST_NODECLASS_FIELD
96  '' can't use FB_DATACLASS because pointers would return FB_DATACLASS_INTEGER
97  dtype = ( n->sym->typ And FB_DT_TYPEMASK )
98 
99  '' only floats
100  if( dtype = FB_DATATYPE_SINGLE ) then return TRUE
101  if( dtype = FB_DATATYPE_DOUBLE ) then
102  maxVectorWidth = 2
103  return TRUE
104  end if
105 
106  return FALSE
107  case AST_NODECLASS_DEREF, AST_NODECLASS_ADDROF, AST_NODECLASS_IDX
108  return TRUE
109  case else
110 
111  end select
112 
113  return FALSE
114 
115 end function
116 
117 
118 ''::::
119 sub hGetVarDistance _
120  ( _
121  byval vn as ASTNODE ptr, _
122  byval vn_ofs as integer ptr, _
123  byval n as ASTNODE ptr, _
124  byval n_ofs as integer ptr _
125  )
126 
127  if( ( vn->class = AST_NODECLASS_DEREF ) or ( vn->class = AST_NODECLASS_IDX ) ) then
128  *vn_ofs += vn->idx.ofs
129  *n_ofs += n->idx.ofs
130  end if
131 
132  if( vn->class = AST_NODECLASS_VAR ) then
133  *vn_ofs += vn->var_.ofs
134  *n_ofs += n->var_.ofs
135  end if
136 
137  if( vn->l = NULL ) then exit sub
138  if( n->l = NULL ) then exit sub
139 
140  hGetVarDistance( vn->l, vn_ofs, n->l, n_ofs )
141 
142 end sub
143 
144 
145 '' skip any added nodes, as they couldn't possibly be in the new node
146 '' possible nodes to check: SWZ_REPEAT and HADD
147 ''::::
148 function hSkipNewNodes _
149  ( _
150  byval n as ASTNODE ptr, _
151  byval doMerge as integer _
152  ) as ASTNODE ptr
153 
154  dim as ASTNODE ptr oldNode = any
155 
156  do
157  oldnode = n
158  if( n->class = AST_NODECLASS_UOP ) then
159  if( ( n->op.op = AST_OP_SWZ_REPEAT ) or ( n->op.op = AST_OP_HADD ) ) then
160  if( doMerge ) then
161  n->vector = iif( n->vector = 0, 2, n->vector + 1)
162  end if
163  n = n->l
164  end if
165  end if
166  if( oldnode = n ) then
167  exit do
168  end if
169  loop
170 
171  return n
172 
173 end function
174 
175 
176 ''::::
177 function hCheckLoad _
178  ( _
179  byval vn as ASTNODE ptr, _
180  byval n as ASTNODE ptr, _
181  byval deref as integer _
182  ) as integer
183 
184 
185  if( ( vn = NULL ) and ( n = NULL ) ) then return TRUE '' it's okay if they're both done
186 
187  if( ( vn = NULL ) or ( n = NULL ) ) then return FALSE '' not okay if only one is done
188 
189  vn = hSkipNewNodes( vn, FALSE )
190 
191  if( ( vn->class = AST_NODECLASS_DEREF ) or ( vn->class = AST_NODECLASS_IDX ) ) then
192  deref = TRUE
193  end if
194 
195  if( hAllowedInVectorize( vn, deref ) = FALSE ) then
196  return FALSE
197  end if
198 
199  if( hNodesMatch( vn, n ) = FALSE ) then
200  return FALSE
201  end if
202 
203 
204  if( hCheckLoad( vn->l, n->l, deref ) = FALSE ) then return FALSE
205  if( hCheckLoad( vn->r, n->r, deref ) = FALSE ) then return FALSE
206 
207  return TRUE
208 
209 end function
210 
211 
212 ''::::
213 sub hInsertSwizzle _
214  ( _
215  byval n as ASTNODE ptr _
216  )
217 
218  dim as ASTNODE ptr swznode = NULL, varNode = NULL
219 
220  '' do a swizzle load of the scalar var
221  varNode = astNewNode( 0, 0, 0 )
222  *varNode = *n
223 
224  swznode = astNewUOP( AST_OP_SWZ_REPEAT, varNode )
225  *n = *swznode
226  n->vector = 2
227 
228  n->l = varNode
229  varNode->vector = 0
230 
231  astDelNode( swznode )
232 
233 end sub
234 
235 
236 ''::::
237 function hVectorizeNode _
238  ( _
239  byval n as ASTNODE ptr, _
240  byval dist as integer _
241  ) as ASTNODE ptr
242 
243 
244  if( n = NULL ) then return n
245 
246  if( ( n->class = AST_NODECLASS_UOP ) or ( n->class = AST_NODECLASS_BOP ) ) then
247  n->vector = iif( n->vector = 0, 2, n->vector + 1)
248  else
249  if( dist = 0 ) then
250  n->vector = 0
251  else
252  n->vector = iif( n->vector = 0, 2, n->vector + 1)
253  end if
254  end if
255 
256  return n
257 
258 end function
259 
260 
261 '' ::::
262 '' check if the deref points to a floating-point variable.
263 '' following the left nodes of the tree, the first variable encountered is the pointer
264 function hCheckDerefVar _
265  ( _
266  byval n as ASTNODE ptr, _
267  byval dtype as integer ptr _
268  ) as integer
269 
270  dim as integer result = FALSE
271 
272  if( n = NULL ) then return result
273 
274  do while( n )
275  select case n->class
276  case AST_NODECLASS_CONV
277  '' the first encountered conversion will be the final datatype of the pointer
278  *dtype = ( n->dtype and FB_DT_TYPEMASK )
279 
280  '' if the final type is not a float then return FALSE
281  if( ( *dtype = FB_DATATYPE_SINGLE ) or ( *dtype = FB_DATATYPE_DOUBLE ) ) then
282  result = TRUE
283  end if
284  '' since we know the final datatype of the pointer, we can quit now
285  exit do
286  case AST_NODECLASS_VAR, AST_NODECLASS_FIELD
287  *dtype = ( n->sym->typ and FB_DT_TYPEMASK )
288  if( ( *dtype = FB_DATATYPE_SINGLE ) or ( *dtype = FB_DATATYPE_DOUBLE ) ) then
289  result = TRUE
290  end if
291  exit do
292  case else
293  end select
294  n = n->l
295  loop
296 
297  return result
298 
299 end function
300 
301 
302 '' ::::
303 '' check if the array variable is a floating-point variable
304 function hCheckArrayVar _
305  ( _
306  byval n as ASTNODE ptr, _
307  byval dtype as integer ptr _
308  ) as integer
309 
310  function = FALSE
311 
312  if( n = NULL ) then exit function
313 
314  if( n->class = AST_NODECLASS_VAR ) then
315  *dtype = ( n->sym->typ and FB_DT_TYPEMASK )
316  if( ( *dtype = FB_DATATYPE_SINGLE ) or ( *dtype = FB_DATATYPE_DOUBLE ) ) then
317  function = TRUE
318  end if
319  end if
320 
321 end function
322 
323 
324 ''::::
325 function hCheckMemOffset _
326  ( _
327  byval vectorNode as ASTNODE ptr, _
328  byval offset as integer, _
329  byval dtype as FB_DATATYPE _
330  ) as integer
331 
332  dim as integer needOffset = 0
333 
334  '' calc required distance of memory offsets
335  if( vectorNode->vector ) then
336  if( vectorNode->vector > vectorWidth ) then vectorWidth = vectorNode->vector
337  needOffset = ( vectorWidth * typeGetSize( dtype ) )
338  else
339  needOffset = typeGetSize( dtype )
340  end if
341 
342  function = TRUE
343 
344  if( ( dtype = FB_DATATYPE_SINGLE ) or ( dtype = FB_DATATYPE_DOUBLE ) ) then
345 
346  '' if node has already been vectorized, offset must be greater than 0.
347  if( ( vectorNode->vector ) and ( offset = 0 ) ) then '' advanced swizzling not supported yet
348  return FALSE
349  end if
350 
351  if( ( offset <> needOffset ) and ( offset <> 0 ) ) then
352  function = FALSE
353  end if
354  end if
355 
356 end function
357 
358 
359 ''::::
360 function hGetAssignDataType _
361  ( _
362  byval n as ASTNODE ptr _
363  ) as FB_DATATYPE
364 
365  if( n = NULL ) then return 0
366 
367  select case n->class
368  case AST_NODECLASS_CONV
369  return ( n->dtype And FB_DT_TYPEMASK )
370  case AST_NODECLASS_VAR, AST_NODECLASS_FIELD
371  return ( n->sym->typ And FB_DT_TYPEMASK )
372 
373  case else
374  return hGetAssignDataType( n->l )
375  end select
376 end function
377 
378 
379 ''::::
380 '' vectorNode is the (potentially) vectorized tree.
381 '' n is checked against vectorNode to possibly be removed.
382 
383 function hMergeNode _
384  ( _
385  byval vectorNode as ASTNODE ptr, _
386  byval n as ASTNODE ptr, _
387  byval doMerge as integer _
388  ) as integer
389 
390  dim as integer offset = any, needOffset = 0
391  dim as integer vn_ofs = any, n_ofs = any
392  dim as integer dtype = any
393 
394  function = TRUE
395 
396  '' prevent NULL pointer access
397  if( n = NULL ) then return TRUE
398  if( vectorNode = NULL ) then return TRUE
399 
400  '' ignore any nodes that have been added
401  vectorNode = hSkipNewNodes( vectorNode, doMerge )
402 
403  if( doMerge = 0 ) then
404  if( hNodesMatch( vectorNode, n ) = FALSE ) then
405  return FALSE
406  end if
407 
408  if( hAllowedInVectorize( vectorNode, FALSE ) = FALSE ) then
409  return FALSE
410  end if
411  end if
412 
413  '' ASSIGN is usually the starting point in the search of vectorizable trees.
414  '' make sure the assigns are in contiguous memory locations
415  if( vectorNode->class = AST_NODECLASS_ASSIGN ) then
416  dtype = hGetAssignDataType( vectorNode )
417 
418  n_ofs = 0: vn_ofs = 0
419  hGetVarDistance( vectorNode, @vn_ofs, n, @n_ofs )
420 
421  offset = ( n_ofs - vn_ofs )
422 
423  if( vectorNode->vector ) then
424  if( vectorNode->vector > vectorWidth ) then vectorWidth = vectorNode->vector
425  needOffset = ( vectorWidth * typeGetSize( dtype ) )
426  else
427  needOffset = typeGetSize( dtype )
428  end if
429 
430  if( offset <> needOffset ) then
431  return FALSE
432  end if
433  end if
434 
435  '' cap the vector width
436  if( vectorNode->vector = maxVectorWidth ) then
437  return FALSE
438  end if
439 
440  if( vectorNode->class = AST_NODECLASS_FIELD ) then
441  if( doMerge = FALSE ) then
442  if( hCheckLoad( vectorNode, n, FALSE ) = FALSE ) then
443  return FALSE
444  end if
445  end if
446 
447  n_ofs = 0: vn_ofs = 0
448  hGetVarDistance( vectorNode, @vn_ofs, n, @n_ofs )
449 
450  offset = ( n_ofs - vn_ofs )
451 
452  dtype = vectorNode->sym->typ And FB_DT_TYPEMASK
453  if( hCheckMemOffset( vectorNode, offset, dtype ) = FALSE ) then
454  return FALSE
455  end if
456 
457  if( doMerge ) then
458  if( ( offset = 0 ) and ( vectorWidth = 0 ) ) then
459  hInsertSwizzle vectorNode
460  else
461  hVectorizeNode vectorNode, offset
462  end if
463  end if
464 
465  return TRUE
466  end if
467 
468  if( ( vectorNode->class = AST_NODECLASS_DEREF ) or ( vectorNode->class = AST_NODECLASS_IDX ) ) then
469  if( doMerge = FALSE ) then
470  if( hCheckLoad( vectorNode, n, TRUE ) = FALSE ) then
471  return FALSE
472  end if
473  end if
474 
475  if( vectorNode->class = AST_NODECLASS_DEREF ) then
476  '' check if the deref points to a float
477  if( hCheckDerefVar( vectorNode->l, @dtype ) = FALSE ) then
478  return FALSE
479  end if
480  else
481  '' check if the array is of floats
482  if( hCheckArrayVar( vectorNode->r, @dtype ) = FALSE ) then
483  return FALSE
484  end if
485  end if
486 
487  '' it points to a float; get the distance (pointer address) between the pointers
488  n_ofs = 0: vn_ofs = 0
489  hGetVarDistance( vectorNode, @vn_ofs, n, @n_ofs )
490 
491  offset = ( n_ofs - vn_ofs )
492 
493  if( hCheckMemOffset( vectorNode, offset, dtype ) = FALSE ) then
494  return FALSE
495  end if
496 
497  if( doMerge ) then
498  if( ( offset = 0 ) and ( vectorWidth = 0 ) ) then
499  hInsertSwizzle vectorNode
500  else
501  hVectorizeNode vectorNode, offset
502  end if
503  end if
504  return TRUE
505  end if
506 
507  if( vectorNode->class = AST_NODECLASS_VAR ) then
508  if( doMerge = FALSE ) then
509  if( hCheckLoad( vectorNode, n, FALSE ) = FALSE ) then
510  return FALSE
511  end if
512  end if
513 
514  if( doMerge ) then
515  if( vectorWidth = 0 ) then
516  hInsertSwizzle vectorNode
517  end if
518  end if
519 
520  return TRUE
521  end if
522 
523  if( vectorNode->class = AST_NODECLASS_CONST ) then
524  dtype = ( n->dtype And FB_DT_TYPEMASK )
525  if( ( dtype = FB_DATATYPE_SINGLE ) or ( dtype = FB_DATATYPE_DOUBLE ) ) then
526  '' constants always get swizzled
527  if( doMerge ) then
528  if( vectorWidth = 0 ) then
529  hInsertSwizzle vectorNode
530  end if
531  end if
532  else
533  return FALSE
534  end if
535 
536  return TRUE
537  end if
538 
539  if( doMerge ) then
540  hVectorizeNode vectorNode, offset
541  end if
542 
543  if( hMergeNode( vectorNode->l, n->l, doMerge ) = FALSE ) then return FALSE
544  if( hMergeNode( vectorNode->r, n->r, doMerge ) = FALSE ) then return FALSE
545 
546 end function
547 
548 
549 ''::::
550 function hIsVectorizable _
551  ( _
552  byval n as ASTNODE ptr _
553  ) as integer
554 
555  if( n->class = AST_NODECLASS_ASSIGN ) then return TRUE
556 
557  return FALSE
558 
559 end function
560 
561 
562 
563 ''::::
564 function astIntraTreeVectorize _
565  ( _
566  byval n as ASTNODE ptr _
567  ) as integer
568 
569  dim as ASTNODE ptr l = any
570  dim as integer changed = FALSE
571 
572  if( n = NULL ) then return FALSE
573 
574  if( n->class = AST_NODECLASS_BOP ) then
575  if( n->op.op = AST_OP_ADD ) then
576 
577  maxVectorWidth = 4
578  vectorWidth = 0
579  if( hMergeNode( n->l, n->r, FALSE ) ) then
580  maxVectorWidth = 4
581  vectorWidth = 0
582  hMergeNode( n->l, n->r, TRUE )
583 
584  '' check for multiple HADDs
585  l = n->l
586  if( l->class = AST_NODECLASS_UOP ) then
587  if( l->op.op = AST_OP_HADD ) then
588  *n = *l
589  '' remove this node
590  astDelNode( l )
591  n->vector = 0
592  return TRUE
593  end if
594  end if
595 
596  astDelTree( n->r )
597  n->r = NULL
598  n->class = AST_NODECLASS_UOP
599  n->op.op = AST_OP_HADD
600  n->vector = 0
601 
602  return TRUE
603  end if
604  end if
605  end if
606  if( astIntraTreeVectorize( n->l ) = TRUE ) then
607  changed = TRUE
608  end if
609 
610  if( astIntraTreeVectorize( n->r ) = TRUE ) then
611  changed = TRUE
612  end if
613 
614  function = changed
615 end function
616 
617 
618 function hGetNextTree _
619  ( _
620  byval n as ASTNODE ptr _
621  ) as ASTNODE ptr
622 
623  dim as ASTNODE ptr nextNode = NULL
624 
625  do while ( n )
626  '' if( hNodeEmitsCode( treeA ) ) then
627  if( n->class <> AST_NODECLASS_DBG ) And _
628  ( n->class <> AST_NODECLASS_LIT ) And _
629  ( n->class <> AST_NODECLASS_LABEL ) And _
630  ( n->class <> AST_NODECLASS_DECL ) And _
631  ( n->class <> AST_NODECLASS_NOP ) And _
632  ( n->class <> AST_NODECLASS_SCOPEBEGIN ) And _
633  ( n->class <> AST_NODECLASS_SCOPEEND ) then
634  nextNode = n
635  end if
636  if( nextNode ) then exit do
637  n = n->next
638  loop
639  function = nextNode
640 
641 end function
642 
643 '' handle the variable initializer
644 function hCheckLink _
645  ( _
646  byval n as ASTNODE ptr _
647  ) as integer
648 
649  function = FALSE
650 
651  if( n->class <> AST_NODECLASS_LINK ) then exit function
652 
653  if( n->l->class <> AST_NODECLASS_DECL ) then exit function
654  if( n->r->class = AST_NODECLASS_ASSIGN ) then
655  function = TRUE
656  end if
657 
658 end function
659 
660 ''::::
661 '' compare treeA against treeB to see if they can be combined into vector operations.
662 '' if vectorization can be done, the ops in treeA are turned to vector ops and treeB is deleted.
663 
664 sub astProcVectorize _
665  ( _
666  byval p as ASTNODE ptr _
667  )
668 
669  dim as ASTNODE ptr treeA = any, treeB = any, nodeA = any, nodeB = any, prev = any, nxt = any
670  dim as integer intraTree = any, parallelA = any, parallelB = any, doNext = any
671 
672  treeA = hGetNextTree( p )
673 
674  do
675 
676  if( treeA = NULL ) then exit do
677 
678  nodeA = NULL
679  parallelA = FALSE
680  if( hCheckLink( treeA ) = TRUE ) then
681  nodeA = treeA->r
682  elseif( hIsVectorizable( treeA ) = TRUE ) then
683  nodeA = treeA
684  parallelA = TRUE
685  end if
686 
687  maxVectorWidth = 4
688  vectorWidth = 0
689 
690  treeB = treeA
691  do
692 
693  treeB = hGetNextTree( treeB->next )
694 
695  intraTree = FALSE
696  parallelB = FALSE
697 
698  if( ( treeB = NULL ) or ( parallelA = FALSE ) )then
699  intraTree = TRUE
700  else
701  if( hIsVectorizable( treeB ) = TRUE ) then
702  parallelB = TRUE
703  nodeB = treeB
704  else
705  intraTree = TRUE
706  end if
707  end if
708 
709  doNext = TRUE
710  if( parallelA and parallelB ) then
711  if( hMergeNode( nodeA, nodeB, FALSE ) ) then
712  vectorWidth = 0
713  hMergeNode( nodeA, nodeB, TRUE )
714 
715  prev = treeB->prev
716  nxt = treeB->next
717 
718  astDelTree treeB
719  treeB = astNewNOP()
720  treeB->prev = prev
721  treeB->next = nxt
722 
723  treeB->prev->next = treeB
724  if( treeB->next ) then
725  treeB->next->prev = treeB
726  end if
727  doNext = FALSE
728  else
729  intraTree = TRUE
730  end if
731  end if
732 
733  if( ( intraTree = TRUE ) and ( env.clopt.vectorize >= FB_VECTORIZE_INTRATREE ) ) then
734  maxVectorWidth = 4
735  vectorWidth = 0
736  do while ( astIntraTreeVectorize( nodeA ) )
737  loop
738  end if
739 
740  if( treeB ) then
741  if( doNext ) then
742  treeA = treeB
743  exit do
744  else
745  treeB = treeB->next
746  end if
747  else
748  exit do, do
749  end if
750  loop
751  loop
752 
753 end sub
754