FreeBASIC  0.91.0
emit_SSE.bas
Go to the documentation of this file.
1 '' code generation for x86 Streaming SIMD Extensions, GNU assembler (GAS/Intel arch)
2 ''
3 '' chng: june/2008 written [bryan]
4 
5 #include once "fb.bi"
6 #include once "fbint.bi"
7 #include once "reg.bi"
8 #include once "ir.bi"
9 #include once "rtl.bi"
10 #include once "emit.bi"
11 #include once "emitdbg.bi"
12 #include once "hash.bi"
13 #include once "symb.bi"
14 #include once "emit-private.bi"
15 
16 sub hULONG2DBL _
17  ( _
18  byval svreg as IRVREG ptr _
19  ) static
20 
21  dim as string label, aux, ostr
22 
23  label = *symbUniqueLabel( )
24 
25  hPrepOperand( svreg, aux, FB_DATATYPE_INTEGER, 0, TRUE )
26  ostr = "cmp " + aux + ", 0"
27 
28  outp ostr
29  ostr = "jns " + label
30  outp ostr
31  hPUSH( "0x403f" )
32  hPUSH( "0x80000000" )
33  hPUSH( "0" )
34  outp "fldt [esp]"
35  outp "add esp, 12"
36  outp "faddp"
37  hLABEL( label )
38 
39 end sub
40 
41 
42 '':::::
43 sub _emitSTORF2L_SSE _
44  ( _
45  byval dvreg as IRVREG ptr, _
46  byval svreg as IRVREG ptr _
47  ) static
48 
49  dim as string dst, src
50  dim as integer sdsize
51 
52  '' signed?
53  if( typeIsSigned( dvreg->dtype ) = 0) then exit sub
54 
55  if( svreg->regFamily = IR_REG_SSE ) then
56 
57  sdsize = typeGetSize( svreg->dtype )
58  outp "sub esp" + COMMA + str( sdsize )
59 
60  hPrepOperand( svreg, src )
61 
62  if( sdsize > 4 ) then
63  outp "movlpd qword ptr [esp]" + COMMA + src
64  outp "fld qword ptr [esp]"
65  else
66  outp "movss dword ptr [esp]" + COMMA + src
67  outp "fld dword ptr [esp]"
68  end if
69 
70  outp "add esp" + COMMA + str( sdsize )
71  end if
72 
73  hPrepOperand( dvreg, dst )
74 
75  outp "fistp " + dst
76 
77 end sub
78 
79 
80 
81 '':::::
82 sub _emitSTORF2I_SSE _
83  ( _
84  byval dvreg as IRVREG ptr, _
85  byval svreg as IRVREG ptr _
86  ) static
87 
88  dim as string dst, src
89  dim as integer sdsize, ddsize
90  dim as string ostr
91  dim as string aux, aux8, aux16
92  dim as integer isfree, reg, wasreg
93  hPrepOperand( dvreg, dst )
94  hPrepOperand( svreg, src )
95 
96  sdsize = typeGetSize( svreg->dtype )
97  ddsize = typeGetSize( dvreg->dtype )
98 
99  '' special case if the dst is uinteger
100  if( (ddsize = 4) and (typeIsSigned( dvreg->dtype ) = 0) ) then
101  outp "sub esp, 8"
102  if( svreg->typ <> IR_VREGTYPE_REG ) then
103  outp "fld " + src
104  elseif( svreg->regFamily = IR_REG_SSE ) then
105  if( sdsize > 4 ) then
106  outp "movlpd qword ptr [esp], " + src
107  outp "fld qword ptr [esp]"
108  else
109  outp "movss dword ptr [esp], " + src
110  outp "fld dword ptr [esp]"
111  end if
112  end if
113  outp "fistp qword ptr [esp]"
114  hPOP dst
115  outp "add esp, 4"
116  exit sub
117  end if
118 
119  '' special case if the dst is signed short
120  if( ddsize = 2 ) and ( typeIsSigned( dvreg->dtype ) ) then
121  outp "sub esp, 8"
122  if( svreg->typ <> IR_VREGTYPE_REG ) then
123  outp "fld " + src
124  elseif( svreg->regFamily = IR_REG_SSE ) then
125  if( sdsize > 4 ) then
126  outp "movlpd qword ptr [esp], " + src
127  outp "fld qword ptr [esp]"
128  else
129  outp "movss dword ptr [esp], " + src
130  outp "fld dword ptr [esp]"
131  end if
132  end if
133  outp "fistp " + dst
134  outp "add esp, 8"
135  exit sub
136  end if
137 
138  if( (dvreg->typ = IR_VREGTYPE_REG) and (ddsize = 4) ) then
139  '' dst is 32-bit register
140  isfree = TRUE
141  aux = dst
142  wasreg = TRUE
143  else
144  '' dst is not 32-bit register
145  wasreg = FALSE
146  '' find a register
147  reg = hFindRegNotInVreg( svreg )
148 
149  aux = *hGetRegName( FB_DATATYPE_INTEGER, reg )
150 
151  isfree = hIsRegFree( FB_DATACLASS_INTEGER, reg )
152  if( isfree = FALSE ) then
153  hPUSH aux
154  end if
155  end if
156 
157  if( svreg->regFamily = IR_REG_FPU_STACK ) then
158  outp "sub esp, 4"
159  outp "fistp dword ptr [esp]"
160  outp "mov " + aux + COMMA + "dword ptr [esp]"
161  outp "add esp, 4"
162  else
163  if( sdsize > 4 ) then
164  outp "cvtsd2si " + aux + COMMA + src
165  else
166  outp "cvtss2si " + aux + COMMA + src
167  end if
168  end if
169 
170  if( wasreg = FALSE ) Then
171  if( ddsize = 1 ) then
172  aux8 = *hGetRegName( FB_DATATYPE_BYTE, reg )
173  outp "mov " + dst + COMMA + aux8
174  elseif( ddsize = 2 ) then
175  aux16 = *hGetRegName( FB_DATATYPE_SHORT, reg )
176  outp "mov " + dst + COMMA + aux16
177  else
178  outp "mov " + dst + COMMA + aux
179  end if
180  if( isfree = FALSE ) then
181  hPOP aux
182  end if
183  end if
184 
185 end sub
186 
187 
188 
189 '' NOTE: this is identical to the FPU code, which is probably
190 '' faster than any SSE implementation
191 '':::::
192 sub _emitSTORL2F_SSE _
193  ( _
194  byval dvreg as IRVREG ptr, _
195  byval svreg as IRVREG ptr _
196  ) static
197 
198  dim as string dst, src, aux
199  dim as string ostr
200 
201  hPrepOperand( dvreg, dst )
202  hPrepOperand( svreg, src )
203 
204  if( (svreg->typ = IR_VREGTYPE_REG) or (svreg->typ = IR_VREGTYPE_IMM) ) then
205 
206  '' signed?
207  if( typeIsSigned( svreg->dtype ) ) then
208 
209  hPrepOperand64( svreg, src, aux )
210 
211  hPUSH( aux )
212  hPUSH( src )
213 
214  ostr = "fild " + dtypeTB(svreg->dtype).mname + " [esp]"
215  outp ostr
216 
217  outp "add esp, 8"
218 
219  '' unsigned..
220  else
221  hPrepOperand64( svreg, src, aux )
222  hPUSH aux
223  hPUSH src
224  outp "fild qword ptr [esp]"
225  outp "add esp, 8"
226  hULONG2DBL( svreg )
227 
228  end if
229 
230  '' not a reg or imm
231  else
232  '' signed?
233  if( typeIsSigned( svreg->dtype ) ) then
234  ostr = "fild " + src
235  outp ostr
236 
237  '' unsigned, try a bigger type..
238  else
239  ostr = "fild " + src
240  outp ostr
241  hULONG2DBL( svreg )
242 
243  end if
244  end if
245 
246  ostr = "fstp " + dst
247  outp ostr
248 
249 end sub
250 
251 
252 
253 
254 '':::::
255 sub _emitSTORI2F_SSE _
256  ( _
257  byval dvreg as IRVREG ptr, _
258  byval svreg as IRVREG ptr _
259  ) static
260 
261  dim as string dst, src, aux
262  dim as integer ddsize, sdsize, reg, isfree
263  dim as string ostr
264 
265  hPrepOperand( dvreg, dst )
266  hPrepOperand( svreg, src )
267 
268  ddsize = typeGetSize( dvreg->dtype )
269  sdsize = typeGetSize( svreg->dtype )
270 
271  '' special case for unsigned integers
272  if( (typeIsSigned( svreg->dtype ) = 0) and (sdsize = 4) ) then
273  hPUSH "0"
274  hPUSH src
275  outp "fild qword ptr [esp]"
276  outp "add esp, 8"
277  outp "fstp " + dst
278  exit sub
279  end if
280 
281  if( (svreg->typ <> IR_VREGTYPE_IMM) and (sdsize = 4) ) then
282  '' src is 32-bit reg or 32-bit mem
283  aux = src
284  isFree = TRUE
285  else
286  '' src is not 32-bit or it is immediate number
287  '' find a register
288  reg = hFindRegNotInVreg( svreg )
289  aux = *hGetRegName( FB_DATATYPE_INTEGER, reg )
290 
291  isfree = hIsRegFree( FB_DATACLASS_INTEGER, reg )
292  if( isfree = FALSE ) then
293  hPUSH aux
294  end if
295 
296  '' put the src into aux
297  if( svreg->typ = IR_VREGTYPE_IMM ) then
298  outp "mov " + aux + COMMA + src
299  else
300  if( typeIsSigned( svreg->dtype ) ) then
301  ostr = "movsx "
302  else
303  ostr = "movzx "
304  end if
305  outp ostr + aux + COMMA + src
306  end if
307  end if
308 
309  if( ddsize > 4 ) then
310  outp "cvtsi2sd xmm7" + COMMA + aux
311  outp "movlpd " + dst + COMMA + "xmm7"
312  else
313  outp "cvtsi2ss xmm7" + COMMA + aux
314  outp "movss " + dst + COMMA + "xmm7"
315  end if
316 
317  if( isfree = FALSE ) then
318  hPOP aux
319  end if
320 end sub
321 
322 
324  ( _
325  byval dvreg as IRVREG ptr, _
326  byval svreg as IRVREG ptr _
327  ) static
328 
329  dim as string dst, src
330  dim as integer ddsize
331 
332  hPrepOperand( dvreg, dst, , , FALSE )
333  hPrepOperand( svreg, src, , , FALSE )
334 
335  ddsize = typeGetSize( dvreg->dtype )
336 
337  if( ( svreg->vector = 2 ) and ( ddsize > 4 ) ) then
338  outp "movupd " + dst + COMMA + src
339  exit sub
340  end if
341 
342  if( svreg->vector = 2 ) then
343  outp "movlps " + dst + COMMA + src
344  elseif( svreg->vector = 3 ) then
345  outp "movhlps xmm7" + COMMA + src
346  outp "movlps " + dst + COMMA + src
347  hPrepOperand( dvreg, dst, , 8, FALSE )
348  outp "movss " + dst + COMMA + "xmm7"
349  elseif( svreg->vector = 4 ) then
350  outp "movups " + dst + COMMA + src
351  end if
352 
353 end sub
354 
355 
356 '':::::
357 sub _emitSTORF2F_SSE _
358  ( _
359  byval dvreg as IRVREG ptr, _
360  byval svreg as IRVREG ptr _
361  ) static
362 
363  dim as string dst, src
364  dim as integer ddsize, sdsize, src_vec
365  dim as string ostr
366 
367  hPrepOperand( dvreg, dst )
368  hPrepOperand( svreg, src )
369 
370  ddsize = typeGetSize( dvreg->dtype )
371  sdsize = typeGetSize( svreg->dtype )
372 
373  src_vec = ( svreg->vector > 0 )
374 
375  if( svreg->typ = IR_VREGTYPE_REG ) then
376  '' if the src was returned from a function, it is in st(0)
377  if( svreg->regFamily = IR_REG_FPU_STACK ) then
378  outp "fstp " + dst
379  exit sub
380  end if
381 
382  if( src_vec ) then
383  hEmitStoreFreg2F_SSE dvreg, svreg
384  exit sub
385  end if
386 
387  if( ddsize > 4 ) then
388  if( sdsize <= 4 ) then
389  '' convert src to double, then move
390  outp "cvtss2sd " + src + COMMA + src
391  endif
392 
393  outp "movlpd " + dst + COMMA + src
394  else
395  '' dst is single
396  '' if src is double, convert it to single first
397  if( sdsize > 4 ) then
398  outp "cvtsd2ss " + src + COMMA + src
399  end if
400  outp "movss " + dst + COMMA + src
401  endif
402  else
403  '' same size? just copy..
404  if( sdsize = ddsize ) then
405  if( src_vec ) then
406  hPrepOperand( dvreg, dst, , , FALSE )
407  hPrepOperand( svreg, src, , , FALSE )
408  if( ddsize > 4 ) then
409  outp "movupd xmm7" + COMMA + src
410  outp "movupd " + dst + COMMA + "xmm7"
411  else
412  if( svreg->vector = 2 ) then
413  outp "movlps xmm7" + COMMA + src
414  outp "movlps " + dst + COMMA + "xmm7"
415  elseif( svreg->vector = 3 ) then
416  outp "movups xmm7" + COMMA + src
417  outp "movlps " + dst + COMMA + "xmm7"
418  outp "unpckhps xmm7, xmm7"
419  hPrepOperand( dvreg, dst, , 8, FALSE )
420  outp "movss " + dst + COMMA + "xmm7"
421  elseif( svreg->vector = 4 ) then
422  outp "movups xmm7" + COMMA + src
423  outp "movups " + dst + COMMA + "xmm7"
424  end if
425  end if
426  exit sub
427  end if
428 
429  if( ddsize > 4 ) then
430  outp "movlpd xmm7" + COMMA + src
431  outp "movlpd " + dst + COMMA + "xmm7"
432  else
433  outp "movss xmm7" + COMMA + src
434  outp "movss " + dst + COMMA + "xmm7"
435  end if
436  '' diff sizes, convert..
437  else
438  if( sdsize > 4 ) then
439  '' load as double, store as single
440  if( src_vec ) then
441  outp "cvtpd2ps xmm7" + COMMA + src
442  outp "movlps " + dst + COMMA + "xmm7"
443  else
444  outp "cvtsd2ss xmm7" + COMMA + src
445  outp "movss " + dst + COMMA + "xmm7"
446  end if
447  else
448  '' load as single, store as double
449  if( src_vec ) then
450  outp "cvtps2pd xmm7" + COMMA + src
451  outp "movupd " + dst + COMMA + "xmm7"
452  else
453  outp "cvtss2sd xmm7" + COMMA + src
454  outp "movlpd " + dst + COMMA + "xmm7"
455  end if
456  end if
457  end if
458  end if
459 end sub
460 
461 
462 
463 '':::::
464 sub _emitLOADF2L_SSE _
465  ( _
466  byval dvreg as IRVREG ptr, _
467  byval svreg as IRVREG ptr _
468  ) static
469 
470  dim as string dst, src, aux
471  dim as string ostr
472  dim as integer ddsize, sdsize
473 
474  hPrepOperand( dvreg, dst )
475  hPrepOperand( svreg, src )
476 
477  sdsize = typeGetSize( svreg->dtype )
478 
479  if( svreg->regFamily = IR_REG_SSE ) then
480  '' move float onto FPU stack
481  if( svreg->typ = IR_VREGTYPE_REG ) then
482  outp "sub esp, 8"
483  if( sdsize > 4 ) then
484  outp "movlpd qword ptr [esp]" + COMMA + src
485  outp "fld qword ptr [esp]"
486  else
487  outp "movss dword ptr [esp]" + COMMA + src
488  outp "fld dword ptr [esp]"
489  end if
490  outp "add esp, 8"
491  else
492  outp "fld " + src
493  end if
494  end if
495 
496  hPrepOperand64( dvreg, dst, aux )
497 
498  '' signed?
499  '' (handle ULONGINT here too - workaround for #2082801)
500  if( typeIsSigned( dvreg->dtype ) orelse (dvreg->dtype = FB_DATATYPE_ULONGINT) ) then
501 
502  outp "sub esp, 8"
503 
504  ostr = "fistp " + dtypeTB(dvreg->dtype).mname + " [esp]"
505  outp ostr
506 
507  '' unsigned.. try a bigger type
508  else
509  outp "fld st(0)"
510  '' UWtype hi = (UWtype)(a / Wtype_MAXp1_F)
511  outp "push 0x4f800000"
512  outp "fdiv dword ptr [esp]"
513  outp "fistp dword ptr [esp]"
514  '' UWtype lo = (UWtype)(a - ((DFtype)hi) * Wtype_MAXp1_F)
515  outp "fild dword ptr [esp]"
516  outp "push 0x4f800000"
517  outp "fmul dword ptr [esp]"
518  outp "fsubp"
519  outp "fistp dword ptr [esp]"
520  '' ((UDWtype) hi << W_TYPE_SIZE) | lo
521  end if
522 
523  hPOP( dst )
524  hPOP( aux )
525 
526 end sub
527 
528 
529 
530 '':::::
531 sub _emitLOADF2I_SSE _
532  ( _
533  byval dvreg as IRVREG ptr, _
534  byval svreg as IRVREG ptr _
535  ) static
536 
537  dim as string dst, src, suffix
538  dim as string aux, aux8_16
539  dim as integer sdsize, ddsize
540  dim as integer isFree, reg, wasReg
541 
542  dim as FBSYMBOL ptr sym
543  dim as IRVREG ptr tempVreg
544 
545  hPrepOperand( dvreg, dst )
546  hPrepOperand( svreg, src )
547 
548  sdsize = typeGetSize( svreg->dtype )
549  ddsize = typeGetSize( dvreg->dtype )
550 
551  '' special case unsigned integer
552  if( ( typeIsSigned( dvreg->dtype ) = FALSE ) and ( ddsize = 4 ) ) then
553  outp "sub esp, 8"
554  if( svreg->typ <> IR_VREGTYPE_REG ) then
555  outp "fld " + src
556  elseif( svreg->regFamily = IR_REG_SSE ) then
557  if( sdsize > 4 ) then
558  outp "movlpd qword ptr [esp]" + COMMA + src
559  outp "fld qword ptr [esp]"
560  else
561  outp "movss dword ptr [esp]" + COMMA + src
562  outp "fld dword ptr [esp]"
563  end if
564  end if
565  outp "fistp qword ptr [esp]"
566  hPOP dst
567  outp "add esp, 4"
568  exit sub
569  end if
570 
571  if( dvreg->typ = IR_VREGTYPE_REG ) Then
572  '' dst is a register
573  isfree = TRUE
574  '' not an integer? make it
575  if( ddsize < 4 ) then
576  dst = *hGetRegName( FB_DATATYPE_INTEGER, dvreg->reg )
577  end if
578 
579  aux = dst
580  wasreg = TRUE
581  else
582  '' dst is not a register
583  wasReg = FALSE
584  '' find a register
585  reg = hFindRegNotInVreg( svreg )
586 
587  aux = *hGetRegName( FB_DATATYPE_INTEGER, reg )
588  aux8_16 = *hGetRegName( dvreg->dtype, reg )
589 
590  isFree = hIsRegFree( FB_DATACLASS_INTEGER, reg )
591  if( isFree = FALSE ) then
592  hPUSH aux
593  end if
594  end if
595 
596  if( svreg->regFamily = IR_REG_FPU_STACK ) then
597  outp "sub esp, 4"
598  if( ddsize = 2 ) then
599  outp "fistp word ptr [esp]"
600  else
601  outp "fistp dword ptr [esp]"
602  end if
603  hPOP aux
604  else
605  suffix = chr( iif( sdsize > 4 , 100, 115 ) )
606  if( typeIsSigned( dvreg->dtype ) and ( ddsize = 2 ) ) then
607  if( svreg->typ <> IR_VREGTYPE_REG ) then
608  if( sdsize > 4 ) then
609  outp "movlpd xmm7" + COMMA + src
610  else
611  outp "movss xmm7" + COMMA + src
612  end if
613  src = "xmm7"
614  end if
615  outp "cvtp" + suffix + "2dq xmm7" + COMMA + src
616  outp "packssdw xmm7, xmm7"
617  outp "movd " + aux + COMMA + "xmm7"
618  else
619  '' GAS doesn't like 32-bit dst and 64-bit src here... leave off the prefix
620  hPrepOperand( svreg, src, , , , FALSE )
621  outp "cvts" + suffix + "2si " + aux + COMMA + src
622  end if
623  end if
624 
625  if( wasReg = FALSE ) then
626  if( ddsize = 4 ) then
627  outp "mov " + dst + COMMA + aux
628  else
629  outp "mov " + dst + COMMA + aux8_16
630  endif
631  if( isFree = FALSE ) then
632  hPOP aux
633  end if
634  end if
635 end sub
636 
637 
638 
639 '':::::
640 sub _emitLOADL2F_SSE _
641  ( _
642  byval dvreg as IRVREG ptr, _
643  byval svreg as IRVREG ptr _
644  ) static
645 
646  dim as string dst, src, aux
647  dim as string ostr
648  dim as integer ddsize
649 
650  hPrepOperand( dvreg, dst )
651  hPrepOperand( svreg, src )
652 
653  if( (svreg->typ = IR_VREGTYPE_REG) or (svreg->typ = IR_VREGTYPE_IMM) ) then
654 
655  '' signed?
656  if( typeIsSigned( svreg->dtype ) ) then
657 
658  hPrepOperand64( svreg, src, aux )
659 
660  hPUSH( aux )
661  hPUSH( src )
662 
663  ostr = "fild " + dtypeTB(svreg->dtype).mname + " [esp]"
664  outp ostr
665 
666  outp "add esp, 8"
667 
668  '' unsigned, try a bigger type..
669  else
670 
671  hPrepOperand64( svreg, src, aux )
672  hPUSH aux
673  hPUSH src
674  outp "fild qword ptr [esp]"
675  outp "add esp, 8"
676  hULONG2DBL( svreg )
677 
678  end if
679 
680  '' not a reg or imm
681  else
682 
683  '' signed?
684  if( typeIsSigned( svreg->dtype ) ) then
685  ostr = "fild " + src
686  outp ostr
687 
688  '' unsigned, try a bigger type..
689  else
690  ostr = "fild " + src
691  outp ostr
692  hULONG2DBL( svreg )
693  end if
694 
695  end if
696 
697  ddsize = typeGetSize( dvreg->dtype )
698 
699  outp "sub esp" + COMMA + str( ddsize )
700  if( ddsize > 4 ) then
701  outp "fstp qword ptr [esp]"
702  outp "movlpd " + dst + COMMA + "qword ptr [esp]"
703  else
704  outp "fstp dword ptr [esp]"
705  outp "movss " + dst + COMMA + "dword ptr [esp]"
706  end if
707  outp "add esp" + COMMA + str( ddsize )
708 end sub
709 
710 
711 
712 '':::::
713 sub _emitLOADI2F_SSE _
714  ( _
715  byval dvreg as IRVREG ptr, _
716  byval svreg as IRVREG ptr _
717  ) static
718 
719  dim as string dst, src
720  dim as integer sdsize, ddsize
721  dim as string suffix
722  dim as string aux
723  dim as integer isfree, reg
724  dim as FBSYMBOL ptr sym
725  dim as IRVREG ptr tempVreg
726 
727  hPrepOperand( dvreg, dst )
728  hPrepOperand( svreg, src )
729 
730  sdsize = typeGetSize( svreg->dtype )
731  ddsize = typeGetSize( dvreg->dtype )
732 
733  '' special case for unsigned integers
734  if( (typeIsSigned( svreg->dtype ) = 0) and (sdsize = 4) ) then
735  '' find a register
736  reg = hFindRegNotInVreg( svreg )
737  aux = *hGetRegName( FB_DATATYPE_INTEGER, reg )
738 
739  isfree = hIsRegFree( FB_DATACLASS_INTEGER, reg )
740  if( isfree = FALSE ) then
741  hPUSH aux
742  end if
743 
744  if( ddsize > 4 ) then
745  sym = symbAllocLongIntConst( &h40F0000000000000, FB_DATATYPE_ULONGINT )
746  tempVreg = irAllocVRVAR( FB_DATATYPE_ULONGINT, NULL, sym, symbGetOfs( sym ) )
747  suffix = "sd "
748  else
749  sym = symbAllocIntConst( &h47800000, FB_DATATYPE_UINT )
750  tempVreg = irAllocVRVAR( FB_DATATYPE_UINT, NULL, sym, symbGetOfs( sym ) )
751  suffix = "ss "
752  end if
753 
754  sym->var_.align = 16
755 
756  outp "mov " + aux + COMMA + src
757  outp "and " + aux + COMMA + "0xFFFF"
758  outp "cvtsi2" + suffix + dst + COMMA + aux
759 
760  outp "mov " + aux + COMMA + src
761  outp "shr " + aux + COMMA + "16"
762  outp "cvtsi2" + suffix + "xmm7" + COMMA + aux
763 
764  hPrepOperand( tempVreg, src )
765  outp "mul" + suffix + "xmm7" + COMMA + src
766  outp "add" + suffix + dst + COMMA + "xmm7"
767  if( isfree = FALSE ) then
768  hPOP aux
769  end if
770  exit sub
771  end if
772 
773  if( (svreg->typ <> IR_VREGTYPE_IMM) and (sdsize = 4) ) then
774  '' src is 32-bit mem or register
775  isfree = TRUE
776  aux = src '' just use it
777  else
778  '' src is not 32-bit mem or register
779  '' find a register
780  reg = hFindRegNotInVreg( svreg )
781 
782  aux = *hGetRegName( FB_DATATYPE_INTEGER, reg )
783 
784  isfree = hIsRegFree( FB_DATACLASS_INTEGER, reg )
785  if( isfree = FALSE ) then
786  hPUSH aux
787  end if
788 
789  if( (svreg->typ = IR_VREGTYPE_IMM) or (sdsize = 4) ) then
790  outp "mov " + aux + COMMA + src
791  else
792  if( typeIsSigned( svreg->dtype ) ) then
793  outp "movsx " + aux + COMMA + src
794  else
795  outp "movzx " + aux + COMMA + src
796  end if
797  end if
798  end if
799 
800  if( ddsize > 4 ) then
801  outp "cvtsi2sd " + dst + COMMA + aux
802  else
803  outp "cvtsi2ss " + dst + COMMA + aux
804  end if
805  if( isfree = FALSE ) then
806  hPOP aux
807  end if
808 
809  if( dvreg->regFamily = IR_REG_SSE ) then exit sub
810 
811  outp "sub esp" + COMMA + str( ddsize )
812  if( ddsize > 4 ) then
813  outp "movlpd [esp]" + COMMA + dst
814  outp "fld qword ptr [esp]"
815  else
816  outp "movss [esp]" + COMMA + dst
817  outp "fld dword ptr [esp]"
818  end if
819  outp "add esp" + COMMA + str( ddsize )
820 
821 end sub
822 
823 
824 
825 '':::::
826 sub _emitLOADF2F_SSE _
827  ( _
828  byval dvreg as IRVREG ptr, _
829  byval svreg as IRVREG ptr _
830  ) static
831 
832  dim as string src, dst
833  dim as integer sdsize, ddsize
834 
835  hPrepOperand( dvreg, dst )
836  hPrepOperand( svreg, src )
837 
838  if( dvreg->regFamily = IR_REG_FPU_STACK ) then
839  outp "fld " + src
840  exit sub
841  end if
842 
843  sdsize = typeGetSize( svreg->dtype )
844  ddsize = typeGetSize( dvreg->dtype )
845 
846  if( sdsize = ddsize ) then
847  if( svreg->vector ) then
848  hPrepOperand( svreg, src, , , , FALSE )
849  if( ddsize > 4 ) then
850  outp "movupd " + dst + COMMA + src
851  else
852  if( svreg->vector = 2 ) then
853  outp "movlps " + dst + COMMA + src
854  else
855  outp "movups " + dst + COMMA + src
856  end if
857  end if
858  exit sub
859  end if
860 
861  if( ddsize > 4 ) then
862  outp "movlpd " + dst + COMMA + src
863  else
864  outp "movss " + dst + COMMA + src
865  end if
866  elseif( sdsize > 4 ) then
867  '' source is a double, dst is single
868  if( svreg->vector ) then
869  outp "cvtpd2ps " + dst + COMMA + src
870  else
871  outp "cvtsd2ss " + dst + COMMA + src
872  end if
873  else
874  '' source is a single, dst is double
875  if( svreg->vector ) then
876  outp "cvtps2pd " + dst + COMMA + src
877  else
878  outp "cvtss2sd " + dst + COMMA + src
879  end if
880  end if
881 end sub
882 
883 
884 '':::::
885 sub _emitMOVF_SSE _
886  ( _
887  byval dvreg as IRVREG ptr, _
888  byval svreg as IRVREG ptr _
889  ) static
890 
891  dim as string dst, src
892  dim as integer sdsize, ddsize
893 
894  hPrepOperand( dvreg, dst )
895  hPrepOperand( svreg, src )
896 
897  sdsize = typeGetSize( svreg->dtype )
898  ddsize = typeGetSize( dvreg->dtype )
899 
900  if( sdsize > 4 ) and ( ddsize <= 4 ) then
901  '' source is a double
902  if( svreg->vector ) then
903  outp "cvtpd2ps " + dst + COMMA + src
904  else
905  outp "cvtsd2ss " + dst + COMMA + src
906  end if
907  elseif( ddsize > 4 ) and ( sdsize <= 4 ) then
908  '' source is a single
909  if( svreg->vector ) then
910  outp "cvtps2pd " + dst + COMMA + src
911  else
912  outp "cvtss2sd " + dst + COMMA + src
913  end if
914  else
915  outp "movaps " + dst + COMMA + src
916  end if
917 
918 end sub
919 
920 
921 
922 '':::::
923 '' replicate the scalar operand
924 sub _emitSWZREPF_SSE _
925  ( _
926  byval dvreg as IRVREG ptr _
927  ) static
928 
929  dim as string dst
930  dim as integer ddsize
931 
932  ddsize = typeGetSize( dvreg->dtype )
933 
934  hPrepOperand( dvreg, dst )
935 
936  if( ddsize > 4 ) then
937  outp "unpcklpd " + dst + COMMA + dst
938  else
939  if( dvreg->vector = 2 ) then
940  outp "unpcklps " + dst + COMMA + dst
941  else
942  outp "shufps " + dst + COMMA + dst + COMMA + "0x0"
943  end if
944  end if
945 
946 end sub
947 
948 
949 '':::::
950 '' emit code to convert operands, if necessary. return TRUE if conversion occured
951 function hEmitConvertOperands_SSE _
952  ( _
953  byval dvreg as IRVREG ptr, _
954  byval svreg as IRVREG ptr _
955  ) as integer static
956 
957  dim as string dst, src, ostr
958  dim As integer sdsize, ddsize
959 
960  hPrepOperand( dvreg, dst )
961  hPrepOperand( svreg, src )
962 
963  sdsize = typeGetSize( svreg->dtype )
964  ddsize = typeGetSize( dvreg->dtype )
965 
966  function = FALSE
967 
968  if( ddsize > 4 ) then
969  if( sdsize = 4 ) then
970  '' convert src to double
971  if( svreg->vector ) then
972  outp "cvtps2pd xmm7" + COMMA + src
973  else
974  outp "cvtss2sd xmm7" + COMMA + src
975  end if
976  function = TRUE
977  end if
978  else
979  if( sdsize > 4 ) then
980  '' convert src to single
981  if( svreg->vector ) then
982  outp "cvtpd2ps xmm7" + COMMA + src
983  else
984  outp "cvtsd2ss xmm7" + COMMA + src
985  end if
986  function = TRUE
987  end if
988  end if
989 
990 end function
991 
992 
993 
994 '':::::
995 sub _emitADDF_SSE _
996  ( _
997  byval dvreg as IRVREG ptr, _
998  byval svreg as IRVREG ptr _
999  ) static
1000 
1001  dim as string dst, src, ostr
1002  dim As integer sdsize, ddsize, returnSize
1003 
1004  hPrepOperand( dvreg, dst )
1005  hPrepOperand( svreg, src )
1006 
1007  sdsize = typeGetSize( svreg->dtype )
1008  ddsize = typeGetSize( dvreg->dtype )
1009 
1010  '' if either operand is returned from a function, grab it from st(0)
1011  returnSize = 0
1012  if( dvreg->regFamily = IR_REG_FPU_STACK ) then
1013  ostr = dst
1014  returnSize = ddsize
1015  elseif( svreg->regFamily = IR_REG_FPU_STACK ) then
1016  ostr = src
1017  returnSize = sdsize
1018  end if
1019 
1020  if( returnSize ) then
1021  outp "sub esp" + COMMA + str( returnSize )
1022  end if
1023  if( returnSize = 8 ) then
1024  outp "fstp qword ptr [esp]"
1025  outp "movlpd " + ostr + COMMA + "qword ptr [esp]"
1026  elseif( returnSize = 4) then
1027  outp "fstp dword ptr [esp]"
1028  outp "movss " + ostr + COMMA + "dword ptr [esp]"
1029  end if
1030  if( returnSize ) then
1031  outp "add esp" + COMMA + str( returnSize )
1032  end if
1033 
1034  ostr = "adds"
1035 
1036  if( svreg->vector ) then
1037  ostr = "addp"
1038 
1039  if( svreg->typ <> IR_VREGTYPE_REG ) then
1040  hPrepOperand( svreg, src, , , , FALSE )
1041  if( sdsize > 4 ) then
1042  outp "movupd xmm7" + COMMA + src
1043  else
1044  if( svreg->vector = 2 ) then
1045  outp "movlps xmm7" + COMMA + src
1046  else
1047  outp "movups xmm7" + COMMA + src
1048  end if
1049  end if
1050  src = "xmm7"
1051  end if
1052  end if
1053 
1054  if( hEmitConvertOperands_SSE( dvreg, svreg ) ) then
1055  src = "xmm7"
1056  end if
1057 
1058  if( typeGetClass( svreg->dtype ) = FB_DATACLASS_FPOINT ) then
1059  if( ddsize > 4 ) then
1060  '' add them as double-precision
1061  outp ostr + "d " + dst + COMMA + src
1062  else
1063  '' add them as single-precision
1064  outp ostr + "s " + dst + COMMA + src
1065  end if
1066  else
1067  '' This should never happen due to IR_OPT_FPUCONV
1068  outp " implement 'add integer to float'"
1069  end if
1070 end sub
1071 
1072 
1073 
1074 
1075 '':::::
1076 sub _emitSUBF_SSE _
1077  ( _
1078  byval dvreg as IRVREG ptr, _
1079  byval svreg as IRVREG ptr _
1080  ) static
1081 
1082  dim as string dst, src, ostr
1083  dim As integer sdsize, ddsize, returnSize
1084 
1085  hPrepOperand( dvreg, dst )
1086  hPrepOperand( svreg, src )
1087 
1088  sdsize = typeGetSize( svreg->dtype )
1089  ddsize = typeGetSize( dvreg->dtype )
1090 
1091  '' if either operand is returned from a function, grab it from st(0)
1092  returnSize = 0
1093  if( dvreg->regFamily = IR_REG_FPU_STACK ) then
1094  ostr = dst
1095  returnSize = ddsize
1096  elseif( svreg->regFamily = IR_REG_FPU_STACK ) then
1097  ostr = src
1098  returnSize = sdsize
1099  end if
1100 
1101  if( returnSize ) then
1102  outp "sub esp" + COMMA + str( returnSize )
1103  end if
1104  if( returnSize = 8 ) then
1105  outp "fstp qword ptr [esp]"
1106  outp "movlpd " + ostr + COMMA + "qword ptr [esp]"
1107  elseif( returnSize = 4) then
1108  outp "fstp dword ptr [esp]"
1109  outp "movss " + ostr + COMMA + "dword ptr [esp]"
1110  end if
1111  if( returnSize ) then
1112  outp "add esp" + COMMA + str( returnSize )
1113  end if
1114 
1115  ostr = "subs"
1116  if( svreg->vector ) then
1117  ostr = "subp"
1118 
1119  if( svreg->typ <> IR_VREGTYPE_REG ) then
1120  hPrepOperand( svreg, src, , , , FALSE )
1121  if( sdsize > 4 ) then
1122  outp "movupd xmm7" + COMMA + src
1123  else
1124  if( svreg->vector = 2 ) then
1125  outp "movlps xmm7" + COMMA + src
1126  else
1127  outp "movups xmm7" + COMMA + src
1128  end if
1129  end if
1130  src = "xmm7"
1131  end if
1132  end if
1133 
1134  if( hEmitConvertOperands_SSE( dvreg, svreg ) ) then
1135  src = "xmm7"
1136  end if
1137 
1138  if( typeGetClass( svreg->dtype ) = FB_DATACLASS_FPOINT ) then
1139  if( ddsize > 4 ) then
1140  '' subtract them as double-precision
1141  outp ostr + "d " + dst + COMMA + src
1142  else
1143  '' subtract them as single-precision
1144  outp ostr + "s " + dst + COMMA + src
1145  end if
1146  else
1147  '' This should never happen due to IR_OPT_FPUCONV
1148  outp " implement 'subtract integer from float'"
1149  end if
1150 end sub
1151 
1152 
1153 
1154 '':::::
1155 sub _emitMULF_SSE _
1156  ( _
1157  byval dvreg as IRVREG ptr, _
1158  byval svreg as IRVREG ptr _
1159  ) static
1160 
1161  dim as string dst, src, ostr
1162  dim As integer sdsize, ddsize, returnSize
1163 
1164  hPrepOperand( dvreg, dst )
1165  hPrepOperand( svreg, src )
1166 
1167  sdsize = typeGetSize( svreg->dtype )
1168  ddsize = typeGetSize( dvreg->dtype )
1169 
1170  '' if either operand is returned from a function, grab it from st(0)
1171  returnSize = 0
1172  if( dvreg->regFamily = IR_REG_FPU_STACK ) then
1173  ostr = dst
1174  returnSize = ddsize
1175  elseif( svreg->regFamily = IR_REG_FPU_STACK ) then
1176  ostr = src
1177  returnSize = sdsize
1178  end if
1179 
1180  if( returnSize ) then
1181  outp "sub esp" + COMMA + str( returnSize )
1182  end if
1183  if( returnSize = 8 ) then
1184  outp "fstp qword ptr [esp]"
1185  outp "movlpd " + ostr + COMMA + "qword ptr [esp]"
1186  elseif( returnSize = 4) then
1187  outp "fstp dword ptr [esp]"
1188  outp "movss " + ostr + COMMA + "dword ptr [esp]"
1189  end if
1190  if( returnSize ) then
1191  outp "add esp" + COMMA + str( returnSize )
1192  end if
1193 
1194  ostr = "muls"
1195  if( svreg->vector ) then
1196  ostr = "mulp"
1197 
1198  if( svreg->typ <> IR_VREGTYPE_REG ) then
1199  hPrepOperand( svreg, src, , , , FALSE )
1200  if( sdsize > 4 ) then
1201  outp "movupd xmm7" + COMMA + src
1202  else
1203  if( svreg->vector = 2 ) then
1204  outp "movlps xmm7" + COMMA + src
1205  else
1206  outp "movups xmm7" + COMMA + src
1207  end if
1208  end if
1209  src = "xmm7"
1210  end if
1211  end if
1212 
1213  if( hEmitConvertOperands_SSE( dvreg, svreg ) ) then
1214  src = "xmm7"
1215  end if
1216 
1217  if( typeGetClass( svreg->dtype ) = FB_DATACLASS_FPOINT ) then
1218  if( ddsize > 4 ) then
1219  '' multiply them as double-precision
1220  outp ostr + "d " + dst + COMMA + src
1221  else
1222  '' multiply them as single-precision
1223  outp ostr + "s " + dst + COMMA + src
1224  end if
1225  else
1226  '' This should never happen due to IR_OPT_FPUCONV
1227  outp " implement 'multiply float by integer'"
1228  end if
1229 
1230 end sub
1231 
1232 
1233 
1234 '':::::
1235 sub _emitDIVF_SSE _
1236  ( _
1237  byval dvreg as IRVREG ptr, _
1238  byval svreg as IRVREG ptr _
1239  ) static
1240 
1241  dim as string dst, src, ostr
1242  dim As integer sdsize, ddsize, returnSize
1243 
1244  hPrepOperand( dvreg, dst )
1245  hPrepOperand( svreg, src )
1246 
1247  sdsize = typeGetSize( svreg->dtype )
1248  ddsize = typeGetSize( dvreg->dtype )
1249 
1250  '' if either operand is returned from a function, grab it from st(0)
1251  returnSize = 0
1252  if( dvreg->regFamily = IR_REG_FPU_STACK ) then
1253  ostr = dst
1254  returnSize = ddsize
1255  elseif( svreg->regFamily = IR_REG_FPU_STACK ) then
1256  ostr = src
1257  returnSize = sdsize
1258  end if
1259 
1260  if( returnSize ) then
1261  outp "sub esp" + COMMA + str( returnSize )
1262  end if
1263  if( returnSize = 8 ) then
1264  outp "fstp qword ptr [esp]"
1265  outp "movlpd " + ostr + COMMA + "qword ptr [esp]"
1266  elseif( returnSize = 4) then
1267  outp "fstp dword ptr [esp]"
1268  outp "movss " + ostr + COMMA + "dword ptr [esp]"
1269  end if
1270  if( returnSize ) then
1271  outp "add esp" + COMMA + str( returnSize )
1272  end if
1273 
1274  ostr = "divs"
1275  if( svreg->vector ) then
1276  ostr = "divp"
1277 
1278  if( svreg->typ <> IR_VREGTYPE_REG ) then
1279  hPrepOperand( svreg, src, , , , FALSE )
1280  if( sdsize > 4 ) then
1281  outp "movupd xmm7" + COMMA + src
1282  else
1283  if( svreg->vector = 2 ) then
1284  outp "movlps xmm7" + COMMA + src
1285  else
1286  outp "movups xmm7" + COMMA + src
1287  end if
1288  end if
1289  src = "xmm7"
1290  end if
1291  end if
1292 
1293  if( hEmitConvertOperands_SSE( dvreg, svreg ) ) then
1294  src = "xmm7"
1295  end if
1296 
1297  if( typeGetClass( svreg->dtype ) = FB_DATACLASS_FPOINT ) then
1298  if( ddsize > 4 ) then
1299  '' divide them as double-precision
1300  outp ostr + "d " + dst + COMMA + src
1301  else
1302  '' divide them as single-precision
1303  outp ostr + "s " + dst + COMMA + src
1304  end if
1305  else
1306  '' This should never happen due to IR_OPT_FPUCONV
1307  outp " implement 'divide float by integer'"
1308  end if
1309 
1310 end sub
1311 
1312 
1313 
1314 '':::::
1315 sub _emitATN2_SSE _
1316  ( _
1317  byval dvreg as IRVREG ptr, _
1318  byval svreg as IRVREG ptr _
1319  ) static
1320 
1321  dim as string src, dst, ostr
1322  dim as integer sdsize, ddsize
1323  dim as integer adjustStack
1324 
1325  hPrepOperand( svreg, src )
1326  hPrepOperand( dvreg, dst )
1327 
1328  sdsize = typeGetSize( svreg->dtype )
1329  ddsize = typeGetSize( dvreg->dtype )
1330 
1331  adjustStack = FALSE
1332  if( dvreg->typ = IR_VREGTYPE_REG ) then
1333  outp "sub esp, 8"
1334  adjustStack = TRUE
1335  if( ddsize > 4 ) then
1336  outp "movlpd qword ptr [esp]" + COMMA + dst
1337  outp "fld qword ptr [esp]"
1338  else
1339  outp "movss dword ptr [esp]" + COMMA + dst
1340  outp "fld dword ptr [esp]"
1341  end if
1342  else
1343  outp "fld " + dst
1344  end if
1345 
1346  if( svreg->typ = IR_VREGTYPE_REG ) then
1347  if( adjustStack = FALSE ) then
1348  outp "sub esp, 8"
1349  adjustStack = TRUE
1350  end if
1351  if( sdsize > 4 ) then
1352  outp "movlpd qword ptr [esp]" + COMMA + src
1353  outp "fld qword ptr [esp]"
1354  else
1355  outp "movss dword ptr [esp]" + COMMA + src
1356  outp "fld dword ptr [esp]"
1357  end if
1358  else
1359  outp "fld " + src
1360  end if
1361  outp "fpatan"
1362 
1363  if( ddsize > 4 ) then
1364  outp "fstp qword ptr [esp]"
1365  outp "movlpd " + dst + COMMA + "qword ptr [esp]"
1366  else
1367  outp "fstp dword ptr [esp]"
1368  outp "movss " + dst + COMMA + "dword ptr [esp]"
1369  end if
1370  if( adjustStack ) then
1371  outp "add esp, 8"
1372  end if
1373 end sub
1374 
1375 '':::::
1376 sub _emitPOW_SSE _
1377  ( _
1378  byval dvreg as IRVREG ptr, _
1379  byval svreg as IRVREG ptr _
1380  ) static
1381 
1382  dim as string src, dst, ostr
1383  dim as integer sdsize, ddsize
1384  dim as integer adjustStack
1385 
1386  hPrepOperand( svreg, src )
1387  hPrepOperand( dvreg, dst )
1388 
1389  adjustStack = FALSE
1390  if( dvreg->typ = IR_VREGTYPE_REG ) then
1391  outp "sub esp, 8"
1392  adjustStack = TRUE
1393  if (ddsize > 4) then
1394  outp "movlpd qword ptr [esp]" + COMMA + dst
1395  outp "fld qword ptr [esp]"
1396  else
1397  outp "movss dword ptr [esp]" + COMMA + dst
1398  outp "fld dword ptr [esp]"
1399  end if
1400  else
1401  outp "fld " + dst
1402  end if
1403 
1404  if( svreg->typ = IR_VREGTYPE_REG ) then
1405  if( adjustStack = FALSE ) then
1406  outp "sub esp, 8"
1407  adjustStack = TRUE
1408  end if
1409  if (sdsize > 4) then
1410  outp "movlpd qword ptr [esp]" + COMMA + src
1411  outp "fld qword ptr [esp]"
1412  else
1413  outp "movss dword ptr [esp]" + COMMA + src
1414  outp "fld dword ptr [esp]"
1415  end if
1416  else
1417  outp "fld " + src
1418  end if
1419 
1420  outp "fabs"
1421  outp "fyl2x"
1422  outp "fld st(0)"
1423  outp "frndint"
1424  outp "fsub st(1), st(0)"
1425  outp "fxch"
1426  outp "f2xm1"
1427  outp "fld1"
1428  outp "faddp"
1429  outp "fscale"
1430  outp "fstp st(1)"
1431 
1432  if( ddsize > 4 ) then
1433  outp "fstp qword ptr [esp]"
1434  outp "movlpd " + dst + COMMA + "qword ptr [esp]"
1435  else
1436  outp "fstp dword ptr [esp]"
1437  outp "movss " + dst + COMMA + "dword ptr [esp]"
1438  end if
1439  if( adjustStack ) then
1440  outp "add esp, 8"
1441  end if
1442 
1443 end sub
1444 
1445 
1446 ''::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
1447 '' relational
1448 ''::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
1449 
1450 sub hCMPF_SSE _
1451  ( _
1452  byval rvreg as IRVREG ptr, _
1453  byval label as FBSYMBOL ptr, _
1454  byval mnemonic as zstring ptr, _
1455  byval mask as zstring ptr, _
1456  byval dvreg as IRVREG ptr, _
1457  byval svreg as IRVREG ptr _
1458  ) static
1459 
1460  dim as string rname, rname8, dst, src, ostr, lname
1461  dim as integer iseaxfree, isedxfree
1462  dim as integer sdsize, ddsize, returnSize
1463 
1464  ddsize = typeGetSize( dvreg->dtype )
1465  sdsize = typeGetSize( svreg->dtype )
1466 
1467  hPrepOperand( dvreg, dst )
1468  hPrepOperand( svreg, src )
1469 
1470  if( label = NULL ) then
1471  lname = *symbUniqueLabel( )
1472  else
1473  lname = *symbGetMangledName( label )
1474  end if
1475 
1476  '' if either operand is returned from a function, grab it from st(0)
1477  returnSize = 0
1478  if( dvreg->regFamily = IR_REG_FPU_STACK ) then
1479  ostr = dst
1480  returnSize = ddsize
1481  elseif( svreg->regFamily = IR_REG_FPU_STACK ) then
1482  ostr = src
1483  returnSize = sdsize
1484  end if
1485 
1486  if( returnSize ) then
1487  outp "sub esp" + COMMA + str( returnSize )
1488  end if
1489  if( returnSize = 8 ) then
1490  outp "fstp qword ptr [esp]"
1491  outp "movlpd " + ostr + COMMA + "qword ptr [esp]"
1492  elseif( returnSize = 4) then
1493  outp "fstp dword ptr [esp]"
1494  outp "movss " + ostr + COMMA + "dword ptr [esp]"
1495  end if
1496  if( returnSize ) then
1497  outp "add esp" + COMMA + str( returnSize )
1498  end if
1499 
1500  '' src is either mem or a reg... dst is always a reg?
1501 
1502  '' this is set up to convert a single to a double, unless both are singles
1503  if( ddsize > 4 ) then
1504  if( sdsize > 4 ) then
1505  outp "comisd " + dst + COMMA + src
1506  else
1507  outp "cvtss2sd xmm7" + COMMA + src
1508  outp "comisd " + dst + COMMA + "xmm7"
1509  end if
1510  else
1511  if( sdsize > 4 ) then
1512  outp "cvtss2sd xmm7" + COMMA + dst
1513  outp "comisd xmm7" + COMMA + src
1514  else
1515  outp "comiss " + dst + COMMA + src
1516  end if
1517  end if
1518 
1519  '' no result to be set? just branch
1520  if( rvreg = NULL ) then
1521  ostr = "j" + *mnemonic
1522  hBRANCH( ostr, lname )
1523  exit sub
1524  end if
1525 
1526  hPrepOperand( rvreg, rname )
1527 
1528  '' can it be optimized?
1529  if( env.clopt.cputype >= FB_CPUTYPE_486 ) then
1530  rname8 = *hGetRegName( FB_DATATYPE_BYTE, rvreg->reg )
1531 
1532  '' handle EDI and ESI
1533  if( (rvreg->reg = EMIT_REG_ESI) or (rvreg->reg = EMIT_REG_EDI) ) then
1534 
1535  isedxfree = hIsRegFree( FB_DATACLASS_INTEGER, EMIT_REG_EDX )
1536  if( isedxfree = FALSE ) then
1537  ostr = "xchg edx, " + rname
1538  outp ostr
1539  end if
1540 
1541  ostr = "set" + *mnemonic + (TABCHAR + "dl")
1542  outp ostr
1543 
1544  if( isedxfree = FALSE ) then
1545  ostr = "xchg edx, " + rname
1546  outp ostr
1547  else
1548  hMOV rname, "edx"
1549  end if
1550  else
1551  ostr = "set" + *mnemonic + " " + rname8
1552  outp ostr
1553  end if
1554 
1555  '' convert 1 to -1 (TRUE in QB/FB)
1556  ostr = "shr " + rname + ", 1"
1557  outp ostr
1558 
1559  ostr = "sbb " + rname + COMMA + rname
1560  outp ostr
1561  else
1562  '' old (and slow) boolean set
1563  ostr = "mov " + rname + ", -1"
1564  outp ostr
1565 
1566  ostr = "j" + *mnemonic
1567  hBRANCH( ostr, lname )
1568 
1569  ostr = "xor " + rname + COMMA + rname
1570  outp ostr
1571 
1572  hLabel( lname )
1573  end if
1574 
1575 end sub
1576 
1577 
1578 
1579 
1580 '':::::
1581 sub _emitCGTF_SSE _
1582  ( _
1583  byval rvreg as IRVREG ptr, _
1584  byval label as FBSYMBOL ptr, _
1585  byval dvreg as IRVREG ptr, _
1586  byval svreg as IRVREG ptr _
1587  ) static
1588 
1589  hCMPF_SSE( rvreg, label, "a", "", dvreg, svreg )
1590 
1591 end sub
1592 
1593 
1594 
1595 '':::::
1596 sub _emitCLTF_SSE _
1597  ( _
1598  byval rvreg as IRVREG ptr, _
1599  byval label as FBSYMBOL ptr, _
1600  byval dvreg as IRVREG ptr, _
1601  byval svreg as IRVREG ptr _
1602  ) static
1603 
1604  hCMPF_SSE( rvreg, label, "b", "", dvreg, svreg )
1605 
1606 end sub
1607 
1608 
1609 '':::::
1610 sub _emitCEQF_SSE _
1611  ( _
1612  byval rvreg as IRVREG ptr, _
1613  byval label as FBSYMBOL ptr, _
1614  byval dvreg as IRVREG ptr, _
1615  byval svreg as IRVREG ptr _
1616  ) static
1617 
1618  hCMPF_SSE( rvreg, label, "e", "", dvreg, svreg )
1619 
1620 end sub
1621 
1622 
1623 '':::::
1624 sub _emitCNEF_SSE _
1625  ( _
1626  byval rvreg as IRVREG ptr, _
1627  byval label as FBSYMBOL ptr, _
1628  byval dvreg as IRVREG ptr, _
1629  byval svreg as IRVREG ptr _
1630  ) static
1631 
1632  hCMPF_SSE( rvreg, label, "ne", "", dvreg, svreg )
1633 
1634 end sub
1635 
1636 
1637 '':::::
1638 sub _emitCLEF_SSE _
1639  ( _
1640  byval rvreg as IRVREG ptr, _
1641  byval label as FBSYMBOL ptr, _
1642  byval dvreg as IRVREG ptr, _
1643  byval svreg as IRVREG ptr _
1644  ) static
1645 
1646  hCMPF_SSE( rvreg, label, "be", "", dvreg, svreg )
1647 
1648 end sub
1649 
1650 
1651 '':::::
1652 sub _emitCGEF_SSE _
1653  ( _
1654  byval rvreg as IRVREG ptr, _
1655  byval label as FBSYMBOL ptr, _
1656  byval dvreg as IRVREG ptr, _
1657  byval svreg as IRVREG ptr _
1658  ) static
1659 
1660  hCMPF_SSE( rvreg, label, "ae", "", dvreg, svreg )
1661 
1662 end sub
1663 
1664 
1665 
1666 '':::::
1667 sub _emitNEGF_SSE _
1668  ( _
1669  byval dvreg as IRVREG ptr _
1670  ) static
1671 
1672  dim as string dst, src
1673  dim as integer ddsize
1674  dim as FBSYMBOL ptr sym
1675  dim as IRVREG ptr tempVreg
1676 
1677  ddsize = typeGetSize( dvreg->dtype )
1678  hPrepOperand( dvreg, dst )
1679 
1680  if( dvreg->regFamily = IR_REG_FPU_STACK ) then
1681  outp "sub esp" + COMMA + str( ddsize )
1682  if( ddsize > 4 ) then
1683  outp "fstp qword ptr [esp]"
1684  outp "movlpd " + dst + COMMA + "qword ptr [esp]"
1685  else
1686  outp "fstp dword ptr [esp]"
1687  outp "movlpd " + dst + COMMA + "dword ptr [esp]"
1688  end if
1689  outp "add esp" + COMMA + str( ddsize )
1690  end if
1691 
1692  if( ddsize > 4 ) then
1693  sym = symbAllocLongIntConst(&h8000000000000000, FB_DATATYPE_ULONGINT)
1694  tempVreg = irAllocVRVAR( FB_DATATYPE_ULONGINT, NULL, sym, symbGetOfs( sym ) )
1695  else
1696  sym = symbAllocIntConst(&h80000000, FB_DATATYPE_UINT)
1697  tempVreg = irAllocVRVAR( FB_DATATYPE_UINT, NULL, sym, symbGetOfs( sym ) )
1698  end if
1699  sym->var_.align = 16
1700 
1701  hPrepOperand( tempVreg, src, FB_DATATYPE_XMMWORD )
1702 
1703  if( ddsize > 4 ) then
1704  outp "xorpd " + dst + COMMA + src
1705  else
1706  outp "xorps " + dst + COMMA + src
1707  end if
1708 
1709 end sub
1710 
1711 
1712 '':::::
1713 sub _emitHADDF_SSE _
1714  ( _
1715  byval dvreg as IRVREG ptr _
1716  ) static
1717 
1718  dim dst as string
1719 
1720  hPrepOperand( dvreg, dst )
1721 
1722  if( typeGetSize( dvreg->dtype ) > 4 ) then
1723  outp "movhlps xmm7" + COMMA + dst
1724  outp "addsd " + dst + COMMA + "xmm7"
1725  else
1726  if( dvreg->vector = 2 ) then
1727  outp "pshufd xmm7" + COMMA + dst + COMMA + "0x01"
1728  outp "addss " + dst + COMMA + "xmm7"
1729  elseif( dvreg->vector = 3 ) then
1730  outp "pshufd xmm7" + COMMA + dst + COMMA + "0x01"
1731  outp "addss " + dst + COMMA + "xmm7"
1732  outp "movhlps xmm7" + COMMA + dst
1733  outp "addss " + dst + COMMA + "xmm7"
1734  elseif( dvreg->vector = 4 ) then
1735  outp "movhlps xmm7" + COMMA + dst
1736  outp "addps " + dst + COMMA + "xmm7"
1737  outp "pshufd xmm7" + COMMA + dst + COMMA + "0x01"
1738  outp "addss " + dst + COMMA + "xmm7"
1739  end if
1740  end if
1741 
1742 end sub
1743 
1744 
1745 '':::::
1746 sub _emitABSF_SSE _
1747  ( _
1748  byval dvreg as IRVREG ptr _
1749  ) static
1750 
1751  dim as string dst, src
1752  dim as integer ddsize
1753  dim as FBSYMBOL ptr sym
1754  dim as IRVREG ptr tempVreg
1755 
1756  ddsize = typeGetSize( dvreg->dtype )
1757  hPrepOperand( dvreg, dst )
1758 
1759  if( dvreg->regFamily = IR_REG_FPU_STACK ) then
1760  outp "sub esp" + COMMA + str( ddsize )
1761  if( ddsize > 4 ) then
1762  outp "fstp qword ptr [esp]"
1763  outp "movlpd " + dst + COMMA + "qword ptr [esp]"
1764  else
1765  outp "fstp dword ptr [esp]"
1766  outp "movss " + dst + COMMA + "dword ptr [esp]"
1767  end if
1768  outp "add esp" + COMMA + str( ddsize )
1769  end if
1770 
1771  if( ddsize > 4 ) then
1772  sym = symbAllocLongIntConst(&h7FFFFFFFFFFFFFFF, FB_DATATYPE_ULONGINT)
1773  tempVreg = irAllocVRVAR( FB_DATATYPE_ULONGINT, NULL, sym, symbGetOfs( sym ) )
1774  else
1775  sym = symbAllocIntConst(&h7FFFFFFF, FB_DATATYPE_UINT)
1776  tempVreg = irAllocVRVAR( FB_DATATYPE_UINT, NULL, sym, symbGetOfs( sym ) )
1777  end if
1778  sym->var_.align = 16
1779 
1780  hPrepOperand( tempVreg, src, FB_DATATYPE_XMMWORD )
1781 
1782  if( ddsize > 4 ) then
1783  outp "andpd " + dst + COMMA + src
1784  else
1785  outp "andps " + dst + COMMA + src
1786  end if
1787 
1788 end sub
1789 
1790 
1791 
1792 '':::::
1793 sub _emitSGNF_SSE _
1794  ( _
1795  byval dvreg as IRVREG ptr _
1796  ) static
1797 
1798  dim as string dst, src
1799  dim as FBSYMBOL ptr sym
1800  dim as IRVREG ptr tempVreg
1801  dim as integer ddsize
1802 
1803  ddsize = typeGetSize( dvreg->dtype )
1804  hPrepOperand( dvreg, dst )
1805 
1806  if( dvreg->regFamily = IR_REG_FPU_STACK ) then
1807  outp "sub esp" + COMMA + str( ddsize )
1808  if( ddsize > 4 ) then
1809  outp "fstp qword ptr [esp]"
1810  outp "movlpd " + dst + COMMA + "qword ptr [esp]"
1811  else
1812  outp "fstp dword ptr [esp]"
1813  outp "movlpd " + dst + COMMA + "dword ptr [esp]"
1814  end if
1815  outp "add esp" + COMMA + str( ddsize )
1816  end if
1817 
1818  if( ddsize > 4 ) then
1819  outp "xorpd xmm7, xmm7"
1820  outp "cmpneqsd xmm7" + COMMA + dst
1821 
1822  sym = symbAllocLongIntConst(&h7FFFFFFFFFFFFFFF, FB_DATATYPE_ULONGINT)
1823  sym->var_.align = 16
1824  tempVreg = irAllocVRVAR( FB_DATATYPE_ULONGINT, NULL, sym, symbGetOfs( sym ) )
1825  hPrepOperand( tempVreg, src, FB_DATATYPE_XMMWORD )
1826  outp "orpd " + dst + COMMA + src
1827 
1828  sym = symbAllocLongIntConst(&hBFF0000000000000, FB_DATATYPE_ULONGINT)
1829  sym->var_.align = 16
1830  tempVreg = irAllocVRVAR( FB_DATATYPE_ULONGINT, NULL, sym, symbGetOfs( sym ) )
1831  hPrepOperand( tempVreg, src, FB_DATATYPE_XMMWORD )
1832  outp "andpd xmm7" + COMMA + src
1833 
1834  outp "andpd " + dst + COMMA + "xmm7"
1835  else
1836  outp "xorps xmm7, xmm7"
1837  outp "cmpneqss xmm7" + COMMA + dst
1838 
1839  sym = symbAllocIntConst(&h7FFFFFFF, FB_DATATYPE_UINT)
1840  sym->var_.align = 16
1841  tempVreg = irAllocVRVAR( FB_DATATYPE_UINT, NULL, sym, symbGetOfs( sym ) )
1842  hPrepOperand( tempVreg, src, FB_DATATYPE_XMMWORD )
1843  outp "orps " + dst + COMMA + src '' set bits 31-0, sign is unchanged"
1844 
1845  sym = symbAllocIntConst(&hBF800000, FB_DATATYPE_UINT)
1846  sym->var_.align = 16
1847  tempVreg = irAllocVRVAR( FB_DATATYPE_UINT, NULL, sym, symbGetOfs( sym ) )
1848  hPrepOperand( tempVreg, src, FB_DATATYPE_XMMWORD )
1849  outp "andps xmm7" + COMMA + src '' load -1.0f, kill if == 0.0f"
1850 
1851  outp "andps " + dst + COMMA + "xmm7" '' get +/-1.0f or 0.0f"
1852  end if
1853 end sub
1854 
1855 
1856 '':::::
1858  ( _
1859  byval dvreg as IRVREG ptr, _
1860  byval iscos as integer _
1861  ) static
1862 
1863  dim as integer reg(2), isFree(2), stackSize, i, stackPointer
1864  dim as string dst, src, regName(2)
1865  dim as FBSYMBOL ptr sym_invSignBitMask, sym_one, sym_piOverTwo, sym_twoOverPI
1866  dim as FBSYMBOL ptr sym_sin_c0, sym_sin_c1, sym_sin_c2, sym_sin_c3
1867  dim as IRVREG ptr vReg_invSignBitMask, vReg_one, vReg_piOverTwo, vReg_twoOverPI
1868  dim as IRVREG ptr vReg_sin_c0, vReg_sin_c1, vReg_sin_c2, vReg_sin_c3
1869 
1870  hPrepOperand( dvreg, dst )
1871 
1872  stackSize = 4 '' 4 bytes always needed
1873 
1874  if( dvreg->regFamily = IR_REG_FPU_STACK ) then
1875  stackSize += 4
1876  end if
1877 
1878  '' find a register
1879  reg(0) = EMIT_REG_ECX
1880  isFree(0) = FALSE
1881 
1882  reg(1) = EMIT_REG_EAX
1883  isFree(1) = FALSE
1884  if( hIsRegFree( FB_DATACLASS_INTEGER, EMIT_REG_ECX ) ) then
1885  reg(0) = EMIT_REG_ECX
1886  isFree(0) = TRUE
1887  if( hIsRegFree( FB_DATACLASS_INTEGER, EMIT_REG_EDX ) ) then
1888  reg(1) = EMIT_REG_EDX
1889  isFree(1) = TRUE
1890  elseif( hIsRegFree( FB_DATACLASS_INTEGER, EMIT_REG_EAX ) ) then
1891  reg(1) = EMIT_REG_EAX
1892  isFree(1) = TRUE
1893  end if
1894  elseif( hIsRegFree( FB_DATACLASS_INTEGER, EMIT_REG_EDX ) ) then
1895  reg(0) = EMIT_REG_EDX
1896  isFree(0) = TRUE
1897  if( hIsRegFree( FB_DATACLASS_INTEGER, EMIT_REG_EAX ) ) then
1898  reg(1) = EMIT_REG_EAX
1899  isFree(1) = TRUE
1900  end if
1901  else
1902  if( hIsRegFree( FB_DATACLASS_INTEGER, EMIT_REG_EAX ) ) then
1903  reg(1) = EMIT_REG_EAX
1904  isFree(1) = TRUE
1905  end if
1906  end if
1907 
1908  reg(2) = hFindFreeReg( FB_DATACLASS_FPOINT )
1909  if( reg(2) = INVALID ) then
1910  reg(2) = EMIT_REG_FP0
1911  isFree(2) = FALSE
1912  else
1913  isFree(2) = TRUE
1914  end if
1915 
1916  stackSize += (4 * (isFree(0) And 1))
1917  stackSize += (4 * (isFree(1) And 1))
1918  stackSize += (4 * (isFree(2) And 1))
1919 
1920  regName(0) = *hGetRegName( FB_DATATYPE_INTEGER, reg(0) )
1921  regName(1) = *hGetRegName( FB_DATATYPE_INTEGER, reg(1) )
1922  regName(2) = *hGetRegName( FB_DATATYPE_SINGLE, reg(2) )
1923 
1924  sym_invSignBitMask = symbAllocIntConst(&h7FFFFFFF, FB_DATATYPE_UINT)
1925  sym_invSignBitMask->var_.align = 16
1926  vReg_invSignBitMask = irAllocVRVAR( FB_DATATYPE_UINT, NULL, sym_invSignBitMask, symbGetOfs( sym_invSignBitMask ) )
1927 
1928  sym_piOverTwo = symbAllocIntConst(&h3FC90FDB, FB_DATATYPE_UINT)
1929  sym_piOverTwo->var_.align = 16
1930  vReg_piOverTwo = irAllocVRVAR( FB_DATATYPE_UINT, NULL, sym_piOverTwo, symbGetOfs( sym_piOverTwo ) )
1931 
1932  sym_twoOverPI = symbAllocIntConst(&h3F22F983, FB_DATATYPE_UINT)
1933  sym_twoOverPI->var_.align = 16
1934  vReg_twoOverPI = irAllocVRVAR( FB_DATATYPE_UINT, NULL, sym_twoOverPI, symbGetOfs( sym_twoOverPI ) )
1935 
1936  sym_one = symbAllocIntConst(&h3F800000, FB_DATATYPE_UINT)
1937  sym_one->var_.align = 16
1938  vReg_one = irAllocVRVAR( FB_DATATYPE_UINT, NULL, sym_one, symbGetOfs( sym_one ) )
1939 
1940  sym_sin_c0 = symbAllocIntConst(&h3FC90FDB, FB_DATATYPE_UINT)
1941  sym_sin_c0->var_.align = 16
1942  vReg_sin_c0 = irAllocVRVAR( FB_DATATYPE_UINT, NULL, sym_sin_c0, symbGetOfs( sym_sin_c0 ) )
1943 
1944  sym_sin_c1 = symbAllocIntConst(&hBF255DE7, FB_DATATYPE_UINT)
1945  sym_sin_c1->var_.align = 16
1946  vReg_sin_c1 = irAllocVRVAR( FB_DATATYPE_UINT, NULL, sym_sin_c1, symbGetOfs( sym_sin_c1 ) )
1947 
1948  sym_sin_c2 = symbAllocIntConst(&h3DA335E3, FB_DATATYPE_UINT)
1949  sym_sin_c2->var_.align = 16
1950  vReg_sin_c2 = irAllocVRVAR( FB_DATATYPE_UINT, NULL, sym_sin_c2, symbGetOfs( sym_sin_c2 ) )
1951 
1952  sym_sin_c3 = symbAllocIntConst(&hBB996966, FB_DATATYPE_UINT)
1953  sym_sin_c3->var_.align = 16
1954  vReg_sin_c3 = irAllocVRVAR( FB_DATATYPE_UINT, NULL, sym_sin_c3, symbGetOfs( sym_sin_c3 ) )
1955 
1956  hPrepOperand( dvreg, dst )
1957 
1958  outp "sub esp" + COMMA + str( stackSize )
1959 
1960  if( dvreg->regFamily = IR_REG_FPU_STACK ) then
1961  outp "fstp dword ptr [esp]"
1962  outp "movss " + dst + COMMA + "dword ptr [esp]"
1963  end if
1964 
1965  stackPointer = 4
1966  for i = 0 to 2
1967  if( isFree(i) = FALSE ) then
1968  if( i < 2 ) then
1969  outp "mov [esp+" + str(stackPointer) + "]" + COMMA + regName(i)
1970  else
1971  outp "movss [esp+" + str(stackPointer) + "]" + COMMA + regName(i)
1972  end if
1973  stackPointer += 4
1974  end if
1975  next i
1976 
1977 if( iscos = FALSE ) then
1978  outp "movss [esp]" + COMMA + dst
1979 
1980  hPrepOperand( vReg_twoOverPI, src )
1981  outp "mulss " + dst + COMMA + src
1982 
1983  outp "and dword ptr [esp], 0x80000000"
1984 end if
1985 
1986  hPrepOperand( vReg_invSignBitMask, src, FB_DATATYPE_XMMWORD )
1987  outp "andps " + dst + COMMA + src
1988 
1989 if( iscos = TRUE ) then
1990  hPrepOperand( vReg_piOverTwo, src )
1991  outp "addss " + dst + COMMA + src
1992 
1993  hPrepOperand( vReg_twoOverPI, src )
1994  outp "mulss " + dst + COMMA + src
1995 end if
1996 
1997  outp "cvttss2si " + regName(0) + COMMA + dst
1998 
1999  hPrepOperand( vReg_one, src )
2000  outp "movss xmm7" + COMMA + src
2001  outp "mov " + regName(1) + COMMA + regName(0)
2002  outp "cvtsi2ss " + regName(2) + COMMA + regName(0)
2003  outp "shl " + regName(1) + COMMA + "30"
2004  outp "not " + regName(0)
2005  outp "and " + regName(1) + COMMA + "0x80000000"
2006  outp "and " + regName(0) + COMMA + "0x1"
2007  outp "subss " + dst + COMMA + regName(2)
2008  outp "dec " + regName(0)
2009  outp "minss " + dst + COMMA + "xmm7"
2010  outp "movd " + regName(2) + COMMA + regName(0)
2011  outp "subss xmm7" + COMMA + dst
2012  outp "andps xmm7" + COMMA + regName(2)
2013  outp "andnps " + regName(2) + COMMA + dst
2014  outp "orps xmm7" + COMMA + regName(2)
2015 if( iscos = FALSE ) then
2016  outp "xor " + regName(1) + COMMA + "[esp]"
2017 end if
2018  outp "movd " + regName(0) + COMMA + "xmm7"
2019 
2020  outp "mulss xmm7, xmm7"
2021 
2022  outp "or " + regName(1) + COMMA + regName(0)
2023 
2024  outp "movss " + regName(2) + COMMA + "xmm7"
2025 
2026  hPrepOperand( vReg_sin_c3, src )
2027  outp "mulss xmm7" + COMMA + src
2028 
2029  hPrepOperand( vReg_sin_c2, src )
2030  outp "addss xmm7" + COMMA + src
2031  outp "mulss xmm7" + COMMA + regName(2)
2032 
2033  outp "movd " + dst + COMMA + regName(1)
2034 
2035  hPrepOperand( vReg_sin_c1, src )
2036  outp "addss xmm7" + COMMA + src
2037  outp "mulss xmm7" + COMMA + regName(2)
2038 
2039  hPrepOperand( vReg_sin_c0, src )
2040  outp "addss xmm7" + COMMA + src
2041  outp "mulss " + dst + COMMA + "xmm7"
2042 
2043  stackPointer = 4
2044  for i = 0 to 2
2045  if( isFree(i) = FALSE ) then
2046  if( i < 2 ) then
2047  outp "mov " + regName(i) + COMMA + "[esp+" + str(stackPointer) + "]"
2048  else
2049  outp "movss " + regName(i) + COMMA + "[esp+" + str(stackPointer) + "]"
2050  end if
2051  stackPointer += 4
2052  end if
2053  next i
2054 
2055  outp "add esp" + COMMA + str( stackSize )
2056 end sub
2057 
2058 
2059 
2060 '':::::
2061 sub _emitSIN_SSE _
2062  ( _
2063  byval dvreg as IRVREG ptr _
2064  ) static
2065 
2066  dim as string dst
2067  dim as integer ddsize
2068 
2069  ddsize = typeGetSize( dvreg->dtype )
2070 
2071  if( ( ddsize = 4 ) and ( env.clopt.fpmode = FB_FPMODE_FAST ) ) then
2072  _emitSINCOS_FAST_SSE dvreg, FALSE
2073  exit sub
2074  end if
2075 
2076  hPrepOperand( dvreg, dst )
2077 
2078  if( dvreg->regFamily = IR_REG_SSE ) then
2079  outp "sub esp" + COMMA + str( ddsize )
2080  if( ddsize > 4 ) then
2081  outp "movlpd qword ptr [esp]" + COMMA + dst
2082  outp "fld qword ptr [esp]"
2083  else
2084  outp "movss dword ptr [esp]" + COMMA + dst
2085  outp "fld dword ptr [esp]"
2086  end if
2087  end if
2088 
2089  outp "fsin"
2090 
2091  if( dvreg->regFamily = IR_REG_FPU_STACK ) then
2092  outp "sub esp" + COMMA + str( ddsize )
2093  end if
2094 
2095  if( ddsize > 4 ) then
2096  outp "fstp qword ptr [esp]"
2097  outp "movlpd " + dst + COMMA + "qword ptr [esp]"
2098  else
2099  outp "fstp dword ptr [esp]"
2100  outp "movss " + dst + COMMA + "dword ptr [esp]"
2101  end if
2102  outp "add esp" + COMMA + str( ddsize )
2103 
2104 end sub
2105 
2106 
2107 '':::::
2108 sub _emitASIN_SSE _
2109  ( _
2110  byval dvreg as IRVREG ptr _
2111  ) static
2112 
2113  dim as string dst
2114  dim as integer ddsize
2115 
2116  ddsize = typeGetSize( dvreg->dtype )
2117  hPrepOperand( dvreg, dst )
2118 
2119  if( dvreg->regFamily = IR_REG_SSE ) then
2120  outp "sub esp" + COMMA + str( ddsize )
2121  if( ddsize > 4 ) then
2122  outp "movlpd qword ptr [esp]" + COMMA + dst
2123  outp "fld qword ptr [esp]"
2124  else
2125  outp "movss dword ptr [esp]" + COMMA + dst
2126  outp "fld dword ptr [esp]"
2127  end if
2128  end if
2129 
2130  '' asin( x ) = atn( sqr( (x*x) / (1-x*x) ) )
2131  outp "fld st(0)"
2132  outp "fmul st(0), st(0)"
2133  outp "fld1"
2134  outp "fsubrp"
2135  outp "fsqrt"
2136  outp "fpatan"
2137 
2138  if( dvreg->regFamily = IR_REG_FPU_STACK ) then
2139  outp "sub esp" + COMMA + str( ddsize )
2140  end if
2141 
2142  if( ddsize > 4 ) then
2143  outp "fstp qword ptr [esp]"
2144  outp "movlpd " + dst + COMMA + "qword ptr [esp]"
2145  else
2146  outp "fstp dword ptr [esp]"
2147  outp "movss " + dst + COMMA + "dword ptr [esp]"
2148  end if
2149  outp "add esp" + COMMA + str( ddsize )
2150 
2151 end sub
2152 
2153 
2154 '':::::
2155 sub _emitCOS_SSE _
2156  ( _
2157  byval dvreg as IRVREG ptr _
2158  ) static
2159 
2160  dim as string dst
2161  dim as integer ddsize
2162 
2163  ddsize = typeGetSize( dvreg->dtype )
2164 
2165  if( ( ddsize = 4 ) and ( env.clopt.fpmode = FB_FPMODE_FAST ) ) then
2166  _emitSINCOS_FAST_SSE dvreg, TRUE
2167  exit sub
2168  end if
2169 
2170  hPrepOperand( dvreg, dst )
2171 
2172  if( dvreg->regFamily = IR_REG_SSE ) then
2173  outp "sub esp" + COMMA + str( ddsize )
2174  if( ddsize > 4 ) then
2175  outp "movlpd qword ptr [esp]" + COMMA + dst
2176  outp "fld qword ptr [esp]"
2177  else
2178  outp "movss dword ptr [esp]" + COMMA + dst
2179  outp "fld dword ptr [esp]"
2180  end if
2181  end if
2182 
2183  outp "fcos"
2184 
2185  if( dvreg->regFamily = IR_REG_FPU_STACK ) then
2186  outp "sub esp" + COMMA + str( ddsize )
2187  end if
2188 
2189  if( ddsize > 4 ) then
2190  outp "fstp qword ptr [esp]"
2191  outp "movlpd " + dst + COMMA + "qword ptr [esp]"
2192  else
2193  outp "fstp dword ptr [esp]"
2194  outp "movss " + dst + COMMA + "dword ptr [esp]"
2195  end if
2196  outp "add esp" + COMMA + str( ddsize )
2197 
2198 end sub
2199 
2200 
2201 
2202 '':::::
2203 sub _emitACOS_SSE _
2204  ( _
2205  byval dvreg as IRVREG ptr _
2206  ) static
2207 
2208  dim as string dst
2209  dim as integer ddsize
2210 
2211  ddsize = typeGetSize( dvreg->dtype )
2212  hPrepOperand( dvreg, dst )
2213 
2214  if( dvreg->regFamily = IR_REG_SSE ) then
2215  outp "sub esp" + COMMA + str( ddsize )
2216  if( ddsize > 4 ) then
2217  outp "movlpd qword ptr [esp]" + COMMA + dst
2218  outp "fld qword ptr [esp]"
2219  else
2220  outp "movss dword ptr [esp]" + COMMA + dst
2221  outp "fld dword ptr [esp]"
2222  end if
2223  end if
2224 
2225  '' acos( x ) = atn( sqr( (1-x*x) / (x*x) ) )
2226  outp "fld st(0)"
2227  outp "fmul st(0), st(0)"
2228  outp "fld1"
2229  outp "fsubrp"
2230  outp "fsqrt"
2231  outp "fxch"
2232  outp "fpatan"
2233 
2234  if( dvreg->regFamily = IR_REG_FPU_STACK ) then
2235  outp "sub esp" + COMMA + str( ddsize )
2236  end if
2237 
2238  if( ddsize > 4 ) then
2239  outp "fstp qword ptr [esp]"
2240  outp "movlpd " + dst + COMMA + "qword ptr [esp]"
2241  else
2242  outp "fstp dword ptr [esp]"
2243  outp "movss " + dst + COMMA + "dword ptr [esp]"
2244  end if
2245  outp "add esp" + COMMA + str( ddsize )
2246 
2247 end sub
2248 
2249 
2250 '':::::
2251 sub _emitTAN_SSE _
2252  ( _
2253  byval dvreg as IRVREG ptr _
2254  ) static
2255 
2256  dim as string dst
2257  dim as integer ddsize
2258 
2259  ddsize = typeGetSize( dvreg->dtype )
2260  hPrepOperand( dvreg, dst )
2261 
2262  if( dvreg->regFamily = IR_REG_SSE ) then
2263  outp "sub esp" + COMMA + str( ddsize )
2264  if( ddsize > 4 ) then
2265  outp "movlpd qword ptr [esp]" + COMMA + dst
2266  outp "fld qword ptr [esp]"
2267  else
2268  outp "movss dword ptr [esp]" + COMMA + dst
2269  outp "fld dword ptr [esp]"
2270  end if
2271  end if
2272 
2273  outp "fptan"
2274  outp "fstp st(0)"
2275 
2276  if( dvreg->regFamily = IR_REG_FPU_STACK ) then
2277  outp "sub esp" + COMMA + str( ddsize )
2278  end if
2279 
2280  if( ddsize > 4 ) then
2281  outp "fstp qword ptr [esp]"
2282  outp "movlpd " + dst + COMMA + "qword ptr [esp]"
2283  else
2284  outp "fstp dword ptr [esp]"
2285  outp "movss " + dst + COMMA + "dword ptr [esp]"
2286  end if
2287  outp "add esp" + COMMA + str( ddsize )
2288 
2289 end sub
2290 
2291 
2292 '':::::
2293 sub _emitATAN_SSE _
2294  ( _
2295  byval dvreg as IRVREG ptr _
2296  ) static
2297 
2298  dim as string dst
2299  dim as integer ddsize
2300 
2301  ddsize = typeGetSize( dvreg->dtype )
2302  hPrepOperand( dvreg, dst )
2303 
2304  if( dvreg->regFamily = IR_REG_SSE ) then
2305  outp "sub esp" + COMMA + str( ddsize )
2306  if( ddsize > 4 ) then
2307  outp "movlpd qword ptr [esp]" + COMMA + dst
2308  outp "fld qword ptr [esp]"
2309  else
2310  outp "movss dword ptr [esp]" + COMMA + dst
2311  outp "fld dword ptr [esp]"
2312  end if
2313  end if
2314 
2315  outp "fld1"
2316  outp "fpatan"
2317 
2318  if( dvreg->regFamily = IR_REG_FPU_STACK ) then
2319  outp "sub esp" + COMMA + str( ddsize )
2320  end if
2321 
2322  if( ddsize > 4 ) then
2323  outp "fstp qword ptr [esp]"
2324  outp "movlpd " + dst + COMMA + "qword ptr [esp]"
2325  else
2326  outp "fstp dword ptr [esp]"
2327  outp "movss " + dst + COMMA + "dword ptr [esp]"
2328  end if
2329  outp "add esp" + COMMA + str( ddsize )
2330 
2331 end sub
2332 
2333 
2334 '':::::
2335 sub _emitSQRT_SSE _
2336  ( _
2337  byval dvreg as IRVREG ptr _
2338  ) static
2339 
2340  dim as string dst
2341  dim as integer ddsize
2342 
2343  hPrepOperand( dvreg, dst )
2344  ddsize = typeGetSize( dvreg->dtype )
2345 
2346  if( dvreg->regFamily = IR_REG_FPU_STACK ) then
2347  outp "sub esp" + COMMA + str( ddsize )
2348  if( ddsize > 4 ) then
2349  outp "fstp qword ptr [esp]"
2350  outp "movlpd " + dst + COMMA + "dword ptr [esp]"
2351  else
2352  outp "fstp dword ptr [esp]"
2353  outp "movss " + dst + COMMA + "dword ptr [esp]"
2354  end if
2355  end if
2356 
2357  if( ddsize > 4 ) then
2358  outp "sqrtsd " + dst + COMMA + dst
2359  else
2360  outp "sqrtss " + dst + COMMA + dst
2361  end if
2362 
2363  if( dvreg->regFamily = IR_REG_FPU_STACK ) then
2364  outp "add esp" + COMMA + str( ddsize )
2365  end if
2366 
2367 end sub
2368 
2369 '':::::
2370 sub _emitRSQRT_SSE _
2371  ( _
2372  byval dvreg as IRVREG ptr _
2373  ) static
2374 
2375  dim as string dst
2376  dim as integer ddsize
2377 
2378  hPrepOperand( dvreg, dst )
2379  ddsize = typeGetSize( dvreg->dtype )
2380 
2381  if( dvreg->regFamily = IR_REG_FPU_STACK ) then
2382  outp "sub esp" + COMMA + str( ddsize )
2383  if( ddsize > 4 ) then
2384  outp "fstp qword ptr [esp]"
2385  outp "movlpd " + dst + COMMA + "dword ptr [esp]"
2386  else
2387  outp "fstp dword ptr [esp]"
2388  outp "movss " + dst + COMMA + "dword ptr [esp]"
2389  end if
2390  end if
2391 
2392  if( ddsize > 4 ) then
2393  outp "rsqrtsd " + dst + COMMA + dst
2394  else
2395  outp "rsqrtss " + dst + COMMA + dst
2396  end if
2397 
2398  if( dvreg->regFamily = IR_REG_FPU_STACK ) then
2399  outp "add esp" + COMMA + str( ddsize )
2400  end if
2401 
2402 end sub
2403 
2404 '':::::
2405 sub _emitRCP_SSE _
2406  ( _
2407  byval dvreg as IRVREG ptr _
2408  ) static
2409 
2410  dim as string dst
2411  dim as integer ddsize
2412 
2413  hPrepOperand( dvreg, dst )
2414  ddsize = typeGetSize( dvreg->dtype )
2415 
2416  if( dvreg->regFamily = IR_REG_FPU_STACK ) then
2417  outp "sub esp" + COMMA + str( ddsize )
2418  if( ddsize > 4 ) then
2419  outp "fstp qword ptr [esp]"
2420  outp "movlpd " + dst + COMMA + "dword ptr [esp]"
2421  else
2422  outp "fstp dword ptr [esp]"
2423  outp "movss " + dst + COMMA + "dword ptr [esp]"
2424  end if
2425  end if
2426 
2427  if( ddsize > 4 ) then
2428  outp "rcpsd " + dst + COMMA + dst
2429  else
2430  outp "rcpss " + dst + COMMA + dst
2431  end if
2432 
2433  if( dvreg->regFamily = IR_REG_FPU_STACK ) then
2434  outp "add esp" + COMMA + str( ddsize )
2435  end if
2436 
2437 end sub
2438 
2439 
2440 '':::::
2441 sub _emitLOG_SSE _
2442  ( _
2443  byval dvreg as IRVREG ptr _
2444  ) static
2445 
2446  '' log( x ) = log2( x ) / log2( e ).
2447 
2448  dim as string dst
2449  dim as integer ddsize
2450 
2451  ddsize = typeGetSize( dvreg->dtype )
2452  hPrepOperand( dvreg, dst )
2453 
2454  if( dvreg->regFamily = IR_REG_SSE ) then
2455  outp "sub esp" + COMMA + str( ddsize )
2456  if( ddsize > 4 ) then
2457  outp "movlpd qword ptr [esp]" + COMMA + dst
2458  outp "fld qword ptr [esp]"
2459  else
2460  outp "movss dword ptr [esp]" + COMMA + dst
2461  outp "fld dword ptr [esp]"
2462  end if
2463  end if
2464 
2465 
2466  outp "fldln2"
2467  outp "fxch"
2468  outp "fyl2x"
2469 
2470  if( dvreg->regFamily = IR_REG_FPU_STACK ) then
2471  outp "sub esp" + COMMA + str( ddsize )
2472  end if
2473 
2474  if( ddsize > 4 ) then
2475  outp "fstp qword ptr [esp]"
2476  outp "movlpd " + dst + COMMA + "qword ptr [esp]"
2477  else
2478  outp "fstp dword ptr [esp]"
2479  outp "movss " + dst + COMMA + "dword ptr [esp]"
2480  end if
2481  outp "add esp" + COMMA + str( ddsize )
2482 
2483 end sub
2484 
2485 '':::::
2486 sub _emitEXP_SSE _
2487  ( _
2488  byval dvreg as IRVREG ptr _
2489  ) static
2490 
2491  dim as string dst
2492  dim as integer ddsize
2493 
2494  ddsize = typeGetSize( dvreg->dtype )
2495  hPrepOperand( dvreg, dst )
2496 
2497  if( dvreg->regFamily = IR_REG_SSE ) then
2498  outp "sub esp" + COMMA + str( ddsize )
2499  if( ddsize > 4 ) then
2500  outp "movlpd qword ptr [esp]" + COMMA + dst
2501  outp "fld qword ptr [esp]"
2502  else
2503  outp "movss dword ptr [esp]" + COMMA + dst
2504  outp "fld dword ptr [esp]"
2505  end if
2506  end if
2507 
2508  outp "fldl2e"
2509  outp "fmulp st(1), st"
2510  outp "fld st"
2511  outp "frndint"
2512  outp "fsub st(1), st"
2513  outp "fxch"
2514  outp "f2xm1"
2515  '' can't use fld1 because max 2 fp regs can be used
2516  hPUSH( "0x3f800000" )
2517  outp "fadd dword ptr [esp]"
2518  outp "add esp, 4"
2519  outp "fscale"
2520  outp "fstp st(1)"
2521 
2522 
2523  if( dvreg->regFamily = IR_REG_FPU_STACK ) then
2524  outp "sub esp" + COMMA + str( ddsize )
2525  end if
2526 
2527  if( ddsize > 4 ) then
2528  outp "fstp qword ptr [esp]"
2529  outp "movlpd " + dst + COMMA + "qword ptr [esp]"
2530  else
2531  outp "fstp dword ptr [esp]"
2532  outp "movss " + dst + COMMA + "dword ptr [esp]"
2533  end if
2534  outp "add esp" + COMMA + str( ddsize )
2535 
2536 end sub
2537 
2538 
2539 '':::::
2540 sub _emitFLOOR_SSE _
2541  ( _
2542  byval dvreg as IRVREG ptr _
2543  ) static
2544 
2545  dim as string dst, neg1, suffix
2546  dim as integer ddsize
2547  dim as FBSYMBOL ptr sym
2548  dim as IRVREG ptr vreg
2549 
2550  ddsize = typeGetSize( dvreg->dtype )
2551  if( ddsize > 4 ) then
2552  sym = symbAllocLongIntConst(&hBFF0000000000000, FB_DATATYPE_ULONGINT)
2553  vreg = irAllocVRVAR( FB_DATATYPE_ULONGINT, NULL, sym, symbGetOfs( sym ) )
2554  suffix = "d "
2555  else
2556  sym = symbAllocIntConst(&hBF800000, FB_DATATYPE_UINT)
2557  vreg = irAllocVRVAR( FB_DATATYPE_UINT, NULL, sym, symbGetOfs( sym ) )
2558  suffix = "s "
2559  end if
2560  sym->var_.align = 16
2561 
2562  hPrepOperand( dvreg, dst )
2563  hPrepOperand( vreg, neg1, FB_DATATYPE_XMMWORD )
2564 
2565  outp "sub esp, 8"
2566 
2567  if( dvreg->regFamily = IR_REG_SSE ) then
2568  if( ddsize > 4 ) then
2569  outp "movlpd qword ptr [esp]" + COMMA + dst
2570  outp "fld qword ptr [esp]"
2571  else
2572  outp "movss dword ptr [esp]" + COMMA + dst
2573  outp "fld dword ptr [esp]"
2574  end if
2575  outp "movap" + suffix + "xmm7" + COMMA + dst
2576  else
2577  if( ddsize > 4 ) then
2578  outp "fst qword ptr [esp]"
2579  outp "movlpd xmm7, qword ptr [esp]"
2580  else
2581  outp "fst dword ptr [esp]"
2582  outp "movss xmm7, dword ptr [esp]"
2583  end if
2584  end if
2585 
2586  outp "fistp qword ptr [esp]"
2587  outp "fild qword ptr [esp]"
2588  outp "fstp " + dtypeTB(dvreg->dtype).mname + " [esp]" '' round(f)
2589  outp "xorp" + suffix + dst + COMMA + dst
2590  outp "subs" + suffix + "xmm7" + COMMA + "[esp]" '' f - round(f)
2591  outp "cmpnles" + suffix + dst + COMMA + "xmm7" '' 0 > f - round(f) ? 1 : 0
2592  outp "andp" + suffix + dst + COMMA + neg1 '' F > I ? -1.0 : 0.0
2593  outp "adds" + suffix + dst + COMMA + "[esp]"
2594 
2595  outp "add esp, 8"
2596 
2597 end sub
2598 
2599 
2600 '':::::
2601 sub _emitFIX_SSE _
2602  ( _
2603  byval dvreg as IRVREG ptr _
2604  ) static
2605 
2606  '' dst = floor( abs( dst ) ) * sng( dst )
2607 
2608  dim as string dst, suffix, absval, neg1
2609  dim as integer ddsize
2610  dim as FBSYMBOL ptr neg1_sym, absval_sym
2611  dim as IRVREG ptr neg1_vreg, absval_vreg
2612 
2613  ddsize = typeGetSize( dvreg->dtype )
2614  if( ddsize > 4 ) then
2615  neg1_sym = symbAllocLongIntConst(&hBFF0000000000000, FB_DATATYPE_ULONGINT)
2616  neg1_vreg = irAllocVRVAR( FB_DATATYPE_ULONGINT, NULL, neg1_sym, symbGetOfs( neg1_sym ) )
2617 
2618  absval_sym = symbAllocLongIntConst(&h8000000000000000, FB_DATATYPE_ULONGINT)
2619  absval_vreg = irAllocVRVAR( FB_DATATYPE_ULONGINT, NULL, absval_sym, symbGetOfs( absval_sym ) )
2620 
2621  suffix = "d "
2622  else
2623  neg1_sym = symbAllocIntConst(&hBF800000, FB_DATATYPE_UINT)
2624  neg1_vreg = irAllocVRVAR( FB_DATATYPE_UINT, NULL, neg1_sym, symbGetOfs( neg1_sym ) )
2625 
2626  absval_sym = symbAllocIntConst(&h80000000, FB_DATATYPE_UINT)
2627  absval_vreg = irAllocVRVAR( FB_DATATYPE_UINT, NULL, absval_sym, symbGetOfs( absval_sym ) )
2628 
2629  suffix = "s "
2630  end if
2631 
2632  neg1_sym->var_.align = 16
2633  absval_sym->var_.align = 16
2634 
2635  hPrepOperand( dvreg, dst )
2636  hPrepOperand( neg1_vreg, neg1, FB_DATATYPE_XMMWORD )
2637  hPrepOperand( absval_vreg, absval, FB_DATATYPE_XMMWORD )
2638 
2639  outp "sub esp" + COMMA + str( ddsize + 8 )
2640 
2641  if( dvreg->regFamily = IR_REG_SSE ) then
2642  if( ddsize > 4 ) then
2643  outp "movlpd qword ptr [esp]" + COMMA + dst
2644  outp "fld qword ptr [esp]"
2645  else
2646  outp "movss dword ptr [esp]" + COMMA + dst
2647  outp "fld dword ptr [esp]"
2648  end if
2649  else
2650  if( ddsize > 4 ) then
2651  outp "fst qword ptr [esp]"
2652  outp "movlpd " + dst + COMMA + "qword ptr [esp]"
2653  else
2654  outp "fst dword ptr [esp]"
2655  outp "movss " + dst + COMMA + "dword ptr [esp]"
2656  end if
2657  end if
2658 
2659  outp "xorp" + suffix + "xmm7, xmm7"
2660  if( ddsize > 4 ) then
2661  outp "movlpd [esp+8], xmm7" '' 0.0
2662  else
2663  outp "movss [esp+8], xmm7" '' 0.0
2664  end if
2665 
2666  outp "fistp qword ptr [esp]"
2667  outp "cmpnles" + suffix + "xmm7" + COMMA + dst '' f < 0 ? 1 : 0
2668  outp "fild qword ptr [esp]"
2669  outp "andp" + suffix + "xmm7" + COMMA + absval '' f < 0 ? -/+ : 0
2670  outp "fstp " + dtypeTB(dvreg->dtype).mname + " [esp]" '' round(f)
2671  outp "subs" + suffix + dst + COMMA + "[esp]" '' difference = (f - round(f))
2672  outp "xorp" + suffix + dst + COMMA + "xmm7" '' f < 0 ? -difference : difference
2673  outp "xorp" + suffix + "xmm7" + COMMA + neg1 '' f < 0 ? 1.0 : -1.0
2674  '' difference < 0 ? 1 : 0
2675  outp "cmplts" + suffix + dst + COMMA + "[esp+8]"
2676  outp "andp" + suffix + dst + COMMA + "xmm7" '' difference < 0 ? +/- 1.0 : 0.0
2677  outp "adds" + suffix + dst + COMMA + "[esp]" '' round(f) +/- 1
2678  outp "add esp" + COMMA + str( ddsize + 8 )
2679 
2680 end sub
2681 
2682 '':::::
2683 sub _emitFRAC_SSE _
2684  ( _
2685  byval dvreg as IRVREG ptr _
2686  ) static
2687 
2688  '' dst = dst - fix( dst )
2689 
2690  dim as string dst, suffix, absval, neg1
2691  dim as integer ddsize
2692  dim as FBSYMBOL ptr neg1_sym, absval_sym
2693  dim as IRVREG ptr neg1_vreg, absval_vreg
2694 
2695  ddsize = typeGetSize( dvreg->dtype )
2696  if( ddsize > 4 ) then
2697  neg1_sym = symbAllocLongIntConst(&hBFF0000000000000, FB_DATATYPE_ULONGINT)
2698  neg1_vreg = irAllocVRVAR( FB_DATATYPE_ULONGINT, NULL, neg1_sym, symbGetOfs( neg1_sym ) )
2699 
2700  absval_sym = symbAllocLongIntConst(&h8000000000000000, FB_DATATYPE_ULONGINT)
2701  absval_vreg = irAllocVRVAR( FB_DATATYPE_ULONGINT, NULL, absval_sym, symbGetOfs( absval_sym ) )
2702 
2703  suffix = "d "
2704  else
2705  neg1_sym = symbAllocIntConst(&hBF800000, FB_DATATYPE_UINT)
2706  neg1_vreg = irAllocVRVAR( FB_DATATYPE_UINT, NULL, neg1_sym, symbGetOfs( neg1_sym ) )
2707 
2708  absval_sym = symbAllocIntConst(&h80000000, FB_DATATYPE_UINT)
2709  absval_vreg = irAllocVRVAR( FB_DATATYPE_UINT, NULL, absval_sym, symbGetOfs( absval_sym ) )
2710 
2711  suffix = "s "
2712  end if
2713  neg1_sym->var_.align = 16
2714  absval_sym->var_.align = 16
2715 
2716  hPrepOperand( dvreg, dst )
2717  hPrepOperand( neg1_vreg, neg1, FB_DATATYPE_XMMWORD )
2718  hPrepOperand( absval_vreg, absval, FB_DATATYPE_XMMWORD )
2719 
2720  outp "sub esp" + COMMA + str( ddsize+8 )
2721 
2722  if( dvreg->regFamily = IR_REG_SSE ) then
2723  if( ddsize > 4 ) then
2724  outp "movlpd qword ptr [esp]" + COMMA + dst
2725  outp "fld qword ptr [esp]"
2726  else
2727  outp "movss dword ptr [esp]" + COMMA + dst
2728  outp "fld dword ptr [esp]"
2729  end if
2730  else
2731  if( ddsize > 4 ) then
2732  outp "fst qword ptr [esp]"
2733  outp "movlpd " + dst + COMMA + "qword ptr [esp]"
2734  else
2735  outp "fst dword ptr [esp]"
2736  outp "movss " + dst + COMMA + "dword ptr [esp]"
2737  end if
2738  end if
2739 
2740  outp "xorp" + suffix + "xmm7, xmm7"
2741  if( ddsize > 4 ) then
2742  outp "shufpd " + dst + COMMA + dst + COMMA + "0"
2743  outp "movlpd [esp+8], xmm7" '' 0.0
2744  else
2745  outp "movlhps " + dst + COMMA + dst
2746  outp "movss [esp+8], xmm7" '' 0.0
2747  end if
2748 
2749  outp "fistp qword ptr [esp]"
2750  outp "cmpnles" + suffix + "xmm7" + COMMA + dst '' f < 0 ? 1 : 0
2751  outp "fild qword ptr [esp]"
2752  outp "andp" + suffix + "xmm7" + COMMA + absval '' f < 0 ? - : +
2753  outp "fstp " + dtypeTB(dvreg->dtype).mname + " [esp]" '' round(f)
2754  outp "subs" + suffix + dst + COMMA + "[esp]" '' difference = (f - round(f))
2755  outp "xorp" + suffix + dst + COMMA + "xmm7" '' f < 0 ? -difference : difference
2756  outp "xorp" + suffix + "xmm7" + COMMA + neg1 '' f < 0 ? 1.0 : -1.0
2757  '' difference < 0 ? 1 : 0
2758  outp "cmplts" + suffix + dst + COMMA + "[esp+8]"
2759  outp "andp" + suffix + "xmm7" + COMMA + dst '' difference < 0 ? +/- 1.0 : 0.0
2760  if( ddsize > 4 ) then
2761  outp "shufpd " + dst + COMMA + dst + COMMA + "1" '' restore dst
2762  else
2763  outp "movhlps " + dst + COMMA + dst '' restore dst
2764  end if
2765  outp "adds" + suffix + "xmm7" + COMMA + "[esp]" '' round(f) +/- 1
2766  outp "subs" + suffix + dst + COMMA + "xmm7" '' dst - fix(dst)
2767  outp "add esp" + COMMA + str( ddsize+8 )
2768 
2769 end sub
2770 
2771 
2772 
2773 ''::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
2774 '' stack
2775 ''::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
2776 
2777 
2778 '':::::
2779 sub _emitPUSHF_SSE _
2780  ( _
2781  byval svreg as IRVREG ptr, _
2782  byval unused as integer _
2783  ) static
2784 
2785  dim src as string, sdsize as integer
2786  dim ostr as string
2787 
2788  hPrepOperand( svreg, src )
2789 
2790  sdsize = typeGetSize( svreg->dtype )
2791 
2792  if( svreg->typ <> IR_VREGTYPE_REG ) then
2793  if( svreg->dtype = FB_DATATYPE_SINGLE ) then
2794  ostr = "push " + src
2795  outp ostr
2796  else
2797  hPrepOperand( svreg, src, FB_DATATYPE_INTEGER, 4 )
2798  ostr = "push " + src
2799  outp ostr
2800 
2801  hPrepOperand( svreg, src, FB_DATATYPE_INTEGER, 0 )
2802  ostr = "push " + src
2803  outp ostr
2804  end if
2805  else
2806  ostr = "sub esp" + COMMA + str( sdsize )
2807  outp ostr
2808 
2809  '' floats are returned in st(0)
2810  if( svreg->regFamily = IR_REG_FPU_STACK ) then
2811  ostr = "fstp " + dtypeTB(svreg->dtype).mname + " [esp]"
2812  outp ostr
2813  exit sub
2814  end if
2815 
2816  if( sdsize > 4 ) then
2817  ostr = "movlpd "
2818  else
2819  ostr = "movss "
2820  end if
2821  outp ostr + "[esp]" + COMMA + src
2822 
2823  end if
2824 
2825 end sub
2826 
2827 
2828 '':::::
2829 sub _emitPOPF_SSE _
2830  ( _
2831  byval dvreg as IRVREG ptr, _
2832  byval unused as integer _
2833  ) static
2834 
2835  dim as string dst, ostr
2836  dim as integer dsize
2837 
2838  hPrepOperand( dvreg, dst )
2839 
2840  dsize = typeGetSize( dvreg->dtype )
2841 
2842  if( dvreg->typ <> IR_VREGTYPE_REG ) then
2843  if( dvreg->dtype = FB_DATATYPE_SINGLE ) then
2844  ostr = "pop " + dst
2845  outp ostr
2846  else
2847  hPrepOperand( dvreg, dst, FB_DATATYPE_INTEGER, 0 )
2848  ostr = "pop " + dst
2849  outp ostr
2850 
2851  hPrepOperand( dvreg, dst, FB_DATATYPE_INTEGER, 4 )
2852  ostr = "pop " + dst
2853  outp ostr
2854  end if
2855  else
2856  if( dsize > 4 ) then
2857  ostr = "movlpd "
2858  else
2859  ostr = "movss "
2860  end if
2861  outp ostr + dst + COMMA + dtypeTB(dvreg->dtype).mname + " [esp]"
2862 
2863  outp "add esp, " + str( dsize )
2864  end if
2865 
2866 end sub
2867 
2868 
2869 ''::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
2870 '' functions table
2871 ''::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
2872 
2873 #define EMIT_CBENTRY(op) @_emit##op##
2874 
2875 
2876 function _init_opFnTB_SSE _
2877  ( _
2878  byval _opFnTB_SSE as any ptr ptr _
2879  ) as integer
2880 
2881  '' load
2882  _opFnTB_SSE[EMIT_OP_LOADF2I] = EMIT_CBENTRY(LOADF2I_SSE)
2883  _opFnTB_SSE[EMIT_OP_LOADI2F] = EMIT_CBENTRY(LOADI2F_SSE)
2884  _opFnTB_SSE[EMIT_OP_LOADF2L] = EMIT_CBENTRY(LOADF2L_SSE)
2885  _opFnTB_SSE[EMIT_OP_LOADL2F] = EMIT_CBENTRY(LOADL2F_SSE)
2886  _opFnTB_SSE[EMIT_OP_LOADF2F] = EMIT_CBENTRY(LOADF2F_SSE)
2887 
2888  '' store
2889  _opFnTB_SSE[EMIT_OP_STORF2I] = EMIT_CBENTRY(STORF2I_SSE)
2890  _opFnTB_SSE[EMIT_OP_STORI2F] = EMIT_CBENTRY(STORI2F_SSE)
2891  _opFnTB_SSE[EMIT_OP_STORF2L] = EMIT_CBENTRY(STORF2L_SSE)
2892  _opFnTB_SSE[EMIT_OP_STORL2F] = EMIT_CBENTRY(STORL2F_SSE)
2893  _opFnTB_SSE[EMIT_OP_STORF2F] = EMIT_CBENTRY(STORF2F_SSE)
2894 
2895  '' binary ops
2896  _opFnTB_SSE[EMIT_OP_MOVF] = EMIT_CBENTRY(MOVF_SSE)
2897  _opFnTB_SSE[EMIT_OP_ADDF] = EMIT_CBENTRY(ADDF_SSE)
2898  _opFnTB_SSE[EMIT_OP_SUBF] = EMIT_CBENTRY(SUBF_SSE)
2899  _opFnTB_SSE[EMIT_OP_MULF] = EMIT_CBENTRY(MULF_SSE)
2900  _opFnTB_SSE[EMIT_OP_DIVF] = EMIT_CBENTRY(DIVF_SSE)
2901 
2902  _opFnTB_SSE[EMIT_OP_ATN2] = EMIT_CBENTRY(ATN2_SSE)
2903  _opFnTB_SSE[EMIT_OP_POW] = EMIT_CBENTRY(POW_SSE)
2904 
2905  '' relational
2906  _opFnTB_SSE[EMIT_OP_CGTF] = EMIT_CBENTRY(CGTF_SSE)
2907  _opFnTB_SSE[EMIT_OP_CLTF] = EMIT_CBENTRY(CLTF_SSE)
2908  _opFnTB_SSE[EMIT_OP_CEQF] = EMIT_CBENTRY(CEQF_SSE)
2909  _opFnTB_SSE[EMIT_OP_CNEF] = EMIT_CBENTRY(CNEF_SSE)
2910  _opFnTB_SSE[EMIT_OP_CGEF] = EMIT_CBENTRY(CGEF_SSE)
2911  _opFnTB_SSE[EMIT_OP_CLEF] = EMIT_CBENTRY(CLEF_SSE)
2912 
2913  '' unary ops
2914  _opFnTB_SSE[EMIT_OP_NEGF] = EMIT_CBENTRY(NEGF_SSE)
2915  _opFnTB_SSE[EMIT_OP_HADDF] = EMIT_CBENTRY(HADDF_SSE)
2916  _opFnTB_SSE[EMIT_OP_ABSF] = EMIT_CBENTRY(ABSF_SSE)
2917  _opFnTB_SSE[EMIT_OP_SGNF] = EMIT_CBENTRY(SGNF_SSE)
2918 
2919  _opFnTB_SSE[EMIT_OP_FIX] = EMIT_CBENTRY(FIX_SSE)
2920  _opFnTB_SSE[EMIT_OP_FRAC] = EMIT_CBENTRY(FRAC_SSE)
2921 
2922  _opFnTB_SSE[EMIT_OP_SIN] = EMIT_CBENTRY(SIN_SSE)
2923  _opFnTB_SSE[EMIT_OP_ASIN] = EMIT_CBENTRY(ASIN_SSE)
2924  _opFnTB_SSE[EMIT_OP_COS] = EMIT_CBENTRY(COS_SSE)
2925  _opFnTB_SSE[EMIT_OP_ACOS] = EMIT_CBENTRY(ACOS_SSE)
2926  _opFnTB_SSE[EMIT_OP_TAN] = EMIT_CBENTRY(TAN_SSE)
2927  _opFnTB_SSE[EMIT_OP_ATAN] = EMIT_CBENTRY(ATAN_SSE)
2928 
2929  _opFnTB_SSE[EMIT_OP_SQRT] = EMIT_CBENTRY(SQRT_SSE)
2930  _opFnTB_SSE[EMIT_OP_RSQRT] = EMIT_CBENTRY(RSQRT_SSE)
2931  _opFnTB_SSE[EMIT_OP_RCP] = EMIT_CBENTRY(RCP_SSE)
2932 
2933  _opFnTB_SSE[EMIT_OP_LOG] = EMIT_CBENTRY(LOG_SSE)
2934  _opFnTB_SSE[EMIT_OP_EXP] = EMIT_CBENTRY(EXP_SSE)
2935 
2936  _opFnTB_SSE[EMIT_OP_FLOOR] = EMIT_CBENTRY(FLOOR_SSE)
2937  _opFnTB_SSE[EMIT_OP_SWZREP] = EMIT_CBENTRY(SWZREPF_SSE)
2938 
2939  _opFnTB_SSE[EMIT_OP_PUSHF] = EMIT_CBENTRY(PUSHF_SSE)
2940  _opFnTB_SSE[EMIT_OP_POPF] = EMIT_CBENTRY(POPF_SSE)
2941 
2942  function = TRUE
2943 end function
2944 
2945 
2946