1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 Boston, MA 02110-1301, USA. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "langhooks.h"
50 #include "tree-gimple.h"
52 #include "tm-constrs.h"
55 #ifndef CHECK_STACK_LIMIT
56 #define CHECK_STACK_LIMIT (-1)
59 /* Return index of given mode in mult and division cost tables. */
60 #define MODE_INDEX(mode) \
61 ((mode) == QImode ? 0 \
62 : (mode) == HImode ? 1 \
63 : (mode) == SImode ? 2 \
64 : (mode) == DImode ? 3 \
67 /* Processor costs (relative to an add) */
68 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
69 #define COSTS_N_BYTES(N) ((N) * 2)
72 struct processor_costs size_cost
= { /* costs for tuning for size */
73 COSTS_N_BYTES (2), /* cost of an add instruction */
74 COSTS_N_BYTES (3), /* cost of a lea instruction */
75 COSTS_N_BYTES (2), /* variable shift costs */
76 COSTS_N_BYTES (3), /* constant shift costs */
77 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
78 COSTS_N_BYTES (3), /* HI */
79 COSTS_N_BYTES (3), /* SI */
80 COSTS_N_BYTES (3), /* DI */
81 COSTS_N_BYTES (5)}, /* other */
82 0, /* cost of multiply per each bit set */
83 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
84 COSTS_N_BYTES (3), /* HI */
85 COSTS_N_BYTES (3), /* SI */
86 COSTS_N_BYTES (3), /* DI */
87 COSTS_N_BYTES (5)}, /* other */
88 COSTS_N_BYTES (3), /* cost of movsx */
89 COSTS_N_BYTES (3), /* cost of movzx */
92 2, /* cost for loading QImode using movzbl */
93 {2, 2, 2}, /* cost of loading integer registers
94 in QImode, HImode and SImode.
95 Relative to reg-reg move (2). */
96 {2, 2, 2}, /* cost of storing integer registers */
97 2, /* cost of reg,reg fld/fst */
98 {2, 2, 2}, /* cost of loading fp registers
99 in SFmode, DFmode and XFmode */
100 {2, 2, 2}, /* cost of storing fp registers
101 in SFmode, DFmode and XFmode */
102 3, /* cost of moving MMX register */
103 {3, 3}, /* cost of loading MMX registers
104 in SImode and DImode */
105 {3, 3}, /* cost of storing MMX registers
106 in SImode and DImode */
107 3, /* cost of moving SSE register */
108 {3, 3, 3}, /* cost of loading SSE registers
109 in SImode, DImode and TImode */
110 {3, 3, 3}, /* cost of storing SSE registers
111 in SImode, DImode and TImode */
112 3, /* MMX or SSE register to integer */
113 0, /* size of prefetch block */
114 0, /* number of parallel prefetches */
116 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
117 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
118 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
119 COSTS_N_BYTES (2), /* cost of FABS instruction. */
120 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
121 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
124 /* Processor costs (relative to an add) */
126 struct processor_costs i386_cost
= { /* 386 specific costs */
127 COSTS_N_INSNS (1), /* cost of an add instruction */
128 COSTS_N_INSNS (1), /* cost of a lea instruction */
129 COSTS_N_INSNS (3), /* variable shift costs */
130 COSTS_N_INSNS (2), /* constant shift costs */
131 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
132 COSTS_N_INSNS (6), /* HI */
133 COSTS_N_INSNS (6), /* SI */
134 COSTS_N_INSNS (6), /* DI */
135 COSTS_N_INSNS (6)}, /* other */
136 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
137 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
138 COSTS_N_INSNS (23), /* HI */
139 COSTS_N_INSNS (23), /* SI */
140 COSTS_N_INSNS (23), /* DI */
141 COSTS_N_INSNS (23)}, /* other */
142 COSTS_N_INSNS (3), /* cost of movsx */
143 COSTS_N_INSNS (2), /* cost of movzx */
144 15, /* "large" insn */
146 4, /* cost for loading QImode using movzbl */
147 {2, 4, 2}, /* cost of loading integer registers
148 in QImode, HImode and SImode.
149 Relative to reg-reg move (2). */
150 {2, 4, 2}, /* cost of storing integer registers */
151 2, /* cost of reg,reg fld/fst */
152 {8, 8, 8}, /* cost of loading fp registers
153 in SFmode, DFmode and XFmode */
154 {8, 8, 8}, /* cost of storing fp registers
155 in SFmode, DFmode and XFmode */
156 2, /* cost of moving MMX register */
157 {4, 8}, /* cost of loading MMX registers
158 in SImode and DImode */
159 {4, 8}, /* cost of storing MMX registers
160 in SImode and DImode */
161 2, /* cost of moving SSE register */
162 {4, 8, 16}, /* cost of loading SSE registers
163 in SImode, DImode and TImode */
164 {4, 8, 16}, /* cost of storing SSE registers
165 in SImode, DImode and TImode */
166 3, /* MMX or SSE register to integer */
167 0, /* size of prefetch block */
168 0, /* number of parallel prefetches */
170 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
171 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
172 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
173 COSTS_N_INSNS (22), /* cost of FABS instruction. */
174 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
175 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
179 struct processor_costs i486_cost
= { /* 486 specific costs */
180 COSTS_N_INSNS (1), /* cost of an add instruction */
181 COSTS_N_INSNS (1), /* cost of a lea instruction */
182 COSTS_N_INSNS (3), /* variable shift costs */
183 COSTS_N_INSNS (2), /* constant shift costs */
184 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
185 COSTS_N_INSNS (12), /* HI */
186 COSTS_N_INSNS (12), /* SI */
187 COSTS_N_INSNS (12), /* DI */
188 COSTS_N_INSNS (12)}, /* other */
189 1, /* cost of multiply per each bit set */
190 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
191 COSTS_N_INSNS (40), /* HI */
192 COSTS_N_INSNS (40), /* SI */
193 COSTS_N_INSNS (40), /* DI */
194 COSTS_N_INSNS (40)}, /* other */
195 COSTS_N_INSNS (3), /* cost of movsx */
196 COSTS_N_INSNS (2), /* cost of movzx */
197 15, /* "large" insn */
199 4, /* cost for loading QImode using movzbl */
200 {2, 4, 2}, /* cost of loading integer registers
201 in QImode, HImode and SImode.
202 Relative to reg-reg move (2). */
203 {2, 4, 2}, /* cost of storing integer registers */
204 2, /* cost of reg,reg fld/fst */
205 {8, 8, 8}, /* cost of loading fp registers
206 in SFmode, DFmode and XFmode */
207 {8, 8, 8}, /* cost of storing fp registers
208 in SFmode, DFmode and XFmode */
209 2, /* cost of moving MMX register */
210 {4, 8}, /* cost of loading MMX registers
211 in SImode and DImode */
212 {4, 8}, /* cost of storing MMX registers
213 in SImode and DImode */
214 2, /* cost of moving SSE register */
215 {4, 8, 16}, /* cost of loading SSE registers
216 in SImode, DImode and TImode */
217 {4, 8, 16}, /* cost of storing SSE registers
218 in SImode, DImode and TImode */
219 3, /* MMX or SSE register to integer */
220 0, /* size of prefetch block */
221 0, /* number of parallel prefetches */
223 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
224 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
225 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
226 COSTS_N_INSNS (3), /* cost of FABS instruction. */
227 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
228 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
232 struct processor_costs pentium_cost
= {
233 COSTS_N_INSNS (1), /* cost of an add instruction */
234 COSTS_N_INSNS (1), /* cost of a lea instruction */
235 COSTS_N_INSNS (4), /* variable shift costs */
236 COSTS_N_INSNS (1), /* constant shift costs */
237 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
238 COSTS_N_INSNS (11), /* HI */
239 COSTS_N_INSNS (11), /* SI */
240 COSTS_N_INSNS (11), /* DI */
241 COSTS_N_INSNS (11)}, /* other */
242 0, /* cost of multiply per each bit set */
243 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
244 COSTS_N_INSNS (25), /* HI */
245 COSTS_N_INSNS (25), /* SI */
246 COSTS_N_INSNS (25), /* DI */
247 COSTS_N_INSNS (25)}, /* other */
248 COSTS_N_INSNS (3), /* cost of movsx */
249 COSTS_N_INSNS (2), /* cost of movzx */
250 8, /* "large" insn */
252 6, /* cost for loading QImode using movzbl */
253 {2, 4, 2}, /* cost of loading integer registers
254 in QImode, HImode and SImode.
255 Relative to reg-reg move (2). */
256 {2, 4, 2}, /* cost of storing integer registers */
257 2, /* cost of reg,reg fld/fst */
258 {2, 2, 6}, /* cost of loading fp registers
259 in SFmode, DFmode and XFmode */
260 {4, 4, 6}, /* cost of storing fp registers
261 in SFmode, DFmode and XFmode */
262 8, /* cost of moving MMX register */
263 {8, 8}, /* cost of loading MMX registers
264 in SImode and DImode */
265 {8, 8}, /* cost of storing MMX registers
266 in SImode and DImode */
267 2, /* cost of moving SSE register */
268 {4, 8, 16}, /* cost of loading SSE registers
269 in SImode, DImode and TImode */
270 {4, 8, 16}, /* cost of storing SSE registers
271 in SImode, DImode and TImode */
272 3, /* MMX or SSE register to integer */
273 0, /* size of prefetch block */
274 0, /* number of parallel prefetches */
276 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
277 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
278 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
279 COSTS_N_INSNS (1), /* cost of FABS instruction. */
280 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
281 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
285 struct processor_costs pentiumpro_cost
= {
286 COSTS_N_INSNS (1), /* cost of an add instruction */
287 COSTS_N_INSNS (1), /* cost of a lea instruction */
288 COSTS_N_INSNS (1), /* variable shift costs */
289 COSTS_N_INSNS (1), /* constant shift costs */
290 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
291 COSTS_N_INSNS (4), /* HI */
292 COSTS_N_INSNS (4), /* SI */
293 COSTS_N_INSNS (4), /* DI */
294 COSTS_N_INSNS (4)}, /* other */
295 0, /* cost of multiply per each bit set */
296 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
297 COSTS_N_INSNS (17), /* HI */
298 COSTS_N_INSNS (17), /* SI */
299 COSTS_N_INSNS (17), /* DI */
300 COSTS_N_INSNS (17)}, /* other */
301 COSTS_N_INSNS (1), /* cost of movsx */
302 COSTS_N_INSNS (1), /* cost of movzx */
303 8, /* "large" insn */
305 2, /* cost for loading QImode using movzbl */
306 {4, 4, 4}, /* cost of loading integer registers
307 in QImode, HImode and SImode.
308 Relative to reg-reg move (2). */
309 {2, 2, 2}, /* cost of storing integer registers */
310 2, /* cost of reg,reg fld/fst */
311 {2, 2, 6}, /* cost of loading fp registers
312 in SFmode, DFmode and XFmode */
313 {4, 4, 6}, /* cost of storing fp registers
314 in SFmode, DFmode and XFmode */
315 2, /* cost of moving MMX register */
316 {2, 2}, /* cost of loading MMX registers
317 in SImode and DImode */
318 {2, 2}, /* cost of storing MMX registers
319 in SImode and DImode */
320 2, /* cost of moving SSE register */
321 {2, 2, 8}, /* cost of loading SSE registers
322 in SImode, DImode and TImode */
323 {2, 2, 8}, /* cost of storing SSE registers
324 in SImode, DImode and TImode */
325 3, /* MMX or SSE register to integer */
326 32, /* size of prefetch block */
327 6, /* number of parallel prefetches */
329 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
330 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
331 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
332 COSTS_N_INSNS (2), /* cost of FABS instruction. */
333 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
334 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
338 struct processor_costs geode_cost
= {
339 COSTS_N_INSNS (1), /* cost of an add instruction */
340 COSTS_N_INSNS (1), /* cost of a lea instruction */
341 COSTS_N_INSNS (2), /* variable shift costs */
342 COSTS_N_INSNS (1), /* constant shift costs */
343 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
344 COSTS_N_INSNS (4), /* HI */
345 COSTS_N_INSNS (7), /* SI */
346 COSTS_N_INSNS (7), /* DI */
347 COSTS_N_INSNS (7)}, /* other */
348 0, /* cost of multiply per each bit set */
349 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
350 COSTS_N_INSNS (23), /* HI */
351 COSTS_N_INSNS (39), /* SI */
352 COSTS_N_INSNS (39), /* DI */
353 COSTS_N_INSNS (39)}, /* other */
354 COSTS_N_INSNS (1), /* cost of movsx */
355 COSTS_N_INSNS (1), /* cost of movzx */
356 8, /* "large" insn */
358 1, /* cost for loading QImode using movzbl */
359 {1, 1, 1}, /* cost of loading integer registers
360 in QImode, HImode and SImode.
361 Relative to reg-reg move (2). */
362 {1, 1, 1}, /* cost of storing integer registers */
363 1, /* cost of reg,reg fld/fst */
364 {1, 1, 1}, /* cost of loading fp registers
365 in SFmode, DFmode and XFmode */
366 {4, 6, 6}, /* cost of storing fp registers
367 in SFmode, DFmode and XFmode */
369 1, /* cost of moving MMX register */
370 {1, 1}, /* cost of loading MMX registers
371 in SImode and DImode */
372 {1, 1}, /* cost of storing MMX registers
373 in SImode and DImode */
374 1, /* cost of moving SSE register */
375 {1, 1, 1}, /* cost of loading SSE registers
376 in SImode, DImode and TImode */
377 {1, 1, 1}, /* cost of storing SSE registers
378 in SImode, DImode and TImode */
379 1, /* MMX or SSE register to integer */
380 32, /* size of prefetch block */
381 1, /* number of parallel prefetches */
383 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
384 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
385 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
386 COSTS_N_INSNS (1), /* cost of FABS instruction. */
387 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
388 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
392 struct processor_costs k6_cost
= {
393 COSTS_N_INSNS (1), /* cost of an add instruction */
394 COSTS_N_INSNS (2), /* cost of a lea instruction */
395 COSTS_N_INSNS (1), /* variable shift costs */
396 COSTS_N_INSNS (1), /* constant shift costs */
397 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
398 COSTS_N_INSNS (3), /* HI */
399 COSTS_N_INSNS (3), /* SI */
400 COSTS_N_INSNS (3), /* DI */
401 COSTS_N_INSNS (3)}, /* other */
402 0, /* cost of multiply per each bit set */
403 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
404 COSTS_N_INSNS (18), /* HI */
405 COSTS_N_INSNS (18), /* SI */
406 COSTS_N_INSNS (18), /* DI */
407 COSTS_N_INSNS (18)}, /* other */
408 COSTS_N_INSNS (2), /* cost of movsx */
409 COSTS_N_INSNS (2), /* cost of movzx */
410 8, /* "large" insn */
412 3, /* cost for loading QImode using movzbl */
413 {4, 5, 4}, /* cost of loading integer registers
414 in QImode, HImode and SImode.
415 Relative to reg-reg move (2). */
416 {2, 3, 2}, /* cost of storing integer registers */
417 4, /* cost of reg,reg fld/fst */
418 {6, 6, 6}, /* cost of loading fp registers
419 in SFmode, DFmode and XFmode */
420 {4, 4, 4}, /* cost of storing fp registers
421 in SFmode, DFmode and XFmode */
422 2, /* cost of moving MMX register */
423 {2, 2}, /* cost of loading MMX registers
424 in SImode and DImode */
425 {2, 2}, /* cost of storing MMX registers
426 in SImode and DImode */
427 2, /* cost of moving SSE register */
428 {2, 2, 8}, /* cost of loading SSE registers
429 in SImode, DImode and TImode */
430 {2, 2, 8}, /* cost of storing SSE registers
431 in SImode, DImode and TImode */
432 6, /* MMX or SSE register to integer */
433 32, /* size of prefetch block */
434 1, /* number of parallel prefetches */
436 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
437 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
438 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
439 COSTS_N_INSNS (2), /* cost of FABS instruction. */
440 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
441 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
445 struct processor_costs athlon_cost
= {
446 COSTS_N_INSNS (1), /* cost of an add instruction */
447 COSTS_N_INSNS (2), /* cost of a lea instruction */
448 COSTS_N_INSNS (1), /* variable shift costs */
449 COSTS_N_INSNS (1), /* constant shift costs */
450 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
451 COSTS_N_INSNS (5), /* HI */
452 COSTS_N_INSNS (5), /* SI */
453 COSTS_N_INSNS (5), /* DI */
454 COSTS_N_INSNS (5)}, /* other */
455 0, /* cost of multiply per each bit set */
456 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
457 COSTS_N_INSNS (26), /* HI */
458 COSTS_N_INSNS (42), /* SI */
459 COSTS_N_INSNS (74), /* DI */
460 COSTS_N_INSNS (74)}, /* other */
461 COSTS_N_INSNS (1), /* cost of movsx */
462 COSTS_N_INSNS (1), /* cost of movzx */
463 8, /* "large" insn */
465 4, /* cost for loading QImode using movzbl */
466 {3, 4, 3}, /* cost of loading integer registers
467 in QImode, HImode and SImode.
468 Relative to reg-reg move (2). */
469 {3, 4, 3}, /* cost of storing integer registers */
470 4, /* cost of reg,reg fld/fst */
471 {4, 4, 12}, /* cost of loading fp registers
472 in SFmode, DFmode and XFmode */
473 {6, 6, 8}, /* cost of storing fp registers
474 in SFmode, DFmode and XFmode */
475 2, /* cost of moving MMX register */
476 {4, 4}, /* cost of loading MMX registers
477 in SImode and DImode */
478 {4, 4}, /* cost of storing MMX registers
479 in SImode and DImode */
480 2, /* cost of moving SSE register */
481 {4, 4, 6}, /* cost of loading SSE registers
482 in SImode, DImode and TImode */
483 {4, 4, 5}, /* cost of storing SSE registers
484 in SImode, DImode and TImode */
485 5, /* MMX or SSE register to integer */
486 64, /* size of prefetch block */
487 6, /* number of parallel prefetches */
489 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
490 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
491 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
492 COSTS_N_INSNS (2), /* cost of FABS instruction. */
493 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
494 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
498 struct processor_costs k8_cost
= {
499 COSTS_N_INSNS (1), /* cost of an add instruction */
500 COSTS_N_INSNS (2), /* cost of a lea instruction */
501 COSTS_N_INSNS (1), /* variable shift costs */
502 COSTS_N_INSNS (1), /* constant shift costs */
503 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
504 COSTS_N_INSNS (4), /* HI */
505 COSTS_N_INSNS (3), /* SI */
506 COSTS_N_INSNS (4), /* DI */
507 COSTS_N_INSNS (5)}, /* other */
508 0, /* cost of multiply per each bit set */
509 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
510 COSTS_N_INSNS (26), /* HI */
511 COSTS_N_INSNS (42), /* SI */
512 COSTS_N_INSNS (74), /* DI */
513 COSTS_N_INSNS (74)}, /* other */
514 COSTS_N_INSNS (1), /* cost of movsx */
515 COSTS_N_INSNS (1), /* cost of movzx */
516 8, /* "large" insn */
518 4, /* cost for loading QImode using movzbl */
519 {3, 4, 3}, /* cost of loading integer registers
520 in QImode, HImode and SImode.
521 Relative to reg-reg move (2). */
522 {3, 4, 3}, /* cost of storing integer registers */
523 4, /* cost of reg,reg fld/fst */
524 {4, 4, 12}, /* cost of loading fp registers
525 in SFmode, DFmode and XFmode */
526 {6, 6, 8}, /* cost of storing fp registers
527 in SFmode, DFmode and XFmode */
528 2, /* cost of moving MMX register */
529 {3, 3}, /* cost of loading MMX registers
530 in SImode and DImode */
531 {4, 4}, /* cost of storing MMX registers
532 in SImode and DImode */
533 2, /* cost of moving SSE register */
534 {4, 3, 6}, /* cost of loading SSE registers
535 in SImode, DImode and TImode */
536 {4, 4, 5}, /* cost of storing SSE registers
537 in SImode, DImode and TImode */
538 5, /* MMX or SSE register to integer */
539 64, /* size of prefetch block */
540 /* New AMD processors never drop prefetches; if they cannot be performed
541 immediately, they are queued. We set number of simultaneous prefetches
542 to a large constant to reflect this (it probably is not a good idea not
543 to limit number of prefetches at all, as their execution also takes some
545 100, /* number of parallel prefetches */
547 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
548 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
549 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
550 COSTS_N_INSNS (2), /* cost of FABS instruction. */
551 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
552 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
556 struct processor_costs pentium4_cost
= {
557 COSTS_N_INSNS (1), /* cost of an add instruction */
558 COSTS_N_INSNS (3), /* cost of a lea instruction */
559 COSTS_N_INSNS (4), /* variable shift costs */
560 COSTS_N_INSNS (4), /* constant shift costs */
561 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
562 COSTS_N_INSNS (15), /* HI */
563 COSTS_N_INSNS (15), /* SI */
564 COSTS_N_INSNS (15), /* DI */
565 COSTS_N_INSNS (15)}, /* other */
566 0, /* cost of multiply per each bit set */
567 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
568 COSTS_N_INSNS (56), /* HI */
569 COSTS_N_INSNS (56), /* SI */
570 COSTS_N_INSNS (56), /* DI */
571 COSTS_N_INSNS (56)}, /* other */
572 COSTS_N_INSNS (1), /* cost of movsx */
573 COSTS_N_INSNS (1), /* cost of movzx */
574 16, /* "large" insn */
576 2, /* cost for loading QImode using movzbl */
577 {4, 5, 4}, /* cost of loading integer registers
578 in QImode, HImode and SImode.
579 Relative to reg-reg move (2). */
580 {2, 3, 2}, /* cost of storing integer registers */
581 2, /* cost of reg,reg fld/fst */
582 {2, 2, 6}, /* cost of loading fp registers
583 in SFmode, DFmode and XFmode */
584 {4, 4, 6}, /* cost of storing fp registers
585 in SFmode, DFmode and XFmode */
586 2, /* cost of moving MMX register */
587 {2, 2}, /* cost of loading MMX registers
588 in SImode and DImode */
589 {2, 2}, /* cost of storing MMX registers
590 in SImode and DImode */
591 12, /* cost of moving SSE register */
592 {12, 12, 12}, /* cost of loading SSE registers
593 in SImode, DImode and TImode */
594 {2, 2, 8}, /* cost of storing SSE registers
595 in SImode, DImode and TImode */
596 10, /* MMX or SSE register to integer */
597 64, /* size of prefetch block */
598 6, /* number of parallel prefetches */
600 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
601 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
602 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
603 COSTS_N_INSNS (2), /* cost of FABS instruction. */
604 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
605 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
609 struct processor_costs nocona_cost
= {
610 COSTS_N_INSNS (1), /* cost of an add instruction */
611 COSTS_N_INSNS (1), /* cost of a lea instruction */
612 COSTS_N_INSNS (1), /* variable shift costs */
613 COSTS_N_INSNS (1), /* constant shift costs */
614 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
615 COSTS_N_INSNS (10), /* HI */
616 COSTS_N_INSNS (10), /* SI */
617 COSTS_N_INSNS (10), /* DI */
618 COSTS_N_INSNS (10)}, /* other */
619 0, /* cost of multiply per each bit set */
620 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
621 COSTS_N_INSNS (66), /* HI */
622 COSTS_N_INSNS (66), /* SI */
623 COSTS_N_INSNS (66), /* DI */
624 COSTS_N_INSNS (66)}, /* other */
625 COSTS_N_INSNS (1), /* cost of movsx */
626 COSTS_N_INSNS (1), /* cost of movzx */
627 16, /* "large" insn */
629 4, /* cost for loading QImode using movzbl */
630 {4, 4, 4}, /* cost of loading integer registers
631 in QImode, HImode and SImode.
632 Relative to reg-reg move (2). */
633 {4, 4, 4}, /* cost of storing integer registers */
634 3, /* cost of reg,reg fld/fst */
635 {12, 12, 12}, /* cost of loading fp registers
636 in SFmode, DFmode and XFmode */
637 {4, 4, 4}, /* cost of storing fp registers
638 in SFmode, DFmode and XFmode */
639 6, /* cost of moving MMX register */
640 {12, 12}, /* cost of loading MMX registers
641 in SImode and DImode */
642 {12, 12}, /* cost of storing MMX registers
643 in SImode and DImode */
644 6, /* cost of moving SSE register */
645 {12, 12, 12}, /* cost of loading SSE registers
646 in SImode, DImode and TImode */
647 {12, 12, 12}, /* cost of storing SSE registers
648 in SImode, DImode and TImode */
649 8, /* MMX or SSE register to integer */
650 128, /* size of prefetch block */
651 8, /* number of parallel prefetches */
653 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
654 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
655 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
656 COSTS_N_INSNS (3), /* cost of FABS instruction. */
657 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
658 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
662 struct processor_costs core2_cost
= {
663 COSTS_N_INSNS (1), /* cost of an add instruction */
664 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
665 COSTS_N_INSNS (1), /* variable shift costs */
666 COSTS_N_INSNS (1), /* constant shift costs */
667 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
668 COSTS_N_INSNS (3), /* HI */
669 COSTS_N_INSNS (3), /* SI */
670 COSTS_N_INSNS (3), /* DI */
671 COSTS_N_INSNS (3)}, /* other */
672 0, /* cost of multiply per each bit set */
673 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
674 COSTS_N_INSNS (22), /* HI */
675 COSTS_N_INSNS (22), /* SI */
676 COSTS_N_INSNS (22), /* DI */
677 COSTS_N_INSNS (22)}, /* other */
678 COSTS_N_INSNS (1), /* cost of movsx */
679 COSTS_N_INSNS (1), /* cost of movzx */
680 8, /* "large" insn */
682 2, /* cost for loading QImode using movzbl */
683 {6, 6, 6}, /* cost of loading integer registers
684 in QImode, HImode and SImode.
685 Relative to reg-reg move (2). */
686 {4, 4, 4}, /* cost of storing integer registers */
687 2, /* cost of reg,reg fld/fst */
688 {6, 6, 6}, /* cost of loading fp registers
689 in SFmode, DFmode and XFmode */
690 {4, 4, 4}, /* cost of loading integer registers */
691 2, /* cost of moving MMX register */
692 {6, 6}, /* cost of loading MMX registers
693 in SImode and DImode */
694 {4, 4}, /* cost of storing MMX registers
695 in SImode and DImode */
696 2, /* cost of moving SSE register */
697 {6, 6, 6}, /* cost of loading SSE registers
698 in SImode, DImode and TImode */
699 {4, 4, 4}, /* cost of storing SSE registers
700 in SImode, DImode and TImode */
701 2, /* MMX or SSE register to integer */
702 128, /* size of prefetch block */
703 8, /* number of parallel prefetches */
705 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
706 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
707 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
708 COSTS_N_INSNS (1), /* cost of FABS instruction. */
709 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
710 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
713 /* Generic64 should produce code tuned for Nocona and K8. */
715 struct processor_costs generic64_cost
= {
716 COSTS_N_INSNS (1), /* cost of an add instruction */
717 /* On all chips taken into consideration lea is 2 cycles and more. With
718 this cost however our current implementation of synth_mult results in
719 use of unnecessary temporary registers causing regression on several
720 SPECfp benchmarks. */
721 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
722 COSTS_N_INSNS (1), /* variable shift costs */
723 COSTS_N_INSNS (1), /* constant shift costs */
724 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
725 COSTS_N_INSNS (4), /* HI */
726 COSTS_N_INSNS (3), /* SI */
727 COSTS_N_INSNS (4), /* DI */
728 COSTS_N_INSNS (2)}, /* other */
729 0, /* cost of multiply per each bit set */
730 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
731 COSTS_N_INSNS (26), /* HI */
732 COSTS_N_INSNS (42), /* SI */
733 COSTS_N_INSNS (74), /* DI */
734 COSTS_N_INSNS (74)}, /* other */
735 COSTS_N_INSNS (1), /* cost of movsx */
736 COSTS_N_INSNS (1), /* cost of movzx */
737 8, /* "large" insn */
739 4, /* cost for loading QImode using movzbl */
740 {4, 4, 4}, /* cost of loading integer registers
741 in QImode, HImode and SImode.
742 Relative to reg-reg move (2). */
743 {4, 4, 4}, /* cost of storing integer registers */
744 4, /* cost of reg,reg fld/fst */
745 {12, 12, 12}, /* cost of loading fp registers
746 in SFmode, DFmode and XFmode */
747 {6, 6, 8}, /* cost of storing fp registers
748 in SFmode, DFmode and XFmode */
749 2, /* cost of moving MMX register */
750 {8, 8}, /* cost of loading MMX registers
751 in SImode and DImode */
752 {8, 8}, /* cost of storing MMX registers
753 in SImode and DImode */
754 2, /* cost of moving SSE register */
755 {8, 8, 8}, /* cost of loading SSE registers
756 in SImode, DImode and TImode */
757 {8, 8, 8}, /* cost of storing SSE registers
758 in SImode, DImode and TImode */
759 5, /* MMX or SSE register to integer */
760 64, /* size of prefetch block */
761 6, /* number of parallel prefetches */
762 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
763 is increased to perhaps more appropriate value of 5. */
765 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
766 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
767 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
768 COSTS_N_INSNS (8), /* cost of FABS instruction. */
769 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
770 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
773 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
775 struct processor_costs generic32_cost
= {
776 COSTS_N_INSNS (1), /* cost of an add instruction */
777 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
778 COSTS_N_INSNS (1), /* variable shift costs */
779 COSTS_N_INSNS (1), /* constant shift costs */
780 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
781 COSTS_N_INSNS (4), /* HI */
782 COSTS_N_INSNS (3), /* SI */
783 COSTS_N_INSNS (4), /* DI */
784 COSTS_N_INSNS (2)}, /* other */
785 0, /* cost of multiply per each bit set */
786 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
787 COSTS_N_INSNS (26), /* HI */
788 COSTS_N_INSNS (42), /* SI */
789 COSTS_N_INSNS (74), /* DI */
790 COSTS_N_INSNS (74)}, /* other */
791 COSTS_N_INSNS (1), /* cost of movsx */
792 COSTS_N_INSNS (1), /* cost of movzx */
793 8, /* "large" insn */
795 4, /* cost for loading QImode using movzbl */
796 {4, 4, 4}, /* cost of loading integer registers
797 in QImode, HImode and SImode.
798 Relative to reg-reg move (2). */
799 {4, 4, 4}, /* cost of storing integer registers */
800 4, /* cost of reg,reg fld/fst */
801 {12, 12, 12}, /* cost of loading fp registers
802 in SFmode, DFmode and XFmode */
803 {6, 6, 8}, /* cost of storing fp registers
804 in SFmode, DFmode and XFmode */
805 2, /* cost of moving MMX register */
806 {8, 8}, /* cost of loading MMX registers
807 in SImode and DImode */
808 {8, 8}, /* cost of storing MMX registers
809 in SImode and DImode */
810 2, /* cost of moving SSE register */
811 {8, 8, 8}, /* cost of loading SSE registers
812 in SImode, DImode and TImode */
813 {8, 8, 8}, /* cost of storing SSE registers
814 in SImode, DImode and TImode */
815 5, /* MMX or SSE register to integer */
816 64, /* size of prefetch block */
817 6, /* number of parallel prefetches */
819 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
820 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
821 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
822 COSTS_N_INSNS (8), /* cost of FABS instruction. */
823 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
824 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
827 const struct processor_costs
*ix86_cost
= &pentium_cost
;
829 /* Processor feature/optimization bitmasks. */
830 #define m_386 (1<<PROCESSOR_I386)
831 #define m_486 (1<<PROCESSOR_I486)
832 #define m_PENT (1<<PROCESSOR_PENTIUM)
833 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
834 #define m_GEODE (1<<PROCESSOR_GEODE)
835 #define m_K6_GEODE (m_K6 | m_GEODE)
836 #define m_K6 (1<<PROCESSOR_K6)
837 #define m_ATHLON (1<<PROCESSOR_ATHLON)
838 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
839 #define m_K8 (1<<PROCESSOR_K8)
840 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
841 #define m_NOCONA (1<<PROCESSOR_NOCONA)
842 #define m_CORE2 (1<<PROCESSOR_CORE2)
843 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
844 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
845 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
847 /* Generic instruction choice should be common subset of supported CPUs
848 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
850 /* Leave is not affecting Nocona SPEC2000 results negatively, so enabling for
851 Generic64 seems like good code size tradeoff. We can't enable it for 32bit
852 generic because it is not working well with PPro base chips. */
853 const int x86_use_leave
= m_386
| m_K6_GEODE
| m_ATHLON_K8
| m_CORE2
| m_GENERIC64
;
854 const int x86_push_memory
= m_386
| m_K6_GEODE
| m_ATHLON_K8
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
;
855 const int x86_zero_extend_with_and
= m_486
| m_PENT
;
856 const int x86_movx
= m_ATHLON_K8
| m_PPRO
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
| m_GEODE
/* m_386 | m_K6 */;
857 const int x86_double_with_add
= ~m_386
;
858 const int x86_use_bit_test
= m_386
;
859 const int x86_unroll_strlen
= m_486
| m_PENT
| m_PPRO
| m_ATHLON_K8
| m_K6
| m_CORE2
| m_GENERIC
;
860 const int x86_cmove
= m_PPRO
| m_GEODE
| m_ATHLON_K8
| m_PENT4
| m_NOCONA
;
861 const int x86_3dnow_a
= m_ATHLON_K8
;
862 const int x86_deep_branch
= m_PPRO
| m_K6_GEODE
| m_ATHLON_K8
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
;
863 /* Branch hints were put in P4 based on simulation result. But
864 after P4 was made, no performance benefit was observed with
865 branch hints. It also increases the code size. As the result,
866 icc never generates branch hints. */
867 const int x86_branch_hints
= 0;
868 const int x86_use_sahf
= m_PPRO
| m_K6_GEODE
| m_PENT4
| m_NOCONA
| m_GENERIC32
; /*m_GENERIC | m_ATHLON_K8 ? */
869 /* We probably ought to watch for partial register stalls on Generic32
870 compilation setting as well. However in current implementation the
871 partial register stalls are not eliminated very well - they can
872 be introduced via subregs synthesized by combine and can happen
873 in caller/callee saving sequences.
874 Because this option pays back little on PPro based chips and is in conflict
875 with partial reg. dependencies used by Athlon/P4 based chips, it is better
876 to leave it off for generic32 for now. */
877 const int x86_partial_reg_stall
= m_PPRO
;
878 const int x86_partial_flag_reg_stall
= m_CORE2
| m_GENERIC
;
879 const int x86_use_himode_fiop
= m_386
| m_486
| m_K6_GEODE
;
880 const int x86_use_simode_fiop
= ~(m_PPRO
| m_ATHLON_K8
| m_PENT
| m_CORE2
| m_GENERIC
);
881 const int x86_use_mov0
= m_K6
;
882 const int x86_use_cltd
= ~(m_PENT
| m_K6
| m_CORE2
| m_GENERIC
);
883 const int x86_read_modify_write
= ~m_PENT
;
884 const int x86_read_modify
= ~(m_PENT
| m_PPRO
);
885 const int x86_split_long_moves
= m_PPRO
;
886 const int x86_promote_QImode
= m_K6_GEODE
| m_PENT
| m_386
| m_486
| m_ATHLON_K8
| m_CORE2
| m_GENERIC
; /* m_PENT4 ? */
887 const int x86_fast_prefix
= ~(m_PENT
| m_486
| m_386
);
888 const int x86_single_stringop
= m_386
| m_PENT4
| m_NOCONA
;
889 const int x86_qimode_math
= ~(0);
890 const int x86_promote_qi_regs
= 0;
891 /* On PPro this flag is meant to avoid partial register stalls. Just like
892 the x86_partial_reg_stall this option might be considered for Generic32
893 if our scheme for avoiding partial stalls was more effective. */
894 const int x86_himode_math
= ~(m_PPRO
);
895 const int x86_promote_hi_regs
= m_PPRO
;
896 const int x86_sub_esp_4
= m_ATHLON_K8
| m_PPRO
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
;
897 const int x86_sub_esp_8
= m_ATHLON_K8
| m_PPRO
| m_386
| m_486
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
;
898 const int x86_add_esp_4
= m_ATHLON_K8
| m_K6_GEODE
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
;
899 const int x86_add_esp_8
= m_ATHLON_K8
| m_PPRO
| m_K6_GEODE
| m_386
| m_486
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
;
900 const int x86_integer_DFmode_moves
= ~(m_ATHLON_K8
| m_PENT4
| m_NOCONA
| m_PPRO
| m_CORE2
| m_GENERIC
| m_GEODE
);
901 const int x86_partial_reg_dependency
= m_ATHLON_K8
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
;
902 const int x86_memory_mismatch_stall
= m_ATHLON_K8
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
;
903 const int x86_accumulate_outgoing_args
= m_ATHLON_K8
| m_PENT4
| m_NOCONA
| m_PPRO
| m_CORE2
| m_GENERIC
;
904 const int x86_prologue_using_move
= m_ATHLON_K8
| m_PPRO
| m_CORE2
| m_GENERIC
;
905 const int x86_epilogue_using_move
= m_ATHLON_K8
| m_PPRO
| m_CORE2
| m_GENERIC
;
906 const int x86_shift1
= ~m_486
;
907 const int x86_arch_always_fancy_math_387
= m_PENT
| m_PPRO
| m_ATHLON_K8
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
;
908 /* In Generic model we have an conflict here in between PPro/Pentium4 based chips
909 that thread 128bit SSE registers as single units versus K8 based chips that
910 divide SSE registers to two 64bit halves.
911 x86_sse_partial_reg_dependency promote all store destinations to be 128bit
912 to allow register renaming on 128bit SSE units, but usually results in one
913 extra microop on 64bit SSE units. Experimental results shows that disabling
914 this option on P4 brings over 20% SPECfp regression, while enabling it on
915 K8 brings roughly 2.4% regression that can be partly masked by careful scheduling
917 const int x86_sse_partial_reg_dependency
= m_PENT4
| m_NOCONA
| m_PPRO
| m_CORE2
| m_GENERIC
;
918 /* Set for machines where the type and dependencies are resolved on SSE
919 register parts instead of whole registers, so we may maintain just
920 lower part of scalar values in proper format leaving the upper part
922 const int x86_sse_split_regs
= m_ATHLON_K8
;
923 const int x86_sse_typeless_stores
= m_ATHLON_K8
;
924 const int x86_sse_load0_by_pxor
= m_PPRO
| m_PENT4
| m_NOCONA
;
925 const int x86_use_ffreep
= m_ATHLON_K8
;
926 const int x86_rep_movl_optimal
= m_386
| m_PENT
| m_PPRO
| m_K6_GEODE
| m_CORE2
;
927 const int x86_use_incdec
= ~(m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
);
929 /* ??? Allowing interunit moves makes it all too easy for the compiler to put
930 integer data in xmm registers. Which results in pretty abysmal code. */
931 const int x86_inter_unit_moves
= 0 /* ~(m_ATHLON_K8) */;
933 const int x86_ext_80387_constants
= m_K6_GEODE
| m_ATHLON
| m_PENT4
| m_NOCONA
| m_PPRO
| m_GENERIC32
;
934 /* Some CPU cores are not able to predict more than 4 branch instructions in
935 the 16 byte window. */
936 const int x86_four_jump_limit
= m_PPRO
| m_ATHLON_K8
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
;
937 const int x86_schedule
= m_PPRO
| m_ATHLON_K8
| m_K6_GEODE
| m_PENT
| m_CORE2
| m_GENERIC
;
938 const int x86_use_bt
= m_ATHLON_K8
;
939 /* Compare and exchange was added for 80486. */
940 const int x86_cmpxchg
= ~m_386
;
941 /* Compare and exchange 8 bytes was added for pentium. */
942 const int x86_cmpxchg8b
= ~(m_386
| m_486
);
943 /* Compare and exchange 16 bytes was added for nocona. */
944 const int x86_cmpxchg16b
= m_NOCONA
;
945 /* Exchange and add was added for 80486. */
946 const int x86_xadd
= ~m_386
;
947 /* Byteswap was added for 80486. */
948 const int x86_bswap
= ~m_386
;
949 const int x86_pad_returns
= m_ATHLON_K8
| m_CORE2
| m_GENERIC
;
951 /* In case the average insn count for single function invocation is
952 lower than this constant, emit fast (but longer) prologue and
954 #define FAST_PROLOGUE_INSN_COUNT 20
956 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
957 static const char *const qi_reg_name
[] = QI_REGISTER_NAMES
;
958 static const char *const qi_high_reg_name
[] = QI_HIGH_REGISTER_NAMES
;
959 static const char *const hi_reg_name
[] = HI_REGISTER_NAMES
;
961 /* Array of the smallest class containing reg number REGNO, indexed by
962 REGNO. Used by REGNO_REG_CLASS in i386.h. */
964 enum reg_class
const regclass_map
[FIRST_PSEUDO_REGISTER
] =
967 AREG
, DREG
, CREG
, BREG
,
969 SIREG
, DIREG
, NON_Q_REGS
, NON_Q_REGS
,
971 FP_TOP_REG
, FP_SECOND_REG
, FLOAT_REGS
, FLOAT_REGS
,
972 FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
,
975 /* flags, fpsr, fpcr, dirflag, frame */
976 NO_REGS
, NO_REGS
, NO_REGS
, NO_REGS
, NON_Q_REGS
,
977 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
979 MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
,
981 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
982 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
983 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
987 /* The "default" register map used in 32bit mode. */
989 int const dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
991 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
992 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
993 -1, -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, dir, frame */
994 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
995 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
996 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
997 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1000 static int const x86_64_int_parameter_registers
[6] =
1002 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
1003 FIRST_REX_INT_REG
/*R8 */, FIRST_REX_INT_REG
+ 1 /*R9 */
1006 static int const x86_64_int_return_registers
[4] =
1008 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
1011 /* The "default" register map used in 64bit mode. */
1012 int const dbx64_register_map
[FIRST_PSEUDO_REGISTER
] =
1014 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1015 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1016 -1, -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, dir, frame */
1017 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1018 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1019 8,9,10,11,12,13,14,15, /* extended integer registers */
1020 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1023 /* Define the register numbers to be used in Dwarf debugging information.
1024 The SVR4 reference port C compiler uses the following register numbers
1025 in its Dwarf output code:
1026 0 for %eax (gcc regno = 0)
1027 1 for %ecx (gcc regno = 2)
1028 2 for %edx (gcc regno = 1)
1029 3 for %ebx (gcc regno = 3)
1030 4 for %esp (gcc regno = 7)
1031 5 for %ebp (gcc regno = 6)
1032 6 for %esi (gcc regno = 4)
1033 7 for %edi (gcc regno = 5)
1034 The following three DWARF register numbers are never generated by
1035 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1036 believes these numbers have these meanings.
1037 8 for %eip (no gcc equivalent)
1038 9 for %eflags (gcc regno = 17)
1039 10 for %trapno (no gcc equivalent)
1040 It is not at all clear how we should number the FP stack registers
1041 for the x86 architecture. If the version of SDB on x86/svr4 were
1042 a bit less brain dead with respect to floating-point then we would
1043 have a precedent to follow with respect to DWARF register numbers
1044 for x86 FP registers, but the SDB on x86/svr4 is so completely
1045 broken with respect to FP registers that it is hardly worth thinking
1046 of it as something to strive for compatibility with.
1047 The version of x86/svr4 SDB I have at the moment does (partially)
1048 seem to believe that DWARF register number 11 is associated with
1049 the x86 register %st(0), but that's about all. Higher DWARF
1050 register numbers don't seem to be associated with anything in
1051 particular, and even for DWARF regno 11, SDB only seems to under-
1052 stand that it should say that a variable lives in %st(0) (when
1053 asked via an `=' command) if we said it was in DWARF regno 11,
1054 but SDB still prints garbage when asked for the value of the
1055 variable in question (via a `/' command).
1056 (Also note that the labels SDB prints for various FP stack regs
1057 when doing an `x' command are all wrong.)
1058 Note that these problems generally don't affect the native SVR4
1059 C compiler because it doesn't allow the use of -O with -g and
1060 because when it is *not* optimizing, it allocates a memory
1061 location for each floating-point variable, and the memory
1062 location is what gets described in the DWARF AT_location
1063 attribute for the variable in question.
1064 Regardless of the severe mental illness of the x86/svr4 SDB, we
1065 do something sensible here and we use the following DWARF
1066 register numbers. Note that these are all stack-top-relative
1068 11 for %st(0) (gcc regno = 8)
1069 12 for %st(1) (gcc regno = 9)
1070 13 for %st(2) (gcc regno = 10)
1071 14 for %st(3) (gcc regno = 11)
1072 15 for %st(4) (gcc regno = 12)
1073 16 for %st(5) (gcc regno = 13)
1074 17 for %st(6) (gcc regno = 14)
1075 18 for %st(7) (gcc regno = 15)
1077 int const svr4_dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
1079 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1080 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1081 -1, 9, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, dir, frame */
1082 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1083 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1084 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1085 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1088 /* Test and compare insns in i386.md store the information needed to
1089 generate branch and scc insns here. */
1091 rtx ix86_compare_op0
= NULL_RTX
;
1092 rtx ix86_compare_op1
= NULL_RTX
;
1093 rtx ix86_compare_emitted
= NULL_RTX
;
1095 /* Size of the register save area. */
1096 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
1098 /* Define the structure for the machine field in struct function. */
1100 struct stack_local_entry
GTY(())
1102 unsigned short mode
;
1105 struct stack_local_entry
*next
;
1108 /* Structure describing stack frame layout.
1109 Stack grows downward:
1115 saved frame pointer if frame_pointer_needed
1116 <- HARD_FRAME_POINTER
1121 [va_arg registers] (
1122 > to_allocate <- FRAME_POINTER
1132 HOST_WIDE_INT frame
;
1134 int outgoing_arguments_size
;
1137 HOST_WIDE_INT to_allocate
;
1138 /* The offsets relative to ARG_POINTER. */
1139 HOST_WIDE_INT frame_pointer_offset
;
1140 HOST_WIDE_INT hard_frame_pointer_offset
;
1141 HOST_WIDE_INT stack_pointer_offset
;
1143 /* When save_regs_using_mov is set, emit prologue using
1144 move instead of push instructions. */
1145 bool save_regs_using_mov
;
1148 /* Code model option. */
1149 enum cmodel ix86_cmodel
;
1151 enum asm_dialect ix86_asm_dialect
= ASM_ATT
;
1153 enum tls_dialect ix86_tls_dialect
= TLS_DIALECT_GNU
;
1155 /* Which unit we are generating floating point math for. */
1156 enum fpmath_unit ix86_fpmath
;
1158 /* Which cpu are we scheduling for. */
1159 enum processor_type ix86_tune
;
1160 /* Which instruction set architecture to use. */
1161 enum processor_type ix86_arch
;
1163 /* true if sse prefetch instruction is not NOOP. */
1164 int x86_prefetch_sse
;
1166 /* ix86_regparm_string as a number */
1167 static int ix86_regparm
;
1169 /* -mstackrealign option */
1170 extern int ix86_force_align_arg_pointer
;
1171 static const char ix86_force_align_arg_pointer_string
[] = "force_align_arg_pointer";
1173 /* Preferred alignment for stack boundary in bits. */
1174 unsigned int ix86_preferred_stack_boundary
;
1176 /* Values 1-5: see jump.c */
1177 int ix86_branch_cost
;
1179 /* Variables which are this size or smaller are put in the data/bss
1180 or ldata/lbss sections. */
1182 int ix86_section_threshold
= 65536;
1184 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1185 char internal_label_prefix
[16];
1186 int internal_label_prefix_len
;
1188 static bool ix86_handle_option (size_t, const char *, int);
1189 static void output_pic_addr_const (FILE *, rtx
, int);
1190 static void put_condition_code (enum rtx_code
, enum machine_mode
,
1192 static const char *get_some_local_dynamic_name (void);
1193 static int get_some_local_dynamic_name_1 (rtx
*, void *);
1194 static rtx
ix86_expand_int_compare (enum rtx_code
, rtx
, rtx
);
1195 static enum rtx_code
ix86_prepare_fp_compare_args (enum rtx_code
, rtx
*,
1197 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
1198 static enum machine_mode
ix86_cc_modes_compatible (enum machine_mode
,
1200 static rtx
get_thread_pointer (int);
1201 static rtx
legitimize_tls_address (rtx
, enum tls_model
, int);
1202 static void get_pc_thunk_name (char [32], unsigned int);
1203 static rtx
gen_push (rtx
);
1204 static int ix86_flags_dependent (rtx
, rtx
, enum attr_type
);
1205 static int ix86_agi_dependent (rtx
, rtx
, enum attr_type
);
1206 static struct machine_function
* ix86_init_machine_status (void);
1207 static int ix86_split_to_parts (rtx
, rtx
*, enum machine_mode
);
1208 static int ix86_nsaved_regs (void);
1209 static void ix86_emit_save_regs (void);
1210 static void ix86_emit_save_regs_using_mov (rtx
, HOST_WIDE_INT
);
1211 static void ix86_emit_restore_regs_using_mov (rtx
, HOST_WIDE_INT
, int);
1212 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT
);
1213 static HOST_WIDE_INT
ix86_GOT_alias_set (void);
1214 static void ix86_adjust_counter (rtx
, HOST_WIDE_INT
);
1215 static rtx
ix86_expand_aligntest (rtx
, int);
1216 static void ix86_expand_strlensi_unroll_1 (rtx
, rtx
, rtx
);
1217 static int ix86_issue_rate (void);
1218 static int ix86_adjust_cost (rtx
, rtx
, rtx
, int);
1219 static int ia32_multipass_dfa_lookahead (void);
1220 static void ix86_init_mmx_sse_builtins (void);
1221 static rtx
x86_this_parameter (tree
);
1222 static void x86_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
,
1223 HOST_WIDE_INT
, tree
);
1224 static bool x86_can_output_mi_thunk (tree
, HOST_WIDE_INT
, HOST_WIDE_INT
, tree
);
1225 static void x86_file_start (void);
1226 static void ix86_reorg (void);
1227 static bool ix86_expand_carry_flag_compare (enum rtx_code
, rtx
, rtx
, rtx
*);
1228 static tree
ix86_build_builtin_va_list (void);
1229 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS
*, enum machine_mode
,
1231 static tree
ix86_gimplify_va_arg (tree
, tree
, tree
*, tree
*);
1232 static bool ix86_scalar_mode_supported_p (enum machine_mode
);
1233 static bool ix86_vector_mode_supported_p (enum machine_mode
);
1235 static int ix86_address_cost (rtx
);
1236 static bool ix86_cannot_force_const_mem (rtx
);
1237 static rtx
ix86_delegitimize_address (rtx
);
1239 static void i386_output_dwarf_dtprel (FILE *, int, rtx
) ATTRIBUTE_UNUSED
;
1241 struct builtin_description
;
1242 static rtx
ix86_expand_sse_comi (const struct builtin_description
*,
1244 static rtx
ix86_expand_sse_compare (const struct builtin_description
*,
1246 static rtx
ix86_expand_unop1_builtin (enum insn_code
, tree
, rtx
);
1247 static rtx
ix86_expand_unop_builtin (enum insn_code
, tree
, rtx
, int);
1248 static rtx
ix86_expand_binop_builtin (enum insn_code
, tree
, rtx
);
1249 static rtx
ix86_expand_store_builtin (enum insn_code
, tree
);
1250 static rtx
safe_vector_operand (rtx
, enum machine_mode
);
1251 static rtx
ix86_expand_fp_compare (enum rtx_code
, rtx
, rtx
, rtx
, rtx
*, rtx
*);
1252 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code
);
1253 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code
);
1254 static int ix86_fp_comparison_sahf_cost (enum rtx_code code
);
1255 static int ix86_fp_comparison_cost (enum rtx_code code
);
1256 static unsigned int ix86_select_alt_pic_regnum (void);
1257 static int ix86_save_reg (unsigned int, int);
1258 static void ix86_compute_frame_layout (struct ix86_frame
*);
1259 static int ix86_comp_type_attributes (tree
, tree
);
1260 static int ix86_function_regparm (tree
, tree
);
1261 const struct attribute_spec ix86_attribute_table
[];
1262 static bool ix86_function_ok_for_sibcall (tree
, tree
);
1263 static tree
ix86_handle_cconv_attribute (tree
*, tree
, tree
, int, bool *);
1264 static int ix86_value_regno (enum machine_mode
, tree
, tree
);
1265 static bool contains_128bit_aligned_vector_p (tree
);
1266 static rtx
ix86_struct_value_rtx (tree
, int);
1267 static bool ix86_ms_bitfield_layout_p (tree
);
1268 static tree
ix86_handle_struct_attribute (tree
*, tree
, tree
, int, bool *);
1269 static int extended_reg_mentioned_1 (rtx
*, void *);
1270 static bool ix86_rtx_costs (rtx
, int, int, int *);
1271 static int min_insn_size (rtx
);
1272 static tree
ix86_md_asm_clobbers (tree outputs
, tree inputs
, tree clobbers
);
1273 static bool ix86_must_pass_in_stack (enum machine_mode mode
, tree type
);
1274 static bool ix86_pass_by_reference (CUMULATIVE_ARGS
*, enum machine_mode
,
1276 static void ix86_init_builtins (void);
1277 static rtx
ix86_expand_builtin (tree
, rtx
, rtx
, enum machine_mode
, int);
1278 static const char *ix86_mangle_fundamental_type (tree
);
1279 static tree
ix86_stack_protect_fail (void);
1280 static rtx
ix86_internal_arg_pointer (void);
1281 static void ix86_dwarf_handle_frame_unspec (const char *, rtx
, int);
1283 /* This function is only used on Solaris. */
1284 static void i386_solaris_elf_named_section (const char *, unsigned int, tree
)
1287 /* Register class used for passing given 64bit part of the argument.
1288 These represent classes as documented by the PS ABI, with the exception
1289 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1290 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1292 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1293 whenever possible (upper half does contain padding).
1295 enum x86_64_reg_class
1298 X86_64_INTEGER_CLASS
,
1299 X86_64_INTEGERSI_CLASS
,
1306 X86_64_COMPLEX_X87_CLASS
,
1309 static const char * const x86_64_reg_class_name
[] = {
1310 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1311 "sseup", "x87", "x87up", "cplx87", "no"
1314 #define MAX_CLASSES 4
1316 /* Table of constants used by fldpi, fldln2, etc.... */
1317 static REAL_VALUE_TYPE ext_80387_constants_table
[5];
1318 static bool ext_80387_constants_init
= 0;
1319 static void init_ext_80387_constants (void);
1320 static bool ix86_in_large_data_p (tree
) ATTRIBUTE_UNUSED
;
1321 static void ix86_encode_section_info (tree
, rtx
, int) ATTRIBUTE_UNUSED
;
1322 static void x86_64_elf_unique_section (tree decl
, int reloc
) ATTRIBUTE_UNUSED
;
1323 static section
*x86_64_elf_select_section (tree decl
, int reloc
,
1324 unsigned HOST_WIDE_INT align
)
1327 /* Initialize the GCC target structure. */
1328 #undef TARGET_ATTRIBUTE_TABLE
1329 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
1330 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1331 # undef TARGET_MERGE_DECL_ATTRIBUTES
1332 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
1335 #undef TARGET_COMP_TYPE_ATTRIBUTES
1336 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
1338 #undef TARGET_INIT_BUILTINS
1339 #define TARGET_INIT_BUILTINS ix86_init_builtins
1340 #undef TARGET_EXPAND_BUILTIN
1341 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
1343 #undef TARGET_ASM_FUNCTION_EPILOGUE
1344 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
1346 #undef TARGET_ENCODE_SECTION_INFO
1347 #ifndef SUBTARGET_ENCODE_SECTION_INFO
1348 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
1350 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
1353 #undef TARGET_ASM_OPEN_PAREN
1354 #define TARGET_ASM_OPEN_PAREN ""
1355 #undef TARGET_ASM_CLOSE_PAREN
1356 #define TARGET_ASM_CLOSE_PAREN ""
1358 #undef TARGET_ASM_ALIGNED_HI_OP
1359 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
1360 #undef TARGET_ASM_ALIGNED_SI_OP
1361 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
1363 #undef TARGET_ASM_ALIGNED_DI_OP
1364 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
1367 #undef TARGET_ASM_UNALIGNED_HI_OP
1368 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1369 #undef TARGET_ASM_UNALIGNED_SI_OP
1370 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1371 #undef TARGET_ASM_UNALIGNED_DI_OP
1372 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1374 #undef TARGET_SCHED_ADJUST_COST
1375 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1376 #undef TARGET_SCHED_ISSUE_RATE
1377 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1378 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1379 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1380 ia32_multipass_dfa_lookahead
1382 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1383 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1386 #undef TARGET_HAVE_TLS
1387 #define TARGET_HAVE_TLS true
1389 #undef TARGET_CANNOT_FORCE_CONST_MEM
1390 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1391 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1392 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_rtx_true
1394 #undef TARGET_DELEGITIMIZE_ADDRESS
1395 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1397 #undef TARGET_MS_BITFIELD_LAYOUT_P
1398 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1401 #undef TARGET_BINDS_LOCAL_P
1402 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1405 #undef TARGET_ASM_OUTPUT_MI_THUNK
1406 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1407 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1408 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1410 #undef TARGET_ASM_FILE_START
1411 #define TARGET_ASM_FILE_START x86_file_start
1413 #undef TARGET_DEFAULT_TARGET_FLAGS
1414 #define TARGET_DEFAULT_TARGET_FLAGS \
1416 | TARGET_64BIT_DEFAULT \
1417 | TARGET_SUBTARGET_DEFAULT \
1418 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
1420 #undef TARGET_HANDLE_OPTION
1421 #define TARGET_HANDLE_OPTION ix86_handle_option
1423 #undef TARGET_RTX_COSTS
1424 #define TARGET_RTX_COSTS ix86_rtx_costs
1425 #undef TARGET_ADDRESS_COST
1426 #define TARGET_ADDRESS_COST ix86_address_cost
1428 #undef TARGET_FIXED_CONDITION_CODE_REGS
1429 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1430 #undef TARGET_CC_MODES_COMPATIBLE
1431 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1433 #undef TARGET_MACHINE_DEPENDENT_REORG
1434 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1436 #undef TARGET_BUILD_BUILTIN_VA_LIST
1437 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1439 #undef TARGET_MD_ASM_CLOBBERS
1440 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1442 #undef TARGET_PROMOTE_PROTOTYPES
1443 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1444 #undef TARGET_STRUCT_VALUE_RTX
1445 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1446 #undef TARGET_SETUP_INCOMING_VARARGS
1447 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1448 #undef TARGET_MUST_PASS_IN_STACK
1449 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
1450 #undef TARGET_PASS_BY_REFERENCE
1451 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
1452 #undef TARGET_INTERNAL_ARG_POINTER
1453 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
1454 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
1455 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
1457 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1458 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1460 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1461 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
1463 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1464 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
1467 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1468 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
1471 #ifdef SUBTARGET_INSERT_ATTRIBUTES
1472 #undef TARGET_INSERT_ATTRIBUTES
1473 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1476 #undef TARGET_MANGLE_FUNDAMENTAL_TYPE
1477 #define TARGET_MANGLE_FUNDAMENTAL_TYPE ix86_mangle_fundamental_type
1479 #undef TARGET_STACK_PROTECT_FAIL
1480 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
1482 #undef TARGET_FUNCTION_VALUE
1483 #define TARGET_FUNCTION_VALUE ix86_function_value
1485 struct gcc_target targetm
= TARGET_INITIALIZER
;
1488 /* The svr4 ABI for the i386 says that records and unions are returned
1490 #ifndef DEFAULT_PCC_STRUCT_RETURN
1491 #define DEFAULT_PCC_STRUCT_RETURN 1
1494 /* Implement TARGET_HANDLE_OPTION. */
1497 ix86_handle_option (size_t code
, const char *arg ATTRIBUTE_UNUSED
, int value
)
1504 target_flags
&= ~MASK_3DNOW_A
;
1505 target_flags_explicit
|= MASK_3DNOW_A
;
1512 target_flags
&= ~(MASK_3DNOW
| MASK_3DNOW_A
);
1513 target_flags_explicit
|= MASK_3DNOW
| MASK_3DNOW_A
;
1520 target_flags
&= ~(MASK_SSE2
| MASK_SSE3
);
1521 target_flags_explicit
|= MASK_SSE2
| MASK_SSE3
;
1528 target_flags
&= ~MASK_SSE3
;
1529 target_flags_explicit
|= MASK_SSE3
;
1538 /* Sometimes certain combinations of command options do not make
1539 sense on a particular target machine. You can define a macro
1540 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1541 defined, is executed once just after all the command options have
1544 Don't use this macro to turn on various extra optimizations for
1545 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1548 override_options (void)
1551 int ix86_tune_defaulted
= 0;
1553 /* Comes from final.c -- no real reason to change it. */
1554 #define MAX_CODE_ALIGN 16
1558 const struct processor_costs
*cost
; /* Processor costs */
1559 const int target_enable
; /* Target flags to enable. */
1560 const int target_disable
; /* Target flags to disable. */
1561 const int align_loop
; /* Default alignments. */
1562 const int align_loop_max_skip
;
1563 const int align_jump
;
1564 const int align_jump_max_skip
;
1565 const int align_func
;
1567 const processor_target_table
[PROCESSOR_max
] =
1569 {&i386_cost
, 0, 0, 4, 3, 4, 3, 4},
1570 {&i486_cost
, 0, 0, 16, 15, 16, 15, 16},
1571 {&pentium_cost
, 0, 0, 16, 7, 16, 7, 16},
1572 {&pentiumpro_cost
, 0, 0, 16, 15, 16, 7, 16},
1573 {&geode_cost
, 0, 0, 0, 0, 0, 0, 0},
1574 {&k6_cost
, 0, 0, 32, 7, 32, 7, 32},
1575 {&athlon_cost
, 0, 0, 16, 7, 16, 7, 16},
1576 {&pentium4_cost
, 0, 0, 0, 0, 0, 0, 0},
1577 {&k8_cost
, 0, 0, 16, 7, 16, 7, 16},
1578 {&nocona_cost
, 0, 0, 0, 0, 0, 0, 0},
1579 {&core2_cost
, 0, 0, 16, 7, 16, 7, 16},
1580 {&generic32_cost
, 0, 0, 16, 7, 16, 7, 16},
1581 {&generic64_cost
, 0, 0, 16, 7, 16, 7, 16}
1584 static const char * const cpu_names
[] = TARGET_CPU_DEFAULT_NAMES
;
1587 const char *const name
; /* processor name or nickname. */
1588 const enum processor_type processor
;
1589 const enum pta_flags
1595 PTA_PREFETCH_SSE
= 16,
1602 const processor_alias_table
[] =
1604 {"i386", PROCESSOR_I386
, 0},
1605 {"i486", PROCESSOR_I486
, 0},
1606 {"i586", PROCESSOR_PENTIUM
, 0},
1607 {"pentium", PROCESSOR_PENTIUM
, 0},
1608 {"pentium-mmx", PROCESSOR_PENTIUM
, PTA_MMX
},
1609 {"winchip-c6", PROCESSOR_I486
, PTA_MMX
},
1610 {"winchip2", PROCESSOR_I486
, PTA_MMX
| PTA_3DNOW
},
1611 {"c3", PROCESSOR_I486
, PTA_MMX
| PTA_3DNOW
},
1612 {"c3-2", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_SSE
},
1613 {"i686", PROCESSOR_PENTIUMPRO
, 0},
1614 {"pentiumpro", PROCESSOR_PENTIUMPRO
, 0},
1615 {"pentium2", PROCESSOR_PENTIUMPRO
, PTA_MMX
},
1616 {"pentium3", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
| PTA_PREFETCH_SSE
},
1617 {"pentium3m", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
| PTA_PREFETCH_SSE
},
1618 {"pentium-m", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
| PTA_PREFETCH_SSE
| PTA_SSE2
},
1619 {"pentium4", PROCESSOR_PENTIUM4
, PTA_SSE
| PTA_SSE2
1620 | PTA_MMX
| PTA_PREFETCH_SSE
},
1621 {"pentium4m", PROCESSOR_PENTIUM4
, PTA_SSE
| PTA_SSE2
1622 | PTA_MMX
| PTA_PREFETCH_SSE
},
1623 {"prescott", PROCESSOR_NOCONA
, PTA_SSE
| PTA_SSE2
| PTA_SSE3
1624 | PTA_MMX
| PTA_PREFETCH_SSE
},
1625 {"nocona", PROCESSOR_NOCONA
, PTA_SSE
| PTA_SSE2
| PTA_SSE3
| PTA_64BIT
1626 | PTA_MMX
| PTA_PREFETCH_SSE
},
1627 {"core2", PROCESSOR_CORE2
, PTA_SSE
| PTA_SSE2
| PTA_SSE3
1628 | PTA_64BIT
| PTA_MMX
1629 | PTA_PREFETCH_SSE
},
1630 {"geode", PROCESSOR_GEODE
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1632 {"k6", PROCESSOR_K6
, PTA_MMX
},
1633 {"k6-2", PROCESSOR_K6
, PTA_MMX
| PTA_3DNOW
},
1634 {"k6-3", PROCESSOR_K6
, PTA_MMX
| PTA_3DNOW
},
1635 {"athlon", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1637 {"athlon-tbird", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
1638 | PTA_3DNOW
| PTA_3DNOW_A
},
1639 {"athlon-4", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1640 | PTA_3DNOW_A
| PTA_SSE
},
1641 {"athlon-xp", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1642 | PTA_3DNOW_A
| PTA_SSE
},
1643 {"athlon-mp", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1644 | PTA_3DNOW_A
| PTA_SSE
},
1645 {"x86-64", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_64BIT
1646 | PTA_SSE
| PTA_SSE2
},
1647 {"k8", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1648 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
},
1649 {"opteron", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1650 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
},
1651 {"athlon64", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1652 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
},
1653 {"athlon-fx", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1654 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
},
1655 {"generic32", PROCESSOR_GENERIC32
, 0 /* flags are only used for -march switch. */ },
1656 {"generic64", PROCESSOR_GENERIC64
, PTA_64BIT
/* flags are only used for -march switch. */ },
1659 int const pta_size
= ARRAY_SIZE (processor_alias_table
);
1661 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1662 SUBTARGET_OVERRIDE_OPTIONS
;
1665 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
1666 SUBSUBTARGET_OVERRIDE_OPTIONS
;
1669 /* -fPIC is the default for x86_64. */
1670 if (TARGET_MACHO
&& TARGET_64BIT
)
1673 /* Set the default values for switches whose default depends on TARGET_64BIT
1674 in case they weren't overwritten by command line options. */
1677 /* Mach-O doesn't support omitting the frame pointer for now. */
1678 if (flag_omit_frame_pointer
== 2)
1679 flag_omit_frame_pointer
= (TARGET_MACHO
? 0 : 1);
1680 if (flag_asynchronous_unwind_tables
== 2)
1681 flag_asynchronous_unwind_tables
= 1;
1682 if (flag_pcc_struct_return
== 2)
1683 flag_pcc_struct_return
= 0;
1687 if (flag_omit_frame_pointer
== 2)
1688 flag_omit_frame_pointer
= 0;
1689 if (flag_asynchronous_unwind_tables
== 2)
1690 flag_asynchronous_unwind_tables
= 0;
1691 if (flag_pcc_struct_return
== 2)
1692 flag_pcc_struct_return
= DEFAULT_PCC_STRUCT_RETURN
;
1695 /* Need to check -mtune=generic first. */
1696 if (ix86_tune_string
)
1698 if (!strcmp (ix86_tune_string
, "generic")
1699 || !strcmp (ix86_tune_string
, "i686")
1700 /* As special support for cross compilers we read -mtune=native
1701 as -mtune=generic. With native compilers we won't see the
1702 -mtune=native, as it was changed by the driver. */
1703 || !strcmp (ix86_tune_string
, "native"))
1706 ix86_tune_string
= "generic64";
1708 ix86_tune_string
= "generic32";
1710 else if (!strncmp (ix86_tune_string
, "generic", 7))
1711 error ("bad value (%s) for -mtune= switch", ix86_tune_string
);
1715 if (ix86_arch_string
)
1716 ix86_tune_string
= ix86_arch_string
;
1717 if (!ix86_tune_string
)
1719 ix86_tune_string
= cpu_names
[TARGET_CPU_DEFAULT
];
1720 ix86_tune_defaulted
= 1;
1723 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
1724 need to use a sensible tune option. */
1725 if (!strcmp (ix86_tune_string
, "generic")
1726 || !strcmp (ix86_tune_string
, "x86-64")
1727 || !strcmp (ix86_tune_string
, "i686"))
1730 ix86_tune_string
= "generic64";
1732 ix86_tune_string
= "generic32";
1735 if (!strcmp (ix86_tune_string
, "x86-64"))
1736 warning (OPT_Wdeprecated
, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
1737 "-mtune=generic instead as appropriate.");
1739 if (!ix86_arch_string
)
1740 ix86_arch_string
= TARGET_64BIT
? "x86-64" : "i386";
1741 if (!strcmp (ix86_arch_string
, "generic"))
1742 error ("generic CPU can be used only for -mtune= switch");
1743 if (!strncmp (ix86_arch_string
, "generic", 7))
1744 error ("bad value (%s) for -march= switch", ix86_arch_string
);
1746 if (ix86_cmodel_string
!= 0)
1748 if (!strcmp (ix86_cmodel_string
, "small"))
1749 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
1750 else if (!strcmp (ix86_cmodel_string
, "medium"))
1751 ix86_cmodel
= flag_pic
? CM_MEDIUM_PIC
: CM_MEDIUM
;
1753 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string
);
1754 else if (!strcmp (ix86_cmodel_string
, "32"))
1755 ix86_cmodel
= CM_32
;
1756 else if (!strcmp (ix86_cmodel_string
, "kernel") && !flag_pic
)
1757 ix86_cmodel
= CM_KERNEL
;
1758 else if (!strcmp (ix86_cmodel_string
, "large") && !flag_pic
)
1759 ix86_cmodel
= CM_LARGE
;
1761 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string
);
1765 ix86_cmodel
= CM_32
;
1767 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
1769 if (ix86_asm_string
!= 0)
1772 && !strcmp (ix86_asm_string
, "intel"))
1773 ix86_asm_dialect
= ASM_INTEL
;
1774 else if (!strcmp (ix86_asm_string
, "att"))
1775 ix86_asm_dialect
= ASM_ATT
;
1777 error ("bad value (%s) for -masm= switch", ix86_asm_string
);
1779 if ((TARGET_64BIT
== 0) != (ix86_cmodel
== CM_32
))
1780 error ("code model %qs not supported in the %s bit mode",
1781 ix86_cmodel_string
, TARGET_64BIT
? "64" : "32");
1782 if (ix86_cmodel
== CM_LARGE
)
1783 sorry ("code model %<large%> not supported yet");
1784 if ((TARGET_64BIT
!= 0) != ((target_flags
& MASK_64BIT
) != 0))
1785 sorry ("%i-bit mode not compiled in",
1786 (target_flags
& MASK_64BIT
) ? 64 : 32);
1788 for (i
= 0; i
< pta_size
; i
++)
1789 if (! strcmp (ix86_arch_string
, processor_alias_table
[i
].name
))
1791 ix86_arch
= processor_alias_table
[i
].processor
;
1792 /* Default cpu tuning to the architecture. */
1793 ix86_tune
= ix86_arch
;
1794 if (processor_alias_table
[i
].flags
& PTA_MMX
1795 && !(target_flags_explicit
& MASK_MMX
))
1796 target_flags
|= MASK_MMX
;
1797 if (processor_alias_table
[i
].flags
& PTA_3DNOW
1798 && !(target_flags_explicit
& MASK_3DNOW
))
1799 target_flags
|= MASK_3DNOW
;
1800 if (processor_alias_table
[i
].flags
& PTA_3DNOW_A
1801 && !(target_flags_explicit
& MASK_3DNOW_A
))
1802 target_flags
|= MASK_3DNOW_A
;
1803 if (processor_alias_table
[i
].flags
& PTA_SSE
1804 && !(target_flags_explicit
& MASK_SSE
))
1805 target_flags
|= MASK_SSE
;
1806 if (processor_alias_table
[i
].flags
& PTA_SSE2
1807 && !(target_flags_explicit
& MASK_SSE2
))
1808 target_flags
|= MASK_SSE2
;
1809 if (processor_alias_table
[i
].flags
& PTA_SSE3
1810 && !(target_flags_explicit
& MASK_SSE3
))
1811 target_flags
|= MASK_SSE3
;
1812 if (processor_alias_table
[i
].flags
& PTA_SSSE3
1813 && !(target_flags_explicit
& MASK_SSSE3
))
1814 target_flags
|= MASK_SSSE3
;
1815 if (processor_alias_table
[i
].flags
& PTA_PREFETCH_SSE
)
1816 x86_prefetch_sse
= true;
1817 if (TARGET_64BIT
&& !(processor_alias_table
[i
].flags
& PTA_64BIT
))
1818 error ("CPU you selected does not support x86-64 "
1824 error ("bad value (%s) for -march= switch", ix86_arch_string
);
1826 for (i
= 0; i
< pta_size
; i
++)
1827 if (! strcmp (ix86_tune_string
, processor_alias_table
[i
].name
))
1829 ix86_tune
= processor_alias_table
[i
].processor
;
1830 if (TARGET_64BIT
&& !(processor_alias_table
[i
].flags
& PTA_64BIT
))
1832 if (ix86_tune_defaulted
)
1834 ix86_tune_string
= "x86-64";
1835 for (i
= 0; i
< pta_size
; i
++)
1836 if (! strcmp (ix86_tune_string
,
1837 processor_alias_table
[i
].name
))
1839 ix86_tune
= processor_alias_table
[i
].processor
;
1842 error ("CPU you selected does not support x86-64 "
1845 /* Intel CPUs have always interpreted SSE prefetch instructions as
1846 NOPs; so, we can enable SSE prefetch instructions even when
1847 -mtune (rather than -march) points us to a processor that has them.
1848 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
1849 higher processors. */
1850 if (TARGET_CMOVE
&& (processor_alias_table
[i
].flags
& PTA_PREFETCH_SSE
))
1851 x86_prefetch_sse
= true;
1855 error ("bad value (%s) for -mtune= switch", ix86_tune_string
);
1858 ix86_cost
= &size_cost
;
1860 ix86_cost
= processor_target_table
[ix86_tune
].cost
;
1861 target_flags
|= processor_target_table
[ix86_tune
].target_enable
;
1862 target_flags
&= ~processor_target_table
[ix86_tune
].target_disable
;
1864 /* Arrange to set up i386_stack_locals for all functions. */
1865 init_machine_status
= ix86_init_machine_status
;
1867 /* Validate -mregparm= value. */
1868 if (ix86_regparm_string
)
1870 i
= atoi (ix86_regparm_string
);
1871 if (i
< 0 || i
> REGPARM_MAX
)
1872 error ("-mregparm=%d is not between 0 and %d", i
, REGPARM_MAX
);
1878 ix86_regparm
= REGPARM_MAX
;
1880 /* If the user has provided any of the -malign-* options,
1881 warn and use that value only if -falign-* is not set.
1882 Remove this code in GCC 3.2 or later. */
1883 if (ix86_align_loops_string
)
1885 warning (0, "-malign-loops is obsolete, use -falign-loops");
1886 if (align_loops
== 0)
1888 i
= atoi (ix86_align_loops_string
);
1889 if (i
< 0 || i
> MAX_CODE_ALIGN
)
1890 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
1892 align_loops
= 1 << i
;
1896 if (ix86_align_jumps_string
)
1898 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
1899 if (align_jumps
== 0)
1901 i
= atoi (ix86_align_jumps_string
);
1902 if (i
< 0 || i
> MAX_CODE_ALIGN
)
1903 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
1905 align_jumps
= 1 << i
;
1909 if (ix86_align_funcs_string
)
1911 warning (0, "-malign-functions is obsolete, use -falign-functions");
1912 if (align_functions
== 0)
1914 i
= atoi (ix86_align_funcs_string
);
1915 if (i
< 0 || i
> MAX_CODE_ALIGN
)
1916 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
1918 align_functions
= 1 << i
;
1922 /* Default align_* from the processor table. */
1923 if (align_loops
== 0)
1925 align_loops
= processor_target_table
[ix86_tune
].align_loop
;
1926 align_loops_max_skip
= processor_target_table
[ix86_tune
].align_loop_max_skip
;
1928 if (align_jumps
== 0)
1930 align_jumps
= processor_target_table
[ix86_tune
].align_jump
;
1931 align_jumps_max_skip
= processor_target_table
[ix86_tune
].align_jump_max_skip
;
1933 if (align_functions
== 0)
1935 align_functions
= processor_target_table
[ix86_tune
].align_func
;
1938 /* Validate -mbranch-cost= value, or provide default. */
1939 ix86_branch_cost
= ix86_cost
->branch_cost
;
1940 if (ix86_branch_cost_string
)
1942 i
= atoi (ix86_branch_cost_string
);
1944 error ("-mbranch-cost=%d is not between 0 and 5", i
);
1946 ix86_branch_cost
= i
;
1948 if (ix86_section_threshold_string
)
1950 i
= atoi (ix86_section_threshold_string
);
1952 error ("-mlarge-data-threshold=%d is negative", i
);
1954 ix86_section_threshold
= i
;
1957 if (ix86_tls_dialect_string
)
1959 if (strcmp (ix86_tls_dialect_string
, "gnu") == 0)
1960 ix86_tls_dialect
= TLS_DIALECT_GNU
;
1961 else if (strcmp (ix86_tls_dialect_string
, "gnu2") == 0)
1962 ix86_tls_dialect
= TLS_DIALECT_GNU2
;
1963 else if (strcmp (ix86_tls_dialect_string
, "sun") == 0)
1964 ix86_tls_dialect
= TLS_DIALECT_SUN
;
1966 error ("bad value (%s) for -mtls-dialect= switch",
1967 ix86_tls_dialect_string
);
1970 /* Keep nonleaf frame pointers. */
1971 if (flag_omit_frame_pointer
)
1972 target_flags
&= ~MASK_OMIT_LEAF_FRAME_POINTER
;
1973 else if (TARGET_OMIT_LEAF_FRAME_POINTER
)
1974 flag_omit_frame_pointer
= 1;
1976 /* If we're doing fast math, we don't care about comparison order
1977 wrt NaNs. This lets us use a shorter comparison sequence. */
1978 if (flag_finite_math_only
)
1979 target_flags
&= ~MASK_IEEE_FP
;
1981 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1982 since the insns won't need emulation. */
1983 if (x86_arch_always_fancy_math_387
& (1 << ix86_arch
))
1984 target_flags
&= ~MASK_NO_FANCY_MATH_387
;
1986 /* Likewise, if the target doesn't have a 387, or we've specified
1987 software floating point, don't use 387 inline intrinsics. */
1989 target_flags
|= MASK_NO_FANCY_MATH_387
;
1991 /* Turn on SSE3 builtins for -mssse3. */
1993 target_flags
|= MASK_SSE3
;
1995 /* Turn on SSE2 builtins for -msse3. */
1997 target_flags
|= MASK_SSE2
;
1999 /* Turn on SSE builtins for -msse2. */
2001 target_flags
|= MASK_SSE
;
2003 /* Turn on MMX builtins for -msse. */
2006 target_flags
|= MASK_MMX
& ~target_flags_explicit
;
2007 x86_prefetch_sse
= true;
2010 /* Turn on MMX builtins for 3Dnow. */
2012 target_flags
|= MASK_MMX
;
2016 if (TARGET_ALIGN_DOUBLE
)
2017 error ("-malign-double makes no sense in the 64bit mode");
2019 error ("-mrtd calling convention not supported in the 64bit mode");
2021 /* Enable by default the SSE and MMX builtins. Do allow the user to
2022 explicitly disable any of these. In particular, disabling SSE and
2023 MMX for kernel code is extremely useful. */
2025 |= ((MASK_SSE2
| MASK_SSE
| MASK_MMX
| MASK_128BIT_LONG_DOUBLE
)
2026 & ~target_flags_explicit
);
2030 /* i386 ABI does not specify red zone. It still makes sense to use it
2031 when programmer takes care to stack from being destroyed. */
2032 if (!(target_flags_explicit
& MASK_NO_RED_ZONE
))
2033 target_flags
|= MASK_NO_RED_ZONE
;
2036 /* Validate -mpreferred-stack-boundary= value, or provide default.
2037 The default of 128 bits is for Pentium III's SSE __m128. We can't
2038 change it because of optimize_size. Otherwise, we can't mix object
2039 files compiled with -Os and -On. */
2040 ix86_preferred_stack_boundary
= 128;
2041 if (ix86_preferred_stack_boundary_string
)
2043 i
= atoi (ix86_preferred_stack_boundary_string
);
2044 if (i
< (TARGET_64BIT
? 4 : 2) || i
> 12)
2045 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i
,
2046 TARGET_64BIT
? 4 : 2);
2048 ix86_preferred_stack_boundary
= (1 << i
) * BITS_PER_UNIT
;
2051 /* Accept -mx87regparm only if 80387 support is enabled. */
2052 if (TARGET_X87REGPARM
2054 error ("-mx87regparm used without 80387 enabled");
2056 /* Accept -msseregparm only if at least SSE support is enabled. */
2057 if (TARGET_SSEREGPARM
2059 error ("-msseregparm used without SSE enabled");
2061 ix86_fpmath
= TARGET_FPMATH_DEFAULT
;
2063 if (ix86_fpmath_string
!= 0)
2065 if (! strcmp (ix86_fpmath_string
, "387"))
2066 ix86_fpmath
= FPMATH_387
;
2067 else if (! strcmp (ix86_fpmath_string
, "sse"))
2071 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2072 ix86_fpmath
= FPMATH_387
;
2075 ix86_fpmath
= FPMATH_SSE
;
2077 else if (! strcmp (ix86_fpmath_string
, "387,sse")
2078 || ! strcmp (ix86_fpmath_string
, "sse,387"))
2082 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2083 ix86_fpmath
= FPMATH_387
;
2085 else if (!TARGET_80387
)
2087 warning (0, "387 instruction set disabled, using SSE arithmetics");
2088 ix86_fpmath
= FPMATH_SSE
;
2091 ix86_fpmath
= FPMATH_SSE
| FPMATH_387
;
2094 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string
);
2097 /* If the i387 is disabled, then do not return values in it. */
2099 target_flags
&= ~MASK_FLOAT_RETURNS
;
2101 if ((x86_accumulate_outgoing_args
& TUNEMASK
)
2102 && !(target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
2104 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
2106 /* ??? Unwind info is not correct around the CFG unless either a frame
2107 pointer is present or M_A_O_A is set. Fixing this requires rewriting
2108 unwind info generation to be aware of the CFG and propagating states
2110 if ((flag_unwind_tables
|| flag_asynchronous_unwind_tables
2111 || flag_exceptions
|| flag_non_call_exceptions
)
2112 && flag_omit_frame_pointer
2113 && !(target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS
))
2115 if (target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
2116 warning (0, "unwind tables currently require either a frame pointer "
2117 "or -maccumulate-outgoing-args for correctness");
2118 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
2121 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
2124 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix
, "LX", 0);
2125 p
= strchr (internal_label_prefix
, 'X');
2126 internal_label_prefix_len
= p
- internal_label_prefix
;
2130 /* When scheduling description is not available, disable scheduler pass
2131 so it won't slow down the compilation and make x87 code slower. */
2132 if (!TARGET_SCHEDULE
)
2133 flag_schedule_insns_after_reload
= flag_schedule_insns
= 0;
2135 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES
))
2136 set_param_value ("simultaneous-prefetches",
2137 ix86_cost
->simultaneous_prefetches
);
2138 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE
))
2139 set_param_value ("l1-cache-line-size", ix86_cost
->prefetch_block
);
2142 /* switch to the appropriate section for output of DECL.
2143 DECL is either a `VAR_DECL' node or a constant of some sort.
2144 RELOC indicates whether forming the initial value of DECL requires
2145 link-time relocations. */
2148 x86_64_elf_select_section (tree decl
, int reloc
,
2149 unsigned HOST_WIDE_INT align
)
2151 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
2152 && ix86_in_large_data_p (decl
))
2154 const char *sname
= NULL
;
2155 unsigned int flags
= SECTION_WRITE
;
2156 switch (categorize_decl_for_section (decl
, reloc
, flag_pic
))
2161 case SECCAT_DATA_REL
:
2162 sname
= ".ldata.rel";
2164 case SECCAT_DATA_REL_LOCAL
:
2165 sname
= ".ldata.rel.local";
2167 case SECCAT_DATA_REL_RO
:
2168 sname
= ".ldata.rel.ro";
2170 case SECCAT_DATA_REL_RO_LOCAL
:
2171 sname
= ".ldata.rel.ro.local";
2175 flags
|= SECTION_BSS
;
2178 case SECCAT_RODATA_MERGE_STR
:
2179 case SECCAT_RODATA_MERGE_STR_INIT
:
2180 case SECCAT_RODATA_MERGE_CONST
:
2184 case SECCAT_SRODATA
:
2191 /* We don't split these for medium model. Place them into
2192 default sections and hope for best. */
2197 /* We might get called with string constants, but get_named_section
2198 doesn't like them as they are not DECLs. Also, we need to set
2199 flags in that case. */
2201 return get_section (sname
, flags
, NULL
);
2202 return get_named_section (decl
, sname
, reloc
);
2205 return default_elf_select_section (decl
, reloc
, align
);
2208 /* Build up a unique section name, expressed as a
2209 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2210 RELOC indicates whether the initial value of EXP requires
2211 link-time relocations. */
2214 x86_64_elf_unique_section (tree decl
, int reloc
)
2216 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
2217 && ix86_in_large_data_p (decl
))
2219 const char *prefix
= NULL
;
2220 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2221 bool one_only
= DECL_ONE_ONLY (decl
) && !HAVE_COMDAT_GROUP
;
2223 switch (categorize_decl_for_section (decl
, reloc
, flag_pic
))
2226 case SECCAT_DATA_REL
:
2227 case SECCAT_DATA_REL_LOCAL
:
2228 case SECCAT_DATA_REL_RO
:
2229 case SECCAT_DATA_REL_RO_LOCAL
:
2230 prefix
= one_only
? ".gnu.linkonce.ld." : ".ldata.";
2233 prefix
= one_only
? ".gnu.linkonce.lb." : ".lbss.";
2236 case SECCAT_RODATA_MERGE_STR
:
2237 case SECCAT_RODATA_MERGE_STR_INIT
:
2238 case SECCAT_RODATA_MERGE_CONST
:
2239 prefix
= one_only
? ".gnu.linkonce.lr." : ".lrodata.";
2241 case SECCAT_SRODATA
:
2248 /* We don't split these for medium model. Place them into
2249 default sections and hope for best. */
2257 plen
= strlen (prefix
);
2259 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
2260 name
= targetm
.strip_name_encoding (name
);
2261 nlen
= strlen (name
);
2263 string
= alloca (nlen
+ plen
+ 1);
2264 memcpy (string
, prefix
, plen
);
2265 memcpy (string
+ plen
, name
, nlen
+ 1);
2267 DECL_SECTION_NAME (decl
) = build_string (nlen
+ plen
, string
);
2271 default_unique_section (decl
, reloc
);
2274 #ifdef COMMON_ASM_OP
2275 /* This says how to output assembler code to declare an
2276 uninitialized external linkage data object.
2278 For medium model x86-64 we need to use .largecomm opcode for
2281 x86_elf_aligned_common (FILE *file
,
2282 const char *name
, unsigned HOST_WIDE_INT size
,
2285 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
2286 && size
> (unsigned int)ix86_section_threshold
)
2287 fprintf (file
, ".largecomm\t");
2289 fprintf (file
, "%s", COMMON_ASM_OP
);
2290 assemble_name (file
, name
);
2291 fprintf (file
, ","HOST_WIDE_INT_PRINT_UNSIGNED
",%u\n",
2292 size
, align
/ BITS_PER_UNIT
);
2295 /* Utility function for targets to use in implementing
2296 ASM_OUTPUT_ALIGNED_BSS. */
2299 x86_output_aligned_bss (FILE *file
, tree decl ATTRIBUTE_UNUSED
,
2300 const char *name
, unsigned HOST_WIDE_INT size
,
2303 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
2304 && size
> (unsigned int)ix86_section_threshold
)
2305 switch_to_section (get_named_section (decl
, ".lbss", 0));
2307 switch_to_section (bss_section
);
2308 ASM_OUTPUT_ALIGN (file
, floor_log2 (align
/ BITS_PER_UNIT
));
2309 #ifdef ASM_DECLARE_OBJECT_NAME
2310 last_assemble_variable_decl
= decl
;
2311 ASM_DECLARE_OBJECT_NAME (file
, name
, decl
);
2313 /* Standard thing is just output label for the object. */
2314 ASM_OUTPUT_LABEL (file
, name
);
2315 #endif /* ASM_DECLARE_OBJECT_NAME */
2316 ASM_OUTPUT_SKIP (file
, size
? size
: 1);
2321 optimization_options (int level
, int size ATTRIBUTE_UNUSED
)
2323 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2324 make the problem with not enough registers even worse. */
2325 #ifdef INSN_SCHEDULING
2327 flag_schedule_insns
= 0;
2331 /* The Darwin libraries never set errno, so we might as well
2332 avoid calling them when that's the only reason we would. */
2333 flag_errno_math
= 0;
2335 /* The default values of these switches depend on the TARGET_64BIT
2336 that is not known at this moment. Mark these values with 2 and
2337 let user the to override these. In case there is no command line option
2338 specifying them, we will set the defaults in override_options. */
2340 flag_omit_frame_pointer
= 2;
2341 flag_pcc_struct_return
= 2;
2342 flag_asynchronous_unwind_tables
= 2;
2343 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2344 SUBTARGET_OPTIMIZATION_OPTIONS
;
2348 /* Table of valid machine attributes. */
2349 const struct attribute_spec ix86_attribute_table
[] =
2351 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
2352 /* Stdcall attribute says callee is responsible for popping arguments
2353 if they are not variable. */
2354 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
2355 /* Fastcall attribute says callee is responsible for popping arguments
2356 if they are not variable. */
2357 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
2358 /* Cdecl attribute says the callee is a normal C declaration */
2359 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
2360 /* Regparm attribute specifies how many integer arguments are to be
2361 passed in registers. */
2362 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute
},
2363 /* X87regparm attribute says we are passing floating point arguments
2364 in 80387 registers. */
2365 { "x87regparm", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
2366 /* Sseregparm attribute says we are using x86_64 calling conventions
2367 for FP arguments. */
2368 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
2369 /* force_align_arg_pointer says this function realigns the stack at entry. */
2370 { (const char *)&ix86_force_align_arg_pointer_string
, 0, 0,
2371 false, true, true, ix86_handle_cconv_attribute
},
2372 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2373 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
},
2374 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
},
2375 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute
},
2377 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
},
2378 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
},
2379 #ifdef SUBTARGET_ATTRIBUTE_TABLE
2380 SUBTARGET_ATTRIBUTE_TABLE
,
2382 { NULL
, 0, 0, false, false, false, NULL
}
2385 /* Decide whether we can make a sibling call to a function. DECL is the
2386 declaration of the function being targeted by the call and EXP is the
2387 CALL_EXPR representing the call. */
2390 ix86_function_ok_for_sibcall (tree decl
, tree exp
)
2395 /* If we are generating position-independent code, we cannot sibcall
2396 optimize any indirect call, or a direct call to a global function,
2397 as the PLT requires %ebx be live. */
2398 if (!TARGET_64BIT
&& flag_pic
&& (!decl
|| !targetm
.binds_local_p (decl
)))
2405 func
= TREE_TYPE (TREE_OPERAND (exp
, 0));
2406 if (POINTER_TYPE_P (func
))
2407 func
= TREE_TYPE (func
);
2410 /* Check that the return value locations are the same. Like
2411 if we are returning floats on the 80387 register stack, we cannot
2412 make a sibcall from a function that doesn't return a float to a
2413 function that does or, conversely, from a function that does return
2414 a float to a function that doesn't; the necessary stack adjustment
2415 would not be executed. This is also the place we notice
2416 differences in the return value ABI. Note that it is ok for one
2417 of the functions to have void return type as long as the return
2418 value of the other is passed in a register. */
2419 a
= ix86_function_value (TREE_TYPE (exp
), func
, false);
2420 b
= ix86_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
2422 if (STACK_REG_P (a
) || STACK_REG_P (b
))
2424 if (!rtx_equal_p (a
, b
))
2427 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
2429 else if (!rtx_equal_p (a
, b
))
2432 /* If this call is indirect, we'll need to be able to use a call-clobbered
2433 register for the address of the target function. Make sure that all
2434 such registers are not used for passing parameters. */
2435 if (!decl
&& !TARGET_64BIT
)
2439 /* We're looking at the CALL_EXPR, we need the type of the function. */
2440 type
= TREE_OPERAND (exp
, 0); /* pointer expression */
2441 type
= TREE_TYPE (type
); /* pointer type */
2442 type
= TREE_TYPE (type
); /* function type */
2444 if (ix86_function_regparm (type
, NULL
) >= 3)
2446 /* ??? Need to count the actual number of registers to be used,
2447 not the possible number of registers. Fix later. */
2452 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2453 /* Dllimport'd functions are also called indirectly. */
2454 if (decl
&& DECL_DLLIMPORT_P (decl
)
2455 && ix86_function_regparm (TREE_TYPE (decl
), NULL
) >= 3)
2459 /* If we forced aligned the stack, then sibcalling would unalign the
2460 stack, which may break the called function. */
2461 if (cfun
->machine
->force_align_arg_pointer
)
2464 /* Otherwise okay. That also includes certain types of indirect calls. */
2468 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "x87regparm"
2469 and "sseregparm" calling convention attributes;
2470 arguments as in struct attribute_spec.handler. */
2473 ix86_handle_cconv_attribute (tree
*node
, tree name
,
2475 int flags ATTRIBUTE_UNUSED
,
2478 if (TREE_CODE (*node
) != FUNCTION_TYPE
2479 && TREE_CODE (*node
) != METHOD_TYPE
2480 && TREE_CODE (*node
) != FIELD_DECL
2481 && TREE_CODE (*node
) != TYPE_DECL
)
2483 warning (OPT_Wattributes
, "%qs attribute only applies to functions",
2484 IDENTIFIER_POINTER (name
));
2485 *no_add_attrs
= true;
2489 /* Can combine regparm with all attributes but fastcall. */
2490 if (is_attribute_p ("regparm", name
))
2494 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
2496 error ("fastcall and regparm attributes are not compatible");
2499 cst
= TREE_VALUE (args
);
2500 if (TREE_CODE (cst
) != INTEGER_CST
)
2502 warning (OPT_Wattributes
,
2503 "%qs attribute requires an integer constant argument",
2504 IDENTIFIER_POINTER (name
));
2505 *no_add_attrs
= true;
2507 else if (compare_tree_int (cst
, REGPARM_MAX
) > 0)
2509 warning (OPT_Wattributes
, "argument to %qs attribute larger than %d",
2510 IDENTIFIER_POINTER (name
), REGPARM_MAX
);
2511 *no_add_attrs
= true;
2515 && lookup_attribute (ix86_force_align_arg_pointer_string
,
2516 TYPE_ATTRIBUTES (*node
))
2517 && compare_tree_int (cst
, REGPARM_MAX
-1))
2519 error ("%s functions limited to %d register parameters",
2520 ix86_force_align_arg_pointer_string
, REGPARM_MAX
-1);
2528 warning (OPT_Wattributes
, "%qs attribute ignored",
2529 IDENTIFIER_POINTER (name
));
2530 *no_add_attrs
= true;
2534 /* Can combine fastcall with stdcall (redundant), x87regparm
2536 if (is_attribute_p ("fastcall", name
))
2538 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
2540 error ("fastcall and cdecl attributes are not compatible");
2542 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
2544 error ("fastcall and stdcall attributes are not compatible");
2546 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node
)))
2548 error ("fastcall and regparm attributes are not compatible");
2552 /* Can combine stdcall with fastcall (redundant), regparm,
2553 x87regparm and sseregparm. */
2554 else if (is_attribute_p ("stdcall", name
))
2556 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
2558 error ("stdcall and cdecl attributes are not compatible");
2560 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
2562 error ("stdcall and fastcall attributes are not compatible");
2566 /* Can combine cdecl with regparm, x87regparm and sseregparm. */
2567 else if (is_attribute_p ("cdecl", name
))
2569 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
2571 error ("stdcall and cdecl attributes are not compatible");
2573 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
2575 error ("fastcall and cdecl attributes are not compatible");
2579 /* Can combine x87regparm or sseregparm with all attributes. */
2584 /* Return 0 if the attributes for two types are incompatible, 1 if they
2585 are compatible, and 2 if they are nearly compatible (which causes a
2586 warning to be generated). */
2589 ix86_comp_type_attributes (tree type1
, tree type2
)
2591 /* Check for mismatch of non-default calling convention. */
2592 const char *const rtdstr
= TARGET_RTD
? "cdecl" : "stdcall";
2594 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
2597 /* Check for mismatched fastcall/regparm types. */
2598 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1
))
2599 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2
)))
2600 || (ix86_function_regparm (type1
, NULL
)
2601 != ix86_function_regparm (type2
, NULL
)))
2604 /* Check for mismatched x87regparm types. */
2605 if (!lookup_attribute ("x87regparm", TYPE_ATTRIBUTES (type1
))
2606 != !lookup_attribute ("x87regparm", TYPE_ATTRIBUTES (type2
)))
2609 /* Check for mismatched sseregparm types. */
2610 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1
))
2611 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2
)))
2614 /* Check for mismatched return types (cdecl vs stdcall). */
2615 if (!lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type1
))
2616 != !lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type2
)))
2622 /* Return the regparm value for a function with the indicated TYPE and DECL.
2623 DECL may be NULL when calling function indirectly
2624 or considering a libcall. */
2627 ix86_function_regparm (tree type
, tree decl
)
2630 int regparm
= ix86_regparm
;
2631 bool user_convention
= false;
2635 attr
= lookup_attribute ("regparm", TYPE_ATTRIBUTES (type
));
2638 regparm
= TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
)));
2639 user_convention
= true;
2642 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type
)))
2645 user_convention
= true;
2648 /* Use register calling convention for local functions when possible. */
2649 if (!TARGET_64BIT
&& !user_convention
&& decl
2650 && flag_unit_at_a_time
&& !profile_flag
)
2652 struct cgraph_local_info
*i
= cgraph_local_info (decl
);
2655 int local_regparm
, globals
= 0, regno
;
2657 /* Make sure no regparm register is taken by a global register
2659 for (local_regparm
= 0; local_regparm
< 3; local_regparm
++)
2660 if (global_regs
[local_regparm
])
2662 /* We can't use regparm(3) for nested functions as these use
2663 static chain pointer in third argument. */
2664 if (local_regparm
== 3
2665 && decl_function_context (decl
)
2666 && !DECL_NO_STATIC_CHAIN (decl
))
2668 /* If the function realigns its stackpointer, the
2669 prologue will clobber %ecx. If we've already
2670 generated code for the callee, the callee
2671 DECL_STRUCT_FUNCTION is gone, so we fall back to
2672 scanning the attributes for the self-realigning
2674 if ((DECL_STRUCT_FUNCTION (decl
)
2675 && DECL_STRUCT_FUNCTION (decl
)->machine
->force_align_arg_pointer
)
2676 || (!DECL_STRUCT_FUNCTION (decl
)
2677 && lookup_attribute (ix86_force_align_arg_pointer_string
,
2678 TYPE_ATTRIBUTES (TREE_TYPE (decl
)))))
2680 /* Each global register variable increases register preassure,
2681 so the more global reg vars there are, the smaller regparm
2682 optimization use, unless requested by the user explicitly. */
2683 for (regno
= 0; regno
< 6; regno
++)
2684 if (global_regs
[regno
])
2687 = globals
< local_regparm
? local_regparm
- globals
: 0;
2689 if (local_regparm
> regparm
)
2690 regparm
= local_regparm
;
2697 /* Return 1 if we can pass up to X87_REGPARM_MAX floating point
2698 arguments in x87 registers for a function with the indicated
2699 TYPE and DECL. DECL may be NULL when calling function indirectly
2700 or considering a libcall. For local functions, return 2.
2701 Otherwise return 0. */
2704 ix86_function_x87regparm (tree type
, tree decl
)
2706 /* Use x87 registers to pass floating point arguments if requested
2707 by the x87regparm attribute. */
2708 if (TARGET_X87REGPARM
2710 && lookup_attribute ("x87regparm", TYPE_ATTRIBUTES (type
))))
2715 error ("Calling %qD with attribute x87regparm without "
2716 "80387 enabled", decl
);
2718 error ("Calling %qT with attribute x87regparm without "
2719 "80387 enabled", type
);
2726 /* For local functions, pass up to X87_REGPARM_MAX floating point
2727 arguments in x87 registers. */
2728 if (!TARGET_64BIT
&& decl
2729 && flag_unit_at_a_time
&& !profile_flag
)
2731 struct cgraph_local_info
*i
= cgraph_local_info (decl
);
2739 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
2740 DFmode (2) arguments in SSE registers for a function with the
2741 indicated TYPE and DECL. DECL may be NULL when calling function
2742 indirectly or considering a libcall. Otherwise return 0. */
2745 ix86_function_sseregparm (tree type
, tree decl
)
2747 /* Use SSE registers to pass SFmode and DFmode arguments if requested
2748 by the sseregparm attribute. */
2749 if (TARGET_SSEREGPARM
2751 && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type
))))
2756 error ("Calling %qD with attribute sseregparm without "
2757 "SSE/SSE2 enabled", decl
);
2759 error ("Calling %qT with attribute sseregparm without "
2760 "SSE/SSE2 enabled", type
);
2767 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
2768 (and DFmode for SSE2) arguments in SSE registers,
2769 even for 32-bit targets. */
2770 if (!TARGET_64BIT
&& decl
2771 && TARGET_SSE_MATH
&& flag_unit_at_a_time
&& !profile_flag
)
2773 struct cgraph_local_info
*i
= cgraph_local_info (decl
);
2775 return TARGET_SSE2
? 2 : 1;
2781 /* Return true if EAX is live at the start of the function. Used by
2782 ix86_expand_prologue to determine if we need special help before
2783 calling allocate_stack_worker. */
2786 ix86_eax_live_at_start_p (void)
2788 /* Cheat. Don't bother working forward from ix86_function_regparm
2789 to the function type to whether an actual argument is located in
2790 eax. Instead just look at cfg info, which is still close enough
2791 to correct at this point. This gives false positives for broken
2792 functions that might use uninitialized data that happens to be
2793 allocated in eax, but who cares? */
2794 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR
->il
.rtl
->global_live_at_end
, 0);
2797 /* Value is the number of bytes of arguments automatically
2798 popped when returning from a subroutine call.
2799 FUNDECL is the declaration node of the function (as a tree),
2800 FUNTYPE is the data type of the function (as a tree),
2801 or for a library call it is an identifier node for the subroutine name.
2802 SIZE is the number of bytes of arguments passed on the stack.
2804 On the 80386, the RTD insn may be used to pop them if the number
2805 of args is fixed, but if the number is variable then the caller
2806 must pop them all. RTD can't be used for library calls now
2807 because the library is compiled with the Unix compiler.
2808 Use of RTD is a selectable option, since it is incompatible with
2809 standard Unix calling sequences. If the option is not selected,
2810 the caller must always pop the args.
2812 The attribute stdcall is equivalent to RTD on a per module basis. */
2815 ix86_return_pops_args (tree fundecl
, tree funtype
, int size
)
2817 int rtd
= TARGET_RTD
&& (!fundecl
|| TREE_CODE (fundecl
) != IDENTIFIER_NODE
);
2819 /* Cdecl functions override -mrtd, and never pop the stack. */
2820 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype
))) {
2822 /* Stdcall and fastcall functions will pop the stack if not
2824 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype
))
2825 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype
)))
2829 && (TYPE_ARG_TYPES (funtype
) == NULL_TREE
2830 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype
)))
2831 == void_type_node
)))
2835 /* Lose any fake structure return argument if it is passed on the stack. */
2836 if (aggregate_value_p (TREE_TYPE (funtype
), fundecl
)
2838 && !KEEP_AGGREGATE_RETURN_POINTER
)
2840 int nregs
= ix86_function_regparm (funtype
, fundecl
);
2843 return GET_MODE_SIZE (Pmode
);
2849 /* Argument support functions. */
2851 /* Return true when register may be used to pass function parameters. */
2853 ix86_function_arg_regno_p (int regno
)
2857 return (regno
< REGPARM_MAX
2858 || (TARGET_80387
&& FP_REGNO_P (regno
)
2859 && (regno
< FIRST_FLOAT_REG
+ X87_REGPARM_MAX
))
2860 || (TARGET_MMX
&& MMX_REGNO_P (regno
)
2861 && (regno
< FIRST_MMX_REG
+ MMX_REGPARM_MAX
))
2862 || (TARGET_SSE
&& SSE_REGNO_P (regno
)
2863 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
)));
2865 if (TARGET_SSE
&& SSE_REGNO_P (regno
)
2866 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
))
2868 /* RAX is used as hidden argument to va_arg functions. */
2871 for (i
= 0; i
< REGPARM_MAX
; i
++)
2872 if (regno
== x86_64_int_parameter_registers
[i
])
2877 /* Return if we do not know how to pass TYPE solely in registers. */
2880 ix86_must_pass_in_stack (enum machine_mode mode
, tree type
)
2882 if (must_pass_in_stack_var_size_or_pad (mode
, type
))
2885 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
2886 The layout_type routine is crafty and tries to trick us into passing
2887 currently unsupported vector types on the stack by using TImode. */
2888 return (!TARGET_64BIT
&& mode
== TImode
2889 && type
&& TREE_CODE (type
) != VECTOR_TYPE
);
2892 /* Initialize a variable CUM of type CUMULATIVE_ARGS
2893 for a call to a function whose data type is FNTYPE.
2894 For a library call, FNTYPE is 0. */
2897 init_cumulative_args (CUMULATIVE_ARGS
*cum
, /* Argument info to initialize */
2898 tree fntype
, /* tree ptr for function decl */
2899 rtx libname
, /* SYMBOL_REF of library name or 0 */
2902 static CUMULATIVE_ARGS zero_cum
;
2903 tree param
, next_param
;
2905 if (TARGET_DEBUG_ARG
)
2907 fprintf (stderr
, "\ninit_cumulative_args (");
2909 fprintf (stderr
, "fntype code = %s, ret code = %s",
2910 tree_code_name
[(int) TREE_CODE (fntype
)],
2911 tree_code_name
[(int) TREE_CODE (TREE_TYPE (fntype
))]);
2913 fprintf (stderr
, "no fntype");
2916 fprintf (stderr
, ", libname = %s", XSTR (libname
, 0));
2921 /* Set up the number of registers to use for passing arguments. */
2922 cum
->nregs
= ix86_regparm
;
2924 cum
->x87_nregs
= X87_REGPARM_MAX
;
2926 cum
->sse_nregs
= SSE_REGPARM_MAX
;
2928 cum
->mmx_nregs
= MMX_REGPARM_MAX
;
2929 cum
->warn_sse
= true;
2930 cum
->warn_mmx
= true;
2931 cum
->maybe_vaarg
= false;
2933 /* Use ecx and edx registers if function has fastcall attribute,
2934 else look for regparm information. */
2935 if (fntype
&& !TARGET_64BIT
)
2937 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype
)))
2943 cum
->nregs
= ix86_function_regparm (fntype
, fndecl
);
2946 /* Set up the number of 80387 registers used for passing
2947 floating point arguments. Warn for mismatching ABI. */
2948 cum
->float_in_x87
= ix86_function_x87regparm (fntype
, fndecl
);
2950 /* Set up the number of SSE registers used for passing SFmode
2951 and DFmode arguments. Warn for mismatching ABI. */
2952 cum
->float_in_sse
= ix86_function_sseregparm (fntype
, fndecl
);
2954 /* Determine if this function has variable arguments. This is
2955 indicated by the last argument being 'void_type_mode' if there
2956 are no variable arguments. If there are variable arguments, then
2957 we won't pass anything in registers in 32-bit mode. */
2959 if (cum
->nregs
|| cum
->mmx_nregs
2960 || cum
->x87_nregs
|| cum
->sse_nregs
)
2962 for (param
= (fntype
) ? TYPE_ARG_TYPES (fntype
) : 0;
2963 param
!= 0; param
= next_param
)
2965 next_param
= TREE_CHAIN (param
);
2966 if (next_param
== 0 && TREE_VALUE (param
) != void_type_node
)
2977 cum
->float_in_x87
= 0;
2978 cum
->float_in_sse
= 0;
2980 cum
->maybe_vaarg
= true;
2984 if ((!fntype
&& !libname
)
2985 || (fntype
&& !TYPE_ARG_TYPES (fntype
)))
2986 cum
->maybe_vaarg
= true;
2988 if (TARGET_DEBUG_ARG
)
2989 fprintf (stderr
, ", nregs=%d )\n", cum
->nregs
);
2994 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
2995 But in the case of vector types, it is some vector mode.
2997 When we have only some of our vector isa extensions enabled, then there
2998 are some modes for which vector_mode_supported_p is false. For these
2999 modes, the generic vector support in gcc will choose some non-vector mode
3000 in order to implement the type. By computing the natural mode, we'll
3001 select the proper ABI location for the operand and not depend on whatever
3002 the middle-end decides to do with these vector types. */
3004 static enum machine_mode
3005 type_natural_mode (tree type
)
3007 enum machine_mode mode
= TYPE_MODE (type
);
3009 if (TREE_CODE (type
) == VECTOR_TYPE
&& !VECTOR_MODE_P (mode
))
3011 HOST_WIDE_INT size
= int_size_in_bytes (type
);
3012 if ((size
== 8 || size
== 16)
3013 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
3014 && TYPE_VECTOR_SUBPARTS (type
) > 1)
3016 enum machine_mode innermode
= TYPE_MODE (TREE_TYPE (type
));
3018 if (TREE_CODE (TREE_TYPE (type
)) == REAL_TYPE
)
3019 mode
= MIN_MODE_VECTOR_FLOAT
;
3021 mode
= MIN_MODE_VECTOR_INT
;
3023 /* Get the mode which has this inner mode and number of units. */
3024 for (; mode
!= VOIDmode
; mode
= GET_MODE_WIDER_MODE (mode
))
3025 if (GET_MODE_NUNITS (mode
) == TYPE_VECTOR_SUBPARTS (type
)
3026 && GET_MODE_INNER (mode
) == innermode
)
3036 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
3037 this may not agree with the mode that the type system has chosen for the
3038 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
3039 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
3042 gen_reg_or_parallel (enum machine_mode mode
, enum machine_mode orig_mode
,
3047 if (orig_mode
!= BLKmode
)
3048 tmp
= gen_rtx_REG (orig_mode
, regno
);
3051 tmp
= gen_rtx_REG (mode
, regno
);
3052 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
, const0_rtx
);
3053 tmp
= gen_rtx_PARALLEL (orig_mode
, gen_rtvec (1, tmp
));
3059 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
3060 of this code is to classify each 8bytes of incoming argument by the register
3061 class and assign registers accordingly. */
3063 /* Return the union class of CLASS1 and CLASS2.
3064 See the x86-64 PS ABI for details. */
3066 static enum x86_64_reg_class
3067 merge_classes (enum x86_64_reg_class class1
, enum x86_64_reg_class class2
)
3069 /* Rule #1: If both classes are equal, this is the resulting class. */
3070 if (class1
== class2
)
3073 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
3075 if (class1
== X86_64_NO_CLASS
)
3077 if (class2
== X86_64_NO_CLASS
)
3080 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
3081 if (class1
== X86_64_MEMORY_CLASS
|| class2
== X86_64_MEMORY_CLASS
)
3082 return X86_64_MEMORY_CLASS
;
3084 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
3085 if ((class1
== X86_64_INTEGERSI_CLASS
&& class2
== X86_64_SSESF_CLASS
)
3086 || (class2
== X86_64_INTEGERSI_CLASS
&& class1
== X86_64_SSESF_CLASS
))
3087 return X86_64_INTEGERSI_CLASS
;
3088 if (class1
== X86_64_INTEGER_CLASS
|| class1
== X86_64_INTEGERSI_CLASS
3089 || class2
== X86_64_INTEGER_CLASS
|| class2
== X86_64_INTEGERSI_CLASS
)
3090 return X86_64_INTEGER_CLASS
;
3092 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
3094 if (class1
== X86_64_X87_CLASS
3095 || class1
== X86_64_X87UP_CLASS
3096 || class1
== X86_64_COMPLEX_X87_CLASS
3097 || class2
== X86_64_X87_CLASS
3098 || class2
== X86_64_X87UP_CLASS
3099 || class2
== X86_64_COMPLEX_X87_CLASS
)
3100 return X86_64_MEMORY_CLASS
;
3102 /* Rule #6: Otherwise class SSE is used. */
3103 return X86_64_SSE_CLASS
;
3106 /* Classify the argument of type TYPE and mode MODE.
3107 CLASSES will be filled by the register class used to pass each word
3108 of the operand. The number of words is returned. In case the parameter
3109 should be passed in memory, 0 is returned. As a special case for zero
3110 sized containers, classes[0] will be NO_CLASS and 1 is returned.
3112 BIT_OFFSET is used internally for handling records and specifies offset
3113 of the offset in bits modulo 256 to avoid overflow cases.
3115 See the x86-64 PS ABI for details.
3119 classify_argument (enum machine_mode mode
, tree type
,
3120 enum x86_64_reg_class classes
[MAX_CLASSES
], int bit_offset
)
3122 HOST_WIDE_INT bytes
=
3123 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
3124 int words
= (bytes
+ (bit_offset
% 64) / 8 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
3126 /* Variable sized entities are always passed/returned in memory. */
3130 if (mode
!= VOIDmode
3131 && targetm
.calls
.must_pass_in_stack (mode
, type
))
3134 if (type
&& AGGREGATE_TYPE_P (type
))
3138 enum x86_64_reg_class subclasses
[MAX_CLASSES
];
3140 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
3144 for (i
= 0; i
< words
; i
++)
3145 classes
[i
] = X86_64_NO_CLASS
;
3147 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
3148 signalize memory class, so handle it as special case. */
3151 classes
[0] = X86_64_NO_CLASS
;
3155 /* Classify each field of record and merge classes. */
3156 switch (TREE_CODE (type
))
3159 /* And now merge the fields of structure. */
3160 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
3162 if (TREE_CODE (field
) == FIELD_DECL
)
3166 if (TREE_TYPE (field
) == error_mark_node
)
3169 /* Bitfields are always classified as integer. Handle them
3170 early, since later code would consider them to be
3171 misaligned integers. */
3172 if (DECL_BIT_FIELD (field
))
3174 for (i
= (int_bit_position (field
) + (bit_offset
% 64)) / 8 / 8;
3175 i
< ((int_bit_position (field
) + (bit_offset
% 64))
3176 + tree_low_cst (DECL_SIZE (field
), 0)
3179 merge_classes (X86_64_INTEGER_CLASS
,
3184 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
3185 TREE_TYPE (field
), subclasses
,
3186 (int_bit_position (field
)
3187 + bit_offset
) % 256);
3190 for (i
= 0; i
< num
; i
++)
3193 (int_bit_position (field
) + (bit_offset
% 64)) / 8 / 8;
3195 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
3203 /* Arrays are handled as small records. */
3206 num
= classify_argument (TYPE_MODE (TREE_TYPE (type
)),
3207 TREE_TYPE (type
), subclasses
, bit_offset
);
3211 /* The partial classes are now full classes. */
3212 if (subclasses
[0] == X86_64_SSESF_CLASS
&& bytes
!= 4)
3213 subclasses
[0] = X86_64_SSE_CLASS
;
3214 if (subclasses
[0] == X86_64_INTEGERSI_CLASS
&& bytes
!= 4)
3215 subclasses
[0] = X86_64_INTEGER_CLASS
;
3217 for (i
= 0; i
< words
; i
++)
3218 classes
[i
] = subclasses
[i
% num
];
3223 case QUAL_UNION_TYPE
:
3224 /* Unions are similar to RECORD_TYPE but offset is always 0.
3226 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
3228 if (TREE_CODE (field
) == FIELD_DECL
)
3232 if (TREE_TYPE (field
) == error_mark_node
)
3235 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
3236 TREE_TYPE (field
), subclasses
,
3240 for (i
= 0; i
< num
; i
++)
3241 classes
[i
] = merge_classes (subclasses
[i
], classes
[i
]);
3250 /* Final merger cleanup. */
3251 for (i
= 0; i
< words
; i
++)
3253 /* If one class is MEMORY, everything should be passed in
3255 if (classes
[i
] == X86_64_MEMORY_CLASS
)
3258 /* The X86_64_SSEUP_CLASS should be always preceded by
3259 X86_64_SSE_CLASS. */
3260 if (classes
[i
] == X86_64_SSEUP_CLASS
3261 && (i
== 0 || classes
[i
- 1] != X86_64_SSE_CLASS
))
3262 classes
[i
] = X86_64_SSE_CLASS
;
3264 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
3265 if (classes
[i
] == X86_64_X87UP_CLASS
3266 && (i
== 0 || classes
[i
- 1] != X86_64_X87_CLASS
))
3267 classes
[i
] = X86_64_SSE_CLASS
;
3272 /* Compute alignment needed. We align all types to natural boundaries with
3273 exception of XFmode that is aligned to 64bits. */
3274 if (mode
!= VOIDmode
&& mode
!= BLKmode
)
3276 int mode_alignment
= GET_MODE_BITSIZE (mode
);
3279 mode_alignment
= 128;
3280 else if (mode
== XCmode
)
3281 mode_alignment
= 256;
3282 if (COMPLEX_MODE_P (mode
))
3283 mode_alignment
/= 2;
3284 /* Misaligned fields are always returned in memory. */
3285 if (bit_offset
% mode_alignment
)
3289 /* for V1xx modes, just use the base mode */
3290 if (VECTOR_MODE_P (mode
)
3291 && GET_MODE_SIZE (GET_MODE_INNER (mode
)) == bytes
)
3292 mode
= GET_MODE_INNER (mode
);
3294 /* Classification of atomic types. */
3299 classes
[0] = X86_64_SSE_CLASS
;
3302 classes
[0] = X86_64_SSE_CLASS
;
3303 classes
[1] = X86_64_SSEUP_CLASS
;
3312 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
3313 classes
[0] = X86_64_INTEGERSI_CLASS
;
3315 classes
[0] = X86_64_INTEGER_CLASS
;
3319 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
3324 if (!(bit_offset
% 64))
3325 classes
[0] = X86_64_SSESF_CLASS
;
3327 classes
[0] = X86_64_SSE_CLASS
;
3330 classes
[0] = X86_64_SSEDF_CLASS
;
3333 classes
[0] = X86_64_X87_CLASS
;
3334 classes
[1] = X86_64_X87UP_CLASS
;
3337 classes
[0] = X86_64_SSE_CLASS
;
3338 classes
[1] = X86_64_SSEUP_CLASS
;
3341 classes
[0] = X86_64_SSE_CLASS
;
3344 classes
[0] = X86_64_SSEDF_CLASS
;
3345 classes
[1] = X86_64_SSEDF_CLASS
;
3348 classes
[0] = X86_64_COMPLEX_X87_CLASS
;
3351 /* This modes is larger than 16 bytes. */
3359 classes
[0] = X86_64_SSE_CLASS
;
3360 classes
[1] = X86_64_SSEUP_CLASS
;
3366 classes
[0] = X86_64_SSE_CLASS
;
3372 gcc_assert (VECTOR_MODE_P (mode
));
3377 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode
)) == MODE_INT
);
3379 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
3380 classes
[0] = X86_64_INTEGERSI_CLASS
;
3382 classes
[0] = X86_64_INTEGER_CLASS
;
3383 classes
[1] = X86_64_INTEGER_CLASS
;
3384 return 1 + (bytes
> 8);
3388 /* Examine the argument and return set number of register required in each
3389 class. Return 0 iff parameter should be passed in memory. */
3391 examine_argument (enum machine_mode mode
, tree type
, int in_return
,
3392 int *int_nregs
, int *sse_nregs
)
3394 enum x86_64_reg_class
class[MAX_CLASSES
];
3395 int n
= classify_argument (mode
, type
, class, 0);
3401 for (n
--; n
>= 0; n
--)
3404 case X86_64_INTEGER_CLASS
:
3405 case X86_64_INTEGERSI_CLASS
:
3408 case X86_64_SSE_CLASS
:
3409 case X86_64_SSESF_CLASS
:
3410 case X86_64_SSEDF_CLASS
:
3413 case X86_64_NO_CLASS
:
3414 case X86_64_SSEUP_CLASS
:
3416 case X86_64_X87_CLASS
:
3417 case X86_64_X87UP_CLASS
:
3421 case X86_64_COMPLEX_X87_CLASS
:
3422 return in_return
? 2 : 0;
3423 case X86_64_MEMORY_CLASS
:
3429 /* Construct container for the argument used by GCC interface. See
3430 FUNCTION_ARG for the detailed description. */
3433 construct_container (enum machine_mode mode
, enum machine_mode orig_mode
,
3434 tree type
, int in_return
, int nintregs
, int nsseregs
,
3435 const int *intreg
, int sse_regno
)
3437 /* The following variables hold the static issued_error state. */
3438 static bool issued_sse_arg_error
;
3439 static bool issued_sse_ret_error
;
3440 static bool issued_x87_ret_error
;
3442 enum machine_mode tmpmode
;
3444 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
3445 enum x86_64_reg_class
class[MAX_CLASSES
];
3449 int needed_sseregs
, needed_intregs
;
3450 rtx exp
[MAX_CLASSES
];
3453 n
= classify_argument (mode
, type
, class, 0);
3454 if (TARGET_DEBUG_ARG
)
3457 fprintf (stderr
, "Memory class\n");
3460 fprintf (stderr
, "Classes:");
3461 for (i
= 0; i
< n
; i
++)
3463 fprintf (stderr
, " %s", x86_64_reg_class_name
[class[i
]]);
3465 fprintf (stderr
, "\n");
3470 if (!examine_argument (mode
, type
, in_return
, &needed_intregs
,
3473 if (needed_intregs
> nintregs
|| needed_sseregs
> nsseregs
)
3476 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
3477 some less clueful developer tries to use floating-point anyway. */
3478 if (needed_sseregs
&& !TARGET_SSE
)
3482 if (!issued_sse_ret_error
)
3484 error ("SSE register return with SSE disabled");
3485 issued_sse_ret_error
= true;
3488 else if (!issued_sse_arg_error
)
3490 error ("SSE register argument with SSE disabled");
3491 issued_sse_arg_error
= true;
3496 /* Likewise, error if the ABI requires us to return values in the
3497 x87 registers and the user specified -mno-80387. */
3498 if (!TARGET_80387
&& in_return
)
3499 for (i
= 0; i
< n
; i
++)
3500 if (class[i
] == X86_64_X87_CLASS
3501 || class[i
] == X86_64_X87UP_CLASS
3502 || class[i
] == X86_64_COMPLEX_X87_CLASS
)
3504 if (!issued_x87_ret_error
)
3506 error ("x87 register return with x87 disabled");
3507 issued_x87_ret_error
= true;
3512 /* First construct simple cases. Avoid SCmode, since we want to use
3513 single register to pass this type. */
3514 if (n
== 1 && mode
!= SCmode
)
3517 case X86_64_INTEGER_CLASS
:
3518 case X86_64_INTEGERSI_CLASS
:
3519 return gen_rtx_REG (mode
, intreg
[0]);
3520 case X86_64_SSE_CLASS
:
3521 case X86_64_SSESF_CLASS
:
3522 case X86_64_SSEDF_CLASS
:
3523 return gen_reg_or_parallel (mode
, orig_mode
, SSE_REGNO (sse_regno
));
3524 case X86_64_X87_CLASS
:
3525 case X86_64_COMPLEX_X87_CLASS
:
3526 return gen_rtx_REG (mode
, FIRST_STACK_REG
);
3527 case X86_64_NO_CLASS
:
3528 /* Zero sized array, struct or class. */
3533 if (n
== 2 && class[0] == X86_64_SSE_CLASS
&& class[1] == X86_64_SSEUP_CLASS
3535 return gen_rtx_REG (mode
, SSE_REGNO (sse_regno
));
3537 && class[0] == X86_64_X87_CLASS
&& class[1] == X86_64_X87UP_CLASS
)
3538 return gen_rtx_REG (XFmode
, FIRST_STACK_REG
);
3539 if (n
== 2 && class[0] == X86_64_INTEGER_CLASS
3540 && class[1] == X86_64_INTEGER_CLASS
3541 && (mode
== CDImode
|| mode
== TImode
|| mode
== TFmode
)
3542 && intreg
[0] + 1 == intreg
[1])
3543 return gen_rtx_REG (mode
, intreg
[0]);
3545 /* Otherwise figure out the entries of the PARALLEL. */
3546 for (i
= 0; i
< n
; i
++)
3550 case X86_64_NO_CLASS
:
3552 case X86_64_INTEGER_CLASS
:
3553 case X86_64_INTEGERSI_CLASS
:
3554 /* Merge TImodes on aligned occasions here too. */
3555 if (i
* 8 + 8 > bytes
)
3556 tmpmode
= mode_for_size ((bytes
- i
* 8) * BITS_PER_UNIT
, MODE_INT
, 0);
3557 else if (class[i
] == X86_64_INTEGERSI_CLASS
)
3561 /* We've requested 24 bytes we don't have mode for. Use DImode. */
3562 if (tmpmode
== BLKmode
)
3564 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
3565 gen_rtx_REG (tmpmode
, *intreg
),
3569 case X86_64_SSESF_CLASS
:
3570 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
3571 gen_rtx_REG (SFmode
,
3572 SSE_REGNO (sse_regno
)),
3576 case X86_64_SSEDF_CLASS
:
3577 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
3578 gen_rtx_REG (DFmode
,
3579 SSE_REGNO (sse_regno
)),
3583 case X86_64_SSE_CLASS
:
3584 if (i
< n
- 1 && class[i
+ 1] == X86_64_SSEUP_CLASS
)
3588 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
3589 gen_rtx_REG (tmpmode
,
3590 SSE_REGNO (sse_regno
)),
3592 if (tmpmode
== TImode
)
3601 /* Empty aligned struct, union or class. */
3605 ret
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nexps
));
3606 for (i
= 0; i
< nexps
; i
++)
3607 XVECEXP (ret
, 0, i
) = exp
[i
];
3611 /* Update the data in CUM to advance over an argument
3612 of mode MODE and data type TYPE.
3613 (TYPE is null for libcalls where that information may not be available.) */
3616 function_arg_advance (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
3617 tree type
, int named
)
3620 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
3621 int words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
3624 mode
= type_natural_mode (type
);
3626 if (TARGET_DEBUG_ARG
)
3627 fprintf (stderr
, "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, "
3628 "mode=%s, named=%d)\n\n",
3629 words
, cum
->words
, cum
->nregs
, cum
->sse_nregs
,
3630 GET_MODE_NAME (mode
), named
);
3634 int int_nregs
, sse_nregs
;
3635 if (!examine_argument (mode
, type
, 0, &int_nregs
, &sse_nregs
))
3636 cum
->words
+= words
;
3637 else if (sse_nregs
<= cum
->sse_nregs
&& int_nregs
<= cum
->nregs
)
3639 cum
->nregs
-= int_nregs
;
3640 cum
->sse_nregs
-= sse_nregs
;
3641 cum
->regno
+= int_nregs
;
3642 cum
->sse_regno
+= sse_nregs
;
3645 cum
->words
+= words
;
3663 cum
->words
+= words
;
3664 cum
->nregs
-= words
;
3665 cum
->regno
+= words
;
3667 if (cum
->nregs
<= 0)
3675 if (cum
->float_in_sse
> 0)
3679 if (cum
->float_in_sse
> 1)
3682 /* Because no inherent XFmode->DFmode and XFmode->SFmode
3683 rounding takes place when values are passed in x87
3684 registers, pass DFmode and SFmode types to local functions
3685 only when flag_unsafe_math_optimizations is set. */
3686 if (!cum
->float_in_x87
3687 || (cum
->float_in_x87
== 2
3688 && !flag_unsafe_math_optimizations
))
3692 if (!cum
->float_in_x87
)
3695 if (!type
|| !AGGREGATE_TYPE_P (type
))
3697 cum
->x87_nregs
-= 1;
3698 cum
->x87_regno
+= 1;
3699 if (cum
->x87_nregs
<= 0)
3716 if (!type
|| !AGGREGATE_TYPE_P (type
))
3718 cum
->sse_nregs
-= 1;
3719 cum
->sse_regno
+= 1;
3720 if (cum
->sse_nregs
<= 0)
3732 if (!type
|| !AGGREGATE_TYPE_P (type
))
3734 cum
->mmx_nregs
-= 1;
3735 cum
->mmx_regno
+= 1;
3736 if (cum
->mmx_nregs
<= 0)
3747 /* Define where to put the arguments to a function.
3748 Value is zero to push the argument on the stack,
3749 or a hard register in which to store the argument.
3751 MODE is the argument's machine mode.
3752 TYPE is the data type of the argument (as a tree).
3753 This is null for libcalls where that information may
3755 CUM is a variable of type CUMULATIVE_ARGS which gives info about
3756 the preceding args and about the function being called.
3757 NAMED is nonzero if this argument is a named parameter
3758 (otherwise it is an extra parameter matching an ellipsis). */
3761 function_arg (CUMULATIVE_ARGS
*cum
, enum machine_mode orig_mode
,
3762 tree type
, int named
)
3764 enum machine_mode mode
= orig_mode
;
3767 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
3768 int words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
3769 static bool warnedsse
, warnedmmx
;
3771 /* To simplify the code below, represent vector types with a vector mode
3772 even if MMX/SSE are not active. */
3773 if (type
&& TREE_CODE (type
) == VECTOR_TYPE
)
3774 mode
= type_natural_mode (type
);
3776 /* Handle a hidden AL argument containing number of registers for varargs
3777 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
3779 if (mode
== VOIDmode
)
3782 return GEN_INT (cum
->maybe_vaarg
3783 ? (cum
->sse_nregs
< 0
3791 ret
= construct_container (mode
, orig_mode
, type
, 0, cum
->nregs
,
3793 &x86_64_int_parameter_registers
[cum
->regno
],
3809 if (words
<= cum
->nregs
)
3811 int regno
= cum
->regno
;
3813 /* Fastcall allocates the first two DWORD (SImode) or
3814 smaller arguments to ECX and EDX. */
3817 if (mode
== BLKmode
|| mode
== DImode
)
3820 /* ECX not EAX is the first allocated register. */
3824 ret
= gen_rtx_REG (mode
, regno
);
3829 if (cum
->float_in_sse
> 0)
3833 if (cum
->float_in_sse
> 1)
3836 /* Because no inherent XFmode->DFmode and XFmode->SFmode
3837 rounding takes place when values are passed in x87
3838 registers, pass DFmode and SFmode types to local functions
3839 only when flag_unsafe_math_optimizations is set. */
3840 if (!cum
->float_in_x87
3841 || (cum
->float_in_x87
== 2
3842 && !flag_unsafe_math_optimizations
))
3846 if (!cum
->float_in_x87
)
3849 if (!type
|| !AGGREGATE_TYPE_P (type
))
3851 ret
= gen_rtx_REG (mode
, cum
->x87_regno
+ FIRST_FLOAT_REG
);
3863 if (!type
|| !AGGREGATE_TYPE_P (type
))
3865 if (!TARGET_SSE
&& !warnedsse
&& cum
->warn_sse
)
3868 warning (0, "SSE vector argument without SSE enabled "
3872 ret
= gen_reg_or_parallel (mode
, orig_mode
,
3873 cum
->sse_regno
+ FIRST_SSE_REG
);
3880 if (!type
|| !AGGREGATE_TYPE_P (type
))
3882 if (!TARGET_MMX
&& !warnedmmx
&& cum
->warn_mmx
)
3885 warning (0, "MMX vector argument without MMX enabled "
3889 ret
= gen_reg_or_parallel (mode
, orig_mode
,
3890 cum
->mmx_regno
+ FIRST_MMX_REG
);
3895 if (TARGET_DEBUG_ARG
)
3898 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
3899 words
, cum
->words
, cum
->nregs
, GET_MODE_NAME (mode
), named
);
3902 print_simple_rtl (stderr
, ret
);
3904 fprintf (stderr
, ", stack");
3906 fprintf (stderr
, " )\n");
3912 /* A C expression that indicates when an argument must be passed by
3913 reference. If nonzero for an argument, a copy of that argument is
3914 made in memory and a pointer to the argument is passed instead of
3915 the argument itself. The pointer is passed in whatever way is
3916 appropriate for passing a pointer to that type. */
3919 ix86_pass_by_reference (CUMULATIVE_ARGS
*cum ATTRIBUTE_UNUSED
,
3920 enum machine_mode mode ATTRIBUTE_UNUSED
,
3921 tree type
, bool named ATTRIBUTE_UNUSED
)
3926 if (type
&& int_size_in_bytes (type
) == -1)
3928 if (TARGET_DEBUG_ARG
)
3929 fprintf (stderr
, "function_arg_pass_by_reference\n");
3936 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
3937 ABI. Only called if TARGET_SSE. */
3939 contains_128bit_aligned_vector_p (tree type
)
3941 enum machine_mode mode
= TYPE_MODE (type
);
3942 if (SSE_REG_MODE_P (mode
)
3943 && (!TYPE_USER_ALIGN (type
) || TYPE_ALIGN (type
) > 128))
3945 if (TYPE_ALIGN (type
) < 128)
3948 if (AGGREGATE_TYPE_P (type
))
3950 /* Walk the aggregates recursively. */
3951 switch (TREE_CODE (type
))
3955 case QUAL_UNION_TYPE
:
3959 /* Walk all the structure fields. */
3960 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
3962 if (TREE_CODE (field
) == FIELD_DECL
3963 && contains_128bit_aligned_vector_p (TREE_TYPE (field
)))
3970 /* Just for use if some languages passes arrays by value. */
3971 if (contains_128bit_aligned_vector_p (TREE_TYPE (type
)))
3982 /* Gives the alignment boundary, in bits, of an argument with the
3983 specified mode and type. */
3986 ix86_function_arg_boundary (enum machine_mode mode
, tree type
)
3990 align
= TYPE_ALIGN (type
);
3992 align
= GET_MODE_ALIGNMENT (mode
);
3993 if (align
< PARM_BOUNDARY
)
3994 align
= PARM_BOUNDARY
;
3997 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
3998 make an exception for SSE modes since these require 128bit
4001 The handling here differs from field_alignment. ICC aligns MMX
4002 arguments to 4 byte boundaries, while structure fields are aligned
4003 to 8 byte boundaries. */
4005 align
= PARM_BOUNDARY
;
4008 if (!SSE_REG_MODE_P (mode
))
4009 align
= PARM_BOUNDARY
;
4013 if (!contains_128bit_aligned_vector_p (type
))
4014 align
= PARM_BOUNDARY
;
4022 /* Return true if N is a possible register number of function value. */
4024 ix86_function_value_regno_p (int regno
)
4027 || (regno
== FIRST_FLOAT_REG
&& TARGET_FLOAT_RETURNS_IN_80387
)
4028 || (regno
== FIRST_SSE_REG
&& TARGET_SSE
))
4032 && (regno
== FIRST_MMX_REG
&& TARGET_MMX
))
4038 /* Define how to find the value returned by a function.
4039 VALTYPE is the data type of the value (as a tree).
4040 If the precise function being called is known, FUNC is its FUNCTION_DECL;
4041 otherwise, FUNC is 0. */
4043 ix86_function_value (tree valtype
, tree fntype_or_decl
,
4044 bool outgoing ATTRIBUTE_UNUSED
)
4046 enum machine_mode natmode
= type_natural_mode (valtype
);
4050 rtx ret
= construct_container (natmode
, TYPE_MODE (valtype
), valtype
,
4051 1, REGPARM_MAX
, SSE_REGPARM_MAX
,
4052 x86_64_int_return_registers
, 0);
4053 /* For zero sized structures, construct_container return NULL, but we
4054 need to keep rest of compiler happy by returning meaningful value. */
4056 ret
= gen_rtx_REG (TYPE_MODE (valtype
), 0);
4061 tree fn
= NULL_TREE
, fntype
;
4063 && DECL_P (fntype_or_decl
))
4064 fn
= fntype_or_decl
;
4065 fntype
= fn
? TREE_TYPE (fn
) : fntype_or_decl
;
4066 return gen_rtx_REG (TYPE_MODE (valtype
),
4067 ix86_value_regno (natmode
, fn
, fntype
));
4071 /* Return true iff type is returned in memory. */
4073 ix86_return_in_memory (tree type
)
4075 int needed_intregs
, needed_sseregs
, size
;
4076 enum machine_mode mode
= type_natural_mode (type
);
4079 return !examine_argument (mode
, type
, 1, &needed_intregs
, &needed_sseregs
);
4081 if (mode
== BLKmode
)
4084 size
= int_size_in_bytes (type
);
4086 if (MS_AGGREGATE_RETURN
&& AGGREGATE_TYPE_P (type
) && size
<= 8)
4089 if (VECTOR_MODE_P (mode
) || mode
== TImode
)
4091 /* User-created vectors small enough to fit in EAX. */
4095 /* MMX/3dNow values are returned in MM0,
4096 except when it doesn't exits. */
4098 return (TARGET_MMX
? 0 : 1);
4100 /* SSE values are returned in XMM0, except when it doesn't exist. */
4102 return (TARGET_SSE
? 0 : 1);
4116 /* When returning SSE vector types, we have a choice of either
4117 (1) being abi incompatible with a -march switch, or
4118 (2) generating an error.
4119 Given no good solution, I think the safest thing is one warning.
4120 The user won't be able to use -Werror, but....
4122 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
4123 called in response to actually generating a caller or callee that
4124 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
4125 via aggregate_value_p for general type probing from tree-ssa. */
4128 ix86_struct_value_rtx (tree type
, int incoming ATTRIBUTE_UNUSED
)
4130 static bool warnedsse
, warnedmmx
;
4134 /* Look at the return type of the function, not the function type. */
4135 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (type
));
4137 if (!TARGET_SSE
&& !warnedsse
)
4140 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
4143 warning (0, "SSE vector return without SSE enabled "
4148 if (!TARGET_MMX
&& !warnedmmx
)
4150 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
4153 warning (0, "MMX vector return without MMX enabled "
4162 /* Define how to find the value returned by a library function
4163 assuming the value has mode MODE. */
4165 ix86_libcall_value (enum machine_mode mode
)
4179 return gen_rtx_REG (mode
, FIRST_SSE_REG
);
4182 return gen_rtx_REG (mode
, FIRST_FLOAT_REG
);
4186 return gen_rtx_REG (mode
, 0);
4190 return gen_rtx_REG (mode
, ix86_value_regno (mode
, NULL
, NULL
));
4193 /* Given a mode, return the register to use for a return value. */
4196 ix86_value_regno (enum machine_mode mode
, tree func
, tree fntype
)
4198 gcc_assert (!TARGET_64BIT
);
4200 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4201 we normally prevent this case when mmx is not available. However
4202 some ABIs may require the result to be returned like DImode. */
4203 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
4204 return TARGET_MMX
? FIRST_MMX_REG
: 0;
4206 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4207 we prevent this case when sse is not available. However some ABIs
4208 may require the result to be returned like integer TImode. */
4209 if (mode
== TImode
|| (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
4210 return TARGET_SSE
? FIRST_SSE_REG
: 0;
4212 /* Decimal floating point values can go in %eax, unlike other float modes. */
4213 if (DECIMAL_FLOAT_MODE_P (mode
))
4216 /* Most things go in %eax, except (unless -mno-fp-ret-in-387) fp values. */
4217 if (!SCALAR_FLOAT_MODE_P (mode
) || !TARGET_FLOAT_RETURNS_IN_80387
)
4220 /* Floating point return values in %st(0), except for local functions when
4221 SSE math is enabled or for functions with sseregparm attribute. */
4222 if ((func
|| fntype
)
4223 && (mode
== SFmode
|| mode
== DFmode
))
4225 int sse_level
= ix86_function_sseregparm (fntype
, func
);
4226 if ((sse_level
>= 1 && mode
== SFmode
)
4227 || (sse_level
== 2 && mode
== DFmode
))
4228 return FIRST_SSE_REG
;
4231 return FIRST_FLOAT_REG
;
4234 /* Create the va_list data type. */
4237 ix86_build_builtin_va_list (void)
4239 tree f_gpr
, f_fpr
, f_ovf
, f_sav
, record
, type_decl
;
4241 /* For i386 we use plain pointer to argument area. */
4243 return build_pointer_type (char_type_node
);
4245 record
= (*lang_hooks
.types
.make_type
) (RECORD_TYPE
);
4246 type_decl
= build_decl (TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
4248 f_gpr
= build_decl (FIELD_DECL
, get_identifier ("gp_offset"),
4249 unsigned_type_node
);
4250 f_fpr
= build_decl (FIELD_DECL
, get_identifier ("fp_offset"),
4251 unsigned_type_node
);
4252 f_ovf
= build_decl (FIELD_DECL
, get_identifier ("overflow_arg_area"),
4254 f_sav
= build_decl (FIELD_DECL
, get_identifier ("reg_save_area"),
4257 va_list_gpr_counter_field
= f_gpr
;
4258 va_list_fpr_counter_field
= f_fpr
;
4260 DECL_FIELD_CONTEXT (f_gpr
) = record
;
4261 DECL_FIELD_CONTEXT (f_fpr
) = record
;
4262 DECL_FIELD_CONTEXT (f_ovf
) = record
;
4263 DECL_FIELD_CONTEXT (f_sav
) = record
;
4265 TREE_CHAIN (record
) = type_decl
;
4266 TYPE_NAME (record
) = type_decl
;
4267 TYPE_FIELDS (record
) = f_gpr
;
4268 TREE_CHAIN (f_gpr
) = f_fpr
;
4269 TREE_CHAIN (f_fpr
) = f_ovf
;
4270 TREE_CHAIN (f_ovf
) = f_sav
;
4272 layout_type (record
);
4274 /* The correct type is an array type of one element. */
4275 return build_array_type (record
, build_index_type (size_zero_node
));
4278 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4281 ix86_setup_incoming_varargs (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
4282 tree type
, int *pretend_size ATTRIBUTE_UNUSED
,
4285 CUMULATIVE_ARGS next_cum
;
4286 rtx save_area
= NULL_RTX
, mem
;
4299 if (! cfun
->va_list_gpr_size
&& ! cfun
->va_list_fpr_size
)
4302 /* Indicate to allocate space on the stack for varargs save area. */
4303 ix86_save_varrargs_registers
= 1;
4305 cfun
->stack_alignment_needed
= 128;
4307 fntype
= TREE_TYPE (current_function_decl
);
4308 stdarg_p
= (TYPE_ARG_TYPES (fntype
) != 0
4309 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype
)))
4310 != void_type_node
));
4312 /* For varargs, we do not want to skip the dummy va_dcl argument.
4313 For stdargs, we do want to skip the last named argument. */
4316 function_arg_advance (&next_cum
, mode
, type
, 1);
4319 save_area
= frame_pointer_rtx
;
4321 set
= get_varargs_alias_set ();
4323 for (i
= next_cum
.regno
;
4325 && i
< next_cum
.regno
+ cfun
->va_list_gpr_size
/ UNITS_PER_WORD
;
4328 mem
= gen_rtx_MEM (Pmode
,
4329 plus_constant (save_area
, i
* UNITS_PER_WORD
));
4330 MEM_NOTRAP_P (mem
) = 1;
4331 set_mem_alias_set (mem
, set
);
4332 emit_move_insn (mem
, gen_rtx_REG (Pmode
,
4333 x86_64_int_parameter_registers
[i
]));
4336 if (next_cum
.sse_nregs
&& cfun
->va_list_fpr_size
)
4338 /* Now emit code to save SSE registers. The AX parameter contains number
4339 of SSE parameter registers used to call this function. We use
4340 sse_prologue_save insn template that produces computed jump across
4341 SSE saves. We need some preparation work to get this working. */
4343 label
= gen_label_rtx ();
4344 label_ref
= gen_rtx_LABEL_REF (Pmode
, label
);
4346 /* Compute address to jump to :
4347 label - 5*eax + nnamed_sse_arguments*5 */
4348 tmp_reg
= gen_reg_rtx (Pmode
);
4349 nsse_reg
= gen_reg_rtx (Pmode
);
4350 emit_insn (gen_zero_extendqidi2 (nsse_reg
, gen_rtx_REG (QImode
, 0)));
4351 emit_insn (gen_rtx_SET (VOIDmode
, tmp_reg
,
4352 gen_rtx_MULT (Pmode
, nsse_reg
,
4354 if (next_cum
.sse_regno
)
4357 gen_rtx_CONST (DImode
,
4358 gen_rtx_PLUS (DImode
,
4360 GEN_INT (next_cum
.sse_regno
* 4))));
4362 emit_move_insn (nsse_reg
, label_ref
);
4363 emit_insn (gen_subdi3 (nsse_reg
, nsse_reg
, tmp_reg
));
4365 /* Compute address of memory block we save into. We always use pointer
4366 pointing 127 bytes after first byte to store - this is needed to keep
4367 instruction size limited by 4 bytes. */
4368 tmp_reg
= gen_reg_rtx (Pmode
);
4369 emit_insn (gen_rtx_SET (VOIDmode
, tmp_reg
,
4370 plus_constant (save_area
,
4371 8 * REGPARM_MAX
+ 127)));
4372 mem
= gen_rtx_MEM (BLKmode
, plus_constant (tmp_reg
, -127));
4373 MEM_NOTRAP_P (mem
) = 1;
4374 set_mem_alias_set (mem
, set
);
4375 set_mem_align (mem
, BITS_PER_WORD
);
4377 /* And finally do the dirty job! */
4378 emit_insn (gen_sse_prologue_save (mem
, nsse_reg
,
4379 GEN_INT (next_cum
.sse_regno
), label
));
4384 /* Implement va_start. */
4387 ix86_va_start (tree valist
, rtx nextarg
)
4389 HOST_WIDE_INT words
, n_gpr
, n_fpr
;
4390 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
4391 tree gpr
, fpr
, ovf
, sav
, t
;
4394 /* Only 64bit target needs something special. */
4397 std_expand_builtin_va_start (valist
, nextarg
);
4401 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
4402 f_fpr
= TREE_CHAIN (f_gpr
);
4403 f_ovf
= TREE_CHAIN (f_fpr
);
4404 f_sav
= TREE_CHAIN (f_ovf
);
4406 valist
= build1 (INDIRECT_REF
, TREE_TYPE (TREE_TYPE (valist
)), valist
);
4407 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
, NULL_TREE
);
4408 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
4409 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
4410 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
4412 /* Count number of gp and fp argument registers used. */
4413 words
= current_function_args_info
.words
;
4414 n_gpr
= current_function_args_info
.regno
;
4415 n_fpr
= current_function_args_info
.sse_regno
;
4417 if (TARGET_DEBUG_ARG
)
4418 fprintf (stderr
, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
4419 (int) words
, (int) n_gpr
, (int) n_fpr
);
4421 if (cfun
->va_list_gpr_size
)
4423 type
= TREE_TYPE (gpr
);
4424 t
= build2 (MODIFY_EXPR
, type
, gpr
,
4425 build_int_cst (type
, n_gpr
* 8));
4426 TREE_SIDE_EFFECTS (t
) = 1;
4427 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4430 if (cfun
->va_list_fpr_size
)
4432 type
= TREE_TYPE (fpr
);
4433 t
= build2 (MODIFY_EXPR
, type
, fpr
,
4434 build_int_cst (type
, n_fpr
* 16 + 8*REGPARM_MAX
));
4435 TREE_SIDE_EFFECTS (t
) = 1;
4436 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4439 /* Find the overflow area. */
4440 type
= TREE_TYPE (ovf
);
4441 t
= make_tree (type
, virtual_incoming_args_rtx
);
4443 t
= build2 (PLUS_EXPR
, type
, t
,
4444 build_int_cst (type
, words
* UNITS_PER_WORD
));
4445 t
= build2 (MODIFY_EXPR
, type
, ovf
, t
);
4446 TREE_SIDE_EFFECTS (t
) = 1;
4447 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4449 if (cfun
->va_list_gpr_size
|| cfun
->va_list_fpr_size
)
4451 /* Find the register save area.
4452 Prologue of the function save it right above stack frame. */
4453 type
= TREE_TYPE (sav
);
4454 t
= make_tree (type
, frame_pointer_rtx
);
4455 t
= build2 (MODIFY_EXPR
, type
, sav
, t
);
4456 TREE_SIDE_EFFECTS (t
) = 1;
4457 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4461 /* Implement va_arg. */
4464 ix86_gimplify_va_arg (tree valist
, tree type
, tree
*pre_p
, tree
*post_p
)
4466 static const int intreg
[6] = { 0, 1, 2, 3, 4, 5 };
4467 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
4468 tree gpr
, fpr
, ovf
, sav
, t
;
4470 tree lab_false
, lab_over
= NULL_TREE
;
4475 enum machine_mode nat_mode
;
4477 /* Only 64bit target needs something special. */
4479 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
4481 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
4482 f_fpr
= TREE_CHAIN (f_gpr
);
4483 f_ovf
= TREE_CHAIN (f_fpr
);
4484 f_sav
= TREE_CHAIN (f_ovf
);
4486 valist
= build_va_arg_indirect_ref (valist
);
4487 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
, NULL_TREE
);
4488 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
4489 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
4490 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
4492 indirect_p
= pass_by_reference (NULL
, TYPE_MODE (type
), type
, false);
4494 type
= build_pointer_type (type
);
4495 size
= int_size_in_bytes (type
);
4496 rsize
= (size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
4498 nat_mode
= type_natural_mode (type
);
4499 container
= construct_container (nat_mode
, TYPE_MODE (type
), type
, 0,
4500 REGPARM_MAX
, SSE_REGPARM_MAX
, intreg
, 0);
4502 /* Pull the value out of the saved registers. */
4504 addr
= create_tmp_var (ptr_type_node
, "addr");
4505 DECL_POINTER_ALIAS_SET (addr
) = get_varargs_alias_set ();
4509 int needed_intregs
, needed_sseregs
;
4511 tree int_addr
, sse_addr
;
4513 lab_false
= create_artificial_label ();
4514 lab_over
= create_artificial_label ();
4516 examine_argument (nat_mode
, type
, 0, &needed_intregs
, &needed_sseregs
);
4518 need_temp
= (!REG_P (container
)
4519 && ((needed_intregs
&& TYPE_ALIGN (type
) > 64)
4520 || TYPE_ALIGN (type
) > 128));
4522 /* In case we are passing structure, verify that it is consecutive block
4523 on the register save area. If not we need to do moves. */
4524 if (!need_temp
&& !REG_P (container
))
4526 /* Verify that all registers are strictly consecutive */
4527 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container
, 0, 0), 0))))
4531 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
4533 rtx slot
= XVECEXP (container
, 0, i
);
4534 if (REGNO (XEXP (slot
, 0)) != FIRST_SSE_REG
+ (unsigned int) i
4535 || INTVAL (XEXP (slot
, 1)) != i
* 16)
4543 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
4545 rtx slot
= XVECEXP (container
, 0, i
);
4546 if (REGNO (XEXP (slot
, 0)) != (unsigned int) i
4547 || INTVAL (XEXP (slot
, 1)) != i
* 8)
4559 int_addr
= create_tmp_var (ptr_type_node
, "int_addr");
4560 DECL_POINTER_ALIAS_SET (int_addr
) = get_varargs_alias_set ();
4561 sse_addr
= create_tmp_var (ptr_type_node
, "sse_addr");
4562 DECL_POINTER_ALIAS_SET (sse_addr
) = get_varargs_alias_set ();
4565 /* First ensure that we fit completely in registers. */
4568 t
= build_int_cst (TREE_TYPE (gpr
),
4569 (REGPARM_MAX
- needed_intregs
+ 1) * 8);
4570 t
= build2 (GE_EXPR
, boolean_type_node
, gpr
, t
);
4571 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
4572 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
4573 gimplify_and_add (t
, pre_p
);
4577 t
= build_int_cst (TREE_TYPE (fpr
),
4578 (SSE_REGPARM_MAX
- needed_sseregs
+ 1) * 16
4580 t
= build2 (GE_EXPR
, boolean_type_node
, fpr
, t
);
4581 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
4582 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
4583 gimplify_and_add (t
, pre_p
);
4586 /* Compute index to start of area used for integer regs. */
4589 /* int_addr = gpr + sav; */
4590 t
= fold_convert (ptr_type_node
, gpr
);
4591 t
= build2 (PLUS_EXPR
, ptr_type_node
, sav
, t
);
4592 t
= build2 (MODIFY_EXPR
, void_type_node
, int_addr
, t
);
4593 gimplify_and_add (t
, pre_p
);
4597 /* sse_addr = fpr + sav; */
4598 t
= fold_convert (ptr_type_node
, fpr
);
4599 t
= build2 (PLUS_EXPR
, ptr_type_node
, sav
, t
);
4600 t
= build2 (MODIFY_EXPR
, void_type_node
, sse_addr
, t
);
4601 gimplify_and_add (t
, pre_p
);
4606 tree temp
= create_tmp_var (type
, "va_arg_tmp");
4609 t
= build1 (ADDR_EXPR
, build_pointer_type (type
), temp
);
4610 t
= build2 (MODIFY_EXPR
, void_type_node
, addr
, t
);
4611 gimplify_and_add (t
, pre_p
);
4613 for (i
= 0; i
< XVECLEN (container
, 0); i
++)
4615 rtx slot
= XVECEXP (container
, 0, i
);
4616 rtx reg
= XEXP (slot
, 0);
4617 enum machine_mode mode
= GET_MODE (reg
);
4618 tree piece_type
= lang_hooks
.types
.type_for_mode (mode
, 1);
4619 tree addr_type
= build_pointer_type (piece_type
);
4622 tree dest_addr
, dest
;
4624 if (SSE_REGNO_P (REGNO (reg
)))
4626 src_addr
= sse_addr
;
4627 src_offset
= (REGNO (reg
) - FIRST_SSE_REG
) * 16;
4631 src_addr
= int_addr
;
4632 src_offset
= REGNO (reg
) * 8;
4634 src_addr
= fold_convert (addr_type
, src_addr
);
4635 src_addr
= fold (build2 (PLUS_EXPR
, addr_type
, src_addr
,
4636 size_int (src_offset
)));
4637 src
= build_va_arg_indirect_ref (src_addr
);
4639 dest_addr
= fold_convert (addr_type
, addr
);
4640 dest_addr
= fold (build2 (PLUS_EXPR
, addr_type
, dest_addr
,
4641 size_int (INTVAL (XEXP (slot
, 1)))));
4642 dest
= build_va_arg_indirect_ref (dest_addr
);
4644 t
= build2 (MODIFY_EXPR
, void_type_node
, dest
, src
);
4645 gimplify_and_add (t
, pre_p
);
4651 t
= build2 (PLUS_EXPR
, TREE_TYPE (gpr
), gpr
,
4652 build_int_cst (TREE_TYPE (gpr
), needed_intregs
* 8));
4653 t
= build2 (MODIFY_EXPR
, TREE_TYPE (gpr
), gpr
, t
);
4654 gimplify_and_add (t
, pre_p
);
4658 t
= build2 (PLUS_EXPR
, TREE_TYPE (fpr
), fpr
,
4659 build_int_cst (TREE_TYPE (fpr
), needed_sseregs
* 16));
4660 t
= build2 (MODIFY_EXPR
, TREE_TYPE (fpr
), fpr
, t
);
4661 gimplify_and_add (t
, pre_p
);
4664 t
= build1 (GOTO_EXPR
, void_type_node
, lab_over
);
4665 gimplify_and_add (t
, pre_p
);
4667 t
= build1 (LABEL_EXPR
, void_type_node
, lab_false
);
4668 append_to_statement_list (t
, pre_p
);
4671 /* ... otherwise out of the overflow area. */
4673 /* Care for on-stack alignment if needed. */
4674 if (FUNCTION_ARG_BOUNDARY (VOIDmode
, type
) <= 64
4675 || integer_zerop (TYPE_SIZE (type
)))
4679 HOST_WIDE_INT align
= FUNCTION_ARG_BOUNDARY (VOIDmode
, type
) / 8;
4680 t
= build2 (PLUS_EXPR
, TREE_TYPE (ovf
), ovf
,
4681 build_int_cst (TREE_TYPE (ovf
), align
- 1));
4682 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
4683 build_int_cst (TREE_TYPE (t
), -align
));
4685 gimplify_expr (&t
, pre_p
, NULL
, is_gimple_val
, fb_rvalue
);
4687 t2
= build2 (MODIFY_EXPR
, void_type_node
, addr
, t
);
4688 gimplify_and_add (t2
, pre_p
);
4690 t
= build2 (PLUS_EXPR
, TREE_TYPE (t
), t
,
4691 build_int_cst (TREE_TYPE (t
), rsize
* UNITS_PER_WORD
));
4692 t
= build2 (MODIFY_EXPR
, TREE_TYPE (ovf
), ovf
, t
);
4693 gimplify_and_add (t
, pre_p
);
4697 t
= build1 (LABEL_EXPR
, void_type_node
, lab_over
);
4698 append_to_statement_list (t
, pre_p
);
4701 ptrtype
= build_pointer_type (type
);
4702 addr
= fold_convert (ptrtype
, addr
);
4705 addr
= build_va_arg_indirect_ref (addr
);
4706 return build_va_arg_indirect_ref (addr
);
4709 /* Return nonzero if OPNUM's MEM should be matched
4710 in movabs* patterns. */
4713 ix86_check_movabs (rtx insn
, int opnum
)
4717 set
= PATTERN (insn
);
4718 if (GET_CODE (set
) == PARALLEL
)
4719 set
= XVECEXP (set
, 0, 0);
4720 gcc_assert (GET_CODE (set
) == SET
);
4721 mem
= XEXP (set
, opnum
);
4722 while (GET_CODE (mem
) == SUBREG
)
4723 mem
= SUBREG_REG (mem
);
4724 gcc_assert (GET_CODE (mem
) == MEM
);
4725 return (volatile_ok
|| !MEM_VOLATILE_P (mem
));
4728 /* Initialize the table of extra 80387 mathematical constants. */
4731 init_ext_80387_constants (void)
4733 static const char * cst
[5] =
4735 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4736 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4737 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4738 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4739 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4743 for (i
= 0; i
< 5; i
++)
4745 real_from_string (&ext_80387_constants_table
[i
], cst
[i
]);
4746 /* Ensure each constant is rounded to XFmode precision. */
4747 real_convert (&ext_80387_constants_table
[i
],
4748 XFmode
, &ext_80387_constants_table
[i
]);
4751 ext_80387_constants_init
= 1;
4754 /* Return true if the constant is something that can be loaded with
4755 a special instruction. */
4758 standard_80387_constant_p (rtx x
)
4762 if (GET_CODE (x
) != CONST_DOUBLE
|| !FLOAT_MODE_P (GET_MODE (x
)))
4765 if (x
== CONST0_RTX (GET_MODE (x
)))
4767 if (x
== CONST1_RTX (GET_MODE (x
)))
4770 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
4772 /* For XFmode constants, try to find a special 80387 instruction when
4773 optimizing for size or on those CPUs that benefit from them. */
4774 if (GET_MODE (x
) == XFmode
4775 && (optimize_size
|| x86_ext_80387_constants
& TUNEMASK
))
4779 if (! ext_80387_constants_init
)
4780 init_ext_80387_constants ();
4782 for (i
= 0; i
< 5; i
++)
4783 if (real_identical (&r
, &ext_80387_constants_table
[i
]))
4787 /* Load of the constant -0.0 or -1.0 will be split as
4788 fldz;fchs or fld1;fchs sequence. */
4789 if (real_isnegzero (&r
))
4791 if (real_identical (&r
, &dconstm1
))
4797 /* Return the opcode of the special instruction to be used to load
4801 standard_80387_constant_opcode (rtx x
)
4803 switch (standard_80387_constant_p (x
))
4827 /* Return the CONST_DOUBLE representing the 80387 constant that is
4828 loaded by the specified special instruction. The argument IDX
4829 matches the return value from standard_80387_constant_p. */
4832 standard_80387_constant_rtx (int idx
)
4836 if (! ext_80387_constants_init
)
4837 init_ext_80387_constants ();
4853 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table
[i
],
4857 /* Return 1 if mode is a valid mode for sse. */
4859 standard_sse_mode_p (enum machine_mode mode
)
4876 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4879 standard_sse_constant_p (rtx x
)
4881 enum machine_mode mode
= GET_MODE (x
);
4883 if (x
== const0_rtx
|| x
== CONST0_RTX (GET_MODE (x
)))
4885 if (vector_all_ones_operand (x
, mode
)
4886 && standard_sse_mode_p (mode
))
4887 return TARGET_SSE2
? 2 : -1;
4892 /* Return the opcode of the special instruction to be used to load
4896 standard_sse_constant_opcode (rtx insn
, rtx x
)
4898 switch (standard_sse_constant_p (x
))
4901 if (get_attr_mode (insn
) == MODE_V4SF
)
4902 return "xorps\t%0, %0";
4903 else if (get_attr_mode (insn
) == MODE_V2DF
)
4904 return "xorpd\t%0, %0";
4906 return "pxor\t%0, %0";
4908 return "pcmpeqd\t%0, %0";
4913 /* Returns 1 if OP contains a symbol reference */
4916 symbolic_reference_mentioned_p (rtx op
)
4921 if (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == LABEL_REF
)
4924 fmt
= GET_RTX_FORMAT (GET_CODE (op
));
4925 for (i
= GET_RTX_LENGTH (GET_CODE (op
)) - 1; i
>= 0; i
--)
4931 for (j
= XVECLEN (op
, i
) - 1; j
>= 0; j
--)
4932 if (symbolic_reference_mentioned_p (XVECEXP (op
, i
, j
)))
4936 else if (fmt
[i
] == 'e' && symbolic_reference_mentioned_p (XEXP (op
, i
)))
4943 /* Return 1 if it is appropriate to emit `ret' instructions in the
4944 body of a function. Do this only if the epilogue is simple, needing a
4945 couple of insns. Prior to reloading, we can't tell how many registers
4946 must be saved, so return 0 then. Return 0 if there is no frame
4947 marker to de-allocate. */
4950 ix86_can_use_return_insn_p (void)
4952 struct ix86_frame frame
;
4954 if (! reload_completed
|| frame_pointer_needed
)
4957 /* Don't allow more than 32 pop, since that's all we can do
4958 with one instruction. */
4959 if (current_function_pops_args
4960 && current_function_args_size
>= 32768)
4963 ix86_compute_frame_layout (&frame
);
4964 return frame
.to_allocate
== 0 && frame
.nregs
== 0;
4967 /* Value should be nonzero if functions must have frame pointers.
4968 Zero means the frame pointer need not be set up (and parms may
4969 be accessed via the stack pointer) in functions that seem suitable. */
4972 ix86_frame_pointer_required (void)
4974 /* If we accessed previous frames, then the generated code expects
4975 to be able to access the saved ebp value in our frame. */
4976 if (cfun
->machine
->accesses_prev_frame
)
4979 /* Several x86 os'es need a frame pointer for other reasons,
4980 usually pertaining to setjmp. */
4981 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
4984 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4985 the frame pointer by default. Turn it back on now if we've not
4986 got a leaf function. */
4987 if (TARGET_OMIT_LEAF_FRAME_POINTER
4988 && (!current_function_is_leaf
4989 || ix86_current_function_calls_tls_descriptor
))
4992 if (current_function_profile
)
4998 /* Record that the current function accesses previous call frames. */
5001 ix86_setup_frame_addresses (void)
5003 cfun
->machine
->accesses_prev_frame
= 1;
5006 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
5007 # define USE_HIDDEN_LINKONCE 1
5009 # define USE_HIDDEN_LINKONCE 0
5012 static int pic_labels_used
;
5014 /* Fills in the label name that should be used for a pc thunk for
5015 the given register. */
5018 get_pc_thunk_name (char name
[32], unsigned int regno
)
5020 gcc_assert (!TARGET_64BIT
);
5022 if (USE_HIDDEN_LINKONCE
)
5023 sprintf (name
, "__i686.get_pc_thunk.%s", reg_names
[regno
]);
5025 ASM_GENERATE_INTERNAL_LABEL (name
, "LPR", regno
);
5029 /* This function generates code for -fpic that loads %ebx with
5030 the return address of the caller and then returns. */
5033 ix86_file_end (void)
5038 for (regno
= 0; regno
< 8; ++regno
)
5042 if (! ((pic_labels_used
>> regno
) & 1))
5045 get_pc_thunk_name (name
, regno
);
5050 switch_to_section (darwin_sections
[text_coal_section
]);
5051 fputs ("\t.weak_definition\t", asm_out_file
);
5052 assemble_name (asm_out_file
, name
);
5053 fputs ("\n\t.private_extern\t", asm_out_file
);
5054 assemble_name (asm_out_file
, name
);
5055 fputs ("\n", asm_out_file
);
5056 ASM_OUTPUT_LABEL (asm_out_file
, name
);
5060 if (USE_HIDDEN_LINKONCE
)
5064 decl
= build_decl (FUNCTION_DECL
, get_identifier (name
),
5066 TREE_PUBLIC (decl
) = 1;
5067 TREE_STATIC (decl
) = 1;
5068 DECL_ONE_ONLY (decl
) = 1;
5070 (*targetm
.asm_out
.unique_section
) (decl
, 0);
5071 switch_to_section (get_named_section (decl
, NULL
, 0));
5073 (*targetm
.asm_out
.globalize_label
) (asm_out_file
, name
);
5074 fputs ("\t.hidden\t", asm_out_file
);
5075 assemble_name (asm_out_file
, name
);
5076 fputc ('\n', asm_out_file
);
5077 ASM_DECLARE_FUNCTION_NAME (asm_out_file
, name
, decl
);
5081 switch_to_section (text_section
);
5082 ASM_OUTPUT_LABEL (asm_out_file
, name
);
5085 xops
[0] = gen_rtx_REG (SImode
, regno
);
5086 xops
[1] = gen_rtx_MEM (SImode
, stack_pointer_rtx
);
5087 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops
);
5088 output_asm_insn ("ret", xops
);
5091 if (NEED_INDICATE_EXEC_STACK
)
5092 file_end_indicate_exec_stack ();
5095 /* Emit code for the SET_GOT patterns. */
5098 output_set_got (rtx dest
, rtx label ATTRIBUTE_UNUSED
)
5103 xops
[1] = gen_rtx_SYMBOL_REF (Pmode
, GOT_SYMBOL_NAME
);
5105 if (! TARGET_DEEP_BRANCH_PREDICTION
|| !flag_pic
)
5107 xops
[2] = gen_rtx_LABEL_REF (Pmode
, label
? label
: gen_label_rtx ());
5110 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
5112 output_asm_insn ("call\t%a2", xops
);
5115 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5116 is what will be referenced by the Mach-O PIC subsystem. */
5118 ASM_OUTPUT_LABEL (asm_out_file
, machopic_function_base_name ());
5121 (*targetm
.asm_out
.internal_label
) (asm_out_file
, "L",
5122 CODE_LABEL_NUMBER (XEXP (xops
[2], 0)));
5125 output_asm_insn ("pop{l}\t%0", xops
);
5130 get_pc_thunk_name (name
, REGNO (dest
));
5131 pic_labels_used
|= 1 << REGNO (dest
);
5133 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (name
));
5134 xops
[2] = gen_rtx_MEM (QImode
, xops
[2]);
5135 output_asm_insn ("call\t%X2", xops
);
5136 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5137 is what will be referenced by the Mach-O PIC subsystem. */
5140 ASM_OUTPUT_LABEL (asm_out_file
, machopic_function_base_name ());
5142 targetm
.asm_out
.internal_label (asm_out_file
, "L",
5143 CODE_LABEL_NUMBER (label
));
5150 if (!flag_pic
|| TARGET_DEEP_BRANCH_PREDICTION
)
5151 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops
);
5153 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops
);
5158 /* Generate an "push" pattern for input ARG. */
5163 return gen_rtx_SET (VOIDmode
,
5165 gen_rtx_PRE_DEC (Pmode
,
5166 stack_pointer_rtx
)),
5170 /* Return >= 0 if there is an unused call-clobbered register available
5171 for the entire function. */
5174 ix86_select_alt_pic_regnum (void)
5176 if (current_function_is_leaf
&& !current_function_profile
5177 && !ix86_current_function_calls_tls_descriptor
)
5180 for (i
= 2; i
>= 0; --i
)
5181 if (!regs_ever_live
[i
])
5185 return INVALID_REGNUM
;
5188 /* Return 1 if we need to save REGNO. */
5190 ix86_save_reg (unsigned int regno
, int maybe_eh_return
)
5192 if (pic_offset_table_rtx
5193 && regno
== REAL_PIC_OFFSET_TABLE_REGNUM
5194 && (regs_ever_live
[REAL_PIC_OFFSET_TABLE_REGNUM
]
5195 || current_function_profile
5196 || current_function_calls_eh_return
5197 || current_function_uses_const_pool
))
5199 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM
)
5204 if (current_function_calls_eh_return
&& maybe_eh_return
)
5209 unsigned test
= EH_RETURN_DATA_REGNO (i
);
5210 if (test
== INVALID_REGNUM
)
5217 if (cfun
->machine
->force_align_arg_pointer
5218 && regno
== REGNO (cfun
->machine
->force_align_arg_pointer
))
5221 return (regs_ever_live
[regno
]
5222 && !call_used_regs
[regno
]
5223 && !fixed_regs
[regno
]
5224 && (regno
!= HARD_FRAME_POINTER_REGNUM
|| !frame_pointer_needed
));
5227 /* Return number of registers to be saved on the stack. */
5230 ix86_nsaved_regs (void)
5235 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
>= 0; regno
--)
5236 if (ix86_save_reg (regno
, true))
5241 /* Return the offset between two registers, one to be eliminated, and the other
5242 its replacement, at the start of a routine. */
5245 ix86_initial_elimination_offset (int from
, int to
)
5247 struct ix86_frame frame
;
5248 ix86_compute_frame_layout (&frame
);
5250 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
5251 return frame
.hard_frame_pointer_offset
;
5252 else if (from
== FRAME_POINTER_REGNUM
5253 && to
== HARD_FRAME_POINTER_REGNUM
)
5254 return frame
.hard_frame_pointer_offset
- frame
.frame_pointer_offset
;
5257 gcc_assert (to
== STACK_POINTER_REGNUM
);
5259 if (from
== ARG_POINTER_REGNUM
)
5260 return frame
.stack_pointer_offset
;
5262 gcc_assert (from
== FRAME_POINTER_REGNUM
);
5263 return frame
.stack_pointer_offset
- frame
.frame_pointer_offset
;
5267 /* Fill structure ix86_frame about frame of currently computed function. */
5270 ix86_compute_frame_layout (struct ix86_frame
*frame
)
5272 HOST_WIDE_INT total_size
;
5273 unsigned int stack_alignment_needed
;
5274 HOST_WIDE_INT offset
;
5275 unsigned int preferred_alignment
;
5276 HOST_WIDE_INT size
= get_frame_size ();
5278 frame
->nregs
= ix86_nsaved_regs ();
5281 stack_alignment_needed
= cfun
->stack_alignment_needed
/ BITS_PER_UNIT
;
5282 preferred_alignment
= cfun
->preferred_stack_boundary
/ BITS_PER_UNIT
;
5284 /* During reload iteration the amount of registers saved can change.
5285 Recompute the value as needed. Do not recompute when amount of registers
5286 didn't change as reload does multiple calls to the function and does not
5287 expect the decision to change within single iteration. */
5289 && cfun
->machine
->use_fast_prologue_epilogue_nregs
!= frame
->nregs
)
5291 int count
= frame
->nregs
;
5293 cfun
->machine
->use_fast_prologue_epilogue_nregs
= count
;
5294 /* The fast prologue uses move instead of push to save registers. This
5295 is significantly longer, but also executes faster as modern hardware
5296 can execute the moves in parallel, but can't do that for push/pop.
5298 Be careful about choosing what prologue to emit: When function takes
5299 many instructions to execute we may use slow version as well as in
5300 case function is known to be outside hot spot (this is known with
5301 feedback only). Weight the size of function by number of registers
5302 to save as it is cheap to use one or two push instructions but very
5303 slow to use many of them. */
5305 count
= (count
- 1) * FAST_PROLOGUE_INSN_COUNT
;
5306 if (cfun
->function_frequency
< FUNCTION_FREQUENCY_NORMAL
5307 || (flag_branch_probabilities
5308 && cfun
->function_frequency
< FUNCTION_FREQUENCY_HOT
))
5309 cfun
->machine
->use_fast_prologue_epilogue
= false;
5311 cfun
->machine
->use_fast_prologue_epilogue
5312 = !expensive_function_p (count
);
5314 if (TARGET_PROLOGUE_USING_MOVE
5315 && cfun
->machine
->use_fast_prologue_epilogue
)
5316 frame
->save_regs_using_mov
= true;
5318 frame
->save_regs_using_mov
= false;
5321 /* Skip return address and saved base pointer. */
5322 offset
= frame_pointer_needed
? UNITS_PER_WORD
* 2 : UNITS_PER_WORD
;
5324 frame
->hard_frame_pointer_offset
= offset
;
5326 /* Do some sanity checking of stack_alignment_needed and
5327 preferred_alignment, since i386 port is the only using those features
5328 that may break easily. */
5330 gcc_assert (!size
|| stack_alignment_needed
);
5331 gcc_assert (preferred_alignment
>= STACK_BOUNDARY
/ BITS_PER_UNIT
);
5332 gcc_assert (preferred_alignment
<= PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
);
5333 gcc_assert (stack_alignment_needed
5334 <= PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
);
5336 if (stack_alignment_needed
< STACK_BOUNDARY
/ BITS_PER_UNIT
)
5337 stack_alignment_needed
= STACK_BOUNDARY
/ BITS_PER_UNIT
;
5339 /* Register save area */
5340 offset
+= frame
->nregs
* UNITS_PER_WORD
;
5343 if (ix86_save_varrargs_registers
)
5345 offset
+= X86_64_VARARGS_SIZE
;
5346 frame
->va_arg_size
= X86_64_VARARGS_SIZE
;
5349 frame
->va_arg_size
= 0;
5351 /* Align start of frame for local function. */
5352 frame
->padding1
= ((offset
+ stack_alignment_needed
- 1)
5353 & -stack_alignment_needed
) - offset
;
5355 offset
+= frame
->padding1
;
5357 /* Frame pointer points here. */
5358 frame
->frame_pointer_offset
= offset
;
5362 /* Add outgoing arguments area. Can be skipped if we eliminated
5363 all the function calls as dead code.
5364 Skipping is however impossible when function calls alloca. Alloca
5365 expander assumes that last current_function_outgoing_args_size
5366 of stack frame are unused. */
5367 if (ACCUMULATE_OUTGOING_ARGS
5368 && (!current_function_is_leaf
|| current_function_calls_alloca
5369 || ix86_current_function_calls_tls_descriptor
))
5371 offset
+= current_function_outgoing_args_size
;
5372 frame
->outgoing_arguments_size
= current_function_outgoing_args_size
;
5375 frame
->outgoing_arguments_size
= 0;
5377 /* Align stack boundary. Only needed if we're calling another function
5379 if (!current_function_is_leaf
|| current_function_calls_alloca
5380 || ix86_current_function_calls_tls_descriptor
)
5381 frame
->padding2
= ((offset
+ preferred_alignment
- 1)
5382 & -preferred_alignment
) - offset
;
5384 frame
->padding2
= 0;
5386 offset
+= frame
->padding2
;
5388 /* We've reached end of stack frame. */
5389 frame
->stack_pointer_offset
= offset
;
5391 /* Size prologue needs to allocate. */
5392 frame
->to_allocate
=
5393 (size
+ frame
->padding1
+ frame
->padding2
5394 + frame
->outgoing_arguments_size
+ frame
->va_arg_size
);
5396 if ((!frame
->to_allocate
&& frame
->nregs
<= 1)
5397 || (TARGET_64BIT
&& frame
->to_allocate
>= (HOST_WIDE_INT
) 0x80000000))
5398 frame
->save_regs_using_mov
= false;
5400 if (TARGET_RED_ZONE
&& current_function_sp_is_unchanging
5401 && current_function_is_leaf
5402 && !ix86_current_function_calls_tls_descriptor
)
5404 frame
->red_zone_size
= frame
->to_allocate
;
5405 if (frame
->save_regs_using_mov
)
5406 frame
->red_zone_size
+= frame
->nregs
* UNITS_PER_WORD
;
5407 if (frame
->red_zone_size
> RED_ZONE_SIZE
- RED_ZONE_RESERVE
)
5408 frame
->red_zone_size
= RED_ZONE_SIZE
- RED_ZONE_RESERVE
;
5411 frame
->red_zone_size
= 0;
5412 frame
->to_allocate
-= frame
->red_zone_size
;
5413 frame
->stack_pointer_offset
-= frame
->red_zone_size
;
5415 fprintf (stderr
, "nregs: %i\n", frame
->nregs
);
5416 fprintf (stderr
, "size: %i\n", size
);
5417 fprintf (stderr
, "alignment1: %i\n", stack_alignment_needed
);
5418 fprintf (stderr
, "padding1: %i\n", frame
->padding1
);
5419 fprintf (stderr
, "va_arg: %i\n", frame
->va_arg_size
);
5420 fprintf (stderr
, "padding2: %i\n", frame
->padding2
);
5421 fprintf (stderr
, "to_allocate: %i\n", frame
->to_allocate
);
5422 fprintf (stderr
, "red_zone_size: %i\n", frame
->red_zone_size
);
5423 fprintf (stderr
, "frame_pointer_offset: %i\n", frame
->frame_pointer_offset
);
5424 fprintf (stderr
, "hard_frame_pointer_offset: %i\n",
5425 frame
->hard_frame_pointer_offset
);
5426 fprintf (stderr
, "stack_pointer_offset: %i\n", frame
->stack_pointer_offset
);
5430 /* Emit code to save registers in the prologue. */
5433 ix86_emit_save_regs (void)
5438 for (regno
= FIRST_PSEUDO_REGISTER
; regno
-- > 0; )
5439 if (ix86_save_reg (regno
, true))
5441 insn
= emit_insn (gen_push (gen_rtx_REG (Pmode
, regno
)));
5442 RTX_FRAME_RELATED_P (insn
) = 1;
5446 /* Emit code to save registers using MOV insns. First register
5447 is restored from POINTER + OFFSET. */
5449 ix86_emit_save_regs_using_mov (rtx pointer
, HOST_WIDE_INT offset
)
5454 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
5455 if (ix86_save_reg (regno
, true))
5457 insn
= emit_move_insn (adjust_address (gen_rtx_MEM (Pmode
, pointer
),
5459 gen_rtx_REG (Pmode
, regno
));
5460 RTX_FRAME_RELATED_P (insn
) = 1;
5461 offset
+= UNITS_PER_WORD
;
5465 /* Expand prologue or epilogue stack adjustment.
5466 The pattern exist to put a dependency on all ebp-based memory accesses.
5467 STYLE should be negative if instructions should be marked as frame related,
5468 zero if %r11 register is live and cannot be freely used and positive
5472 pro_epilogue_adjust_stack (rtx dest
, rtx src
, rtx offset
, int style
)
5477 insn
= emit_insn (gen_pro_epilogue_adjust_stack_1 (dest
, src
, offset
));
5478 else if (x86_64_immediate_operand (offset
, DImode
))
5479 insn
= emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest
, src
, offset
));
5483 /* r11 is used by indirect sibcall return as well, set before the
5484 epilogue and used after the epilogue. ATM indirect sibcall
5485 shouldn't be used together with huge frame sizes in one
5486 function because of the frame_size check in sibcall.c. */
5488 r11
= gen_rtx_REG (DImode
, FIRST_REX_INT_REG
+ 3 /* R11 */);
5489 insn
= emit_insn (gen_rtx_SET (DImode
, r11
, offset
));
5491 RTX_FRAME_RELATED_P (insn
) = 1;
5492 insn
= emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest
, src
, r11
,
5496 RTX_FRAME_RELATED_P (insn
) = 1;
5499 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
5502 ix86_internal_arg_pointer (void)
5504 bool has_force_align_arg_pointer
=
5505 (0 != lookup_attribute (ix86_force_align_arg_pointer_string
,
5506 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))));
5507 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
5508 && DECL_NAME (current_function_decl
)
5509 && MAIN_NAME_P (DECL_NAME (current_function_decl
))
5510 && DECL_FILE_SCOPE_P (current_function_decl
))
5511 || ix86_force_align_arg_pointer
5512 || has_force_align_arg_pointer
)
5514 /* Nested functions can't realign the stack due to a register
5516 if (DECL_CONTEXT (current_function_decl
)
5517 && TREE_CODE (DECL_CONTEXT (current_function_decl
)) == FUNCTION_DECL
)
5519 if (ix86_force_align_arg_pointer
)
5520 warning (0, "-mstackrealign ignored for nested functions");
5521 if (has_force_align_arg_pointer
)
5522 error ("%s not supported for nested functions",
5523 ix86_force_align_arg_pointer_string
);
5524 return virtual_incoming_args_rtx
;
5526 cfun
->machine
->force_align_arg_pointer
= gen_rtx_REG (Pmode
, 2);
5527 return copy_to_reg (cfun
->machine
->force_align_arg_pointer
);
5530 return virtual_incoming_args_rtx
;
5533 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
5534 This is called from dwarf2out.c to emit call frame instructions
5535 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
5537 ix86_dwarf_handle_frame_unspec (const char *label
, rtx pattern
, int index
)
5539 rtx unspec
= SET_SRC (pattern
);
5540 gcc_assert (GET_CODE (unspec
) == UNSPEC
);
5544 case UNSPEC_REG_SAVE
:
5545 dwarf2out_reg_save_reg (label
, XVECEXP (unspec
, 0, 0),
5546 SET_DEST (pattern
));
5548 case UNSPEC_DEF_CFA
:
5549 dwarf2out_def_cfa (label
, REGNO (SET_DEST (pattern
)),
5550 INTVAL (XVECEXP (unspec
, 0, 0)));
5557 /* Expand the prologue into a bunch of separate insns. */
5560 ix86_expand_prologue (void)
5564 struct ix86_frame frame
;
5565 HOST_WIDE_INT allocate
;
5567 ix86_compute_frame_layout (&frame
);
5569 if (cfun
->machine
->force_align_arg_pointer
)
5573 /* Grab the argument pointer. */
5574 x
= plus_constant (stack_pointer_rtx
, 4);
5575 y
= cfun
->machine
->force_align_arg_pointer
;
5576 insn
= emit_insn (gen_rtx_SET (VOIDmode
, y
, x
));
5577 RTX_FRAME_RELATED_P (insn
) = 1;
5579 /* The unwind info consists of two parts: install the fafp as the cfa,
5580 and record the fafp as the "save register" of the stack pointer.
5581 The later is there in order that the unwinder can see where it
5582 should restore the stack pointer across the and insn. */
5583 x
= gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, const0_rtx
), UNSPEC_DEF_CFA
);
5584 x
= gen_rtx_SET (VOIDmode
, y
, x
);
5585 RTX_FRAME_RELATED_P (x
) = 1;
5586 y
= gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, stack_pointer_rtx
),
5588 y
= gen_rtx_SET (VOIDmode
, cfun
->machine
->force_align_arg_pointer
, y
);
5589 RTX_FRAME_RELATED_P (y
) = 1;
5590 x
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, x
, y
));
5591 x
= gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
, x
, NULL
);
5592 REG_NOTES (insn
) = x
;
5594 /* Align the stack. */
5595 emit_insn (gen_andsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
5598 /* And here we cheat like madmen with the unwind info. We force the
5599 cfa register back to sp+4, which is exactly what it was at the
5600 start of the function. Re-pushing the return address results in
5601 the return at the same spot relative to the cfa, and thus is
5602 correct wrt the unwind info. */
5603 x
= cfun
->machine
->force_align_arg_pointer
;
5604 x
= gen_frame_mem (Pmode
, plus_constant (x
, -4));
5605 insn
= emit_insn (gen_push (x
));
5606 RTX_FRAME_RELATED_P (insn
) = 1;
5609 x
= gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, x
), UNSPEC_DEF_CFA
);
5610 x
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, x
);
5611 x
= gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
, x
, NULL
);
5612 REG_NOTES (insn
) = x
;
5615 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5616 slower on all targets. Also sdb doesn't like it. */
5618 if (frame_pointer_needed
)
5620 insn
= emit_insn (gen_push (hard_frame_pointer_rtx
));
5621 RTX_FRAME_RELATED_P (insn
) = 1;
5623 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
5624 RTX_FRAME_RELATED_P (insn
) = 1;
5627 allocate
= frame
.to_allocate
;
5629 if (!frame
.save_regs_using_mov
)
5630 ix86_emit_save_regs ();
5632 allocate
+= frame
.nregs
* UNITS_PER_WORD
;
5634 /* When using red zone we may start register saving before allocating
5635 the stack frame saving one cycle of the prologue. */
5636 if (TARGET_RED_ZONE
&& frame
.save_regs_using_mov
)
5637 ix86_emit_save_regs_using_mov (frame_pointer_needed
? hard_frame_pointer_rtx
5638 : stack_pointer_rtx
,
5639 -frame
.nregs
* UNITS_PER_WORD
);
5643 else if (! TARGET_STACK_PROBE
|| allocate
< CHECK_STACK_LIMIT
)
5644 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
5645 GEN_INT (-allocate
), -1);
5648 /* Only valid for Win32. */
5649 rtx eax
= gen_rtx_REG (SImode
, 0);
5650 bool eax_live
= ix86_eax_live_at_start_p ();
5653 gcc_assert (!TARGET_64BIT
);
5657 emit_insn (gen_push (eax
));
5661 emit_move_insn (eax
, GEN_INT (allocate
));
5663 insn
= emit_insn (gen_allocate_stack_worker (eax
));
5664 RTX_FRAME_RELATED_P (insn
) = 1;
5665 t
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, GEN_INT (-allocate
));
5666 t
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, t
);
5667 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
,
5668 t
, REG_NOTES (insn
));
5672 if (frame_pointer_needed
)
5673 t
= plus_constant (hard_frame_pointer_rtx
,
5676 - frame
.nregs
* UNITS_PER_WORD
);
5678 t
= plus_constant (stack_pointer_rtx
, allocate
);
5679 emit_move_insn (eax
, gen_rtx_MEM (SImode
, t
));
5683 if (frame
.save_regs_using_mov
&& !TARGET_RED_ZONE
)
5685 if (!frame_pointer_needed
|| !frame
.to_allocate
)
5686 ix86_emit_save_regs_using_mov (stack_pointer_rtx
, frame
.to_allocate
);
5688 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx
,
5689 -frame
.nregs
* UNITS_PER_WORD
);
5692 pic_reg_used
= false;
5693 if (pic_offset_table_rtx
5694 && (regs_ever_live
[REAL_PIC_OFFSET_TABLE_REGNUM
]
5695 || current_function_profile
))
5697 unsigned int alt_pic_reg_used
= ix86_select_alt_pic_regnum ();
5699 if (alt_pic_reg_used
!= INVALID_REGNUM
)
5700 REGNO (pic_offset_table_rtx
) = alt_pic_reg_used
;
5702 pic_reg_used
= true;
5708 insn
= emit_insn (gen_set_got_rex64 (pic_offset_table_rtx
));
5710 insn
= emit_insn (gen_set_got (pic_offset_table_rtx
));
5712 /* Even with accurate pre-reload life analysis, we can wind up
5713 deleting all references to the pic register after reload.
5714 Consider if cross-jumping unifies two sides of a branch
5715 controlled by a comparison vs the only read from a global.
5716 In which case, allow the set_got to be deleted, though we're
5717 too late to do anything about the ebx save in the prologue. */
5718 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD
, const0_rtx
, NULL
);
5721 /* Prevent function calls from be scheduled before the call to mcount.
5722 In the pic_reg_used case, make sure that the got load isn't deleted. */
5723 if (current_function_profile
)
5724 emit_insn (gen_blockage (pic_reg_used
? pic_offset_table_rtx
: const0_rtx
));
5727 /* Emit code to restore saved registers using MOV insns. First register
5728 is restored from POINTER + OFFSET. */
5730 ix86_emit_restore_regs_using_mov (rtx pointer
, HOST_WIDE_INT offset
,
5731 int maybe_eh_return
)
5734 rtx base_address
= gen_rtx_MEM (Pmode
, pointer
);
5736 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
5737 if (ix86_save_reg (regno
, maybe_eh_return
))
5739 /* Ensure that adjust_address won't be forced to produce pointer
5740 out of range allowed by x86-64 instruction set. */
5741 if (TARGET_64BIT
&& offset
!= trunc_int_for_mode (offset
, SImode
))
5745 r11
= gen_rtx_REG (DImode
, FIRST_REX_INT_REG
+ 3 /* R11 */);
5746 emit_move_insn (r11
, GEN_INT (offset
));
5747 emit_insn (gen_adddi3 (r11
, r11
, pointer
));
5748 base_address
= gen_rtx_MEM (Pmode
, r11
);
5751 emit_move_insn (gen_rtx_REG (Pmode
, regno
),
5752 adjust_address (base_address
, Pmode
, offset
));
5753 offset
+= UNITS_PER_WORD
;
5757 /* Restore function stack, frame, and registers. */
5760 ix86_expand_epilogue (int style
)
5763 int sp_valid
= !frame_pointer_needed
|| current_function_sp_is_unchanging
;
5764 struct ix86_frame frame
;
5765 HOST_WIDE_INT offset
;
5767 ix86_compute_frame_layout (&frame
);
5769 /* Calculate start of saved registers relative to ebp. Special care
5770 must be taken for the normal return case of a function using
5771 eh_return: the eax and edx registers are marked as saved, but not
5772 restored along this path. */
5773 offset
= frame
.nregs
;
5774 if (current_function_calls_eh_return
&& style
!= 2)
5776 offset
*= -UNITS_PER_WORD
;
5778 /* If we're only restoring one register and sp is not valid then
5779 using a move instruction to restore the register since it's
5780 less work than reloading sp and popping the register.
5782 The default code result in stack adjustment using add/lea instruction,
5783 while this code results in LEAVE instruction (or discrete equivalent),
5784 so it is profitable in some other cases as well. Especially when there
5785 are no registers to restore. We also use this code when TARGET_USE_LEAVE
5786 and there is exactly one register to pop. This heuristic may need some
5787 tuning in future. */
5788 if ((!sp_valid
&& frame
.nregs
<= 1)
5789 || (TARGET_EPILOGUE_USING_MOVE
5790 && cfun
->machine
->use_fast_prologue_epilogue
5791 && (frame
.nregs
> 1 || frame
.to_allocate
))
5792 || (frame_pointer_needed
&& !frame
.nregs
&& frame
.to_allocate
)
5793 || (frame_pointer_needed
&& TARGET_USE_LEAVE
5794 && cfun
->machine
->use_fast_prologue_epilogue
5795 && frame
.nregs
== 1)
5796 || current_function_calls_eh_return
)
5798 /* Restore registers. We can use ebp or esp to address the memory
5799 locations. If both are available, default to ebp, since offsets
5800 are known to be small. Only exception is esp pointing directly to the
5801 end of block of saved registers, where we may simplify addressing
5804 if (!frame_pointer_needed
|| (sp_valid
&& !frame
.to_allocate
))
5805 ix86_emit_restore_regs_using_mov (stack_pointer_rtx
,
5806 frame
.to_allocate
, style
== 2);
5808 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx
,
5809 offset
, style
== 2);
5811 /* eh_return epilogues need %ecx added to the stack pointer. */
5814 rtx tmp
, sa
= EH_RETURN_STACKADJ_RTX
;
5816 if (frame_pointer_needed
)
5818 tmp
= gen_rtx_PLUS (Pmode
, hard_frame_pointer_rtx
, sa
);
5819 tmp
= plus_constant (tmp
, UNITS_PER_WORD
);
5820 emit_insn (gen_rtx_SET (VOIDmode
, sa
, tmp
));
5822 tmp
= gen_rtx_MEM (Pmode
, hard_frame_pointer_rtx
);
5823 emit_move_insn (hard_frame_pointer_rtx
, tmp
);
5825 pro_epilogue_adjust_stack (stack_pointer_rtx
, sa
,
5830 tmp
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, sa
);
5831 tmp
= plus_constant (tmp
, (frame
.to_allocate
5832 + frame
.nregs
* UNITS_PER_WORD
));
5833 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, tmp
));
5836 else if (!frame_pointer_needed
)
5837 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
5838 GEN_INT (frame
.to_allocate
5839 + frame
.nregs
* UNITS_PER_WORD
),
5841 /* If not an i386, mov & pop is faster than "leave". */
5842 else if (TARGET_USE_LEAVE
|| optimize_size
5843 || !cfun
->machine
->use_fast_prologue_epilogue
)
5844 emit_insn (TARGET_64BIT
? gen_leave_rex64 () : gen_leave ());
5847 pro_epilogue_adjust_stack (stack_pointer_rtx
,
5848 hard_frame_pointer_rtx
,
5851 emit_insn (gen_popdi1 (hard_frame_pointer_rtx
));
5853 emit_insn (gen_popsi1 (hard_frame_pointer_rtx
));
5858 /* First step is to deallocate the stack frame so that we can
5859 pop the registers. */
5862 gcc_assert (frame_pointer_needed
);
5863 pro_epilogue_adjust_stack (stack_pointer_rtx
,
5864 hard_frame_pointer_rtx
,
5865 GEN_INT (offset
), style
);
5867 else if (frame
.to_allocate
)
5868 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
5869 GEN_INT (frame
.to_allocate
), style
);
5871 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
5872 if (ix86_save_reg (regno
, false))
5875 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode
, regno
)));
5877 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode
, regno
)));
5879 if (frame_pointer_needed
)
5881 /* Leave results in shorter dependency chains on CPUs that are
5882 able to grok it fast. */
5883 if (TARGET_USE_LEAVE
)
5884 emit_insn (TARGET_64BIT
? gen_leave_rex64 () : gen_leave ());
5885 else if (TARGET_64BIT
)
5886 emit_insn (gen_popdi1 (hard_frame_pointer_rtx
));
5888 emit_insn (gen_popsi1 (hard_frame_pointer_rtx
));
5892 if (cfun
->machine
->force_align_arg_pointer
)
5894 emit_insn (gen_addsi3 (stack_pointer_rtx
,
5895 cfun
->machine
->force_align_arg_pointer
,
5899 /* Sibcall epilogues don't want a return instruction. */
5903 if (current_function_pops_args
&& current_function_args_size
)
5905 rtx popc
= GEN_INT (current_function_pops_args
);
5907 /* i386 can only pop 64K bytes. If asked to pop more, pop
5908 return address, do explicit add, and jump indirectly to the
5911 if (current_function_pops_args
>= 65536)
5913 rtx ecx
= gen_rtx_REG (SImode
, 2);
5915 /* There is no "pascal" calling convention in 64bit ABI. */
5916 gcc_assert (!TARGET_64BIT
);
5918 emit_insn (gen_popsi1 (ecx
));
5919 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, popc
));
5920 emit_jump_insn (gen_return_indirect_internal (ecx
));
5923 emit_jump_insn (gen_return_pop_internal (popc
));
5926 emit_jump_insn (gen_return_internal ());
5929 /* Reset from the function's potential modifications. */
5932 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
5933 HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
5935 if (pic_offset_table_rtx
)
5936 REGNO (pic_offset_table_rtx
) = REAL_PIC_OFFSET_TABLE_REGNUM
;
5938 /* Mach-O doesn't support labels at the end of objects, so if
5939 it looks like we might want one, insert a NOP. */
5941 rtx insn
= get_last_insn ();
5944 && NOTE_LINE_NUMBER (insn
) != NOTE_INSN_DELETED_LABEL
)
5945 insn
= PREV_INSN (insn
);
5949 && NOTE_LINE_NUMBER (insn
) == NOTE_INSN_DELETED_LABEL
)))
5950 fputs ("\tnop\n", file
);
5956 /* Extract the parts of an RTL expression that is a valid memory address
5957 for an instruction. Return 0 if the structure of the address is
5958 grossly off. Return -1 if the address contains ASHIFT, so it is not
5959 strictly valid, but still used for computing length of lea instruction. */
5962 ix86_decompose_address (rtx addr
, struct ix86_address
*out
)
5964 rtx base
= NULL_RTX
, index
= NULL_RTX
, disp
= NULL_RTX
;
5965 rtx base_reg
, index_reg
;
5966 HOST_WIDE_INT scale
= 1;
5967 rtx scale_rtx
= NULL_RTX
;
5969 enum ix86_address_seg seg
= SEG_DEFAULT
;
5971 if (GET_CODE (addr
) == REG
|| GET_CODE (addr
) == SUBREG
)
5973 else if (GET_CODE (addr
) == PLUS
)
5983 addends
[n
++] = XEXP (op
, 1);
5986 while (GET_CODE (op
) == PLUS
);
5991 for (i
= n
; i
>= 0; --i
)
5994 switch (GET_CODE (op
))
5999 index
= XEXP (op
, 0);
6000 scale_rtx
= XEXP (op
, 1);
6004 if (XINT (op
, 1) == UNSPEC_TP
6005 && TARGET_TLS_DIRECT_SEG_REFS
6006 && seg
== SEG_DEFAULT
)
6007 seg
= TARGET_64BIT
? SEG_FS
: SEG_GS
;
6036 else if (GET_CODE (addr
) == MULT
)
6038 index
= XEXP (addr
, 0); /* index*scale */
6039 scale_rtx
= XEXP (addr
, 1);
6041 else if (GET_CODE (addr
) == ASHIFT
)
6045 /* We're called for lea too, which implements ashift on occasion. */
6046 index
= XEXP (addr
, 0);
6047 tmp
= XEXP (addr
, 1);
6048 if (GET_CODE (tmp
) != CONST_INT
)
6050 scale
= INTVAL (tmp
);
6051 if ((unsigned HOST_WIDE_INT
) scale
> 3)
6057 disp
= addr
; /* displacement */
6059 /* Extract the integral value of scale. */
6062 if (GET_CODE (scale_rtx
) != CONST_INT
)
6064 scale
= INTVAL (scale_rtx
);
6067 base_reg
= base
&& GET_CODE (base
) == SUBREG
? SUBREG_REG (base
) : base
;
6068 index_reg
= index
&& GET_CODE (index
) == SUBREG
? SUBREG_REG (index
) : index
;
6070 /* Allow arg pointer and stack pointer as index if there is not scaling. */
6071 if (base_reg
&& index_reg
&& scale
== 1
6072 && (index_reg
== arg_pointer_rtx
6073 || index_reg
== frame_pointer_rtx
6074 || (REG_P (index_reg
) && REGNO (index_reg
) == STACK_POINTER_REGNUM
)))
6077 tmp
= base
, base
= index
, index
= tmp
;
6078 tmp
= base_reg
, base_reg
= index_reg
, index_reg
= tmp
;
6081 /* Special case: %ebp cannot be encoded as a base without a displacement. */
6082 if ((base_reg
== hard_frame_pointer_rtx
6083 || base_reg
== frame_pointer_rtx
6084 || base_reg
== arg_pointer_rtx
) && !disp
)
6087 /* Special case: on K6, [%esi] makes the instruction vector decoded.
6088 Avoid this by transforming to [%esi+0]. */
6089 if (ix86_tune
== PROCESSOR_K6
&& !optimize_size
6090 && base_reg
&& !index_reg
&& !disp
6092 && REGNO_REG_CLASS (REGNO (base_reg
)) == SIREG
)
6095 /* Special case: encode reg+reg instead of reg*2. */
6096 if (!base
&& index
&& scale
&& scale
== 2)
6097 base
= index
, base_reg
= index_reg
, scale
= 1;
6099 /* Special case: scaling cannot be encoded without base or displacement. */
6100 if (!base
&& !disp
&& index
&& scale
!= 1)
6112 /* Return cost of the memory address x.
6113 For i386, it is better to use a complex address than let gcc copy
6114 the address into a reg and make a new pseudo. But not if the address
6115 requires to two regs - that would mean more pseudos with longer
6118 ix86_address_cost (rtx x
)
6120 struct ix86_address parts
;
6122 int ok
= ix86_decompose_address (x
, &parts
);
6126 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
6127 parts
.base
= SUBREG_REG (parts
.base
);
6128 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
6129 parts
.index
= SUBREG_REG (parts
.index
);
6131 /* More complex memory references are better. */
6132 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
6134 if (parts
.seg
!= SEG_DEFAULT
)
6137 /* Attempt to minimize number of registers in the address. */
6139 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
))
6141 && (!REG_P (parts
.index
)
6142 || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)))
6146 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
)
6148 && (!REG_P (parts
.index
) || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)
6149 && parts
.base
!= parts
.index
)
6152 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
6153 since it's predecode logic can't detect the length of instructions
6154 and it degenerates to vector decoded. Increase cost of such
6155 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
6156 to split such addresses or even refuse such addresses at all.
6158 Following addressing modes are affected:
6163 The first and last case may be avoidable by explicitly coding the zero in
6164 memory address, but I don't have AMD-K6 machine handy to check this
6168 && ((!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
!= 1)
6169 || (parts
.disp
&& !parts
.base
&& parts
.index
&& parts
.scale
!= 1)
6170 || (!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
== 1)))
6176 /* If X is a machine specific address (i.e. a symbol or label being
6177 referenced as a displacement from the GOT implemented using an
6178 UNSPEC), then return the base term. Otherwise return X. */
6181 ix86_find_base_term (rtx x
)
6187 if (GET_CODE (x
) != CONST
)
6190 if (GET_CODE (term
) == PLUS
6191 && (GET_CODE (XEXP (term
, 1)) == CONST_INT
6192 || GET_CODE (XEXP (term
, 1)) == CONST_DOUBLE
))
6193 term
= XEXP (term
, 0);
6194 if (GET_CODE (term
) != UNSPEC
6195 || XINT (term
, 1) != UNSPEC_GOTPCREL
)
6198 term
= XVECEXP (term
, 0, 0);
6200 if (GET_CODE (term
) != SYMBOL_REF
6201 && GET_CODE (term
) != LABEL_REF
)
6207 term
= ix86_delegitimize_address (x
);
6209 if (GET_CODE (term
) != SYMBOL_REF
6210 && GET_CODE (term
) != LABEL_REF
)
6216 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
6217 this is used for to form addresses to local data when -fPIC is in
6221 darwin_local_data_pic (rtx disp
)
6223 if (GET_CODE (disp
) == MINUS
)
6225 if (GET_CODE (XEXP (disp
, 0)) == LABEL_REF
6226 || GET_CODE (XEXP (disp
, 0)) == SYMBOL_REF
)
6227 if (GET_CODE (XEXP (disp
, 1)) == SYMBOL_REF
)
6229 const char *sym_name
= XSTR (XEXP (disp
, 1), 0);
6230 if (! strcmp (sym_name
, "<pic base>"))
6238 /* Determine if a given RTX is a valid constant. We already know this
6239 satisfies CONSTANT_P. */
6242 legitimate_constant_p (rtx x
)
6244 switch (GET_CODE (x
))
6249 if (GET_CODE (x
) == PLUS
)
6251 if (GET_CODE (XEXP (x
, 1)) != CONST_INT
)
6256 if (TARGET_MACHO
&& darwin_local_data_pic (x
))
6259 /* Only some unspecs are valid as "constants". */
6260 if (GET_CODE (x
) == UNSPEC
)
6261 switch (XINT (x
, 1))
6264 return TARGET_64BIT
;
6267 x
= XVECEXP (x
, 0, 0);
6268 return (GET_CODE (x
) == SYMBOL_REF
6269 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
6271 x
= XVECEXP (x
, 0, 0);
6272 return (GET_CODE (x
) == SYMBOL_REF
6273 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
);
6278 /* We must have drilled down to a symbol. */
6279 if (GET_CODE (x
) == LABEL_REF
)
6281 if (GET_CODE (x
) != SYMBOL_REF
)
6286 /* TLS symbols are never valid. */
6287 if (SYMBOL_REF_TLS_MODEL (x
))
6292 if (GET_MODE (x
) == TImode
6293 && x
!= CONST0_RTX (TImode
)
6299 if (x
== CONST0_RTX (GET_MODE (x
)))
6307 /* Otherwise we handle everything else in the move patterns. */
6311 /* Determine if it's legal to put X into the constant pool. This
6312 is not possible for the address of thread-local symbols, which
6313 is checked above. */
6316 ix86_cannot_force_const_mem (rtx x
)
6318 /* We can always put integral constants and vectors in memory. */
6319 switch (GET_CODE (x
))
6329 return !legitimate_constant_p (x
);
6332 /* Determine if a given RTX is a valid constant address. */
6335 constant_address_p (rtx x
)
6337 return CONSTANT_P (x
) && legitimate_address_p (Pmode
, x
, 1);
6340 /* Nonzero if the constant value X is a legitimate general operand
6341 when generating PIC code. It is given that flag_pic is on and
6342 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
6345 legitimate_pic_operand_p (rtx x
)
6349 switch (GET_CODE (x
))
6352 inner
= XEXP (x
, 0);
6353 if (GET_CODE (inner
) == PLUS
6354 && GET_CODE (XEXP (inner
, 1)) == CONST_INT
)
6355 inner
= XEXP (inner
, 0);
6357 /* Only some unspecs are valid as "constants". */
6358 if (GET_CODE (inner
) == UNSPEC
)
6359 switch (XINT (inner
, 1))
6362 return TARGET_64BIT
;
6364 x
= XVECEXP (inner
, 0, 0);
6365 return (GET_CODE (x
) == SYMBOL_REF
6366 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
6374 return legitimate_pic_address_disp_p (x
);
6381 /* Determine if a given CONST RTX is a valid memory displacement
6385 legitimate_pic_address_disp_p (rtx disp
)
6389 /* In 64bit mode we can allow direct addresses of symbols and labels
6390 when they are not dynamic symbols. */
6393 rtx op0
= disp
, op1
;
6395 switch (GET_CODE (disp
))
6401 if (GET_CODE (XEXP (disp
, 0)) != PLUS
)
6403 op0
= XEXP (XEXP (disp
, 0), 0);
6404 op1
= XEXP (XEXP (disp
, 0), 1);
6405 if (GET_CODE (op1
) != CONST_INT
6406 || INTVAL (op1
) >= 16*1024*1024
6407 || INTVAL (op1
) < -16*1024*1024)
6409 if (GET_CODE (op0
) == LABEL_REF
)
6411 if (GET_CODE (op0
) != SYMBOL_REF
)
6416 /* TLS references should always be enclosed in UNSPEC. */
6417 if (SYMBOL_REF_TLS_MODEL (op0
))
6419 if (!SYMBOL_REF_FAR_ADDR_P (op0
) && SYMBOL_REF_LOCAL_P (op0
))
6427 if (GET_CODE (disp
) != CONST
)
6429 disp
= XEXP (disp
, 0);
6433 /* We are unsafe to allow PLUS expressions. This limit allowed distance
6434 of GOT tables. We should not need these anyway. */
6435 if (GET_CODE (disp
) != UNSPEC
6436 || (XINT (disp
, 1) != UNSPEC_GOTPCREL
6437 && XINT (disp
, 1) != UNSPEC_GOTOFF
))
6440 if (GET_CODE (XVECEXP (disp
, 0, 0)) != SYMBOL_REF
6441 && GET_CODE (XVECEXP (disp
, 0, 0)) != LABEL_REF
)
6447 if (GET_CODE (disp
) == PLUS
)
6449 if (GET_CODE (XEXP (disp
, 1)) != CONST_INT
)
6451 disp
= XEXP (disp
, 0);
6455 if (TARGET_MACHO
&& darwin_local_data_pic (disp
))
6458 if (GET_CODE (disp
) != UNSPEC
)
6461 switch (XINT (disp
, 1))
6466 return GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
;
6468 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
6469 While ABI specify also 32bit relocation but we don't produce it in
6470 small PIC model at all. */
6471 if ((GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
6472 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
)
6474 return local_symbolic_operand (XVECEXP (disp
, 0, 0), Pmode
);
6476 case UNSPEC_GOTTPOFF
:
6477 case UNSPEC_GOTNTPOFF
:
6478 case UNSPEC_INDNTPOFF
:
6481 disp
= XVECEXP (disp
, 0, 0);
6482 return (GET_CODE (disp
) == SYMBOL_REF
6483 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_INITIAL_EXEC
);
6485 disp
= XVECEXP (disp
, 0, 0);
6486 return (GET_CODE (disp
) == SYMBOL_REF
6487 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_EXEC
);
6489 disp
= XVECEXP (disp
, 0, 0);
6490 return (GET_CODE (disp
) == SYMBOL_REF
6491 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_DYNAMIC
);
6497 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6498 memory address for an instruction. The MODE argument is the machine mode
6499 for the MEM expression that wants to use this address.
6501 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6502 convert common non-canonical forms to canonical form so that they will
6506 legitimate_address_p (enum machine_mode mode
, rtx addr
, int strict
)
6508 struct ix86_address parts
;
6509 rtx base
, index
, disp
;
6510 HOST_WIDE_INT scale
;
6511 const char *reason
= NULL
;
6512 rtx reason_rtx
= NULL_RTX
;
6514 if (TARGET_DEBUG_ADDR
)
6517 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
6518 GET_MODE_NAME (mode
), strict
);
6522 if (ix86_decompose_address (addr
, &parts
) <= 0)
6524 reason
= "decomposition failed";
6529 index
= parts
.index
;
6531 scale
= parts
.scale
;
6533 /* Validate base register.
6535 Don't allow SUBREG's that span more than a word here. It can lead to spill
6536 failures when the base is one word out of a two word structure, which is
6537 represented internally as a DImode int. */
6546 else if (GET_CODE (base
) == SUBREG
6547 && REG_P (SUBREG_REG (base
))
6548 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base
)))
6550 reg
= SUBREG_REG (base
);
6553 reason
= "base is not a register";
6557 if (GET_MODE (base
) != Pmode
)
6559 reason
= "base is not in Pmode";
6563 if ((strict
&& ! REG_OK_FOR_BASE_STRICT_P (reg
))
6564 || (! strict
&& ! REG_OK_FOR_BASE_NONSTRICT_P (reg
)))
6566 reason
= "base is not valid";
6571 /* Validate index register.
6573 Don't allow SUBREG's that span more than a word here -- same as above. */
6582 else if (GET_CODE (index
) == SUBREG
6583 && REG_P (SUBREG_REG (index
))
6584 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index
)))
6586 reg
= SUBREG_REG (index
);
6589 reason
= "index is not a register";
6593 if (GET_MODE (index
) != Pmode
)
6595 reason
= "index is not in Pmode";
6599 if ((strict
&& ! REG_OK_FOR_INDEX_STRICT_P (reg
))
6600 || (! strict
&& ! REG_OK_FOR_INDEX_NONSTRICT_P (reg
)))
6602 reason
= "index is not valid";
6607 /* Validate scale factor. */
6610 reason_rtx
= GEN_INT (scale
);
6613 reason
= "scale without index";
6617 if (scale
!= 2 && scale
!= 4 && scale
!= 8)
6619 reason
= "scale is not a valid multiplier";
6624 /* Validate displacement. */
6629 if (GET_CODE (disp
) == CONST
6630 && GET_CODE (XEXP (disp
, 0)) == UNSPEC
)
6631 switch (XINT (XEXP (disp
, 0), 1))
6633 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
6634 used. While ABI specify also 32bit relocations, we don't produce
6635 them at all and use IP relative instead. */
6638 gcc_assert (flag_pic
);
6640 goto is_legitimate_pic
;
6641 reason
= "64bit address unspec";
6644 case UNSPEC_GOTPCREL
:
6645 gcc_assert (flag_pic
);
6646 goto is_legitimate_pic
;
6648 case UNSPEC_GOTTPOFF
:
6649 case UNSPEC_GOTNTPOFF
:
6650 case UNSPEC_INDNTPOFF
:
6656 reason
= "invalid address unspec";
6660 else if (SYMBOLIC_CONST (disp
)
6664 && MACHOPIC_INDIRECT
6665 && !machopic_operand_p (disp
)
6671 if (TARGET_64BIT
&& (index
|| base
))
6673 /* foo@dtpoff(%rX) is ok. */
6674 if (GET_CODE (disp
) != CONST
6675 || GET_CODE (XEXP (disp
, 0)) != PLUS
6676 || GET_CODE (XEXP (XEXP (disp
, 0), 0)) != UNSPEC
6677 || GET_CODE (XEXP (XEXP (disp
, 0), 1)) != CONST_INT
6678 || (XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_DTPOFF
6679 && XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_NTPOFF
))
6681 reason
= "non-constant pic memory reference";
6685 else if (! legitimate_pic_address_disp_p (disp
))
6687 reason
= "displacement is an invalid pic construct";
6691 /* This code used to verify that a symbolic pic displacement
6692 includes the pic_offset_table_rtx register.
6694 While this is good idea, unfortunately these constructs may
6695 be created by "adds using lea" optimization for incorrect
6704 This code is nonsensical, but results in addressing
6705 GOT table with pic_offset_table_rtx base. We can't
6706 just refuse it easily, since it gets matched by
6707 "addsi3" pattern, that later gets split to lea in the
6708 case output register differs from input. While this
6709 can be handled by separate addsi pattern for this case
6710 that never results in lea, this seems to be easier and
6711 correct fix for crash to disable this test. */
6713 else if (GET_CODE (disp
) != LABEL_REF
6714 && GET_CODE (disp
) != CONST_INT
6715 && (GET_CODE (disp
) != CONST
6716 || !legitimate_constant_p (disp
))
6717 && (GET_CODE (disp
) != SYMBOL_REF
6718 || !legitimate_constant_p (disp
)))
6720 reason
= "displacement is not constant";
6723 else if (TARGET_64BIT
6724 && !x86_64_immediate_operand (disp
, VOIDmode
))
6726 reason
= "displacement is out of range";
6731 /* Everything looks valid. */
6732 if (TARGET_DEBUG_ADDR
)
6733 fprintf (stderr
, "Success.\n");
6737 if (TARGET_DEBUG_ADDR
)
6739 fprintf (stderr
, "Error: %s\n", reason
);
6740 debug_rtx (reason_rtx
);
6745 /* Return a unique alias set for the GOT. */
6747 static HOST_WIDE_INT
6748 ix86_GOT_alias_set (void)
6750 static HOST_WIDE_INT set
= -1;
6752 set
= new_alias_set ();
6756 /* Return a legitimate reference for ORIG (an address) using the
6757 register REG. If REG is 0, a new pseudo is generated.
6759 There are two types of references that must be handled:
6761 1. Global data references must load the address from the GOT, via
6762 the PIC reg. An insn is emitted to do this load, and the reg is
6765 2. Static data references, constant pool addresses, and code labels
6766 compute the address as an offset from the GOT, whose base is in
6767 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
6768 differentiate them from global data objects. The returned
6769 address is the PIC reg + an unspec constant.
6771 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6772 reg also appears in the address. */
6775 legitimize_pic_address (rtx orig
, rtx reg
)
6782 if (TARGET_MACHO
&& !TARGET_64BIT
)
6785 reg
= gen_reg_rtx (Pmode
);
6786 /* Use the generic Mach-O PIC machinery. */
6787 return machopic_legitimize_pic_address (orig
, GET_MODE (orig
), reg
);
6791 if (TARGET_64BIT
&& legitimate_pic_address_disp_p (addr
))
6793 else if (TARGET_64BIT
6794 && ix86_cmodel
!= CM_SMALL_PIC
6795 && local_symbolic_operand (addr
, Pmode
))
6798 /* This symbol may be referenced via a displacement from the PIC
6799 base address (@GOTOFF). */
6801 if (reload_in_progress
)
6802 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
6803 if (GET_CODE (addr
) == CONST
)
6804 addr
= XEXP (addr
, 0);
6805 if (GET_CODE (addr
) == PLUS
)
6807 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)), UNSPEC_GOTOFF
);
6808 new = gen_rtx_PLUS (Pmode
, new, XEXP (addr
, 1));
6811 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
6812 new = gen_rtx_CONST (Pmode
, new);
6814 tmpreg
= gen_reg_rtx (Pmode
);
6817 emit_move_insn (tmpreg
, new);
6821 new = expand_simple_binop (Pmode
, PLUS
, reg
, pic_offset_table_rtx
,
6822 tmpreg
, 1, OPTAB_DIRECT
);
6825 else new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, tmpreg
);
6827 else if (!TARGET_64BIT
&& local_symbolic_operand (addr
, Pmode
))
6829 /* This symbol may be referenced via a displacement from the PIC
6830 base address (@GOTOFF). */
6832 if (reload_in_progress
)
6833 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
6834 if (GET_CODE (addr
) == CONST
)
6835 addr
= XEXP (addr
, 0);
6836 if (GET_CODE (addr
) == PLUS
)
6838 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)), UNSPEC_GOTOFF
);
6839 new = gen_rtx_PLUS (Pmode
, new, XEXP (addr
, 1));
6842 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
6843 new = gen_rtx_CONST (Pmode
, new);
6844 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
6848 emit_move_insn (reg
, new);
6852 else if (GET_CODE (addr
) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (addr
) == 0)
6856 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTPCREL
);
6857 new = gen_rtx_CONST (Pmode
, new);
6858 new = gen_const_mem (Pmode
, new);
6859 set_mem_alias_set (new, ix86_GOT_alias_set ());
6862 reg
= gen_reg_rtx (Pmode
);
6863 /* Use directly gen_movsi, otherwise the address is loaded
6864 into register for CSE. We don't want to CSE this addresses,
6865 instead we CSE addresses from the GOT table, so skip this. */
6866 emit_insn (gen_movsi (reg
, new));
6871 /* This symbol must be referenced via a load from the
6872 Global Offset Table (@GOT). */
6874 if (reload_in_progress
)
6875 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
6876 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOT
);
6877 new = gen_rtx_CONST (Pmode
, new);
6878 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
6879 new = gen_const_mem (Pmode
, new);
6880 set_mem_alias_set (new, ix86_GOT_alias_set ());
6883 reg
= gen_reg_rtx (Pmode
);
6884 emit_move_insn (reg
, new);
6890 if (GET_CODE (addr
) == CONST_INT
6891 && !x86_64_immediate_operand (addr
, VOIDmode
))
6895 emit_move_insn (reg
, addr
);
6899 new = force_reg (Pmode
, addr
);
6901 else if (GET_CODE (addr
) == CONST
)
6903 addr
= XEXP (addr
, 0);
6905 /* We must match stuff we generate before. Assume the only
6906 unspecs that can get here are ours. Not that we could do
6907 anything with them anyway.... */
6908 if (GET_CODE (addr
) == UNSPEC
6909 || (GET_CODE (addr
) == PLUS
6910 && GET_CODE (XEXP (addr
, 0)) == UNSPEC
))
6912 gcc_assert (GET_CODE (addr
) == PLUS
);
6914 if (GET_CODE (addr
) == PLUS
)
6916 rtx op0
= XEXP (addr
, 0), op1
= XEXP (addr
, 1);
6918 /* Check first to see if this is a constant offset from a @GOTOFF
6919 symbol reference. */
6920 if (local_symbolic_operand (op0
, Pmode
)
6921 && GET_CODE (op1
) == CONST_INT
)
6925 if (reload_in_progress
)
6926 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
6927 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, op0
),
6929 new = gen_rtx_PLUS (Pmode
, new, op1
);
6930 new = gen_rtx_CONST (Pmode
, new);
6931 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
6935 emit_move_insn (reg
, new);
6941 if (INTVAL (op1
) < -16*1024*1024
6942 || INTVAL (op1
) >= 16*1024*1024)
6944 if (!x86_64_immediate_operand (op1
, Pmode
))
6945 op1
= force_reg (Pmode
, op1
);
6946 new = gen_rtx_PLUS (Pmode
, force_reg (Pmode
, op0
), op1
);
6952 base
= legitimize_pic_address (XEXP (addr
, 0), reg
);
6953 new = legitimize_pic_address (XEXP (addr
, 1),
6954 base
== reg
? NULL_RTX
: reg
);
6956 if (GET_CODE (new) == CONST_INT
)
6957 new = plus_constant (base
, INTVAL (new));
6960 if (GET_CODE (new) == PLUS
&& CONSTANT_P (XEXP (new, 1)))
6962 base
= gen_rtx_PLUS (Pmode
, base
, XEXP (new, 0));
6963 new = XEXP (new, 1);
6965 new = gen_rtx_PLUS (Pmode
, base
, new);
6973 /* Load the thread pointer. If TO_REG is true, force it into a register. */
6976 get_thread_pointer (int to_reg
)
6980 tp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
), UNSPEC_TP
);
6984 reg
= gen_reg_rtx (Pmode
);
6985 insn
= gen_rtx_SET (VOIDmode
, reg
, tp
);
6986 insn
= emit_insn (insn
);
6991 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
6992 false if we expect this to be used for a memory address and true if
6993 we expect to load the address into a register. */
6996 legitimize_tls_address (rtx x
, enum tls_model model
, int for_mov
)
6998 rtx dest
, base
, off
, pic
, tp
;
7003 case TLS_MODEL_GLOBAL_DYNAMIC
:
7004 dest
= gen_reg_rtx (Pmode
);
7005 tp
= TARGET_GNU2_TLS
? get_thread_pointer (1) : 0;
7007 if (TARGET_64BIT
&& ! TARGET_GNU2_TLS
)
7009 rtx rax
= gen_rtx_REG (Pmode
, 0), insns
;
7012 emit_call_insn (gen_tls_global_dynamic_64 (rax
, x
));
7013 insns
= get_insns ();
7016 emit_libcall_block (insns
, dest
, rax
, x
);
7018 else if (TARGET_64BIT
&& TARGET_GNU2_TLS
)
7019 emit_insn (gen_tls_global_dynamic_64 (dest
, x
));
7021 emit_insn (gen_tls_global_dynamic_32 (dest
, x
));
7023 if (TARGET_GNU2_TLS
)
7025 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, tp
, dest
));
7027 set_unique_reg_note (get_last_insn (), REG_EQUIV
, x
);
7031 case TLS_MODEL_LOCAL_DYNAMIC
:
7032 base
= gen_reg_rtx (Pmode
);
7033 tp
= TARGET_GNU2_TLS
? get_thread_pointer (1) : 0;
7035 if (TARGET_64BIT
&& ! TARGET_GNU2_TLS
)
7037 rtx rax
= gen_rtx_REG (Pmode
, 0), insns
, note
;
7040 emit_call_insn (gen_tls_local_dynamic_base_64 (rax
));
7041 insns
= get_insns ();
7044 note
= gen_rtx_EXPR_LIST (VOIDmode
, const0_rtx
, NULL
);
7045 note
= gen_rtx_EXPR_LIST (VOIDmode
, ix86_tls_get_addr (), note
);
7046 emit_libcall_block (insns
, base
, rax
, note
);
7048 else if (TARGET_64BIT
&& TARGET_GNU2_TLS
)
7049 emit_insn (gen_tls_local_dynamic_base_64 (base
));
7051 emit_insn (gen_tls_local_dynamic_base_32 (base
));
7053 if (TARGET_GNU2_TLS
)
7055 rtx x
= ix86_tls_module_base ();
7057 set_unique_reg_note (get_last_insn (), REG_EQUIV
,
7058 gen_rtx_MINUS (Pmode
, x
, tp
));
7061 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), UNSPEC_DTPOFF
);
7062 off
= gen_rtx_CONST (Pmode
, off
);
7064 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, base
, off
));
7066 if (TARGET_GNU2_TLS
)
7068 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, dest
, tp
));
7070 set_unique_reg_note (get_last_insn (), REG_EQUIV
, x
);
7075 case TLS_MODEL_INITIAL_EXEC
:
7079 type
= UNSPEC_GOTNTPOFF
;
7083 if (reload_in_progress
)
7084 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
7085 pic
= pic_offset_table_rtx
;
7086 type
= TARGET_ANY_GNU_TLS
? UNSPEC_GOTNTPOFF
: UNSPEC_GOTTPOFF
;
7088 else if (!TARGET_ANY_GNU_TLS
)
7090 pic
= gen_reg_rtx (Pmode
);
7091 emit_insn (gen_set_got (pic
));
7092 type
= UNSPEC_GOTTPOFF
;
7097 type
= UNSPEC_INDNTPOFF
;
7100 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), type
);
7101 off
= gen_rtx_CONST (Pmode
, off
);
7103 off
= gen_rtx_PLUS (Pmode
, pic
, off
);
7104 off
= gen_const_mem (Pmode
, off
);
7105 set_mem_alias_set (off
, ix86_GOT_alias_set ());
7107 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
7109 base
= get_thread_pointer (for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
7110 off
= force_reg (Pmode
, off
);
7111 return gen_rtx_PLUS (Pmode
, base
, off
);
7115 base
= get_thread_pointer (true);
7116 dest
= gen_reg_rtx (Pmode
);
7117 emit_insn (gen_subsi3 (dest
, base
, off
));
7121 case TLS_MODEL_LOCAL_EXEC
:
7122 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
),
7123 (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
7124 ? UNSPEC_NTPOFF
: UNSPEC_TPOFF
);
7125 off
= gen_rtx_CONST (Pmode
, off
);
7127 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
7129 base
= get_thread_pointer (for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
7130 return gen_rtx_PLUS (Pmode
, base
, off
);
7134 base
= get_thread_pointer (true);
7135 dest
= gen_reg_rtx (Pmode
);
7136 emit_insn (gen_subsi3 (dest
, base
, off
));
7147 /* Try machine-dependent ways of modifying an illegitimate address
7148 to be legitimate. If we find one, return the new, valid address.
7149 This macro is used in only one place: `memory_address' in explow.c.
7151 OLDX is the address as it was before break_out_memory_refs was called.
7152 In some cases it is useful to look at this to decide what needs to be done.
7154 MODE and WIN are passed so that this macro can use
7155 GO_IF_LEGITIMATE_ADDRESS.
7157 It is always safe for this macro to do nothing. It exists to recognize
7158 opportunities to optimize the output.
7160 For the 80386, we handle X+REG by loading X into a register R and
7161 using R+REG. R will go in a general reg and indexing will be used.
7162 However, if REG is a broken-out memory address or multiplication,
7163 nothing needs to be done because REG can certainly go in a general reg.
7165 When -fpic is used, special handling is needed for symbolic references.
7166 See comments by legitimize_pic_address in i386.c for details. */
7169 legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
, enum machine_mode mode
)
7174 if (TARGET_DEBUG_ADDR
)
7176 fprintf (stderr
, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
7177 GET_MODE_NAME (mode
));
7181 log
= GET_CODE (x
) == SYMBOL_REF
? SYMBOL_REF_TLS_MODEL (x
) : 0;
7183 return legitimize_tls_address (x
, log
, false);
7184 if (GET_CODE (x
) == CONST
7185 && GET_CODE (XEXP (x
, 0)) == PLUS
7186 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
7187 && (log
= SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x
, 0), 0))))
7189 rtx t
= legitimize_tls_address (XEXP (XEXP (x
, 0), 0), log
, false);
7190 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (x
, 0), 1));
7193 if (flag_pic
&& SYMBOLIC_CONST (x
))
7194 return legitimize_pic_address (x
, 0);
7196 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
7197 if (GET_CODE (x
) == ASHIFT
7198 && GET_CODE (XEXP (x
, 1)) == CONST_INT
7199 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (x
, 1)) < 4)
7202 log
= INTVAL (XEXP (x
, 1));
7203 x
= gen_rtx_MULT (Pmode
, force_reg (Pmode
, XEXP (x
, 0)),
7204 GEN_INT (1 << log
));
7207 if (GET_CODE (x
) == PLUS
)
7209 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
7211 if (GET_CODE (XEXP (x
, 0)) == ASHIFT
7212 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
7213 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 0), 1)) < 4)
7216 log
= INTVAL (XEXP (XEXP (x
, 0), 1));
7217 XEXP (x
, 0) = gen_rtx_MULT (Pmode
,
7218 force_reg (Pmode
, XEXP (XEXP (x
, 0), 0)),
7219 GEN_INT (1 << log
));
7222 if (GET_CODE (XEXP (x
, 1)) == ASHIFT
7223 && GET_CODE (XEXP (XEXP (x
, 1), 1)) == CONST_INT
7224 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 1), 1)) < 4)
7227 log
= INTVAL (XEXP (XEXP (x
, 1), 1));
7228 XEXP (x
, 1) = gen_rtx_MULT (Pmode
,
7229 force_reg (Pmode
, XEXP (XEXP (x
, 1), 0)),
7230 GEN_INT (1 << log
));
7233 /* Put multiply first if it isn't already. */
7234 if (GET_CODE (XEXP (x
, 1)) == MULT
)
7236 rtx tmp
= XEXP (x
, 0);
7237 XEXP (x
, 0) = XEXP (x
, 1);
7242 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
7243 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
7244 created by virtual register instantiation, register elimination, and
7245 similar optimizations. */
7246 if (GET_CODE (XEXP (x
, 0)) == MULT
&& GET_CODE (XEXP (x
, 1)) == PLUS
)
7249 x
= gen_rtx_PLUS (Pmode
,
7250 gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
7251 XEXP (XEXP (x
, 1), 0)),
7252 XEXP (XEXP (x
, 1), 1));
7256 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
7257 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
7258 else if (GET_CODE (x
) == PLUS
&& GET_CODE (XEXP (x
, 0)) == PLUS
7259 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
7260 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == PLUS
7261 && CONSTANT_P (XEXP (x
, 1)))
7264 rtx other
= NULL_RTX
;
7266 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
7268 constant
= XEXP (x
, 1);
7269 other
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
7271 else if (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 1), 1)) == CONST_INT
)
7273 constant
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
7274 other
= XEXP (x
, 1);
7282 x
= gen_rtx_PLUS (Pmode
,
7283 gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 0),
7284 XEXP (XEXP (XEXP (x
, 0), 1), 0)),
7285 plus_constant (other
, INTVAL (constant
)));
7289 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
7292 if (GET_CODE (XEXP (x
, 0)) == MULT
)
7295 XEXP (x
, 0) = force_operand (XEXP (x
, 0), 0);
7298 if (GET_CODE (XEXP (x
, 1)) == MULT
)
7301 XEXP (x
, 1) = force_operand (XEXP (x
, 1), 0);
7305 && GET_CODE (XEXP (x
, 1)) == REG
7306 && GET_CODE (XEXP (x
, 0)) == REG
)
7309 if (flag_pic
&& SYMBOLIC_CONST (XEXP (x
, 1)))
7312 x
= legitimize_pic_address (x
, 0);
7315 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
7318 if (GET_CODE (XEXP (x
, 0)) == REG
)
7320 rtx temp
= gen_reg_rtx (Pmode
);
7321 rtx val
= force_operand (XEXP (x
, 1), temp
);
7323 emit_move_insn (temp
, val
);
7329 else if (GET_CODE (XEXP (x
, 1)) == REG
)
7331 rtx temp
= gen_reg_rtx (Pmode
);
7332 rtx val
= force_operand (XEXP (x
, 0), temp
);
7334 emit_move_insn (temp
, val
);
7344 /* Print an integer constant expression in assembler syntax. Addition
7345 and subtraction are the only arithmetic that may appear in these
7346 expressions. FILE is the stdio stream to write to, X is the rtx, and
7347 CODE is the operand print code from the output string. */
7350 output_pic_addr_const (FILE *file
, rtx x
, int code
)
7354 switch (GET_CODE (x
))
7357 gcc_assert (flag_pic
);
7362 output_addr_const (file
, x
);
7363 if (!TARGET_MACHO
&& code
== 'P' && ! SYMBOL_REF_LOCAL_P (x
))
7364 fputs ("@PLT", file
);
7371 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (x
));
7372 assemble_name (asm_out_file
, buf
);
7376 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
7380 /* This used to output parentheses around the expression,
7381 but that does not work on the 386 (either ATT or BSD assembler). */
7382 output_pic_addr_const (file
, XEXP (x
, 0), code
);
7386 if (GET_MODE (x
) == VOIDmode
)
7388 /* We can use %d if the number is <32 bits and positive. */
7389 if (CONST_DOUBLE_HIGH (x
) || CONST_DOUBLE_LOW (x
) < 0)
7390 fprintf (file
, "0x%lx%08lx",
7391 (unsigned long) CONST_DOUBLE_HIGH (x
),
7392 (unsigned long) CONST_DOUBLE_LOW (x
));
7394 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, CONST_DOUBLE_LOW (x
));
7397 /* We can't handle floating point constants;
7398 PRINT_OPERAND must handle them. */
7399 output_operand_lossage ("floating constant misused");
7403 /* Some assemblers need integer constants to appear first. */
7404 if (GET_CODE (XEXP (x
, 0)) == CONST_INT
)
7406 output_pic_addr_const (file
, XEXP (x
, 0), code
);
7408 output_pic_addr_const (file
, XEXP (x
, 1), code
);
7412 gcc_assert (GET_CODE (XEXP (x
, 1)) == CONST_INT
);
7413 output_pic_addr_const (file
, XEXP (x
, 1), code
);
7415 output_pic_addr_const (file
, XEXP (x
, 0), code
);
7421 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? '(' : '[', file
);
7422 output_pic_addr_const (file
, XEXP (x
, 0), code
);
7424 output_pic_addr_const (file
, XEXP (x
, 1), code
);
7426 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? ')' : ']', file
);
7430 gcc_assert (XVECLEN (x
, 0) == 1);
7431 output_pic_addr_const (file
, XVECEXP (x
, 0, 0), code
);
7432 switch (XINT (x
, 1))
7435 fputs ("@GOT", file
);
7438 fputs ("@GOTOFF", file
);
7440 case UNSPEC_GOTPCREL
:
7441 fputs ("@GOTPCREL(%rip)", file
);
7443 case UNSPEC_GOTTPOFF
:
7444 /* FIXME: This might be @TPOFF in Sun ld too. */
7445 fputs ("@GOTTPOFF", file
);
7448 fputs ("@TPOFF", file
);
7452 fputs ("@TPOFF", file
);
7454 fputs ("@NTPOFF", file
);
7457 fputs ("@DTPOFF", file
);
7459 case UNSPEC_GOTNTPOFF
:
7461 fputs ("@GOTTPOFF(%rip)", file
);
7463 fputs ("@GOTNTPOFF", file
);
7465 case UNSPEC_INDNTPOFF
:
7466 fputs ("@INDNTPOFF", file
);
7469 output_operand_lossage ("invalid UNSPEC as operand");
7475 output_operand_lossage ("invalid expression as operand");
7479 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7480 We need to emit DTP-relative relocations. */
7483 i386_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
7485 fputs (ASM_LONG
, file
);
7486 output_addr_const (file
, x
);
7487 fputs ("@DTPOFF", file
);
7493 fputs (", 0", file
);
7500 /* In the name of slightly smaller debug output, and to cater to
7501 general assembler lossage, recognize PIC+GOTOFF and turn it back
7502 into a direct symbol reference.
7504 On Darwin, this is necessary to avoid a crash, because Darwin
7505 has a different PIC label for each routine but the DWARF debugging
7506 information is not associated with any particular routine, so it's
7507 necessary to remove references to the PIC label from RTL stored by
7508 the DWARF output code. */
7511 ix86_delegitimize_address (rtx orig_x
)
7514 /* reg_addend is NULL or a multiple of some register. */
7515 rtx reg_addend
= NULL_RTX
;
7516 /* const_addend is NULL or a const_int. */
7517 rtx const_addend
= NULL_RTX
;
7518 /* This is the result, or NULL. */
7519 rtx result
= NULL_RTX
;
7521 if (GET_CODE (x
) == MEM
)
7526 if (GET_CODE (x
) != CONST
7527 || GET_CODE (XEXP (x
, 0)) != UNSPEC
7528 || XINT (XEXP (x
, 0), 1) != UNSPEC_GOTPCREL
7529 || GET_CODE (orig_x
) != MEM
)
7531 return XVECEXP (XEXP (x
, 0), 0, 0);
7534 if (GET_CODE (x
) != PLUS
7535 || GET_CODE (XEXP (x
, 1)) != CONST
)
7538 if (GET_CODE (XEXP (x
, 0)) == REG
7539 && REGNO (XEXP (x
, 0)) == PIC_OFFSET_TABLE_REGNUM
)
7540 /* %ebx + GOT/GOTOFF */
7542 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
7544 /* %ebx + %reg * scale + GOT/GOTOFF */
7545 reg_addend
= XEXP (x
, 0);
7546 if (GET_CODE (XEXP (reg_addend
, 0)) == REG
7547 && REGNO (XEXP (reg_addend
, 0)) == PIC_OFFSET_TABLE_REGNUM
)
7548 reg_addend
= XEXP (reg_addend
, 1);
7549 else if (GET_CODE (XEXP (reg_addend
, 1)) == REG
7550 && REGNO (XEXP (reg_addend
, 1)) == PIC_OFFSET_TABLE_REGNUM
)
7551 reg_addend
= XEXP (reg_addend
, 0);
7554 if (GET_CODE (reg_addend
) != REG
7555 && GET_CODE (reg_addend
) != MULT
7556 && GET_CODE (reg_addend
) != ASHIFT
)
7562 x
= XEXP (XEXP (x
, 1), 0);
7563 if (GET_CODE (x
) == PLUS
7564 && GET_CODE (XEXP (x
, 1)) == CONST_INT
)
7566 const_addend
= XEXP (x
, 1);
7570 if (GET_CODE (x
) == UNSPEC
7571 && ((XINT (x
, 1) == UNSPEC_GOT
&& GET_CODE (orig_x
) == MEM
)
7572 || (XINT (x
, 1) == UNSPEC_GOTOFF
&& GET_CODE (orig_x
) != MEM
)))
7573 result
= XVECEXP (x
, 0, 0);
7575 if (TARGET_MACHO
&& darwin_local_data_pic (x
)
7576 && GET_CODE (orig_x
) != MEM
)
7577 result
= XEXP (x
, 0);
7583 result
= gen_rtx_PLUS (Pmode
, result
, const_addend
);
7585 result
= gen_rtx_PLUS (Pmode
, reg_addend
, result
);
7590 put_condition_code (enum rtx_code code
, enum machine_mode mode
, int reverse
,
7595 if (mode
== CCFPmode
|| mode
== CCFPUmode
)
7597 enum rtx_code second_code
, bypass_code
;
7598 ix86_fp_comparison_codes (code
, &bypass_code
, &code
, &second_code
);
7599 gcc_assert (bypass_code
== UNKNOWN
&& second_code
== UNKNOWN
);
7600 code
= ix86_fp_compare_code_to_integer (code
);
7604 code
= reverse_condition (code
);
7615 gcc_assert (mode
== CCmode
|| mode
== CCNOmode
|| mode
== CCGCmode
);
7619 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
7620 Those same assemblers have the same but opposite lossage on cmov. */
7621 gcc_assert (mode
== CCmode
);
7622 suffix
= fp
? "nbe" : "a";
7642 gcc_assert (mode
== CCmode
);
7664 gcc_assert (mode
== CCmode
);
7665 suffix
= fp
? "nb" : "ae";
7668 gcc_assert (mode
== CCmode
|| mode
== CCGCmode
|| mode
== CCNOmode
);
7672 gcc_assert (mode
== CCmode
);
7676 suffix
= fp
? "u" : "p";
7679 suffix
= fp
? "nu" : "np";
7684 fputs (suffix
, file
);
7687 /* Print the name of register X to FILE based on its machine mode and number.
7688 If CODE is 'w', pretend the mode is HImode.
7689 If CODE is 'b', pretend the mode is QImode.
7690 If CODE is 'k', pretend the mode is SImode.
7691 If CODE is 'q', pretend the mode is DImode.
7692 If CODE is 'h', pretend the reg is the 'high' byte register.
7693 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
7696 print_reg (rtx x
, int code
, FILE *file
)
7698 gcc_assert (REGNO (x
) != ARG_POINTER_REGNUM
7699 && REGNO (x
) != FRAME_POINTER_REGNUM
7700 && REGNO (x
) != FLAGS_REG
7701 && REGNO (x
) != FPSR_REG
7702 && REGNO (x
) != FPCR_REG
);
7704 if (ASSEMBLER_DIALECT
== ASM_ATT
|| USER_LABEL_PREFIX
[0] == 0)
7707 if (code
== 'w' || MMX_REG_P (x
))
7709 else if (code
== 'b')
7711 else if (code
== 'k')
7713 else if (code
== 'q')
7715 else if (code
== 'y')
7717 else if (code
== 'h')
7720 code
= GET_MODE_SIZE (GET_MODE (x
));
7722 /* Irritatingly, AMD extended registers use different naming convention
7723 from the normal registers. */
7724 if (REX_INT_REG_P (x
))
7726 gcc_assert (TARGET_64BIT
);
7730 error ("extended registers have no high halves");
7733 fprintf (file
, "r%ib", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
7736 fprintf (file
, "r%iw", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
7739 fprintf (file
, "r%id", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
7742 fprintf (file
, "r%i", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
7745 error ("unsupported operand size for extended register");
7753 if (STACK_TOP_P (x
))
7755 fputs ("st(0)", file
);
7762 if (! ANY_FP_REG_P (x
))
7763 putc (code
== 8 && TARGET_64BIT
? 'r' : 'e', file
);
7768 fputs (hi_reg_name
[REGNO (x
)], file
);
7771 if (REGNO (x
) >= ARRAY_SIZE (qi_reg_name
))
7773 fputs (qi_reg_name
[REGNO (x
)], file
);
7776 if (REGNO (x
) >= ARRAY_SIZE (qi_high_reg_name
))
7778 fputs (qi_high_reg_name
[REGNO (x
)], file
);
7785 /* Locate some local-dynamic symbol still in use by this function
7786 so that we can print its name in some tls_local_dynamic_base
7790 get_some_local_dynamic_name (void)
7794 if (cfun
->machine
->some_ld_name
)
7795 return cfun
->machine
->some_ld_name
;
7797 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
7799 && for_each_rtx (&PATTERN (insn
), get_some_local_dynamic_name_1
, 0))
7800 return cfun
->machine
->some_ld_name
;
7806 get_some_local_dynamic_name_1 (rtx
*px
, void *data ATTRIBUTE_UNUSED
)
7810 if (GET_CODE (x
) == SYMBOL_REF
7811 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
)
7813 cfun
->machine
->some_ld_name
= XSTR (x
, 0);
7821 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
7822 C -- print opcode suffix for set/cmov insn.
7823 c -- like C, but print reversed condition
7824 F,f -- likewise, but for floating-point.
7825 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
7827 R -- print the prefix for register names.
7828 z -- print the opcode suffix for the size of the current operand.
7829 * -- print a star (in certain assembler syntax)
7830 A -- print an absolute memory reference.
7831 w -- print the operand as if it's a "word" (HImode) even if it isn't.
7832 s -- print a shift double count, followed by the assemblers argument
7834 b -- print the QImode name of the register for the indicated operand.
7835 %b0 would print %al if operands[0] is reg 0.
7836 w -- likewise, print the HImode name of the register.
7837 k -- likewise, print the SImode name of the register.
7838 q -- likewise, print the DImode name of the register.
7839 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
7840 y -- print "st(0)" instead of "st" as a register.
7841 D -- print condition for SSE cmp instruction.
7842 P -- if PIC, print an @PLT suffix.
7843 X -- don't print any sort of PIC '@' suffix for a symbol.
7844 & -- print some in-use local-dynamic symbol name.
7845 H -- print a memory address offset by 8; used for sse high-parts
7849 print_operand (FILE *file
, rtx x
, int code
)
7856 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7861 assemble_name (file
, get_some_local_dynamic_name ());
7865 switch (ASSEMBLER_DIALECT
)
7872 /* Intel syntax. For absolute addresses, registers should not
7873 be surrounded by braces. */
7874 if (GET_CODE (x
) != REG
)
7877 PRINT_OPERAND (file
, x
, 0);
7887 PRINT_OPERAND (file
, x
, 0);
7892 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7897 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7902 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7907 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7912 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7917 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7922 /* 387 opcodes don't get size suffixes if the operands are
7924 if (STACK_REG_P (x
))
7927 /* Likewise if using Intel opcodes. */
7928 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
7931 /* This is the size of op from size of operand. */
7932 switch (GET_MODE_SIZE (GET_MODE (x
)))
7935 #ifdef HAVE_GAS_FILDS_FISTS
7941 if (GET_MODE (x
) == SFmode
)
7956 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
7958 #ifdef GAS_MNEMONICS
7984 if (GET_CODE (x
) == CONST_INT
|| ! SHIFT_DOUBLE_OMITS_COUNT
)
7986 PRINT_OPERAND (file
, x
, 0);
7992 /* Little bit of braindamage here. The SSE compare instructions
7993 does use completely different names for the comparisons that the
7994 fp conditional moves. */
7995 switch (GET_CODE (x
))
8010 fputs ("unord", file
);
8014 fputs ("neq", file
);
8018 fputs ("nlt", file
);
8022 fputs ("nle", file
);
8025 fputs ("ord", file
);
8032 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8033 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8035 switch (GET_MODE (x
))
8037 case HImode
: putc ('w', file
); break;
8039 case SFmode
: putc ('l', file
); break;
8041 case DFmode
: putc ('q', file
); break;
8042 default: gcc_unreachable ();
8049 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 0, file
);
8052 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8053 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8056 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 1, file
);
8059 /* Like above, but reverse condition */
8061 /* Check to see if argument to %c is really a constant
8062 and not a condition code which needs to be reversed. */
8063 if (!COMPARISON_P (x
))
8065 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
8068 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 0, file
);
8071 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8072 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8075 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 1, file
);
8079 /* It doesn't actually matter what mode we use here, as we're
8080 only going to use this for printing. */
8081 x
= adjust_address_nv (x
, DImode
, 8);
8088 if (!optimize
|| optimize_size
|| !TARGET_BRANCH_PREDICTION_HINTS
)
8091 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
8094 int pred_val
= INTVAL (XEXP (x
, 0));
8096 if (pred_val
< REG_BR_PROB_BASE
* 45 / 100
8097 || pred_val
> REG_BR_PROB_BASE
* 55 / 100)
8099 int taken
= pred_val
> REG_BR_PROB_BASE
/ 2;
8100 int cputaken
= final_forward_branch_p (current_output_insn
) == 0;
8102 /* Emit hints only in the case default branch prediction
8103 heuristics would fail. */
8104 if (taken
!= cputaken
)
8106 /* We use 3e (DS) prefix for taken branches and
8107 2e (CS) prefix for not taken branches. */
8109 fputs ("ds ; ", file
);
8111 fputs ("cs ; ", file
);
8118 output_operand_lossage ("invalid operand code '%c'", code
);
8122 if (GET_CODE (x
) == REG
)
8123 print_reg (x
, code
, file
);
8125 else if (GET_CODE (x
) == MEM
)
8127 /* No `byte ptr' prefix for call instructions. */
8128 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& code
!= 'X' && code
!= 'P')
8131 switch (GET_MODE_SIZE (GET_MODE (x
)))
8133 case 1: size
= "BYTE"; break;
8134 case 2: size
= "WORD"; break;
8135 case 4: size
= "DWORD"; break;
8136 case 8: size
= "QWORD"; break;
8137 case 12: size
= "XWORD"; break;
8138 case 16: size
= "XMMWORD"; break;
8143 /* Check for explicit size override (codes 'b', 'w' and 'k') */
8146 else if (code
== 'w')
8148 else if (code
== 'k')
8152 fputs (" PTR ", file
);
8156 /* Avoid (%rip) for call operands. */
8157 if (CONSTANT_ADDRESS_P (x
) && code
== 'P'
8158 && GET_CODE (x
) != CONST_INT
)
8159 output_addr_const (file
, x
);
8160 else if (this_is_asm_operands
&& ! address_operand (x
, VOIDmode
))
8161 output_operand_lossage ("invalid constraints for operand");
8166 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == SFmode
)
8171 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
8172 REAL_VALUE_TO_TARGET_SINGLE (r
, l
);
8174 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8176 fprintf (file
, "0x%08lx", l
);
8179 /* These float cases don't actually occur as immediate operands. */
8180 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == DFmode
)
8184 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
8185 fprintf (file
, "%s", dstr
);
8188 else if (GET_CODE (x
) == CONST_DOUBLE
8189 && GET_MODE (x
) == XFmode
)
8193 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
8194 fprintf (file
, "%s", dstr
);
8199 /* We have patterns that allow zero sets of memory, for instance.
8200 In 64-bit mode, we should probably support all 8-byte vectors,
8201 since we can in fact encode that into an immediate. */
8202 if (GET_CODE (x
) == CONST_VECTOR
)
8204 gcc_assert (x
== CONST0_RTX (GET_MODE (x
)));
8210 if (GET_CODE (x
) == CONST_INT
|| GET_CODE (x
) == CONST_DOUBLE
)
8212 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8215 else if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
8216 || GET_CODE (x
) == LABEL_REF
)
8218 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8221 fputs ("OFFSET FLAT:", file
);
8224 if (GET_CODE (x
) == CONST_INT
)
8225 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
8227 output_pic_addr_const (file
, x
, code
);
8229 output_addr_const (file
, x
);
8233 /* Print a memory operand whose address is ADDR. */
8236 print_operand_address (FILE *file
, rtx addr
)
8238 struct ix86_address parts
;
8239 rtx base
, index
, disp
;
8241 int ok
= ix86_decompose_address (addr
, &parts
);
8246 index
= parts
.index
;
8248 scale
= parts
.scale
;
8256 if (USER_LABEL_PREFIX
[0] == 0)
8258 fputs ((parts
.seg
== SEG_FS
? "fs:" : "gs:"), file
);
8264 if (!base
&& !index
)
8266 /* Displacement only requires special attention. */
8268 if (GET_CODE (disp
) == CONST_INT
)
8270 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& parts
.seg
== SEG_DEFAULT
)
8272 if (USER_LABEL_PREFIX
[0] == 0)
8274 fputs ("ds:", file
);
8276 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (disp
));
8279 output_pic_addr_const (file
, disp
, 0);
8281 output_addr_const (file
, disp
);
8283 /* Use one byte shorter RIP relative addressing for 64bit mode. */
8286 if (GET_CODE (disp
) == CONST
8287 && GET_CODE (XEXP (disp
, 0)) == PLUS
8288 && GET_CODE (XEXP (XEXP (disp
, 0), 1)) == CONST_INT
)
8289 disp
= XEXP (XEXP (disp
, 0), 0);
8290 if (GET_CODE (disp
) == LABEL_REF
8291 || (GET_CODE (disp
) == SYMBOL_REF
8292 && SYMBOL_REF_TLS_MODEL (disp
) == 0))
8293 fputs ("(%rip)", file
);
8298 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8303 output_pic_addr_const (file
, disp
, 0);
8304 else if (GET_CODE (disp
) == LABEL_REF
)
8305 output_asm_label (disp
);
8307 output_addr_const (file
, disp
);
8312 print_reg (base
, 0, file
);
8316 print_reg (index
, 0, file
);
8318 fprintf (file
, ",%d", scale
);
8324 rtx offset
= NULL_RTX
;
8328 /* Pull out the offset of a symbol; print any symbol itself. */
8329 if (GET_CODE (disp
) == CONST
8330 && GET_CODE (XEXP (disp
, 0)) == PLUS
8331 && GET_CODE (XEXP (XEXP (disp
, 0), 1)) == CONST_INT
)
8333 offset
= XEXP (XEXP (disp
, 0), 1);
8334 disp
= gen_rtx_CONST (VOIDmode
,
8335 XEXP (XEXP (disp
, 0), 0));
8339 output_pic_addr_const (file
, disp
, 0);
8340 else if (GET_CODE (disp
) == LABEL_REF
)
8341 output_asm_label (disp
);
8342 else if (GET_CODE (disp
) == CONST_INT
)
8345 output_addr_const (file
, disp
);
8351 print_reg (base
, 0, file
);
8354 if (INTVAL (offset
) >= 0)
8356 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
8360 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
8367 print_reg (index
, 0, file
);
8369 fprintf (file
, "*%d", scale
);
8377 output_addr_const_extra (FILE *file
, rtx x
)
8381 if (GET_CODE (x
) != UNSPEC
)
8384 op
= XVECEXP (x
, 0, 0);
8385 switch (XINT (x
, 1))
8387 case UNSPEC_GOTTPOFF
:
8388 output_addr_const (file
, op
);
8389 /* FIXME: This might be @TPOFF in Sun ld. */
8390 fputs ("@GOTTPOFF", file
);
8393 output_addr_const (file
, op
);
8394 fputs ("@TPOFF", file
);
8397 output_addr_const (file
, op
);
8399 fputs ("@TPOFF", file
);
8401 fputs ("@NTPOFF", file
);
8404 output_addr_const (file
, op
);
8405 fputs ("@DTPOFF", file
);
8407 case UNSPEC_GOTNTPOFF
:
8408 output_addr_const (file
, op
);
8410 fputs ("@GOTTPOFF(%rip)", file
);
8412 fputs ("@GOTNTPOFF", file
);
8414 case UNSPEC_INDNTPOFF
:
8415 output_addr_const (file
, op
);
8416 fputs ("@INDNTPOFF", file
);
8426 /* Split one or more DImode RTL references into pairs of SImode
8427 references. The RTL can be REG, offsettable MEM, integer constant, or
8428 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8429 split and "num" is its length. lo_half and hi_half are output arrays
8430 that parallel "operands". */
8433 split_di (rtx operands
[], int num
, rtx lo_half
[], rtx hi_half
[])
8437 rtx op
= operands
[num
];
8439 /* simplify_subreg refuse to split volatile memory addresses,
8440 but we still have to handle it. */
8441 if (GET_CODE (op
) == MEM
)
8443 lo_half
[num
] = adjust_address (op
, SImode
, 0);
8444 hi_half
[num
] = adjust_address (op
, SImode
, 4);
8448 lo_half
[num
] = simplify_gen_subreg (SImode
, op
,
8449 GET_MODE (op
) == VOIDmode
8450 ? DImode
: GET_MODE (op
), 0);
8451 hi_half
[num
] = simplify_gen_subreg (SImode
, op
,
8452 GET_MODE (op
) == VOIDmode
8453 ? DImode
: GET_MODE (op
), 4);
8457 /* Split one or more TImode RTL references into pairs of DImode
8458 references. The RTL can be REG, offsettable MEM, integer constant, or
8459 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8460 split and "num" is its length. lo_half and hi_half are output arrays
8461 that parallel "operands". */
8464 split_ti (rtx operands
[], int num
, rtx lo_half
[], rtx hi_half
[])
8468 rtx op
= operands
[num
];
8470 /* simplify_subreg refuse to split volatile memory addresses, but we
8471 still have to handle it. */
8472 if (GET_CODE (op
) == MEM
)
8474 lo_half
[num
] = adjust_address (op
, DImode
, 0);
8475 hi_half
[num
] = adjust_address (op
, DImode
, 8);
8479 lo_half
[num
] = simplify_gen_subreg (DImode
, op
, TImode
, 0);
8480 hi_half
[num
] = simplify_gen_subreg (DImode
, op
, TImode
, 8);
8485 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
8486 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
8487 is the expression of the binary operation. The output may either be
8488 emitted here, or returned to the caller, like all output_* functions.
8490 There is no guarantee that the operands are the same mode, as they
8491 might be within FLOAT or FLOAT_EXTEND expressions. */
8493 #ifndef SYSV386_COMPAT
8494 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
8495 wants to fix the assemblers because that causes incompatibility
8496 with gcc. No-one wants to fix gcc because that causes
8497 incompatibility with assemblers... You can use the option of
8498 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
8499 #define SYSV386_COMPAT 1
8503 output_387_binary_op (rtx insn
, rtx
*operands
)
8505 static char buf
[30];
8508 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]) || SSE_REG_P (operands
[2]);
8510 #ifdef ENABLE_CHECKING
8511 /* Even if we do not want to check the inputs, this documents input
8512 constraints. Which helps in understanding the following code. */
8513 if (STACK_REG_P (operands
[0])
8514 && ((REG_P (operands
[1])
8515 && REGNO (operands
[0]) == REGNO (operands
[1])
8516 && (STACK_REG_P (operands
[2]) || GET_CODE (operands
[2]) == MEM
))
8517 || (REG_P (operands
[2])
8518 && REGNO (operands
[0]) == REGNO (operands
[2])
8519 && (STACK_REG_P (operands
[1]) || GET_CODE (operands
[1]) == MEM
)))
8520 && (STACK_TOP_P (operands
[1]) || STACK_TOP_P (operands
[2])))
8523 gcc_assert (is_sse
);
8526 switch (GET_CODE (operands
[3]))
8529 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
8530 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
8538 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
8539 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
8547 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
8548 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
8556 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
8557 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
8571 if (GET_MODE (operands
[0]) == SFmode
)
8572 strcat (buf
, "ss\t{%2, %0|%0, %2}");
8574 strcat (buf
, "sd\t{%2, %0|%0, %2}");
8579 switch (GET_CODE (operands
[3]))
8583 if (REG_P (operands
[2]) && REGNO (operands
[0]) == REGNO (operands
[2]))
8585 rtx temp
= operands
[2];
8586 operands
[2] = operands
[1];
8590 /* know operands[0] == operands[1]. */
8592 if (GET_CODE (operands
[2]) == MEM
)
8598 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
8600 if (STACK_TOP_P (operands
[0]))
8601 /* How is it that we are storing to a dead operand[2]?
8602 Well, presumably operands[1] is dead too. We can't
8603 store the result to st(0) as st(0) gets popped on this
8604 instruction. Instead store to operands[2] (which I
8605 think has to be st(1)). st(1) will be popped later.
8606 gcc <= 2.8.1 didn't have this check and generated
8607 assembly code that the Unixware assembler rejected. */
8608 p
= "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8610 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8614 if (STACK_TOP_P (operands
[0]))
8615 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8617 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8622 if (GET_CODE (operands
[1]) == MEM
)
8628 if (GET_CODE (operands
[2]) == MEM
)
8634 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
8637 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
8638 derived assemblers, confusingly reverse the direction of
8639 the operation for fsub{r} and fdiv{r} when the
8640 destination register is not st(0). The Intel assembler
8641 doesn't have this brain damage. Read !SYSV386_COMPAT to
8642 figure out what the hardware really does. */
8643 if (STACK_TOP_P (operands
[0]))
8644 p
= "{p\t%0, %2|rp\t%2, %0}";
8646 p
= "{rp\t%2, %0|p\t%0, %2}";
8648 if (STACK_TOP_P (operands
[0]))
8649 /* As above for fmul/fadd, we can't store to st(0). */
8650 p
= "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8652 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8657 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
8660 if (STACK_TOP_P (operands
[0]))
8661 p
= "{rp\t%0, %1|p\t%1, %0}";
8663 p
= "{p\t%1, %0|rp\t%0, %1}";
8665 if (STACK_TOP_P (operands
[0]))
8666 p
= "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
8668 p
= "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
8673 if (STACK_TOP_P (operands
[0]))
8675 if (STACK_TOP_P (operands
[1]))
8676 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8678 p
= "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
8681 else if (STACK_TOP_P (operands
[1]))
8684 p
= "{\t%1, %0|r\t%0, %1}";
8686 p
= "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
8692 p
= "{r\t%2, %0|\t%0, %2}";
8694 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8707 /* Return needed mode for entity in optimize_mode_switching pass. */
8710 ix86_mode_needed (int entity
, rtx insn
)
8712 enum attr_i387_cw mode
;
8714 /* The mode UNINITIALIZED is used to store control word after a
8715 function call or ASM pattern. The mode ANY specify that function
8716 has no requirements on the control word and make no changes in the
8717 bits we are interested in. */
8720 || (NONJUMP_INSN_P (insn
)
8721 && (asm_noperands (PATTERN (insn
)) >= 0
8722 || GET_CODE (PATTERN (insn
)) == ASM_INPUT
)))
8723 return I387_CW_UNINITIALIZED
;
8725 if (recog_memoized (insn
) < 0)
8728 mode
= get_attr_i387_cw (insn
);
8733 if (mode
== I387_CW_TRUNC
)
8738 if (mode
== I387_CW_FLOOR
)
8743 if (mode
== I387_CW_CEIL
)
8748 if (mode
== I387_CW_MASK_PM
)
8759 /* Output code to initialize control word copies used by trunc?f?i and
8760 rounding patterns. CURRENT_MODE is set to current control word,
8761 while NEW_MODE is set to new control word. */
8764 emit_i387_cw_initialization (int mode
)
8766 rtx stored_mode
= assign_386_stack_local (HImode
, SLOT_CW_STORED
);
8771 rtx reg
= gen_reg_rtx (HImode
);
8773 emit_insn (gen_x86_fnstcw_1 (stored_mode
));
8774 emit_move_insn (reg
, copy_rtx (stored_mode
));
8776 if (TARGET_64BIT
|| TARGET_PARTIAL_REG_STALL
|| optimize_size
)
8781 /* round toward zero (truncate) */
8782 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0c00)));
8783 slot
= SLOT_CW_TRUNC
;
8787 /* round down toward -oo */
8788 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
8789 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0400)));
8790 slot
= SLOT_CW_FLOOR
;
8794 /* round up toward +oo */
8795 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
8796 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0800)));
8797 slot
= SLOT_CW_CEIL
;
8800 case I387_CW_MASK_PM
:
8801 /* mask precision exception for nearbyint() */
8802 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
8803 slot
= SLOT_CW_MASK_PM
;
8815 /* round toward zero (truncate) */
8816 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0xc)));
8817 slot
= SLOT_CW_TRUNC
;
8821 /* round down toward -oo */
8822 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x4)));
8823 slot
= SLOT_CW_FLOOR
;
8827 /* round up toward +oo */
8828 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x8)));
8829 slot
= SLOT_CW_CEIL
;
8832 case I387_CW_MASK_PM
:
8833 /* mask precision exception for nearbyint() */
8834 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
8835 slot
= SLOT_CW_MASK_PM
;
8843 gcc_assert (slot
< MAX_386_STACK_LOCALS
);
8845 new_mode
= assign_386_stack_local (HImode
, slot
);
8846 emit_move_insn (new_mode
, reg
);
8849 /* Output code for INSN to convert a float to a signed int. OPERANDS
8850 are the insn operands. The output may be [HSD]Imode and the input
8851 operand may be [SDX]Fmode. */
8854 output_fix_trunc (rtx insn
, rtx
*operands
, int fisttp
)
8856 int stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
8857 int dimode_p
= GET_MODE (operands
[0]) == DImode
;
8858 int round_mode
= get_attr_i387_cw (insn
);
8860 /* Jump through a hoop or two for DImode, since the hardware has no
8861 non-popping instruction. We used to do this a different way, but
8862 that was somewhat fragile and broke with post-reload splitters. */
8863 if ((dimode_p
|| fisttp
) && !stack_top_dies
)
8864 output_asm_insn ("fld\t%y1", operands
);
8866 gcc_assert (STACK_TOP_P (operands
[1]));
8867 gcc_assert (GET_CODE (operands
[0]) == MEM
);
8870 output_asm_insn ("fisttp%z0\t%0", operands
);
8873 if (round_mode
!= I387_CW_ANY
)
8874 output_asm_insn ("fldcw\t%3", operands
);
8875 if (stack_top_dies
|| dimode_p
)
8876 output_asm_insn ("fistp%z0\t%0", operands
);
8878 output_asm_insn ("fist%z0\t%0", operands
);
8879 if (round_mode
!= I387_CW_ANY
)
8880 output_asm_insn ("fldcw\t%2", operands
);
8886 /* Output code for x87 ffreep insn. The OPNO argument, which may only
8887 have the values zero or one, indicates the ffreep insn's operand
8888 from the OPERANDS array. */
8891 output_387_ffreep (rtx
*operands ATTRIBUTE_UNUSED
, int opno
)
8893 if (TARGET_USE_FFREEP
)
8894 #if HAVE_AS_IX86_FFREEP
8895 return opno
? "ffreep\t%y1" : "ffreep\t%y0";
8898 static char retval
[] = ".word\t0xc_df";
8899 int regno
= REGNO (operands
[opno
]);
8901 gcc_assert (FP_REGNO_P (regno
));
8903 retval
[9] = '0' + (regno
- FIRST_STACK_REG
);
8908 return opno
? "fstp\t%y1" : "fstp\t%y0";
8912 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
8913 should be used. UNORDERED_P is true when fucom should be used. */
8916 output_fp_compare (rtx insn
, rtx
*operands
, int eflags_p
, int unordered_p
)
8919 rtx cmp_op0
, cmp_op1
;
8920 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]);
8924 cmp_op0
= operands
[0];
8925 cmp_op1
= operands
[1];
8929 cmp_op0
= operands
[1];
8930 cmp_op1
= operands
[2];
8935 if (GET_MODE (operands
[0]) == SFmode
)
8937 return "ucomiss\t{%1, %0|%0, %1}";
8939 return "comiss\t{%1, %0|%0, %1}";
8942 return "ucomisd\t{%1, %0|%0, %1}";
8944 return "comisd\t{%1, %0|%0, %1}";
8947 gcc_assert (STACK_TOP_P (cmp_op0
));
8949 stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
8951 if (cmp_op1
== CONST0_RTX (GET_MODE (cmp_op1
)))
8955 output_asm_insn ("ftst\n\tfnstsw\t%0", operands
);
8956 return output_387_ffreep (operands
, 1);
8959 return "ftst\n\tfnstsw\t%0";
8962 if (STACK_REG_P (cmp_op1
)
8964 && find_regno_note (insn
, REG_DEAD
, REGNO (cmp_op1
))
8965 && REGNO (cmp_op1
) != FIRST_STACK_REG
)
8967 /* If both the top of the 387 stack dies, and the other operand
8968 is also a stack register that dies, then this must be a
8969 `fcompp' float compare */
8973 /* There is no double popping fcomi variant. Fortunately,
8974 eflags is immune from the fstp's cc clobbering. */
8976 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands
);
8978 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands
);
8979 return output_387_ffreep (operands
, 0);
8984 return "fucompp\n\tfnstsw\t%0";
8986 return "fcompp\n\tfnstsw\t%0";
8991 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
8993 static const char * const alt
[16] =
8995 "fcom%z2\t%y2\n\tfnstsw\t%0",
8996 "fcomp%z2\t%y2\n\tfnstsw\t%0",
8997 "fucom%z2\t%y2\n\tfnstsw\t%0",
8998 "fucomp%z2\t%y2\n\tfnstsw\t%0",
9000 "ficom%z2\t%y2\n\tfnstsw\t%0",
9001 "ficomp%z2\t%y2\n\tfnstsw\t%0",
9005 "fcomi\t{%y1, %0|%0, %y1}",
9006 "fcomip\t{%y1, %0|%0, %y1}",
9007 "fucomi\t{%y1, %0|%0, %y1}",
9008 "fucomip\t{%y1, %0|%0, %y1}",
9019 mask
= eflags_p
<< 3;
9020 mask
|= (GET_MODE_CLASS (GET_MODE (cmp_op1
)) == MODE_INT
) << 2;
9021 mask
|= unordered_p
<< 1;
9022 mask
|= stack_top_dies
;
9024 gcc_assert (mask
< 16);
9033 ix86_output_addr_vec_elt (FILE *file
, int value
)
9035 const char *directive
= ASM_LONG
;
9039 directive
= ASM_QUAD
;
9041 gcc_assert (!TARGET_64BIT
);
9044 fprintf (file
, "%s%s%d\n", directive
, LPREFIX
, value
);
9048 ix86_output_addr_diff_elt (FILE *file
, int value
, int rel
)
9051 fprintf (file
, "%s%s%d-%s%d\n",
9052 ASM_LONG
, LPREFIX
, value
, LPREFIX
, rel
);
9053 else if (HAVE_AS_GOTOFF_IN_DATA
)
9054 fprintf (file
, "%s%s%d@GOTOFF\n", ASM_LONG
, LPREFIX
, value
);
9056 else if (TARGET_MACHO
)
9058 fprintf (file
, "%s%s%d-", ASM_LONG
, LPREFIX
, value
);
9059 machopic_output_function_base_name (file
);
9060 fprintf(file
, "\n");
9064 asm_fprintf (file
, "%s%U%s+[.-%s%d]\n",
9065 ASM_LONG
, GOT_SYMBOL_NAME
, LPREFIX
, value
);
9068 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
9072 ix86_expand_clear (rtx dest
)
9076 /* We play register width games, which are only valid after reload. */
9077 gcc_assert (reload_completed
);
9079 /* Avoid HImode and its attendant prefix byte. */
9080 if (GET_MODE_SIZE (GET_MODE (dest
)) < 4)
9081 dest
= gen_rtx_REG (SImode
, REGNO (dest
));
9083 tmp
= gen_rtx_SET (VOIDmode
, dest
, const0_rtx
);
9085 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
9086 if (reload_completed
&& (!TARGET_USE_MOV0
|| optimize_size
))
9088 rtx clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, 17));
9089 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, clob
));
9095 /* X is an unchanging MEM. If it is a constant pool reference, return
9096 the constant pool rtx, else NULL. */
9099 maybe_get_pool_constant (rtx x
)
9101 x
= ix86_delegitimize_address (XEXP (x
, 0));
9103 if (GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
9104 return get_pool_constant (x
);
9110 ix86_expand_move (enum machine_mode mode
, rtx operands
[])
9112 int strict
= (reload_in_progress
|| reload_completed
);
9114 enum tls_model model
;
9119 if (GET_CODE (op1
) == SYMBOL_REF
)
9121 model
= SYMBOL_REF_TLS_MODEL (op1
);
9124 op1
= legitimize_tls_address (op1
, model
, true);
9125 op1
= force_operand (op1
, op0
);
9130 else if (GET_CODE (op1
) == CONST
9131 && GET_CODE (XEXP (op1
, 0)) == PLUS
9132 && GET_CODE (XEXP (XEXP (op1
, 0), 0)) == SYMBOL_REF
)
9134 model
= SYMBOL_REF_TLS_MODEL (XEXP (XEXP (op1
, 0), 0));
9137 rtx addend
= XEXP (XEXP (op1
, 0), 1);
9138 op1
= legitimize_tls_address (XEXP (XEXP (op1
, 0), 0), model
, true);
9139 op1
= force_operand (op1
, NULL
);
9140 op1
= expand_simple_binop (Pmode
, PLUS
, op1
, addend
,
9141 op0
, 1, OPTAB_DIRECT
);
9147 if (flag_pic
&& mode
== Pmode
&& symbolic_operand (op1
, Pmode
))
9149 if (TARGET_MACHO
&& !TARGET_64BIT
)
9154 rtx temp
= ((reload_in_progress
9155 || ((op0
&& GET_CODE (op0
) == REG
)
9157 ? op0
: gen_reg_rtx (Pmode
));
9158 op1
= machopic_indirect_data_reference (op1
, temp
);
9159 op1
= machopic_legitimize_pic_address (op1
, mode
,
9160 temp
== op1
? 0 : temp
);
9162 else if (MACHOPIC_INDIRECT
)
9163 op1
= machopic_indirect_data_reference (op1
, 0);
9170 if (GET_CODE (op0
) == MEM
)
9171 op1
= force_reg (Pmode
, op1
);
9173 op1
= legitimize_address (op1
, op1
, Pmode
);
9178 if (GET_CODE (op0
) == MEM
9179 && (PUSH_ROUNDING (GET_MODE_SIZE (mode
)) != GET_MODE_SIZE (mode
)
9180 || !push_operand (op0
, mode
))
9181 && GET_CODE (op1
) == MEM
)
9182 op1
= force_reg (mode
, op1
);
9184 if (push_operand (op0
, mode
)
9185 && ! general_no_elim_operand (op1
, mode
))
9186 op1
= copy_to_mode_reg (mode
, op1
);
9188 /* Force large constants in 64bit compilation into register
9189 to get them CSEed. */
9190 if (TARGET_64BIT
&& mode
== DImode
9191 && immediate_operand (op1
, mode
)
9192 && !x86_64_zext_immediate_operand (op1
, VOIDmode
)
9193 && !register_operand (op0
, mode
)
9194 && optimize
&& !reload_completed
&& !reload_in_progress
)
9195 op1
= copy_to_mode_reg (mode
, op1
);
9197 if (FLOAT_MODE_P (mode
))
9199 /* If we are loading a floating point constant to a register,
9200 force the value to memory now, since we'll get better code
9201 out the back end. */
9205 else if (GET_CODE (op1
) == CONST_DOUBLE
)
9207 op1
= validize_mem (force_const_mem (mode
, op1
));
9208 if (!register_operand (op0
, mode
))
9210 rtx temp
= gen_reg_rtx (mode
);
9211 emit_insn (gen_rtx_SET (VOIDmode
, temp
, op1
));
9212 emit_move_insn (op0
, temp
);
9219 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
9223 ix86_expand_vector_move (enum machine_mode mode
, rtx operands
[])
9225 rtx op0
= operands
[0], op1
= operands
[1];
9227 /* Force constants other than zero into memory. We do not know how
9228 the instructions used to build constants modify the upper 64 bits
9229 of the register, once we have that information we may be able
9230 to handle some of them more efficiently. */
9231 if ((reload_in_progress
| reload_completed
) == 0
9232 && register_operand (op0
, mode
)
9234 && standard_sse_constant_p (op1
) <= 0)
9235 op1
= validize_mem (force_const_mem (mode
, op1
));
9237 /* Make operand1 a register if it isn't already. */
9239 && !register_operand (op0
, mode
)
9240 && !register_operand (op1
, mode
))
9242 emit_move_insn (op0
, force_reg (GET_MODE (op0
), op1
));
9246 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
9249 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
9250 straight to ix86_expand_vector_move. */
9253 ix86_expand_vector_move_misalign (enum machine_mode mode
, rtx operands
[])
9262 /* If we're optimizing for size, movups is the smallest. */
9265 op0
= gen_lowpart (V4SFmode
, op0
);
9266 op1
= gen_lowpart (V4SFmode
, op1
);
9267 emit_insn (gen_sse_movups (op0
, op1
));
9271 /* ??? If we have typed data, then it would appear that using
9272 movdqu is the only way to get unaligned data loaded with
9274 if (TARGET_SSE2
&& GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
9276 op0
= gen_lowpart (V16QImode
, op0
);
9277 op1
= gen_lowpart (V16QImode
, op1
);
9278 emit_insn (gen_sse2_movdqu (op0
, op1
));
9282 if (TARGET_SSE2
&& mode
== V2DFmode
)
9286 /* When SSE registers are split into halves, we can avoid
9287 writing to the top half twice. */
9288 if (TARGET_SSE_SPLIT_REGS
)
9290 emit_insn (gen_rtx_CLOBBER (VOIDmode
, op0
));
9295 /* ??? Not sure about the best option for the Intel chips.
9296 The following would seem to satisfy; the register is
9297 entirely cleared, breaking the dependency chain. We
9298 then store to the upper half, with a dependency depth
9299 of one. A rumor has it that Intel recommends two movsd
9300 followed by an unpacklpd, but this is unconfirmed. And
9301 given that the dependency depth of the unpacklpd would
9302 still be one, I'm not sure why this would be better. */
9303 zero
= CONST0_RTX (V2DFmode
);
9306 m
= adjust_address (op1
, DFmode
, 0);
9307 emit_insn (gen_sse2_loadlpd (op0
, zero
, m
));
9308 m
= adjust_address (op1
, DFmode
, 8);
9309 emit_insn (gen_sse2_loadhpd (op0
, op0
, m
));
9313 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY
)
9314 emit_move_insn (op0
, CONST0_RTX (mode
));
9316 emit_insn (gen_rtx_CLOBBER (VOIDmode
, op0
));
9318 if (mode
!= V4SFmode
)
9319 op0
= gen_lowpart (V4SFmode
, op0
);
9320 m
= adjust_address (op1
, V2SFmode
, 0);
9321 emit_insn (gen_sse_loadlps (op0
, op0
, m
));
9322 m
= adjust_address (op1
, V2SFmode
, 8);
9323 emit_insn (gen_sse_loadhps (op0
, op0
, m
));
9326 else if (MEM_P (op0
))
9328 /* If we're optimizing for size, movups is the smallest. */
9331 op0
= gen_lowpart (V4SFmode
, op0
);
9332 op1
= gen_lowpart (V4SFmode
, op1
);
9333 emit_insn (gen_sse_movups (op0
, op1
));
9337 /* ??? Similar to above, only less clear because of quote
9338 typeless stores unquote. */
9339 if (TARGET_SSE2
&& !TARGET_SSE_TYPELESS_STORES
9340 && GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
9342 op0
= gen_lowpart (V16QImode
, op0
);
9343 op1
= gen_lowpart (V16QImode
, op1
);
9344 emit_insn (gen_sse2_movdqu (op0
, op1
));
9348 if (TARGET_SSE2
&& mode
== V2DFmode
)
9350 m
= adjust_address (op0
, DFmode
, 0);
9351 emit_insn (gen_sse2_storelpd (m
, op1
));
9352 m
= adjust_address (op0
, DFmode
, 8);
9353 emit_insn (gen_sse2_storehpd (m
, op1
));
9357 if (mode
!= V4SFmode
)
9358 op1
= gen_lowpart (V4SFmode
, op1
);
9359 m
= adjust_address (op0
, V2SFmode
, 0);
9360 emit_insn (gen_sse_storelps (m
, op1
));
9361 m
= adjust_address (op0
, V2SFmode
, 8);
9362 emit_insn (gen_sse_storehps (m
, op1
));
9369 /* Expand a push in MODE. This is some mode for which we do not support
9370 proper push instructions, at least from the registers that we expect
9371 the value to live in. */
9374 ix86_expand_push (enum machine_mode mode
, rtx x
)
9378 tmp
= expand_simple_binop (Pmode
, PLUS
, stack_pointer_rtx
,
9379 GEN_INT (-GET_MODE_SIZE (mode
)),
9380 stack_pointer_rtx
, 1, OPTAB_DIRECT
);
9381 if (tmp
!= stack_pointer_rtx
)
9382 emit_move_insn (stack_pointer_rtx
, tmp
);
9384 tmp
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
9385 emit_move_insn (tmp
, x
);
9388 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
9389 destination to use for the operation. If different from the true
9390 destination in operands[0], a copy operation will be required. */
9393 ix86_fixup_binary_operands (enum rtx_code code
, enum machine_mode mode
,
9396 int matching_memory
;
9397 rtx src1
, src2
, dst
;
9403 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
9404 if (GET_RTX_CLASS (code
) == RTX_COMM_ARITH
9405 && (rtx_equal_p (dst
, src2
)
9406 || immediate_operand (src1
, mode
)))
9413 /* If the destination is memory, and we do not have matching source
9414 operands, do things in registers. */
9415 matching_memory
= 0;
9416 if (GET_CODE (dst
) == MEM
)
9418 if (rtx_equal_p (dst
, src1
))
9419 matching_memory
= 1;
9420 else if (GET_RTX_CLASS (code
) == RTX_COMM_ARITH
9421 && rtx_equal_p (dst
, src2
))
9422 matching_memory
= 2;
9424 dst
= gen_reg_rtx (mode
);
9427 /* Both source operands cannot be in memory. */
9428 if (GET_CODE (src1
) == MEM
&& GET_CODE (src2
) == MEM
)
9430 if (matching_memory
!= 2)
9431 src2
= force_reg (mode
, src2
);
9433 src1
= force_reg (mode
, src1
);
9436 /* If the operation is not commutable, source 1 cannot be a constant
9437 or non-matching memory. */
9438 if ((CONSTANT_P (src1
)
9439 || (!matching_memory
&& GET_CODE (src1
) == MEM
))
9440 && GET_RTX_CLASS (code
) != RTX_COMM_ARITH
)
9441 src1
= force_reg (mode
, src1
);
9443 src1
= operands
[1] = src1
;
9444 src2
= operands
[2] = src2
;
9448 /* Similarly, but assume that the destination has already been
9452 ix86_fixup_binary_operands_no_copy (enum rtx_code code
,
9453 enum machine_mode mode
, rtx operands
[])
9455 rtx dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
9456 gcc_assert (dst
== operands
[0]);
9459 /* Attempt to expand a binary operator. Make the expansion closer to the
9460 actual machine, then just general_operand, which will allow 3 separate
9461 memory references (one output, two input) in a single insn. */
9464 ix86_expand_binary_operator (enum rtx_code code
, enum machine_mode mode
,
9467 rtx src1
, src2
, dst
, op
, clob
;
9469 dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
9473 /* Emit the instruction. */
9475 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, src1
, src2
));
9476 if (reload_in_progress
)
9478 /* Reload doesn't know about the flags register, and doesn't know that
9479 it doesn't want to clobber it. We can only do this with PLUS. */
9480 gcc_assert (code
== PLUS
);
9485 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
9486 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
9489 /* Fix up the destination if needed. */
9490 if (dst
!= operands
[0])
9491 emit_move_insn (operands
[0], dst
);
9494 /* Return TRUE or FALSE depending on whether the binary operator meets the
9495 appropriate constraints. */
9498 ix86_binary_operator_ok (enum rtx_code code
,
9499 enum machine_mode mode ATTRIBUTE_UNUSED
,
9502 /* Both source operands cannot be in memory. */
9503 if (GET_CODE (operands
[1]) == MEM
&& GET_CODE (operands
[2]) == MEM
)
9505 /* If the operation is not commutable, source 1 cannot be a constant. */
9506 if (CONSTANT_P (operands
[1]) && GET_RTX_CLASS (code
) != RTX_COMM_ARITH
)
9508 /* If the destination is memory, we must have a matching source operand. */
9509 if (GET_CODE (operands
[0]) == MEM
9510 && ! (rtx_equal_p (operands
[0], operands
[1])
9511 || (GET_RTX_CLASS (code
) == RTX_COMM_ARITH
9512 && rtx_equal_p (operands
[0], operands
[2]))))
9514 /* If the operation is not commutable and the source 1 is memory, we must
9515 have a matching destination. */
9516 if (GET_CODE (operands
[1]) == MEM
9517 && GET_RTX_CLASS (code
) != RTX_COMM_ARITH
9518 && ! rtx_equal_p (operands
[0], operands
[1]))
9523 /* Attempt to expand a unary operator. Make the expansion closer to the
9524 actual machine, then just general_operand, which will allow 2 separate
9525 memory references (one output, one input) in a single insn. */
9528 ix86_expand_unary_operator (enum rtx_code code
, enum machine_mode mode
,
9531 int matching_memory
;
9532 rtx src
, dst
, op
, clob
;
9537 /* If the destination is memory, and we do not have matching source
9538 operands, do things in registers. */
9539 matching_memory
= 0;
9542 if (rtx_equal_p (dst
, src
))
9543 matching_memory
= 1;
9545 dst
= gen_reg_rtx (mode
);
9548 /* When source operand is memory, destination must match. */
9549 if (MEM_P (src
) && !matching_memory
)
9550 src
= force_reg (mode
, src
);
9552 /* Emit the instruction. */
9554 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_e (code
, mode
, src
));
9555 if (reload_in_progress
|| code
== NOT
)
9557 /* Reload doesn't know about the flags register, and doesn't know that
9558 it doesn't want to clobber it. */
9559 gcc_assert (code
== NOT
);
9564 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
9565 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
9568 /* Fix up the destination if needed. */
9569 if (dst
!= operands
[0])
9570 emit_move_insn (operands
[0], dst
);
9573 /* Return TRUE or FALSE depending on whether the unary operator meets the
9574 appropriate constraints. */
9577 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED
,
9578 enum machine_mode mode ATTRIBUTE_UNUSED
,
9579 rtx operands
[2] ATTRIBUTE_UNUSED
)
9581 /* If one of operands is memory, source and destination must match. */
9582 if ((GET_CODE (operands
[0]) == MEM
9583 || GET_CODE (operands
[1]) == MEM
)
9584 && ! rtx_equal_p (operands
[0], operands
[1]))
9589 /* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders.
9590 Create a mask for the sign bit in MODE for an SSE register. If VECT is
9591 true, then replicate the mask for all elements of the vector register.
9592 If INVERT is true, then create a mask excluding the sign bit. */
9595 ix86_build_signbit_mask (enum machine_mode mode
, bool vect
, bool invert
)
9597 enum machine_mode vec_mode
;
9598 HOST_WIDE_INT hi
, lo
;
9603 /* Find the sign bit, sign extended to 2*HWI. */
9605 lo
= 0x80000000, hi
= lo
< 0;
9606 else if (HOST_BITS_PER_WIDE_INT
>= 64)
9607 lo
= (HOST_WIDE_INT
)1 << shift
, hi
= -1;
9609 lo
= 0, hi
= (HOST_WIDE_INT
)1 << (shift
- HOST_BITS_PER_WIDE_INT
);
9614 /* Force this value into the low part of a fp vector constant. */
9615 mask
= immed_double_const (lo
, hi
, mode
== SFmode
? SImode
: DImode
);
9616 mask
= gen_lowpart (mode
, mask
);
9621 v
= gen_rtvec (4, mask
, mask
, mask
, mask
);
9623 v
= gen_rtvec (4, mask
, CONST0_RTX (SFmode
),
9624 CONST0_RTX (SFmode
), CONST0_RTX (SFmode
));
9625 vec_mode
= V4SFmode
;
9630 v
= gen_rtvec (2, mask
, mask
);
9632 v
= gen_rtvec (2, mask
, CONST0_RTX (DFmode
));
9633 vec_mode
= V2DFmode
;
9636 return force_reg (vec_mode
, gen_rtx_CONST_VECTOR (vec_mode
, v
));
9639 /* Generate code for floating point ABS or NEG. */
9642 ix86_expand_fp_absneg_operator (enum rtx_code code
, enum machine_mode mode
,
9645 rtx mask
, set
, use
, clob
, dst
, src
;
9646 bool matching_memory
;
9647 bool use_sse
= false;
9648 bool vector_mode
= VECTOR_MODE_P (mode
);
9649 enum machine_mode elt_mode
= mode
;
9653 elt_mode
= GET_MODE_INNER (mode
);
9656 else if (TARGET_SSE_MATH
)
9657 use_sse
= SSE_FLOAT_MODE_P (mode
);
9659 /* NEG and ABS performed with SSE use bitwise mask operations.
9660 Create the appropriate mask now. */
9662 mask
= ix86_build_signbit_mask (elt_mode
, vector_mode
, code
== ABS
);
9669 /* If the destination is memory, and we don't have matching source
9670 operands or we're using the x87, do things in registers. */
9671 matching_memory
= false;
9674 if (use_sse
&& rtx_equal_p (dst
, src
))
9675 matching_memory
= true;
9677 dst
= gen_reg_rtx (mode
);
9679 if (MEM_P (src
) && !matching_memory
)
9680 src
= force_reg (mode
, src
);
9684 set
= gen_rtx_fmt_ee (code
== NEG
? XOR
: AND
, mode
, src
, mask
);
9685 set
= gen_rtx_SET (VOIDmode
, dst
, set
);
9690 set
= gen_rtx_fmt_e (code
, mode
, src
);
9691 set
= gen_rtx_SET (VOIDmode
, dst
, set
);
9694 use
= gen_rtx_USE (VOIDmode
, mask
);
9695 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
9696 emit_insn (gen_rtx_PARALLEL (VOIDmode
,
9697 gen_rtvec (3, set
, use
, clob
)));
9703 if (dst
!= operands
[0])
9704 emit_move_insn (operands
[0], dst
);
9707 /* Expand a copysign operation. Special case operand 0 being a constant. */
9710 ix86_expand_copysign (rtx operands
[])
9712 enum machine_mode mode
, vmode
;
9713 rtx dest
, op0
, op1
, mask
, nmask
;
9719 mode
= GET_MODE (dest
);
9720 vmode
= mode
== SFmode
? V4SFmode
: V2DFmode
;
9722 if (GET_CODE (op0
) == CONST_DOUBLE
)
9726 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0
)))
9727 op0
= simplify_unary_operation (ABS
, mode
, op0
, mode
);
9729 if (op0
== CONST0_RTX (mode
))
9730 op0
= CONST0_RTX (vmode
);
9734 v
= gen_rtvec (4, op0
, CONST0_RTX (SFmode
),
9735 CONST0_RTX (SFmode
), CONST0_RTX (SFmode
));
9737 v
= gen_rtvec (2, op0
, CONST0_RTX (DFmode
));
9738 op0
= force_reg (vmode
, gen_rtx_CONST_VECTOR (vmode
, v
));
9741 mask
= ix86_build_signbit_mask (mode
, 0, 0);
9744 emit_insn (gen_copysignsf3_const (dest
, op0
, op1
, mask
));
9746 emit_insn (gen_copysigndf3_const (dest
, op0
, op1
, mask
));
9750 nmask
= ix86_build_signbit_mask (mode
, 0, 1);
9751 mask
= ix86_build_signbit_mask (mode
, 0, 0);
9754 emit_insn (gen_copysignsf3_var (dest
, NULL
, op0
, op1
, nmask
, mask
));
9756 emit_insn (gen_copysigndf3_var (dest
, NULL
, op0
, op1
, nmask
, mask
));
9760 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
9761 be a constant, and so has already been expanded into a vector constant. */
9764 ix86_split_copysign_const (rtx operands
[])
9766 enum machine_mode mode
, vmode
;
9767 rtx dest
, op0
, op1
, mask
, x
;
9774 mode
= GET_MODE (dest
);
9775 vmode
= GET_MODE (mask
);
9777 dest
= simplify_gen_subreg (vmode
, dest
, mode
, 0);
9778 x
= gen_rtx_AND (vmode
, dest
, mask
);
9779 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
9781 if (op0
!= CONST0_RTX (vmode
))
9783 x
= gen_rtx_IOR (vmode
, dest
, op0
);
9784 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
9788 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
9789 so we have to do two masks. */
9792 ix86_split_copysign_var (rtx operands
[])
9794 enum machine_mode mode
, vmode
;
9795 rtx dest
, scratch
, op0
, op1
, mask
, nmask
, x
;
9798 scratch
= operands
[1];
9801 nmask
= operands
[4];
9804 mode
= GET_MODE (dest
);
9805 vmode
= GET_MODE (mask
);
9807 if (rtx_equal_p (op0
, op1
))
9809 /* Shouldn't happen often (it's useless, obviously), but when it does
9810 we'd generate incorrect code if we continue below. */
9811 emit_move_insn (dest
, op0
);
9815 if (REG_P (mask
) && REGNO (dest
) == REGNO (mask
)) /* alternative 0 */
9817 gcc_assert (REGNO (op1
) == REGNO (scratch
));
9819 x
= gen_rtx_AND (vmode
, scratch
, mask
);
9820 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
9823 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
9824 x
= gen_rtx_NOT (vmode
, dest
);
9825 x
= gen_rtx_AND (vmode
, x
, op0
);
9826 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
9830 if (REGNO (op1
) == REGNO (scratch
)) /* alternative 1,3 */
9832 x
= gen_rtx_AND (vmode
, scratch
, mask
);
9834 else /* alternative 2,4 */
9836 gcc_assert (REGNO (mask
) == REGNO (scratch
));
9837 op1
= simplify_gen_subreg (vmode
, op1
, mode
, 0);
9838 x
= gen_rtx_AND (vmode
, scratch
, op1
);
9840 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
9842 if (REGNO (op0
) == REGNO (dest
)) /* alternative 1,2 */
9844 dest
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
9845 x
= gen_rtx_AND (vmode
, dest
, nmask
);
9847 else /* alternative 3,4 */
9849 gcc_assert (REGNO (nmask
) == REGNO (dest
));
9851 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
9852 x
= gen_rtx_AND (vmode
, dest
, op0
);
9854 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
9857 x
= gen_rtx_IOR (vmode
, dest
, scratch
);
9858 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
9861 /* Return TRUE or FALSE depending on whether the first SET in INSN
9862 has source and destination with matching CC modes, and that the
9863 CC mode is at least as constrained as REQ_MODE. */
9866 ix86_match_ccmode (rtx insn
, enum machine_mode req_mode
)
9869 enum machine_mode set_mode
;
9871 set
= PATTERN (insn
);
9872 if (GET_CODE (set
) == PARALLEL
)
9873 set
= XVECEXP (set
, 0, 0);
9874 gcc_assert (GET_CODE (set
) == SET
);
9875 gcc_assert (GET_CODE (SET_SRC (set
)) == COMPARE
);
9877 set_mode
= GET_MODE (SET_DEST (set
));
9881 if (req_mode
!= CCNOmode
9882 && (req_mode
!= CCmode
9883 || XEXP (SET_SRC (set
), 1) != const0_rtx
))
9887 if (req_mode
== CCGCmode
)
9891 if (req_mode
== CCGOCmode
|| req_mode
== CCNOmode
)
9895 if (req_mode
== CCZmode
)
9905 return (GET_MODE (SET_SRC (set
)) == set_mode
);
9908 /* Generate insn patterns to do an integer compare of OPERANDS. */
9911 ix86_expand_int_compare (enum rtx_code code
, rtx op0
, rtx op1
)
9913 enum machine_mode cmpmode
;
9916 cmpmode
= SELECT_CC_MODE (code
, op0
, op1
);
9917 flags
= gen_rtx_REG (cmpmode
, FLAGS_REG
);
9919 /* This is very simple, but making the interface the same as in the
9920 FP case makes the rest of the code easier. */
9921 tmp
= gen_rtx_COMPARE (cmpmode
, op0
, op1
);
9922 emit_insn (gen_rtx_SET (VOIDmode
, flags
, tmp
));
9924 /* Return the test that should be put into the flags user, i.e.
9925 the bcc, scc, or cmov instruction. */
9926 return gen_rtx_fmt_ee (code
, VOIDmode
, flags
, const0_rtx
);
9929 /* Figure out whether to use ordered or unordered fp comparisons.
9930 Return the appropriate mode to use. */
9933 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED
)
9935 /* ??? In order to make all comparisons reversible, we do all comparisons
9936 non-trapping when compiling for IEEE. Once gcc is able to distinguish
9937 all forms trapping and nontrapping comparisons, we can make inequality
9938 comparisons trapping again, since it results in better code when using
9939 FCOM based compares. */
9940 return TARGET_IEEE_FP
? CCFPUmode
: CCFPmode
;
9944 ix86_cc_mode (enum rtx_code code
, rtx op0
, rtx op1
)
9946 if (SCALAR_FLOAT_MODE_P (GET_MODE (op0
)))
9947 return ix86_fp_compare_mode (code
);
9950 /* Only zero flag is needed. */
9952 case NE
: /* ZF!=0 */
9954 /* Codes needing carry flag. */
9955 case GEU
: /* CF=0 */
9956 case GTU
: /* CF=0 & ZF=0 */
9957 case LTU
: /* CF=1 */
9958 case LEU
: /* CF=1 | ZF=1 */
9960 /* Codes possibly doable only with sign flag when
9961 comparing against zero. */
9962 case GE
: /* SF=OF or SF=0 */
9963 case LT
: /* SF<>OF or SF=1 */
9964 if (op1
== const0_rtx
)
9967 /* For other cases Carry flag is not required. */
9969 /* Codes doable only with sign flag when comparing
9970 against zero, but we miss jump instruction for it
9971 so we need to use relational tests against overflow
9972 that thus needs to be zero. */
9973 case GT
: /* ZF=0 & SF=OF */
9974 case LE
: /* ZF=1 | SF<>OF */
9975 if (op1
== const0_rtx
)
9979 /* strcmp pattern do (use flags) and combine may ask us for proper
9988 /* Return the fixed registers used for condition codes. */
9991 ix86_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
9998 /* If two condition code modes are compatible, return a condition code
9999 mode which is compatible with both. Otherwise, return
10002 static enum machine_mode
10003 ix86_cc_modes_compatible (enum machine_mode m1
, enum machine_mode m2
)
10008 if (GET_MODE_CLASS (m1
) != MODE_CC
|| GET_MODE_CLASS (m2
) != MODE_CC
)
10011 if ((m1
== CCGCmode
&& m2
== CCGOCmode
)
10012 || (m1
== CCGOCmode
&& m2
== CCGCmode
))
10018 gcc_unreachable ();
10040 /* These are only compatible with themselves, which we already
10046 /* Return true if we should use an FCOMI instruction for this fp comparison. */
10049 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED
)
10051 enum rtx_code swapped_code
= swap_condition (code
);
10052 return ((ix86_fp_comparison_cost (code
) == ix86_fp_comparison_fcomi_cost (code
))
10053 || (ix86_fp_comparison_cost (swapped_code
)
10054 == ix86_fp_comparison_fcomi_cost (swapped_code
)));
10057 /* Swap, force into registers, or otherwise massage the two operands
10058 to a fp comparison. The operands are updated in place; the new
10059 comparison code is returned. */
10061 static enum rtx_code
10062 ix86_prepare_fp_compare_args (enum rtx_code code
, rtx
*pop0
, rtx
*pop1
)
10064 enum machine_mode fpcmp_mode
= ix86_fp_compare_mode (code
);
10065 rtx op0
= *pop0
, op1
= *pop1
;
10066 enum machine_mode op_mode
= GET_MODE (op0
);
10067 int is_sse
= TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (op_mode
);
10069 /* All of the unordered compare instructions only work on registers.
10070 The same is true of the fcomi compare instructions. The XFmode
10071 compare instructions require registers except when comparing
10072 against zero or when converting operand 1 from fixed point to
10076 && (fpcmp_mode
== CCFPUmode
10077 || (op_mode
== XFmode
10078 && ! (standard_80387_constant_p (op0
) == 1
10079 || standard_80387_constant_p (op1
) == 1)
10080 && GET_CODE (op1
) != FLOAT
)
10081 || ix86_use_fcomi_compare (code
)))
10083 op0
= force_reg (op_mode
, op0
);
10084 op1
= force_reg (op_mode
, op1
);
10088 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
10089 things around if they appear profitable, otherwise force op0
10090 into a register. */
10092 if (standard_80387_constant_p (op0
) == 0
10093 || (GET_CODE (op0
) == MEM
10094 && ! (standard_80387_constant_p (op1
) == 0
10095 || GET_CODE (op1
) == MEM
)))
10098 tmp
= op0
, op0
= op1
, op1
= tmp
;
10099 code
= swap_condition (code
);
10102 if (GET_CODE (op0
) != REG
)
10103 op0
= force_reg (op_mode
, op0
);
10105 if (CONSTANT_P (op1
))
10107 int tmp
= standard_80387_constant_p (op1
);
10109 op1
= validize_mem (force_const_mem (op_mode
, op1
));
10113 op1
= force_reg (op_mode
, op1
);
10116 op1
= force_reg (op_mode
, op1
);
10120 /* Try to rearrange the comparison to make it cheaper. */
10121 if (ix86_fp_comparison_cost (code
)
10122 > ix86_fp_comparison_cost (swap_condition (code
))
10123 && (GET_CODE (op1
) == REG
|| !no_new_pseudos
))
10126 tmp
= op0
, op0
= op1
, op1
= tmp
;
10127 code
= swap_condition (code
);
10128 if (GET_CODE (op0
) != REG
)
10129 op0
= force_reg (op_mode
, op0
);
10137 /* Convert comparison codes we use to represent FP comparison to integer
10138 code that will result in proper branch. Return UNKNOWN if no such code
10142 ix86_fp_compare_code_to_integer (enum rtx_code code
)
10171 /* Split comparison code CODE into comparisons we can do using branch
10172 instructions. BYPASS_CODE is comparison code for branch that will
10173 branch around FIRST_CODE and SECOND_CODE. If some of branches
10174 is not required, set value to UNKNOWN.
10175 We never require more than two branches. */
10178 ix86_fp_comparison_codes (enum rtx_code code
, enum rtx_code
*bypass_code
,
10179 enum rtx_code
*first_code
,
10180 enum rtx_code
*second_code
)
10182 *first_code
= code
;
10183 *bypass_code
= UNKNOWN
;
10184 *second_code
= UNKNOWN
;
10186 /* The fcomi comparison sets flags as follows:
10196 case GT
: /* GTU - CF=0 & ZF=0 */
10197 case GE
: /* GEU - CF=0 */
10198 case ORDERED
: /* PF=0 */
10199 case UNORDERED
: /* PF=1 */
10200 case UNEQ
: /* EQ - ZF=1 */
10201 case UNLT
: /* LTU - CF=1 */
10202 case UNLE
: /* LEU - CF=1 | ZF=1 */
10203 case LTGT
: /* EQ - ZF=0 */
10205 case LT
: /* LTU - CF=1 - fails on unordered */
10206 *first_code
= UNLT
;
10207 *bypass_code
= UNORDERED
;
10209 case LE
: /* LEU - CF=1 | ZF=1 - fails on unordered */
10210 *first_code
= UNLE
;
10211 *bypass_code
= UNORDERED
;
10213 case EQ
: /* EQ - ZF=1 - fails on unordered */
10214 *first_code
= UNEQ
;
10215 *bypass_code
= UNORDERED
;
10217 case NE
: /* NE - ZF=0 - fails on unordered */
10218 *first_code
= LTGT
;
10219 *second_code
= UNORDERED
;
10221 case UNGE
: /* GEU - CF=0 - fails on unordered */
10223 *second_code
= UNORDERED
;
10225 case UNGT
: /* GTU - CF=0 & ZF=0 - fails on unordered */
10227 *second_code
= UNORDERED
;
10230 gcc_unreachable ();
10232 if (!TARGET_IEEE_FP
)
10234 *second_code
= UNKNOWN
;
10235 *bypass_code
= UNKNOWN
;
10239 /* Return cost of comparison done fcom + arithmetics operations on AX.
10240 All following functions do use number of instructions as a cost metrics.
10241 In future this should be tweaked to compute bytes for optimize_size and
10242 take into account performance of various instructions on various CPUs. */
10244 ix86_fp_comparison_arithmetics_cost (enum rtx_code code
)
10246 if (!TARGET_IEEE_FP
)
10248 /* The cost of code output by ix86_expand_fp_compare. */
10272 gcc_unreachable ();
10276 /* Return cost of comparison done using fcomi operation.
10277 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10279 ix86_fp_comparison_fcomi_cost (enum rtx_code code
)
10281 enum rtx_code bypass_code
, first_code
, second_code
;
10282 /* Return arbitrarily high cost when instruction is not supported - this
10283 prevents gcc from using it. */
10286 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
10287 return (bypass_code
!= UNKNOWN
|| second_code
!= UNKNOWN
) + 2;
10290 /* Return cost of comparison done using sahf operation.
10291 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10293 ix86_fp_comparison_sahf_cost (enum rtx_code code
)
10295 enum rtx_code bypass_code
, first_code
, second_code
;
10296 /* Return arbitrarily high cost when instruction is not preferred - this
10297 avoids gcc from using it. */
10298 if (!TARGET_USE_SAHF
&& !optimize_size
)
10300 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
10301 return (bypass_code
!= UNKNOWN
|| second_code
!= UNKNOWN
) + 3;
10304 /* Compute cost of the comparison done using any method.
10305 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10307 ix86_fp_comparison_cost (enum rtx_code code
)
10309 int fcomi_cost
, sahf_cost
, arithmetics_cost
= 1024;
10312 fcomi_cost
= ix86_fp_comparison_fcomi_cost (code
);
10313 sahf_cost
= ix86_fp_comparison_sahf_cost (code
);
10315 min
= arithmetics_cost
= ix86_fp_comparison_arithmetics_cost (code
);
10316 if (min
> sahf_cost
)
10318 if (min
> fcomi_cost
)
10323 /* Generate insn patterns to do a floating point compare of OPERANDS. */
10326 ix86_expand_fp_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx scratch
,
10327 rtx
*second_test
, rtx
*bypass_test
)
10329 enum machine_mode fpcmp_mode
, intcmp_mode
;
10331 int cost
= ix86_fp_comparison_cost (code
);
10332 enum rtx_code bypass_code
, first_code
, second_code
;
10334 fpcmp_mode
= ix86_fp_compare_mode (code
);
10335 code
= ix86_prepare_fp_compare_args (code
, &op0
, &op1
);
10338 *second_test
= NULL_RTX
;
10340 *bypass_test
= NULL_RTX
;
10342 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
10344 /* Do fcomi/sahf based test when profitable. */
10345 if ((bypass_code
== UNKNOWN
|| bypass_test
)
10346 && (second_code
== UNKNOWN
|| second_test
)
10347 && ix86_fp_comparison_arithmetics_cost (code
) > cost
)
10351 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
10352 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
10358 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
10359 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
10361 scratch
= gen_reg_rtx (HImode
);
10362 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
10363 emit_insn (gen_x86_sahf_1 (scratch
));
10366 /* The FP codes work out to act like unsigned. */
10367 intcmp_mode
= fpcmp_mode
;
10369 if (bypass_code
!= UNKNOWN
)
10370 *bypass_test
= gen_rtx_fmt_ee (bypass_code
, VOIDmode
,
10371 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
10373 if (second_code
!= UNKNOWN
)
10374 *second_test
= gen_rtx_fmt_ee (second_code
, VOIDmode
,
10375 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
10380 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
10381 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
10382 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
10384 scratch
= gen_reg_rtx (HImode
);
10385 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
10387 /* In the unordered case, we have to check C2 for NaN's, which
10388 doesn't happen to work out to anything nice combination-wise.
10389 So do some bit twiddling on the value we've got in AH to come
10390 up with an appropriate set of condition codes. */
10392 intcmp_mode
= CCNOmode
;
10397 if (code
== GT
|| !TARGET_IEEE_FP
)
10399 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
10404 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
10405 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
10406 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x44)));
10407 intcmp_mode
= CCmode
;
10413 if (code
== LT
&& TARGET_IEEE_FP
)
10415 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
10416 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x01)));
10417 intcmp_mode
= CCmode
;
10422 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x01)));
10428 if (code
== GE
|| !TARGET_IEEE_FP
)
10430 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x05)));
10435 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
10436 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
10443 if (code
== LE
&& TARGET_IEEE_FP
)
10445 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
10446 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
10447 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
10448 intcmp_mode
= CCmode
;
10453 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
10459 if (code
== EQ
&& TARGET_IEEE_FP
)
10461 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
10462 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
10463 intcmp_mode
= CCmode
;
10468 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
10475 if (code
== NE
&& TARGET_IEEE_FP
)
10477 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
10478 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
10484 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
10490 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
10494 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
10499 gcc_unreachable ();
10503 /* Return the test that should be put into the flags user, i.e.
10504 the bcc, scc, or cmov instruction. */
10505 return gen_rtx_fmt_ee (code
, VOIDmode
,
10506 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
10511 ix86_expand_compare (enum rtx_code code
, rtx
*second_test
, rtx
*bypass_test
)
10514 op0
= ix86_compare_op0
;
10515 op1
= ix86_compare_op1
;
10518 *second_test
= NULL_RTX
;
10520 *bypass_test
= NULL_RTX
;
10522 if (ix86_compare_emitted
)
10524 ret
= gen_rtx_fmt_ee (code
, VOIDmode
, ix86_compare_emitted
, const0_rtx
);
10525 ix86_compare_emitted
= NULL_RTX
;
10527 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0
)))
10528 ret
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
,
10529 second_test
, bypass_test
);
10531 ret
= ix86_expand_int_compare (code
, op0
, op1
);
10536 /* Return true if the CODE will result in nontrivial jump sequence. */
10538 ix86_fp_jump_nontrivial_p (enum rtx_code code
)
10540 enum rtx_code bypass_code
, first_code
, second_code
;
10543 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
10544 return bypass_code
!= UNKNOWN
|| second_code
!= UNKNOWN
;
10548 ix86_expand_branch (enum rtx_code code
, rtx label
)
10552 /* If we have emitted a compare insn, go straight to simple.
10553 ix86_expand_compare won't emit anything if ix86_compare_emitted
10555 if (ix86_compare_emitted
)
10558 switch (GET_MODE (ix86_compare_op0
))
10564 tmp
= ix86_expand_compare (code
, NULL
, NULL
);
10565 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
10566 gen_rtx_LABEL_REF (VOIDmode
, label
),
10568 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
10577 enum rtx_code bypass_code
, first_code
, second_code
;
10579 code
= ix86_prepare_fp_compare_args (code
, &ix86_compare_op0
,
10580 &ix86_compare_op1
);
10582 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
10584 /* Check whether we will use the natural sequence with one jump. If
10585 so, we can expand jump early. Otherwise delay expansion by
10586 creating compound insn to not confuse optimizers. */
10587 if (bypass_code
== UNKNOWN
&& second_code
== UNKNOWN
10590 ix86_split_fp_branch (code
, ix86_compare_op0
, ix86_compare_op1
,
10591 gen_rtx_LABEL_REF (VOIDmode
, label
),
10592 pc_rtx
, NULL_RTX
, NULL_RTX
);
10596 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
,
10597 ix86_compare_op0
, ix86_compare_op1
);
10598 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
10599 gen_rtx_LABEL_REF (VOIDmode
, label
),
10601 tmp
= gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
);
10603 use_fcomi
= ix86_use_fcomi_compare (code
);
10604 vec
= rtvec_alloc (3 + !use_fcomi
);
10605 RTVEC_ELT (vec
, 0) = tmp
;
10607 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCFPmode
, 18));
10609 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCFPmode
, 17));
10612 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (HImode
));
10614 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode
, vec
));
10623 /* Expand DImode branch into multiple compare+branch. */
10625 rtx lo
[2], hi
[2], label2
;
10626 enum rtx_code code1
, code2
, code3
;
10627 enum machine_mode submode
;
10629 if (CONSTANT_P (ix86_compare_op0
) && ! CONSTANT_P (ix86_compare_op1
))
10631 tmp
= ix86_compare_op0
;
10632 ix86_compare_op0
= ix86_compare_op1
;
10633 ix86_compare_op1
= tmp
;
10634 code
= swap_condition (code
);
10636 if (GET_MODE (ix86_compare_op0
) == DImode
)
10638 split_di (&ix86_compare_op0
, 1, lo
+0, hi
+0);
10639 split_di (&ix86_compare_op1
, 1, lo
+1, hi
+1);
10644 split_ti (&ix86_compare_op0
, 1, lo
+0, hi
+0);
10645 split_ti (&ix86_compare_op1
, 1, lo
+1, hi
+1);
10649 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
10650 avoid two branches. This costs one extra insn, so disable when
10651 optimizing for size. */
10653 if ((code
== EQ
|| code
== NE
)
10655 || hi
[1] == const0_rtx
|| lo
[1] == const0_rtx
))
10660 if (hi
[1] != const0_rtx
)
10661 xor1
= expand_binop (submode
, xor_optab
, xor1
, hi
[1],
10662 NULL_RTX
, 0, OPTAB_WIDEN
);
10665 if (lo
[1] != const0_rtx
)
10666 xor0
= expand_binop (submode
, xor_optab
, xor0
, lo
[1],
10667 NULL_RTX
, 0, OPTAB_WIDEN
);
10669 tmp
= expand_binop (submode
, ior_optab
, xor1
, xor0
,
10670 NULL_RTX
, 0, OPTAB_WIDEN
);
10672 ix86_compare_op0
= tmp
;
10673 ix86_compare_op1
= const0_rtx
;
10674 ix86_expand_branch (code
, label
);
10678 /* Otherwise, if we are doing less-than or greater-or-equal-than,
10679 op1 is a constant and the low word is zero, then we can just
10680 examine the high word. */
10682 if (GET_CODE (hi
[1]) == CONST_INT
&& lo
[1] == const0_rtx
)
10685 case LT
: case LTU
: case GE
: case GEU
:
10686 ix86_compare_op0
= hi
[0];
10687 ix86_compare_op1
= hi
[1];
10688 ix86_expand_branch (code
, label
);
10694 /* Otherwise, we need two or three jumps. */
10696 label2
= gen_label_rtx ();
10699 code2
= swap_condition (code
);
10700 code3
= unsigned_condition (code
);
10704 case LT
: case GT
: case LTU
: case GTU
:
10707 case LE
: code1
= LT
; code2
= GT
; break;
10708 case GE
: code1
= GT
; code2
= LT
; break;
10709 case LEU
: code1
= LTU
; code2
= GTU
; break;
10710 case GEU
: code1
= GTU
; code2
= LTU
; break;
10712 case EQ
: code1
= UNKNOWN
; code2
= NE
; break;
10713 case NE
: code2
= UNKNOWN
; break;
10716 gcc_unreachable ();
10721 * if (hi(a) < hi(b)) goto true;
10722 * if (hi(a) > hi(b)) goto false;
10723 * if (lo(a) < lo(b)) goto true;
10727 ix86_compare_op0
= hi
[0];
10728 ix86_compare_op1
= hi
[1];
10730 if (code1
!= UNKNOWN
)
10731 ix86_expand_branch (code1
, label
);
10732 if (code2
!= UNKNOWN
)
10733 ix86_expand_branch (code2
, label2
);
10735 ix86_compare_op0
= lo
[0];
10736 ix86_compare_op1
= lo
[1];
10737 ix86_expand_branch (code3
, label
);
10739 if (code2
!= UNKNOWN
)
10740 emit_label (label2
);
10745 gcc_unreachable ();
10749 /* Split branch based on floating point condition. */
10751 ix86_split_fp_branch (enum rtx_code code
, rtx op1
, rtx op2
,
10752 rtx target1
, rtx target2
, rtx tmp
, rtx pushed
)
10754 rtx second
, bypass
;
10755 rtx label
= NULL_RTX
;
10757 int bypass_probability
= -1, second_probability
= -1, probability
= -1;
10760 if (target2
!= pc_rtx
)
10763 code
= reverse_condition_maybe_unordered (code
);
10768 condition
= ix86_expand_fp_compare (code
, op1
, op2
,
10769 tmp
, &second
, &bypass
);
10771 /* Remove pushed operand from stack. */
10773 ix86_free_from_memory (GET_MODE (pushed
));
10775 if (split_branch_probability
>= 0)
10777 /* Distribute the probabilities across the jumps.
10778 Assume the BYPASS and SECOND to be always test
10780 probability
= split_branch_probability
;
10782 /* Value of 1 is low enough to make no need for probability
10783 to be updated. Later we may run some experiments and see
10784 if unordered values are more frequent in practice. */
10786 bypass_probability
= 1;
10788 second_probability
= 1;
10790 if (bypass
!= NULL_RTX
)
10792 label
= gen_label_rtx ();
10793 i
= emit_jump_insn (gen_rtx_SET
10795 gen_rtx_IF_THEN_ELSE (VOIDmode
,
10797 gen_rtx_LABEL_REF (VOIDmode
,
10800 if (bypass_probability
>= 0)
10802 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
10803 GEN_INT (bypass_probability
),
10806 i
= emit_jump_insn (gen_rtx_SET
10808 gen_rtx_IF_THEN_ELSE (VOIDmode
,
10809 condition
, target1
, target2
)));
10810 if (probability
>= 0)
10812 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
10813 GEN_INT (probability
),
10815 if (second
!= NULL_RTX
)
10817 i
= emit_jump_insn (gen_rtx_SET
10819 gen_rtx_IF_THEN_ELSE (VOIDmode
, second
, target1
,
10821 if (second_probability
>= 0)
10823 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
10824 GEN_INT (second_probability
),
10827 if (label
!= NULL_RTX
)
10828 emit_label (label
);
10832 ix86_expand_setcc (enum rtx_code code
, rtx dest
)
10834 rtx ret
, tmp
, tmpreg
, equiv
;
10835 rtx second_test
, bypass_test
;
10837 if (GET_MODE (ix86_compare_op0
) == (TARGET_64BIT
? TImode
: DImode
))
10838 return 0; /* FAIL */
10840 gcc_assert (GET_MODE (dest
) == QImode
);
10842 ret
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
10843 PUT_MODE (ret
, QImode
);
10848 emit_insn (gen_rtx_SET (VOIDmode
, tmp
, ret
));
10849 if (bypass_test
|| second_test
)
10851 rtx test
= second_test
;
10853 rtx tmp2
= gen_reg_rtx (QImode
);
10856 gcc_assert (!second_test
);
10857 test
= bypass_test
;
10859 PUT_CODE (test
, reverse_condition_maybe_unordered (GET_CODE (test
)));
10861 PUT_MODE (test
, QImode
);
10862 emit_insn (gen_rtx_SET (VOIDmode
, tmp2
, test
));
10865 emit_insn (gen_andqi3 (tmp
, tmpreg
, tmp2
));
10867 emit_insn (gen_iorqi3 (tmp
, tmpreg
, tmp2
));
10870 /* Attach a REG_EQUAL note describing the comparison result. */
10871 if (ix86_compare_op0
&& ix86_compare_op1
)
10873 equiv
= simplify_gen_relational (code
, QImode
,
10874 GET_MODE (ix86_compare_op0
),
10875 ix86_compare_op0
, ix86_compare_op1
);
10876 set_unique_reg_note (get_last_insn (), REG_EQUAL
, equiv
);
10879 return 1; /* DONE */
10882 /* Expand comparison setting or clearing carry flag. Return true when
10883 successful and set pop for the operation. */
10885 ix86_expand_carry_flag_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx
*pop
)
10887 enum machine_mode mode
=
10888 GET_MODE (op0
) != VOIDmode
? GET_MODE (op0
) : GET_MODE (op1
);
10890 /* Do not handle DImode compares that go through special path. Also we can't
10891 deal with FP compares yet. This is possible to add. */
10892 if (mode
== (TARGET_64BIT
? TImode
: DImode
))
10894 if (FLOAT_MODE_P (mode
))
10896 rtx second_test
= NULL
, bypass_test
= NULL
;
10897 rtx compare_op
, compare_seq
;
10899 /* Shortcut: following common codes never translate into carry flag compares. */
10900 if (code
== EQ
|| code
== NE
|| code
== UNEQ
|| code
== LTGT
10901 || code
== ORDERED
|| code
== UNORDERED
)
10904 /* These comparisons require zero flag; swap operands so they won't. */
10905 if ((code
== GT
|| code
== UNLE
|| code
== LE
|| code
== UNGT
)
10906 && !TARGET_IEEE_FP
)
10911 code
= swap_condition (code
);
10914 /* Try to expand the comparison and verify that we end up with carry flag
10915 based comparison. This is fails to be true only when we decide to expand
10916 comparison using arithmetic that is not too common scenario. */
10918 compare_op
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
,
10919 &second_test
, &bypass_test
);
10920 compare_seq
= get_insns ();
10923 if (second_test
|| bypass_test
)
10925 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
10926 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
10927 code
= ix86_fp_compare_code_to_integer (GET_CODE (compare_op
));
10929 code
= GET_CODE (compare_op
);
10930 if (code
!= LTU
&& code
!= GEU
)
10932 emit_insn (compare_seq
);
10936 if (!INTEGRAL_MODE_P (mode
))
10944 /* Convert a==0 into (unsigned)a<1. */
10947 if (op1
!= const0_rtx
)
10950 code
= (code
== EQ
? LTU
: GEU
);
10953 /* Convert a>b into b<a or a>=b-1. */
10956 if (GET_CODE (op1
) == CONST_INT
)
10958 op1
= gen_int_mode (INTVAL (op1
) + 1, GET_MODE (op0
));
10959 /* Bail out on overflow. We still can swap operands but that
10960 would force loading of the constant into register. */
10961 if (op1
== const0_rtx
10962 || !x86_64_immediate_operand (op1
, GET_MODE (op1
)))
10964 code
= (code
== GTU
? GEU
: LTU
);
10971 code
= (code
== GTU
? LTU
: GEU
);
10975 /* Convert a>=0 into (unsigned)a<0x80000000. */
10978 if (mode
== DImode
|| op1
!= const0_rtx
)
10980 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
10981 code
= (code
== LT
? GEU
: LTU
);
10985 if (mode
== DImode
|| op1
!= constm1_rtx
)
10987 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
10988 code
= (code
== LE
? GEU
: LTU
);
10994 /* Swapping operands may cause constant to appear as first operand. */
10995 if (!nonimmediate_operand (op0
, VOIDmode
))
10997 if (no_new_pseudos
)
10999 op0
= force_reg (mode
, op0
);
11001 ix86_compare_op0
= op0
;
11002 ix86_compare_op1
= op1
;
11003 *pop
= ix86_expand_compare (code
, NULL
, NULL
);
11004 gcc_assert (GET_CODE (*pop
) == LTU
|| GET_CODE (*pop
) == GEU
);
11009 ix86_expand_int_movcc (rtx operands
[])
11011 enum rtx_code code
= GET_CODE (operands
[1]), compare_code
;
11012 rtx compare_seq
, compare_op
;
11013 rtx second_test
, bypass_test
;
11014 enum machine_mode mode
= GET_MODE (operands
[0]);
11015 bool sign_bit_compare_p
= false;;
11018 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
11019 compare_seq
= get_insns ();
11022 compare_code
= GET_CODE (compare_op
);
11024 if ((ix86_compare_op1
== const0_rtx
&& (code
== GE
|| code
== LT
))
11025 || (ix86_compare_op1
== constm1_rtx
&& (code
== GT
|| code
== LE
)))
11026 sign_bit_compare_p
= true;
11028 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
11029 HImode insns, we'd be swallowed in word prefix ops. */
11031 if ((mode
!= HImode
|| TARGET_FAST_PREFIX
)
11032 && (mode
!= (TARGET_64BIT
? TImode
: DImode
))
11033 && GET_CODE (operands
[2]) == CONST_INT
11034 && GET_CODE (operands
[3]) == CONST_INT
)
11036 rtx out
= operands
[0];
11037 HOST_WIDE_INT ct
= INTVAL (operands
[2]);
11038 HOST_WIDE_INT cf
= INTVAL (operands
[3]);
11039 HOST_WIDE_INT diff
;
11042 /* Sign bit compares are better done using shifts than we do by using
11044 if (sign_bit_compare_p
11045 || ix86_expand_carry_flag_compare (code
, ix86_compare_op0
,
11046 ix86_compare_op1
, &compare_op
))
11048 /* Detect overlap between destination and compare sources. */
11051 if (!sign_bit_compare_p
)
11053 bool fpcmp
= false;
11055 compare_code
= GET_CODE (compare_op
);
11057 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
11058 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
11061 compare_code
= ix86_fp_compare_code_to_integer (compare_code
);
11064 /* To simplify rest of code, restrict to the GEU case. */
11065 if (compare_code
== LTU
)
11067 HOST_WIDE_INT tmp
= ct
;
11070 compare_code
= reverse_condition (compare_code
);
11071 code
= reverse_condition (code
);
11076 PUT_CODE (compare_op
,
11077 reverse_condition_maybe_unordered
11078 (GET_CODE (compare_op
)));
11080 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
11084 if (reg_overlap_mentioned_p (out
, ix86_compare_op0
)
11085 || reg_overlap_mentioned_p (out
, ix86_compare_op1
))
11086 tmp
= gen_reg_rtx (mode
);
11088 if (mode
== DImode
)
11089 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp
, compare_op
));
11091 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode
, tmp
), compare_op
));
11095 if (code
== GT
|| code
== GE
)
11096 code
= reverse_condition (code
);
11099 HOST_WIDE_INT tmp
= ct
;
11104 tmp
= emit_store_flag (tmp
, code
, ix86_compare_op0
,
11105 ix86_compare_op1
, VOIDmode
, 0, -1);
11118 tmp
= expand_simple_binop (mode
, PLUS
,
11120 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
11131 tmp
= expand_simple_binop (mode
, IOR
,
11133 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
11135 else if (diff
== -1 && ct
)
11145 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
11147 tmp
= expand_simple_binop (mode
, PLUS
,
11148 copy_rtx (tmp
), GEN_INT (cf
),
11149 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
11157 * andl cf - ct, dest
11167 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
11170 tmp
= expand_simple_binop (mode
, AND
,
11172 gen_int_mode (cf
- ct
, mode
),
11173 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
11175 tmp
= expand_simple_binop (mode
, PLUS
,
11176 copy_rtx (tmp
), GEN_INT (ct
),
11177 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
11180 if (!rtx_equal_p (tmp
, out
))
11181 emit_move_insn (copy_rtx (out
), copy_rtx (tmp
));
11183 return 1; /* DONE */
11189 tmp
= ct
, ct
= cf
, cf
= tmp
;
11191 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0
)))
11193 /* We may be reversing unordered compare to normal compare, that
11194 is not valid in general (we may convert non-trapping condition
11195 to trapping one), however on i386 we currently emit all
11196 comparisons unordered. */
11197 compare_code
= reverse_condition_maybe_unordered (compare_code
);
11198 code
= reverse_condition_maybe_unordered (code
);
11202 compare_code
= reverse_condition (compare_code
);
11203 code
= reverse_condition (code
);
11207 compare_code
= UNKNOWN
;
11208 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0
)) == MODE_INT
11209 && GET_CODE (ix86_compare_op1
) == CONST_INT
)
11211 if (ix86_compare_op1
== const0_rtx
11212 && (code
== LT
|| code
== GE
))
11213 compare_code
= code
;
11214 else if (ix86_compare_op1
== constm1_rtx
)
11218 else if (code
== GT
)
11223 /* Optimize dest = (op0 < 0) ? -1 : cf. */
11224 if (compare_code
!= UNKNOWN
11225 && GET_MODE (ix86_compare_op0
) == GET_MODE (out
)
11226 && (cf
== -1 || ct
== -1))
11228 /* If lea code below could be used, only optimize
11229 if it results in a 2 insn sequence. */
11231 if (! (diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
11232 || diff
== 3 || diff
== 5 || diff
== 9)
11233 || (compare_code
== LT
&& ct
== -1)
11234 || (compare_code
== GE
&& cf
== -1))
11237 * notl op1 (if necessary)
11245 code
= reverse_condition (code
);
11248 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
11249 ix86_compare_op1
, VOIDmode
, 0, -1);
11251 out
= expand_simple_binop (mode
, IOR
,
11253 out
, 1, OPTAB_DIRECT
);
11254 if (out
!= operands
[0])
11255 emit_move_insn (operands
[0], out
);
11257 return 1; /* DONE */
11262 if ((diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
11263 || diff
== 3 || diff
== 5 || diff
== 9)
11264 && ((mode
!= QImode
&& mode
!= HImode
) || !TARGET_PARTIAL_REG_STALL
)
11266 || x86_64_immediate_operand (GEN_INT (cf
), VOIDmode
)))
11272 * lea cf(dest*(ct-cf)),dest
11276 * This also catches the degenerate setcc-only case.
11282 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
11283 ix86_compare_op1
, VOIDmode
, 0, 1);
11286 /* On x86_64 the lea instruction operates on Pmode, so we need
11287 to get arithmetics done in proper mode to match. */
11289 tmp
= copy_rtx (out
);
11293 out1
= copy_rtx (out
);
11294 tmp
= gen_rtx_MULT (mode
, out1
, GEN_INT (diff
& ~1));
11298 tmp
= gen_rtx_PLUS (mode
, tmp
, out1
);
11304 tmp
= gen_rtx_PLUS (mode
, tmp
, GEN_INT (cf
));
11307 if (!rtx_equal_p (tmp
, out
))
11310 out
= force_operand (tmp
, copy_rtx (out
));
11312 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (out
), copy_rtx (tmp
)));
11314 if (!rtx_equal_p (out
, operands
[0]))
11315 emit_move_insn (operands
[0], copy_rtx (out
));
11317 return 1; /* DONE */
11321 * General case: Jumpful:
11322 * xorl dest,dest cmpl op1, op2
11323 * cmpl op1, op2 movl ct, dest
11324 * setcc dest jcc 1f
11325 * decl dest movl cf, dest
11326 * andl (cf-ct),dest 1:
11329 * Size 20. Size 14.
11331 * This is reasonably steep, but branch mispredict costs are
11332 * high on modern cpus, so consider failing only if optimizing
11336 if ((!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
11337 && BRANCH_COST
>= 2)
11343 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0
)))
11344 /* We may be reversing unordered compare to normal compare,
11345 that is not valid in general (we may convert non-trapping
11346 condition to trapping one), however on i386 we currently
11347 emit all comparisons unordered. */
11348 code
= reverse_condition_maybe_unordered (code
);
11351 code
= reverse_condition (code
);
11352 if (compare_code
!= UNKNOWN
)
11353 compare_code
= reverse_condition (compare_code
);
11357 if (compare_code
!= UNKNOWN
)
11359 /* notl op1 (if needed)
11364 For x < 0 (resp. x <= -1) there will be no notl,
11365 so if possible swap the constants to get rid of the
11367 True/false will be -1/0 while code below (store flag
11368 followed by decrement) is 0/-1, so the constants need
11369 to be exchanged once more. */
11371 if (compare_code
== GE
|| !cf
)
11373 code
= reverse_condition (code
);
11378 HOST_WIDE_INT tmp
= cf
;
11383 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
11384 ix86_compare_op1
, VOIDmode
, 0, -1);
11388 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
11389 ix86_compare_op1
, VOIDmode
, 0, 1);
11391 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), constm1_rtx
,
11392 copy_rtx (out
), 1, OPTAB_DIRECT
);
11395 out
= expand_simple_binop (mode
, AND
, copy_rtx (out
),
11396 gen_int_mode (cf
- ct
, mode
),
11397 copy_rtx (out
), 1, OPTAB_DIRECT
);
11399 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), GEN_INT (ct
),
11400 copy_rtx (out
), 1, OPTAB_DIRECT
);
11401 if (!rtx_equal_p (out
, operands
[0]))
11402 emit_move_insn (operands
[0], copy_rtx (out
));
11404 return 1; /* DONE */
11408 if (!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
11410 /* Try a few things more with specific constants and a variable. */
11413 rtx var
, orig_out
, out
, tmp
;
11415 if (BRANCH_COST
<= 2)
11416 return 0; /* FAIL */
11418 /* If one of the two operands is an interesting constant, load a
11419 constant with the above and mask it in with a logical operation. */
11421 if (GET_CODE (operands
[2]) == CONST_INT
)
11424 if (INTVAL (operands
[2]) == 0 && operands
[3] != constm1_rtx
)
11425 operands
[3] = constm1_rtx
, op
= and_optab
;
11426 else if (INTVAL (operands
[2]) == -1 && operands
[3] != const0_rtx
)
11427 operands
[3] = const0_rtx
, op
= ior_optab
;
11429 return 0; /* FAIL */
11431 else if (GET_CODE (operands
[3]) == CONST_INT
)
11434 if (INTVAL (operands
[3]) == 0 && operands
[2] != constm1_rtx
)
11435 operands
[2] = constm1_rtx
, op
= and_optab
;
11436 else if (INTVAL (operands
[3]) == -1 && operands
[3] != const0_rtx
)
11437 operands
[2] = const0_rtx
, op
= ior_optab
;
11439 return 0; /* FAIL */
11442 return 0; /* FAIL */
11444 orig_out
= operands
[0];
11445 tmp
= gen_reg_rtx (mode
);
11448 /* Recurse to get the constant loaded. */
11449 if (ix86_expand_int_movcc (operands
) == 0)
11450 return 0; /* FAIL */
11452 /* Mask in the interesting variable. */
11453 out
= expand_binop (mode
, op
, var
, tmp
, orig_out
, 0,
11455 if (!rtx_equal_p (out
, orig_out
))
11456 emit_move_insn (copy_rtx (orig_out
), copy_rtx (out
));
11458 return 1; /* DONE */
11462 * For comparison with above,
11472 if (! nonimmediate_operand (operands
[2], mode
))
11473 operands
[2] = force_reg (mode
, operands
[2]);
11474 if (! nonimmediate_operand (operands
[3], mode
))
11475 operands
[3] = force_reg (mode
, operands
[3]);
11477 if (bypass_test
&& reg_overlap_mentioned_p (operands
[0], operands
[3]))
11479 rtx tmp
= gen_reg_rtx (mode
);
11480 emit_move_insn (tmp
, operands
[3]);
11483 if (second_test
&& reg_overlap_mentioned_p (operands
[0], operands
[2]))
11485 rtx tmp
= gen_reg_rtx (mode
);
11486 emit_move_insn (tmp
, operands
[2]);
11490 if (! register_operand (operands
[2], VOIDmode
)
11492 || ! register_operand (operands
[3], VOIDmode
)))
11493 operands
[2] = force_reg (mode
, operands
[2]);
11496 && ! register_operand (operands
[3], VOIDmode
))
11497 operands
[3] = force_reg (mode
, operands
[3]);
11499 emit_insn (compare_seq
);
11500 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
11501 gen_rtx_IF_THEN_ELSE (mode
,
11502 compare_op
, operands
[2],
11505 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (operands
[0]),
11506 gen_rtx_IF_THEN_ELSE (mode
,
11508 copy_rtx (operands
[3]),
11509 copy_rtx (operands
[0]))));
11511 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (operands
[0]),
11512 gen_rtx_IF_THEN_ELSE (mode
,
11514 copy_rtx (operands
[2]),
11515 copy_rtx (operands
[0]))));
11517 return 1; /* DONE */
11520 /* Swap, force into registers, or otherwise massage the two operands
11521 to an sse comparison with a mask result. Thus we differ a bit from
11522 ix86_prepare_fp_compare_args which expects to produce a flags result.
11524 The DEST operand exists to help determine whether to commute commutative
11525 operators. The POP0/POP1 operands are updated in place. The new
11526 comparison code is returned, or UNKNOWN if not implementable. */
11528 static enum rtx_code
11529 ix86_prepare_sse_fp_compare_args (rtx dest
, enum rtx_code code
,
11530 rtx
*pop0
, rtx
*pop1
)
11538 /* We have no LTGT as an operator. We could implement it with
11539 NE & ORDERED, but this requires an extra temporary. It's
11540 not clear that it's worth it. */
11547 /* These are supported directly. */
11554 /* For commutative operators, try to canonicalize the destination
11555 operand to be first in the comparison - this helps reload to
11556 avoid extra moves. */
11557 if (!dest
|| !rtx_equal_p (dest
, *pop1
))
11565 /* These are not supported directly. Swap the comparison operands
11566 to transform into something that is supported. */
11570 code
= swap_condition (code
);
11574 gcc_unreachable ();
11580 /* Detect conditional moves that exactly match min/max operational
11581 semantics. Note that this is IEEE safe, as long as we don't
11582 interchange the operands.
11584 Returns FALSE if this conditional move doesn't match a MIN/MAX,
11585 and TRUE if the operation is successful and instructions are emitted. */
11588 ix86_expand_sse_fp_minmax (rtx dest
, enum rtx_code code
, rtx cmp_op0
,
11589 rtx cmp_op1
, rtx if_true
, rtx if_false
)
11591 enum machine_mode mode
;
11597 else if (code
== UNGE
)
11600 if_true
= if_false
;
11606 if (rtx_equal_p (cmp_op0
, if_true
) && rtx_equal_p (cmp_op1
, if_false
))
11608 else if (rtx_equal_p (cmp_op1
, if_true
) && rtx_equal_p (cmp_op0
, if_false
))
11613 mode
= GET_MODE (dest
);
11615 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
11616 but MODE may be a vector mode and thus not appropriate. */
11617 if (!flag_finite_math_only
|| !flag_unsafe_math_optimizations
)
11619 int u
= is_min
? UNSPEC_IEEE_MIN
: UNSPEC_IEEE_MAX
;
11622 if_true
= force_reg (mode
, if_true
);
11623 v
= gen_rtvec (2, if_true
, if_false
);
11624 tmp
= gen_rtx_UNSPEC (mode
, v
, u
);
11628 code
= is_min
? SMIN
: SMAX
;
11629 tmp
= gen_rtx_fmt_ee (code
, mode
, if_true
, if_false
);
11632 emit_insn (gen_rtx_SET (VOIDmode
, dest
, tmp
));
11636 /* Expand an sse vector comparison. Return the register with the result. */
11639 ix86_expand_sse_cmp (rtx dest
, enum rtx_code code
, rtx cmp_op0
, rtx cmp_op1
,
11640 rtx op_true
, rtx op_false
)
11642 enum machine_mode mode
= GET_MODE (dest
);
11645 cmp_op0
= force_reg (mode
, cmp_op0
);
11646 if (!nonimmediate_operand (cmp_op1
, mode
))
11647 cmp_op1
= force_reg (mode
, cmp_op1
);
11650 || reg_overlap_mentioned_p (dest
, op_true
)
11651 || reg_overlap_mentioned_p (dest
, op_false
))
11652 dest
= gen_reg_rtx (mode
);
11654 x
= gen_rtx_fmt_ee (code
, mode
, cmp_op0
, cmp_op1
);
11655 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
11660 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
11661 operations. This is used for both scalar and vector conditional moves. */
11664 ix86_expand_sse_movcc (rtx dest
, rtx cmp
, rtx op_true
, rtx op_false
)
11666 enum machine_mode mode
= GET_MODE (dest
);
11669 if (op_false
== CONST0_RTX (mode
))
11671 op_true
= force_reg (mode
, op_true
);
11672 x
= gen_rtx_AND (mode
, cmp
, op_true
);
11673 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
11675 else if (op_true
== CONST0_RTX (mode
))
11677 op_false
= force_reg (mode
, op_false
);
11678 x
= gen_rtx_NOT (mode
, cmp
);
11679 x
= gen_rtx_AND (mode
, x
, op_false
);
11680 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
11684 op_true
= force_reg (mode
, op_true
);
11685 op_false
= force_reg (mode
, op_false
);
11687 t2
= gen_reg_rtx (mode
);
11689 t3
= gen_reg_rtx (mode
);
11693 x
= gen_rtx_AND (mode
, op_true
, cmp
);
11694 emit_insn (gen_rtx_SET (VOIDmode
, t2
, x
));
11696 x
= gen_rtx_NOT (mode
, cmp
);
11697 x
= gen_rtx_AND (mode
, x
, op_false
);
11698 emit_insn (gen_rtx_SET (VOIDmode
, t3
, x
));
11700 x
= gen_rtx_IOR (mode
, t3
, t2
);
11701 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
11705 /* Expand a floating-point conditional move. Return true if successful. */
11708 ix86_expand_fp_movcc (rtx operands
[])
11710 enum machine_mode mode
= GET_MODE (operands
[0]);
11711 enum rtx_code code
= GET_CODE (operands
[1]);
11712 rtx tmp
, compare_op
, second_test
, bypass_test
;
11714 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
11716 enum machine_mode cmode
;
11718 /* Since we've no cmove for sse registers, don't force bad register
11719 allocation just to gain access to it. Deny movcc when the
11720 comparison mode doesn't match the move mode. */
11721 cmode
= GET_MODE (ix86_compare_op0
);
11722 if (cmode
== VOIDmode
)
11723 cmode
= GET_MODE (ix86_compare_op1
);
11727 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
,
11729 &ix86_compare_op1
);
11730 if (code
== UNKNOWN
)
11733 if (ix86_expand_sse_fp_minmax (operands
[0], code
, ix86_compare_op0
,
11734 ix86_compare_op1
, operands
[2],
11738 tmp
= ix86_expand_sse_cmp (operands
[0], code
, ix86_compare_op0
,
11739 ix86_compare_op1
, operands
[2], operands
[3]);
11740 ix86_expand_sse_movcc (operands
[0], tmp
, operands
[2], operands
[3]);
11744 /* The floating point conditional move instructions don't directly
11745 support conditions resulting from a signed integer comparison. */
11747 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
11749 /* The floating point conditional move instructions don't directly
11750 support signed integer comparisons. */
11752 if (!fcmov_comparison_operator (compare_op
, VOIDmode
))
11754 gcc_assert (!second_test
&& !bypass_test
);
11755 tmp
= gen_reg_rtx (QImode
);
11756 ix86_expand_setcc (code
, tmp
);
11758 ix86_compare_op0
= tmp
;
11759 ix86_compare_op1
= const0_rtx
;
11760 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
11762 if (bypass_test
&& reg_overlap_mentioned_p (operands
[0], operands
[3]))
11764 tmp
= gen_reg_rtx (mode
);
11765 emit_move_insn (tmp
, operands
[3]);
11768 if (second_test
&& reg_overlap_mentioned_p (operands
[0], operands
[2]))
11770 tmp
= gen_reg_rtx (mode
);
11771 emit_move_insn (tmp
, operands
[2]);
11775 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
11776 gen_rtx_IF_THEN_ELSE (mode
, compare_op
,
11777 operands
[2], operands
[3])));
11779 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
11780 gen_rtx_IF_THEN_ELSE (mode
, bypass_test
,
11781 operands
[3], operands
[0])));
11783 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
11784 gen_rtx_IF_THEN_ELSE (mode
, second_test
,
11785 operands
[2], operands
[0])));
11790 /* Expand a floating-point vector conditional move; a vcond operation
11791 rather than a movcc operation. */
11794 ix86_expand_fp_vcond (rtx operands
[])
11796 enum rtx_code code
= GET_CODE (operands
[3]);
11799 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
,
11800 &operands
[4], &operands
[5]);
11801 if (code
== UNKNOWN
)
11804 if (ix86_expand_sse_fp_minmax (operands
[0], code
, operands
[4],
11805 operands
[5], operands
[1], operands
[2]))
11808 cmp
= ix86_expand_sse_cmp (operands
[0], code
, operands
[4], operands
[5],
11809 operands
[1], operands
[2]);
11810 ix86_expand_sse_movcc (operands
[0], cmp
, operands
[1], operands
[2]);
11814 /* Expand a signed integral vector conditional move. */
11817 ix86_expand_int_vcond (rtx operands
[])
11819 enum machine_mode mode
= GET_MODE (operands
[0]);
11820 enum rtx_code code
= GET_CODE (operands
[3]);
11821 bool negate
= false;
11824 cop0
= operands
[4];
11825 cop1
= operands
[5];
11827 /* Canonicalize the comparison to EQ, GT, GTU. */
11838 code
= reverse_condition (code
);
11844 code
= reverse_condition (code
);
11850 code
= swap_condition (code
);
11851 x
= cop0
, cop0
= cop1
, cop1
= x
;
11855 gcc_unreachable ();
11858 /* Unsigned parallel compare is not supported by the hardware. Play some
11859 tricks to turn this into a signed comparison against 0. */
11862 cop0
= force_reg (mode
, cop0
);
11870 /* Perform a parallel modulo subtraction. */
11871 t1
= gen_reg_rtx (mode
);
11872 emit_insn (gen_subv4si3 (t1
, cop0
, cop1
));
11874 /* Extract the original sign bit of op0. */
11875 mask
= GEN_INT (-0x80000000);
11876 mask
= gen_rtx_CONST_VECTOR (mode
,
11877 gen_rtvec (4, mask
, mask
, mask
, mask
));
11878 mask
= force_reg (mode
, mask
);
11879 t2
= gen_reg_rtx (mode
);
11880 emit_insn (gen_andv4si3 (t2
, cop0
, mask
));
11882 /* XOR it back into the result of the subtraction. This results
11883 in the sign bit set iff we saw unsigned underflow. */
11884 x
= gen_reg_rtx (mode
);
11885 emit_insn (gen_xorv4si3 (x
, t1
, t2
));
11893 /* Perform a parallel unsigned saturating subtraction. */
11894 x
= gen_reg_rtx (mode
);
11895 emit_insn (gen_rtx_SET (VOIDmode
, x
,
11896 gen_rtx_US_MINUS (mode
, cop0
, cop1
)));
11903 gcc_unreachable ();
11907 cop1
= CONST0_RTX (mode
);
11910 x
= ix86_expand_sse_cmp (operands
[0], code
, cop0
, cop1
,
11911 operands
[1+negate
], operands
[2-negate
]);
11913 ix86_expand_sse_movcc (operands
[0], x
, operands
[1+negate
],
11914 operands
[2-negate
]);
11918 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
11919 true if we should do zero extension, else sign extension. HIGH_P is
11920 true if we want the N/2 high elements, else the low elements. */
11923 ix86_expand_sse_unpack (rtx operands
[2], bool unsigned_p
, bool high_p
)
11925 enum machine_mode imode
= GET_MODE (operands
[1]);
11926 rtx (*unpack
)(rtx
, rtx
, rtx
);
11933 unpack
= gen_vec_interleave_highv16qi
;
11935 unpack
= gen_vec_interleave_lowv16qi
;
11939 unpack
= gen_vec_interleave_highv8hi
;
11941 unpack
= gen_vec_interleave_lowv8hi
;
11945 unpack
= gen_vec_interleave_highv4si
;
11947 unpack
= gen_vec_interleave_lowv4si
;
11950 gcc_unreachable ();
11953 dest
= gen_lowpart (imode
, operands
[0]);
11956 se
= force_reg (imode
, CONST0_RTX (imode
));
11958 se
= ix86_expand_sse_cmp (gen_reg_rtx (imode
), GT
, CONST0_RTX (imode
),
11959 operands
[1], pc_rtx
, pc_rtx
);
11961 emit_insn (unpack (dest
, operands
[1], se
));
11964 /* Expand conditional increment or decrement using adb/sbb instructions.
11965 The default case using setcc followed by the conditional move can be
11966 done by generic code. */
11968 ix86_expand_int_addcc (rtx operands
[])
11970 enum rtx_code code
= GET_CODE (operands
[1]);
11972 rtx val
= const0_rtx
;
11973 bool fpcmp
= false;
11974 enum machine_mode mode
= GET_MODE (operands
[0]);
11976 if (operands
[3] != const1_rtx
11977 && operands
[3] != constm1_rtx
)
11979 if (!ix86_expand_carry_flag_compare (code
, ix86_compare_op0
,
11980 ix86_compare_op1
, &compare_op
))
11982 code
= GET_CODE (compare_op
);
11984 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
11985 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
11988 code
= ix86_fp_compare_code_to_integer (code
);
11995 PUT_CODE (compare_op
,
11996 reverse_condition_maybe_unordered
11997 (GET_CODE (compare_op
)));
11999 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
12001 PUT_MODE (compare_op
, mode
);
12003 /* Construct either adc or sbb insn. */
12004 if ((code
== LTU
) == (operands
[3] == constm1_rtx
))
12006 switch (GET_MODE (operands
[0]))
12009 emit_insn (gen_subqi3_carry (operands
[0], operands
[2], val
, compare_op
));
12012 emit_insn (gen_subhi3_carry (operands
[0], operands
[2], val
, compare_op
));
12015 emit_insn (gen_subsi3_carry (operands
[0], operands
[2], val
, compare_op
));
12018 emit_insn (gen_subdi3_carry_rex64 (operands
[0], operands
[2], val
, compare_op
));
12021 gcc_unreachable ();
12026 switch (GET_MODE (operands
[0]))
12029 emit_insn (gen_addqi3_carry (operands
[0], operands
[2], val
, compare_op
));
12032 emit_insn (gen_addhi3_carry (operands
[0], operands
[2], val
, compare_op
));
12035 emit_insn (gen_addsi3_carry (operands
[0], operands
[2], val
, compare_op
));
12038 emit_insn (gen_adddi3_carry_rex64 (operands
[0], operands
[2], val
, compare_op
));
12041 gcc_unreachable ();
12044 return 1; /* DONE */
12048 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
12049 works for floating pointer parameters and nonoffsetable memories.
12050 For pushes, it returns just stack offsets; the values will be saved
12051 in the right order. Maximally three parts are generated. */
12054 ix86_split_to_parts (rtx operand
, rtx
*parts
, enum machine_mode mode
)
12059 size
= mode
==XFmode
? 3 : GET_MODE_SIZE (mode
) / 4;
12061 size
= (GET_MODE_SIZE (mode
) + 4) / 8;
12063 gcc_assert (GET_CODE (operand
) != REG
|| !MMX_REGNO_P (REGNO (operand
)));
12064 gcc_assert (size
>= 2 && size
<= 3);
12066 /* Optimize constant pool reference to immediates. This is used by fp
12067 moves, that force all constants to memory to allow combining. */
12068 if (GET_CODE (operand
) == MEM
&& MEM_READONLY_P (operand
))
12070 rtx tmp
= maybe_get_pool_constant (operand
);
12075 if (GET_CODE (operand
) == MEM
&& !offsettable_memref_p (operand
))
12077 /* The only non-offsetable memories we handle are pushes. */
12078 int ok
= push_operand (operand
, VOIDmode
);
12082 operand
= copy_rtx (operand
);
12083 PUT_MODE (operand
, Pmode
);
12084 parts
[0] = parts
[1] = parts
[2] = operand
;
12088 if (GET_CODE (operand
) == CONST_VECTOR
)
12090 enum machine_mode imode
= int_mode_for_mode (mode
);
12091 /* Caution: if we looked through a constant pool memory above,
12092 the operand may actually have a different mode now. That's
12093 ok, since we want to pun this all the way back to an integer. */
12094 operand
= simplify_subreg (imode
, operand
, GET_MODE (operand
), 0);
12095 gcc_assert (operand
!= NULL
);
12101 if (mode
== DImode
)
12102 split_di (&operand
, 1, &parts
[0], &parts
[1]);
12105 if (REG_P (operand
))
12107 gcc_assert (reload_completed
);
12108 parts
[0] = gen_rtx_REG (SImode
, REGNO (operand
) + 0);
12109 parts
[1] = gen_rtx_REG (SImode
, REGNO (operand
) + 1);
12111 parts
[2] = gen_rtx_REG (SImode
, REGNO (operand
) + 2);
12113 else if (offsettable_memref_p (operand
))
12115 operand
= adjust_address (operand
, SImode
, 0);
12116 parts
[0] = operand
;
12117 parts
[1] = adjust_address (operand
, SImode
, 4);
12119 parts
[2] = adjust_address (operand
, SImode
, 8);
12121 else if (GET_CODE (operand
) == CONST_DOUBLE
)
12126 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
12130 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r
, l
);
12131 parts
[2] = gen_int_mode (l
[2], SImode
);
12134 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
12137 gcc_unreachable ();
12139 parts
[1] = gen_int_mode (l
[1], SImode
);
12140 parts
[0] = gen_int_mode (l
[0], SImode
);
12143 gcc_unreachable ();
12148 if (mode
== TImode
)
12149 split_ti (&operand
, 1, &parts
[0], &parts
[1]);
12150 if (mode
== XFmode
|| mode
== TFmode
)
12152 enum machine_mode upper_mode
= mode
==XFmode
? SImode
: DImode
;
12153 if (REG_P (operand
))
12155 gcc_assert (reload_completed
);
12156 parts
[0] = gen_rtx_REG (DImode
, REGNO (operand
) + 0);
12157 parts
[1] = gen_rtx_REG (upper_mode
, REGNO (operand
) + 1);
12159 else if (offsettable_memref_p (operand
))
12161 operand
= adjust_address (operand
, DImode
, 0);
12162 parts
[0] = operand
;
12163 parts
[1] = adjust_address (operand
, upper_mode
, 8);
12165 else if (GET_CODE (operand
) == CONST_DOUBLE
)
12170 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
12171 real_to_target (l
, &r
, mode
);
12173 /* Do not use shift by 32 to avoid warning on 32bit systems. */
12174 if (HOST_BITS_PER_WIDE_INT
>= 64)
12177 ((l
[0] & (((HOST_WIDE_INT
) 2 << 31) - 1))
12178 + ((((HOST_WIDE_INT
) l
[1]) << 31) << 1),
12181 parts
[0] = immed_double_const (l
[0], l
[1], DImode
);
12183 if (upper_mode
== SImode
)
12184 parts
[1] = gen_int_mode (l
[2], SImode
);
12185 else if (HOST_BITS_PER_WIDE_INT
>= 64)
12188 ((l
[2] & (((HOST_WIDE_INT
) 2 << 31) - 1))
12189 + ((((HOST_WIDE_INT
) l
[3]) << 31) << 1),
12192 parts
[1] = immed_double_const (l
[2], l
[3], DImode
);
12195 gcc_unreachable ();
12202 /* Emit insns to perform a move or push of DI, DF, and XF values.
12203 Return false when normal moves are needed; true when all required
12204 insns have been emitted. Operands 2-4 contain the input values
12205 int the correct order; operands 5-7 contain the output values. */
12208 ix86_split_long_move (rtx operands
[])
12213 int collisions
= 0;
12214 enum machine_mode mode
= GET_MODE (operands
[0]);
12216 /* The DFmode expanders may ask us to move double.
12217 For 64bit target this is single move. By hiding the fact
12218 here we simplify i386.md splitters. */
12219 if (GET_MODE_SIZE (GET_MODE (operands
[0])) == 8 && TARGET_64BIT
)
12221 /* Optimize constant pool reference to immediates. This is used by
12222 fp moves, that force all constants to memory to allow combining. */
12224 if (GET_CODE (operands
[1]) == MEM
12225 && GET_CODE (XEXP (operands
[1], 0)) == SYMBOL_REF
12226 && CONSTANT_POOL_ADDRESS_P (XEXP (operands
[1], 0)))
12227 operands
[1] = get_pool_constant (XEXP (operands
[1], 0));
12228 if (push_operand (operands
[0], VOIDmode
))
12230 operands
[0] = copy_rtx (operands
[0]);
12231 PUT_MODE (operands
[0], Pmode
);
12234 operands
[0] = gen_lowpart (DImode
, operands
[0]);
12235 operands
[1] = gen_lowpart (DImode
, operands
[1]);
12236 emit_move_insn (operands
[0], operands
[1]);
12240 /* The only non-offsettable memory we handle is push. */
12241 if (push_operand (operands
[0], VOIDmode
))
12244 gcc_assert (GET_CODE (operands
[0]) != MEM
12245 || offsettable_memref_p (operands
[0]));
12247 nparts
= ix86_split_to_parts (operands
[1], part
[1], GET_MODE (operands
[0]));
12248 ix86_split_to_parts (operands
[0], part
[0], GET_MODE (operands
[0]));
12250 /* When emitting push, take care for source operands on the stack. */
12251 if (push
&& GET_CODE (operands
[1]) == MEM
12252 && reg_overlap_mentioned_p (stack_pointer_rtx
, operands
[1]))
12255 part
[1][1] = change_address (part
[1][1], GET_MODE (part
[1][1]),
12256 XEXP (part
[1][2], 0));
12257 part
[1][0] = change_address (part
[1][0], GET_MODE (part
[1][0]),
12258 XEXP (part
[1][1], 0));
12261 /* We need to do copy in the right order in case an address register
12262 of the source overlaps the destination. */
12263 if (REG_P (part
[0][0]) && GET_CODE (part
[1][0]) == MEM
)
12265 if (reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0)))
12267 if (reg_overlap_mentioned_p (part
[0][1], XEXP (part
[1][0], 0)))
12270 && reg_overlap_mentioned_p (part
[0][2], XEXP (part
[1][0], 0)))
12273 /* Collision in the middle part can be handled by reordering. */
12274 if (collisions
== 1 && nparts
== 3
12275 && reg_overlap_mentioned_p (part
[0][1], XEXP (part
[1][0], 0)))
12278 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
12279 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
12282 /* If there are more collisions, we can't handle it by reordering.
12283 Do an lea to the last part and use only one colliding move. */
12284 else if (collisions
> 1)
12290 base
= part
[0][nparts
- 1];
12292 /* Handle the case when the last part isn't valid for lea.
12293 Happens in 64-bit mode storing the 12-byte XFmode. */
12294 if (GET_MODE (base
) != Pmode
)
12295 base
= gen_rtx_REG (Pmode
, REGNO (base
));
12297 emit_insn (gen_rtx_SET (VOIDmode
, base
, XEXP (part
[1][0], 0)));
12298 part
[1][0] = replace_equiv_address (part
[1][0], base
);
12299 part
[1][1] = replace_equiv_address (part
[1][1],
12300 plus_constant (base
, UNITS_PER_WORD
));
12302 part
[1][2] = replace_equiv_address (part
[1][2],
12303 plus_constant (base
, 8));
12313 if (TARGET_128BIT_LONG_DOUBLE
&& mode
== XFmode
)
12314 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, GEN_INT (-4)));
12315 emit_move_insn (part
[0][2], part
[1][2]);
12320 /* In 64bit mode we don't have 32bit push available. In case this is
12321 register, it is OK - we will just use larger counterpart. We also
12322 retype memory - these comes from attempt to avoid REX prefix on
12323 moving of second half of TFmode value. */
12324 if (GET_MODE (part
[1][1]) == SImode
)
12326 switch (GET_CODE (part
[1][1]))
12329 part
[1][1] = adjust_address (part
[1][1], DImode
, 0);
12333 part
[1][1] = gen_rtx_REG (DImode
, REGNO (part
[1][1]));
12337 gcc_unreachable ();
12340 if (GET_MODE (part
[1][0]) == SImode
)
12341 part
[1][0] = part
[1][1];
12344 emit_move_insn (part
[0][1], part
[1][1]);
12345 emit_move_insn (part
[0][0], part
[1][0]);
12349 /* Choose correct order to not overwrite the source before it is copied. */
12350 if ((REG_P (part
[0][0])
12351 && REG_P (part
[1][1])
12352 && (REGNO (part
[0][0]) == REGNO (part
[1][1])
12354 && REGNO (part
[0][0]) == REGNO (part
[1][2]))))
12356 && reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0))))
12360 operands
[2] = part
[0][2];
12361 operands
[3] = part
[0][1];
12362 operands
[4] = part
[0][0];
12363 operands
[5] = part
[1][2];
12364 operands
[6] = part
[1][1];
12365 operands
[7] = part
[1][0];
12369 operands
[2] = part
[0][1];
12370 operands
[3] = part
[0][0];
12371 operands
[5] = part
[1][1];
12372 operands
[6] = part
[1][0];
12379 operands
[2] = part
[0][0];
12380 operands
[3] = part
[0][1];
12381 operands
[4] = part
[0][2];
12382 operands
[5] = part
[1][0];
12383 operands
[6] = part
[1][1];
12384 operands
[7] = part
[1][2];
12388 operands
[2] = part
[0][0];
12389 operands
[3] = part
[0][1];
12390 operands
[5] = part
[1][0];
12391 operands
[6] = part
[1][1];
12395 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
12398 if (GET_CODE (operands
[5]) == CONST_INT
12399 && operands
[5] != const0_rtx
12400 && REG_P (operands
[2]))
12402 if (GET_CODE (operands
[6]) == CONST_INT
12403 && INTVAL (operands
[6]) == INTVAL (operands
[5]))
12404 operands
[6] = operands
[2];
12407 && GET_CODE (operands
[7]) == CONST_INT
12408 && INTVAL (operands
[7]) == INTVAL (operands
[5]))
12409 operands
[7] = operands
[2];
12413 && GET_CODE (operands
[6]) == CONST_INT
12414 && operands
[6] != const0_rtx
12415 && REG_P (operands
[3])
12416 && GET_CODE (operands
[7]) == CONST_INT
12417 && INTVAL (operands
[7]) == INTVAL (operands
[6]))
12418 operands
[7] = operands
[3];
12421 emit_move_insn (operands
[2], operands
[5]);
12422 emit_move_insn (operands
[3], operands
[6]);
12424 emit_move_insn (operands
[4], operands
[7]);
12429 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
12430 left shift by a constant, either using a single shift or
12431 a sequence of add instructions. */
12434 ix86_expand_ashl_const (rtx operand
, int count
, enum machine_mode mode
)
12438 emit_insn ((mode
== DImode
12440 : gen_adddi3
) (operand
, operand
, operand
));
12442 else if (!optimize_size
12443 && count
* ix86_cost
->add
<= ix86_cost
->shift_const
)
12446 for (i
=0; i
<count
; i
++)
12448 emit_insn ((mode
== DImode
12450 : gen_adddi3
) (operand
, operand
, operand
));
12454 emit_insn ((mode
== DImode
12456 : gen_ashldi3
) (operand
, operand
, GEN_INT (count
)));
12460 ix86_split_ashl (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
12462 rtx low
[2], high
[2];
12464 const int single_width
= mode
== DImode
? 32 : 64;
12466 if (GET_CODE (operands
[2]) == CONST_INT
)
12468 (mode
== DImode
? split_di
: split_ti
) (operands
, 2, low
, high
);
12469 count
= INTVAL (operands
[2]) & (single_width
* 2 - 1);
12471 if (count
>= single_width
)
12473 emit_move_insn (high
[0], low
[1]);
12474 emit_move_insn (low
[0], const0_rtx
);
12476 if (count
> single_width
)
12477 ix86_expand_ashl_const (high
[0], count
- single_width
, mode
);
12481 if (!rtx_equal_p (operands
[0], operands
[1]))
12482 emit_move_insn (operands
[0], operands
[1]);
12483 emit_insn ((mode
== DImode
12485 : gen_x86_64_shld
) (high
[0], low
[0], GEN_INT (count
)));
12486 ix86_expand_ashl_const (low
[0], count
, mode
);
12491 (mode
== DImode
? split_di
: split_ti
) (operands
, 1, low
, high
);
12493 if (operands
[1] == const1_rtx
)
12495 /* Assuming we've chosen a QImode capable registers, then 1 << N
12496 can be done with two 32/64-bit shifts, no branches, no cmoves. */
12497 if (ANY_QI_REG_P (low
[0]) && ANY_QI_REG_P (high
[0]))
12499 rtx s
, d
, flags
= gen_rtx_REG (CCZmode
, FLAGS_REG
);
12501 ix86_expand_clear (low
[0]);
12502 ix86_expand_clear (high
[0]);
12503 emit_insn (gen_testqi_ccz_1 (operands
[2], GEN_INT (single_width
)));
12505 d
= gen_lowpart (QImode
, low
[0]);
12506 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
12507 s
= gen_rtx_EQ (QImode
, flags
, const0_rtx
);
12508 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
12510 d
= gen_lowpart (QImode
, high
[0]);
12511 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
12512 s
= gen_rtx_NE (QImode
, flags
, const0_rtx
);
12513 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
12516 /* Otherwise, we can get the same results by manually performing
12517 a bit extract operation on bit 5/6, and then performing the two
12518 shifts. The two methods of getting 0/1 into low/high are exactly
12519 the same size. Avoiding the shift in the bit extract case helps
12520 pentium4 a bit; no one else seems to care much either way. */
12525 if (TARGET_PARTIAL_REG_STALL
&& !optimize_size
)
12526 x
= gen_rtx_ZERO_EXTEND (mode
== DImode
? SImode
: DImode
, operands
[2]);
12528 x
= gen_lowpart (mode
== DImode
? SImode
: DImode
, operands
[2]);
12529 emit_insn (gen_rtx_SET (VOIDmode
, high
[0], x
));
12531 emit_insn ((mode
== DImode
12533 : gen_lshrdi3
) (high
[0], high
[0], GEN_INT (mode
== DImode
? 5 : 6)));
12534 emit_insn ((mode
== DImode
12536 : gen_anddi3
) (high
[0], high
[0], GEN_INT (1)));
12537 emit_move_insn (low
[0], high
[0]);
12538 emit_insn ((mode
== DImode
12540 : gen_xordi3
) (low
[0], low
[0], GEN_INT (1)));
12543 emit_insn ((mode
== DImode
12545 : gen_ashldi3
) (low
[0], low
[0], operands
[2]));
12546 emit_insn ((mode
== DImode
12548 : gen_ashldi3
) (high
[0], high
[0], operands
[2]));
12552 if (operands
[1] == constm1_rtx
)
12554 /* For -1 << N, we can avoid the shld instruction, because we
12555 know that we're shifting 0...31/63 ones into a -1. */
12556 emit_move_insn (low
[0], constm1_rtx
);
12558 emit_move_insn (high
[0], low
[0]);
12560 emit_move_insn (high
[0], constm1_rtx
);
12564 if (!rtx_equal_p (operands
[0], operands
[1]))
12565 emit_move_insn (operands
[0], operands
[1]);
12567 (mode
== DImode
? split_di
: split_ti
) (operands
, 1, low
, high
);
12568 emit_insn ((mode
== DImode
12570 : gen_x86_64_shld
) (high
[0], low
[0], operands
[2]));
12573 emit_insn ((mode
== DImode
? gen_ashlsi3
: gen_ashldi3
) (low
[0], low
[0], operands
[2]));
12575 if (TARGET_CMOVE
&& scratch
)
12577 ix86_expand_clear (scratch
);
12578 emit_insn ((mode
== DImode
12579 ? gen_x86_shift_adj_1
12580 : gen_x86_64_shift_adj
) (high
[0], low
[0], operands
[2], scratch
));
12583 emit_insn (gen_x86_shift_adj_2 (high
[0], low
[0], operands
[2]));
12587 ix86_split_ashr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
12589 rtx low
[2], high
[2];
12591 const int single_width
= mode
== DImode
? 32 : 64;
12593 if (GET_CODE (operands
[2]) == CONST_INT
)
12595 (mode
== DImode
? split_di
: split_ti
) (operands
, 2, low
, high
);
12596 count
= INTVAL (operands
[2]) & (single_width
* 2 - 1);
12598 if (count
== single_width
* 2 - 1)
12600 emit_move_insn (high
[0], high
[1]);
12601 emit_insn ((mode
== DImode
12603 : gen_ashrdi3
) (high
[0], high
[0],
12604 GEN_INT (single_width
- 1)));
12605 emit_move_insn (low
[0], high
[0]);
12608 else if (count
>= single_width
)
12610 emit_move_insn (low
[0], high
[1]);
12611 emit_move_insn (high
[0], low
[0]);
12612 emit_insn ((mode
== DImode
12614 : gen_ashrdi3
) (high
[0], high
[0],
12615 GEN_INT (single_width
- 1)));
12616 if (count
> single_width
)
12617 emit_insn ((mode
== DImode
12619 : gen_ashrdi3
) (low
[0], low
[0],
12620 GEN_INT (count
- single_width
)));
12624 if (!rtx_equal_p (operands
[0], operands
[1]))
12625 emit_move_insn (operands
[0], operands
[1]);
12626 emit_insn ((mode
== DImode
12628 : gen_x86_64_shrd
) (low
[0], high
[0], GEN_INT (count
)));
12629 emit_insn ((mode
== DImode
12631 : gen_ashrdi3
) (high
[0], high
[0], GEN_INT (count
)));
12636 if (!rtx_equal_p (operands
[0], operands
[1]))
12637 emit_move_insn (operands
[0], operands
[1]);
12639 (mode
== DImode
? split_di
: split_ti
) (operands
, 1, low
, high
);
12641 emit_insn ((mode
== DImode
12643 : gen_x86_64_shrd
) (low
[0], high
[0], operands
[2]));
12644 emit_insn ((mode
== DImode
12646 : gen_ashrdi3
) (high
[0], high
[0], operands
[2]));
12648 if (TARGET_CMOVE
&& scratch
)
12650 emit_move_insn (scratch
, high
[0]);
12651 emit_insn ((mode
== DImode
12653 : gen_ashrdi3
) (scratch
, scratch
,
12654 GEN_INT (single_width
- 1)));
12655 emit_insn ((mode
== DImode
12656 ? gen_x86_shift_adj_1
12657 : gen_x86_64_shift_adj
) (low
[0], high
[0], operands
[2],
12661 emit_insn (gen_x86_shift_adj_3 (low
[0], high
[0], operands
[2]));
12666 ix86_split_lshr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
12668 rtx low
[2], high
[2];
12670 const int single_width
= mode
== DImode
? 32 : 64;
12672 if (GET_CODE (operands
[2]) == CONST_INT
)
12674 (mode
== DImode
? split_di
: split_ti
) (operands
, 2, low
, high
);
12675 count
= INTVAL (operands
[2]) & (single_width
* 2 - 1);
12677 if (count
>= single_width
)
12679 emit_move_insn (low
[0], high
[1]);
12680 ix86_expand_clear (high
[0]);
12682 if (count
> single_width
)
12683 emit_insn ((mode
== DImode
12685 : gen_lshrdi3
) (low
[0], low
[0],
12686 GEN_INT (count
- single_width
)));
12690 if (!rtx_equal_p (operands
[0], operands
[1]))
12691 emit_move_insn (operands
[0], operands
[1]);
12692 emit_insn ((mode
== DImode
12694 : gen_x86_64_shrd
) (low
[0], high
[0], GEN_INT (count
)));
12695 emit_insn ((mode
== DImode
12697 : gen_lshrdi3
) (high
[0], high
[0], GEN_INT (count
)));
12702 if (!rtx_equal_p (operands
[0], operands
[1]))
12703 emit_move_insn (operands
[0], operands
[1]);
12705 (mode
== DImode
? split_di
: split_ti
) (operands
, 1, low
, high
);
12707 emit_insn ((mode
== DImode
12709 : gen_x86_64_shrd
) (low
[0], high
[0], operands
[2]));
12710 emit_insn ((mode
== DImode
12712 : gen_lshrdi3
) (high
[0], high
[0], operands
[2]));
12714 /* Heh. By reversing the arguments, we can reuse this pattern. */
12715 if (TARGET_CMOVE
&& scratch
)
12717 ix86_expand_clear (scratch
);
12718 emit_insn ((mode
== DImode
12719 ? gen_x86_shift_adj_1
12720 : gen_x86_64_shift_adj
) (low
[0], high
[0], operands
[2],
12724 emit_insn (gen_x86_shift_adj_2 (low
[0], high
[0], operands
[2]));
12728 /* Helper function for the string operations below. Dest VARIABLE whether
12729 it is aligned to VALUE bytes. If true, jump to the label. */
12731 ix86_expand_aligntest (rtx variable
, int value
)
12733 rtx label
= gen_label_rtx ();
12734 rtx tmpcount
= gen_reg_rtx (GET_MODE (variable
));
12735 if (GET_MODE (variable
) == DImode
)
12736 emit_insn (gen_anddi3 (tmpcount
, variable
, GEN_INT (value
)));
12738 emit_insn (gen_andsi3 (tmpcount
, variable
, GEN_INT (value
)));
12739 emit_cmp_and_jump_insns (tmpcount
, const0_rtx
, EQ
, 0, GET_MODE (variable
),
12744 /* Adjust COUNTER by the VALUE. */
12746 ix86_adjust_counter (rtx countreg
, HOST_WIDE_INT value
)
12748 if (GET_MODE (countreg
) == DImode
)
12749 emit_insn (gen_adddi3 (countreg
, countreg
, GEN_INT (-value
)));
12751 emit_insn (gen_addsi3 (countreg
, countreg
, GEN_INT (-value
)));
12754 /* Zero extend possibly SImode EXP to Pmode register. */
12756 ix86_zero_extend_to_Pmode (rtx exp
)
12759 if (GET_MODE (exp
) == VOIDmode
)
12760 return force_reg (Pmode
, exp
);
12761 if (GET_MODE (exp
) == Pmode
)
12762 return copy_to_mode_reg (Pmode
, exp
);
12763 r
= gen_reg_rtx (Pmode
);
12764 emit_insn (gen_zero_extendsidi2 (r
, exp
));
12768 /* Expand string move (memcpy) operation. Use i386 string operations when
12769 profitable. expand_clrmem contains similar code. */
12771 ix86_expand_movmem (rtx dst
, rtx src
, rtx count_exp
, rtx align_exp
)
12773 rtx srcreg
, destreg
, countreg
, srcexp
, destexp
;
12774 enum machine_mode counter_mode
;
12775 HOST_WIDE_INT align
= 0;
12776 unsigned HOST_WIDE_INT count
= 0;
12778 if (GET_CODE (align_exp
) == CONST_INT
)
12779 align
= INTVAL (align_exp
);
12781 /* Can't use any of this if the user has appropriated esi or edi. */
12782 if (global_regs
[4] || global_regs
[5])
12785 /* This simple hack avoids all inlining code and simplifies code below. */
12786 if (!TARGET_ALIGN_STRINGOPS
)
12789 if (GET_CODE (count_exp
) == CONST_INT
)
12791 count
= INTVAL (count_exp
);
12792 if (!TARGET_INLINE_ALL_STRINGOPS
&& count
> 64)
12796 /* Figure out proper mode for counter. For 32bits it is always SImode,
12797 for 64bits use SImode when possible, otherwise DImode.
12798 Set count to number of bytes copied when known at compile time. */
12800 || GET_MODE (count_exp
) == SImode
12801 || x86_64_zext_immediate_operand (count_exp
, VOIDmode
))
12802 counter_mode
= SImode
;
12804 counter_mode
= DImode
;
12806 gcc_assert (counter_mode
== SImode
|| counter_mode
== DImode
);
12808 destreg
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
12809 if (destreg
!= XEXP (dst
, 0))
12810 dst
= replace_equiv_address_nv (dst
, destreg
);
12811 srcreg
= copy_to_mode_reg (Pmode
, XEXP (src
, 0));
12812 if (srcreg
!= XEXP (src
, 0))
12813 src
= replace_equiv_address_nv (src
, srcreg
);
12815 /* When optimizing for size emit simple rep ; movsb instruction for
12816 counts not divisible by 4, except when (movsl;)*(movsw;)?(movsb;)?
12817 sequence is shorter than mov{b,l} $count, %{ecx,cl}; rep; movsb.
12818 Sice of (movsl;)*(movsw;)?(movsb;)? sequence is
12819 count / 4 + (count & 3), the other sequence is either 4 or 7 bytes,
12820 but we don't know whether upper 24 (resp. 56) bits of %ecx will be
12821 known to be zero or not. The rep; movsb sequence causes higher
12822 register pressure though, so take that into account. */
12824 if ((!optimize
|| optimize_size
)
12829 || (count
& 3) + count
/ 4 > 6))))
12831 emit_insn (gen_cld ());
12832 countreg
= ix86_zero_extend_to_Pmode (count_exp
);
12833 destexp
= gen_rtx_PLUS (Pmode
, destreg
, countreg
);
12834 srcexp
= gen_rtx_PLUS (Pmode
, srcreg
, countreg
);
12835 emit_insn (gen_rep_mov (destreg
, dst
, srcreg
, src
, countreg
,
12839 /* For constant aligned (or small unaligned) copies use rep movsl
12840 followed by code copying the rest. For PentiumPro ensure 8 byte
12841 alignment to allow rep movsl acceleration. */
12843 else if (count
!= 0
12845 || (!TARGET_PENTIUMPRO
&& !TARGET_64BIT
&& align
>= 4)
12846 || optimize_size
|| count
< (unsigned int) 64))
12848 unsigned HOST_WIDE_INT offset
= 0;
12849 int size
= TARGET_64BIT
&& !optimize_size
? 8 : 4;
12850 rtx srcmem
, dstmem
;
12852 emit_insn (gen_cld ());
12853 if (count
& ~(size
- 1))
12855 if ((TARGET_SINGLE_STRINGOP
|| optimize_size
) && count
< 5 * 4)
12857 enum machine_mode movs_mode
= size
== 4 ? SImode
: DImode
;
12859 while (offset
< (count
& ~(size
- 1)))
12861 srcmem
= adjust_automodify_address_nv (src
, movs_mode
,
12863 dstmem
= adjust_automodify_address_nv (dst
, movs_mode
,
12865 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
12871 countreg
= GEN_INT ((count
>> (size
== 4 ? 2 : 3))
12872 & (TARGET_64BIT
? -1 : 0x3fffffff));
12873 countreg
= copy_to_mode_reg (counter_mode
, countreg
);
12874 countreg
= ix86_zero_extend_to_Pmode (countreg
);
12876 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
12877 GEN_INT (size
== 4 ? 2 : 3));
12878 srcexp
= gen_rtx_PLUS (Pmode
, destexp
, srcreg
);
12879 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destreg
);
12881 emit_insn (gen_rep_mov (destreg
, dst
, srcreg
, src
,
12882 countreg
, destexp
, srcexp
));
12883 offset
= count
& ~(size
- 1);
12886 if (size
== 8 && (count
& 0x04))
12888 srcmem
= adjust_automodify_address_nv (src
, SImode
, srcreg
,
12890 dstmem
= adjust_automodify_address_nv (dst
, SImode
, destreg
,
12892 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
12897 srcmem
= adjust_automodify_address_nv (src
, HImode
, srcreg
,
12899 dstmem
= adjust_automodify_address_nv (dst
, HImode
, destreg
,
12901 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
12906 srcmem
= adjust_automodify_address_nv (src
, QImode
, srcreg
,
12908 dstmem
= adjust_automodify_address_nv (dst
, QImode
, destreg
,
12910 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
12913 /* The generic code based on the glibc implementation:
12914 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
12915 allowing accelerated copying there)
12916 - copy the data using rep movsl
12917 - copy the rest. */
12922 rtx srcmem
, dstmem
;
12923 int desired_alignment
= (TARGET_PENTIUMPRO
12924 && (count
== 0 || count
>= (unsigned int) 260)
12925 ? 8 : UNITS_PER_WORD
);
12926 /* Get rid of MEM_OFFSETs, they won't be accurate. */
12927 dst
= change_address (dst
, BLKmode
, destreg
);
12928 src
= change_address (src
, BLKmode
, srcreg
);
12930 /* In case we don't know anything about the alignment, default to
12931 library version, since it is usually equally fast and result in
12934 Also emit call when we know that the count is large and call overhead
12935 will not be important. */
12936 if (!TARGET_INLINE_ALL_STRINGOPS
12937 && (align
< UNITS_PER_WORD
|| !TARGET_REP_MOVL_OPTIMAL
))
12940 if (TARGET_SINGLE_STRINGOP
)
12941 emit_insn (gen_cld ());
12943 countreg2
= gen_reg_rtx (Pmode
);
12944 countreg
= copy_to_mode_reg (counter_mode
, count_exp
);
12946 /* We don't use loops to align destination and to copy parts smaller
12947 than 4 bytes, because gcc is able to optimize such code better (in
12948 the case the destination or the count really is aligned, gcc is often
12949 able to predict the branches) and also it is friendlier to the
12950 hardware branch prediction.
12952 Using loops is beneficial for generic case, because we can
12953 handle small counts using the loops. Many CPUs (such as Athlon)
12954 have large REP prefix setup costs.
12956 This is quite costly. Maybe we can revisit this decision later or
12957 add some customizability to this code. */
12959 if (count
== 0 && align
< desired_alignment
)
12961 label
= gen_label_rtx ();
12962 emit_cmp_and_jump_insns (countreg
, GEN_INT (desired_alignment
- 1),
12963 LEU
, 0, counter_mode
, 1, label
);
12967 rtx label
= ix86_expand_aligntest (destreg
, 1);
12968 srcmem
= change_address (src
, QImode
, srcreg
);
12969 dstmem
= change_address (dst
, QImode
, destreg
);
12970 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
12971 ix86_adjust_counter (countreg
, 1);
12972 emit_label (label
);
12973 LABEL_NUSES (label
) = 1;
12977 rtx label
= ix86_expand_aligntest (destreg
, 2);
12978 srcmem
= change_address (src
, HImode
, srcreg
);
12979 dstmem
= change_address (dst
, HImode
, destreg
);
12980 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
12981 ix86_adjust_counter (countreg
, 2);
12982 emit_label (label
);
12983 LABEL_NUSES (label
) = 1;
12985 if (align
<= 4 && desired_alignment
> 4)
12987 rtx label
= ix86_expand_aligntest (destreg
, 4);
12988 srcmem
= change_address (src
, SImode
, srcreg
);
12989 dstmem
= change_address (dst
, SImode
, destreg
);
12990 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
12991 ix86_adjust_counter (countreg
, 4);
12992 emit_label (label
);
12993 LABEL_NUSES (label
) = 1;
12996 if (label
&& desired_alignment
> 4 && !TARGET_64BIT
)
12998 emit_label (label
);
12999 LABEL_NUSES (label
) = 1;
13002 if (!TARGET_SINGLE_STRINGOP
)
13003 emit_insn (gen_cld ());
13006 emit_insn (gen_lshrdi3 (countreg2
, ix86_zero_extend_to_Pmode (countreg
),
13008 destexp
= gen_rtx_ASHIFT (Pmode
, countreg2
, GEN_INT (3));
13012 emit_insn (gen_lshrsi3 (countreg2
, countreg
, const2_rtx
));
13013 destexp
= gen_rtx_ASHIFT (Pmode
, countreg2
, const2_rtx
);
13015 srcexp
= gen_rtx_PLUS (Pmode
, destexp
, srcreg
);
13016 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destreg
);
13017 emit_insn (gen_rep_mov (destreg
, dst
, srcreg
, src
,
13018 countreg2
, destexp
, srcexp
));
13022 emit_label (label
);
13023 LABEL_NUSES (label
) = 1;
13025 if (TARGET_64BIT
&& align
> 4 && count
!= 0 && (count
& 4))
13027 srcmem
= change_address (src
, SImode
, srcreg
);
13028 dstmem
= change_address (dst
, SImode
, destreg
);
13029 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
13031 if ((align
<= 4 || count
== 0) && TARGET_64BIT
)
13033 rtx label
= ix86_expand_aligntest (countreg
, 4);
13034 srcmem
= change_address (src
, SImode
, srcreg
);
13035 dstmem
= change_address (dst
, SImode
, destreg
);
13036 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
13037 emit_label (label
);
13038 LABEL_NUSES (label
) = 1;
13040 if (align
> 2 && count
!= 0 && (count
& 2))
13042 srcmem
= change_address (src
, HImode
, srcreg
);
13043 dstmem
= change_address (dst
, HImode
, destreg
);
13044 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
13046 if (align
<= 2 || count
== 0)
13048 rtx label
= ix86_expand_aligntest (countreg
, 2);
13049 srcmem
= change_address (src
, HImode
, srcreg
);
13050 dstmem
= change_address (dst
, HImode
, destreg
);
13051 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
13052 emit_label (label
);
13053 LABEL_NUSES (label
) = 1;
13055 if (align
> 1 && count
!= 0 && (count
& 1))
13057 srcmem
= change_address (src
, QImode
, srcreg
);
13058 dstmem
= change_address (dst
, QImode
, destreg
);
13059 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
13061 if (align
<= 1 || count
== 0)
13063 rtx label
= ix86_expand_aligntest (countreg
, 1);
13064 srcmem
= change_address (src
, QImode
, srcreg
);
13065 dstmem
= change_address (dst
, QImode
, destreg
);
13066 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
13067 emit_label (label
);
13068 LABEL_NUSES (label
) = 1;
13075 /* Expand string clear operation (bzero). Use i386 string operations when
13076 profitable. expand_movmem contains similar code. */
13078 ix86_expand_clrmem (rtx dst
, rtx count_exp
, rtx align_exp
)
13080 rtx destreg
, zeroreg
, countreg
, destexp
;
13081 enum machine_mode counter_mode
;
13082 HOST_WIDE_INT align
= 0;
13083 unsigned HOST_WIDE_INT count
= 0;
13085 if (GET_CODE (align_exp
) == CONST_INT
)
13086 align
= INTVAL (align_exp
);
13088 /* Can't use any of this if the user has appropriated esi. */
13089 if (global_regs
[4])
13092 /* This simple hack avoids all inlining code and simplifies code below. */
13093 if (!TARGET_ALIGN_STRINGOPS
)
13096 if (GET_CODE (count_exp
) == CONST_INT
)
13098 count
= INTVAL (count_exp
);
13099 if (!TARGET_INLINE_ALL_STRINGOPS
&& count
> 64)
13102 /* Figure out proper mode for counter. For 32bits it is always SImode,
13103 for 64bits use SImode when possible, otherwise DImode.
13104 Set count to number of bytes copied when known at compile time. */
13106 || GET_MODE (count_exp
) == SImode
13107 || x86_64_zext_immediate_operand (count_exp
, VOIDmode
))
13108 counter_mode
= SImode
;
13110 counter_mode
= DImode
;
13112 destreg
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
13113 if (destreg
!= XEXP (dst
, 0))
13114 dst
= replace_equiv_address_nv (dst
, destreg
);
13117 /* When optimizing for size emit simple rep ; movsb instruction for
13118 counts not divisible by 4. The movl $N, %ecx; rep; stosb
13119 sequence is 7 bytes long, so if optimizing for size and count is
13120 small enough that some stosl, stosw and stosb instructions without
13121 rep are shorter, fall back into the next if. */
13123 if ((!optimize
|| optimize_size
)
13126 && (!optimize_size
|| (count
& 0x03) + (count
>> 2) > 7))))
13128 emit_insn (gen_cld ());
13130 countreg
= ix86_zero_extend_to_Pmode (count_exp
);
13131 zeroreg
= copy_to_mode_reg (QImode
, const0_rtx
);
13132 destexp
= gen_rtx_PLUS (Pmode
, destreg
, countreg
);
13133 emit_insn (gen_rep_stos (destreg
, countreg
, dst
, zeroreg
, destexp
));
13135 else if (count
!= 0
13137 || (!TARGET_PENTIUMPRO
&& !TARGET_64BIT
&& align
>= 4)
13138 || optimize_size
|| count
< (unsigned int) 64))
13140 int size
= TARGET_64BIT
&& !optimize_size
? 8 : 4;
13141 unsigned HOST_WIDE_INT offset
= 0;
13143 emit_insn (gen_cld ());
13145 zeroreg
= copy_to_mode_reg (size
== 4 ? SImode
: DImode
, const0_rtx
);
13146 if (count
& ~(size
- 1))
13148 unsigned HOST_WIDE_INT repcount
;
13149 unsigned int max_nonrep
;
13151 repcount
= count
>> (size
== 4 ? 2 : 3);
13153 repcount
&= 0x3fffffff;
13155 /* movl $N, %ecx; rep; stosl is 7 bytes, while N x stosl is N bytes.
13156 movl $N, %ecx; rep; stosq is 8 bytes, while N x stosq is 2xN
13157 bytes. In both cases the latter seems to be faster for small
13159 max_nonrep
= size
== 4 ? 7 : 4;
13160 if (!optimize_size
)
13163 case PROCESSOR_PENTIUM4
:
13164 case PROCESSOR_NOCONA
:
13171 if (repcount
<= max_nonrep
)
13172 while (repcount
-- > 0)
13174 rtx mem
= adjust_automodify_address_nv (dst
,
13175 GET_MODE (zeroreg
),
13177 emit_insn (gen_strset (destreg
, mem
, zeroreg
));
13182 countreg
= copy_to_mode_reg (counter_mode
, GEN_INT (repcount
));
13183 countreg
= ix86_zero_extend_to_Pmode (countreg
);
13184 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
13185 GEN_INT (size
== 4 ? 2 : 3));
13186 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destreg
);
13187 emit_insn (gen_rep_stos (destreg
, countreg
, dst
, zeroreg
,
13189 offset
= count
& ~(size
- 1);
13192 if (size
== 8 && (count
& 0x04))
13194 rtx mem
= adjust_automodify_address_nv (dst
, SImode
, destreg
,
13196 emit_insn (gen_strset (destreg
, mem
,
13197 gen_rtx_SUBREG (SImode
, zeroreg
, 0)));
13202 rtx mem
= adjust_automodify_address_nv (dst
, HImode
, destreg
,
13204 emit_insn (gen_strset (destreg
, mem
,
13205 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
13210 rtx mem
= adjust_automodify_address_nv (dst
, QImode
, destreg
,
13212 emit_insn (gen_strset (destreg
, mem
,
13213 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
13220 /* Compute desired alignment of the string operation. */
13221 int desired_alignment
= (TARGET_PENTIUMPRO
13222 && (count
== 0 || count
>= (unsigned int) 260)
13223 ? 8 : UNITS_PER_WORD
);
13225 /* In case we don't know anything about the alignment, default to
13226 library version, since it is usually equally fast and result in
13229 Also emit call when we know that the count is large and call overhead
13230 will not be important. */
13231 if (!TARGET_INLINE_ALL_STRINGOPS
13232 && (align
< UNITS_PER_WORD
|| !TARGET_REP_MOVL_OPTIMAL
))
13235 if (TARGET_SINGLE_STRINGOP
)
13236 emit_insn (gen_cld ());
13238 countreg2
= gen_reg_rtx (Pmode
);
13239 countreg
= copy_to_mode_reg (counter_mode
, count_exp
);
13240 zeroreg
= copy_to_mode_reg (Pmode
, const0_rtx
);
13241 /* Get rid of MEM_OFFSET, it won't be accurate. */
13242 dst
= change_address (dst
, BLKmode
, destreg
);
13244 if (count
== 0 && align
< desired_alignment
)
13246 label
= gen_label_rtx ();
13247 emit_cmp_and_jump_insns (countreg
, GEN_INT (desired_alignment
- 1),
13248 LEU
, 0, counter_mode
, 1, label
);
13252 rtx label
= ix86_expand_aligntest (destreg
, 1);
13253 emit_insn (gen_strset (destreg
, dst
,
13254 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
13255 ix86_adjust_counter (countreg
, 1);
13256 emit_label (label
);
13257 LABEL_NUSES (label
) = 1;
13261 rtx label
= ix86_expand_aligntest (destreg
, 2);
13262 emit_insn (gen_strset (destreg
, dst
,
13263 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
13264 ix86_adjust_counter (countreg
, 2);
13265 emit_label (label
);
13266 LABEL_NUSES (label
) = 1;
13268 if (align
<= 4 && desired_alignment
> 4)
13270 rtx label
= ix86_expand_aligntest (destreg
, 4);
13271 emit_insn (gen_strset (destreg
, dst
,
13273 ? gen_rtx_SUBREG (SImode
, zeroreg
, 0)
13275 ix86_adjust_counter (countreg
, 4);
13276 emit_label (label
);
13277 LABEL_NUSES (label
) = 1;
13280 if (label
&& desired_alignment
> 4 && !TARGET_64BIT
)
13282 emit_label (label
);
13283 LABEL_NUSES (label
) = 1;
13287 if (!TARGET_SINGLE_STRINGOP
)
13288 emit_insn (gen_cld ());
13291 emit_insn (gen_lshrdi3 (countreg2
, ix86_zero_extend_to_Pmode (countreg
),
13293 destexp
= gen_rtx_ASHIFT (Pmode
, countreg2
, GEN_INT (3));
13297 emit_insn (gen_lshrsi3 (countreg2
, countreg
, const2_rtx
));
13298 destexp
= gen_rtx_ASHIFT (Pmode
, countreg2
, const2_rtx
);
13300 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destreg
);
13301 emit_insn (gen_rep_stos (destreg
, countreg2
, dst
, zeroreg
, destexp
));
13305 emit_label (label
);
13306 LABEL_NUSES (label
) = 1;
13309 if (TARGET_64BIT
&& align
> 4 && count
!= 0 && (count
& 4))
13310 emit_insn (gen_strset (destreg
, dst
,
13311 gen_rtx_SUBREG (SImode
, zeroreg
, 0)));
13312 if (TARGET_64BIT
&& (align
<= 4 || count
== 0))
13314 rtx label
= ix86_expand_aligntest (countreg
, 4);
13315 emit_insn (gen_strset (destreg
, dst
,
13316 gen_rtx_SUBREG (SImode
, zeroreg
, 0)));
13317 emit_label (label
);
13318 LABEL_NUSES (label
) = 1;
13320 if (align
> 2 && count
!= 0 && (count
& 2))
13321 emit_insn (gen_strset (destreg
, dst
,
13322 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
13323 if (align
<= 2 || count
== 0)
13325 rtx label
= ix86_expand_aligntest (countreg
, 2);
13326 emit_insn (gen_strset (destreg
, dst
,
13327 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
13328 emit_label (label
);
13329 LABEL_NUSES (label
) = 1;
13331 if (align
> 1 && count
!= 0 && (count
& 1))
13332 emit_insn (gen_strset (destreg
, dst
,
13333 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
13334 if (align
<= 1 || count
== 0)
13336 rtx label
= ix86_expand_aligntest (countreg
, 1);
13337 emit_insn (gen_strset (destreg
, dst
,
13338 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
13339 emit_label (label
);
13340 LABEL_NUSES (label
) = 1;
13346 /* Expand strlen. */
13348 ix86_expand_strlen (rtx out
, rtx src
, rtx eoschar
, rtx align
)
13350 rtx addr
, scratch1
, scratch2
, scratch3
, scratch4
;
13352 /* The generic case of strlen expander is long. Avoid it's
13353 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
13355 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
13356 && !TARGET_INLINE_ALL_STRINGOPS
13358 && (GET_CODE (align
) != CONST_INT
|| INTVAL (align
) < 4))
13361 addr
= force_reg (Pmode
, XEXP (src
, 0));
13362 scratch1
= gen_reg_rtx (Pmode
);
13364 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
13367 /* Well it seems that some optimizer does not combine a call like
13368 foo(strlen(bar), strlen(bar));
13369 when the move and the subtraction is done here. It does calculate
13370 the length just once when these instructions are done inside of
13371 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
13372 often used and I use one fewer register for the lifetime of
13373 output_strlen_unroll() this is better. */
13375 emit_move_insn (out
, addr
);
13377 ix86_expand_strlensi_unroll_1 (out
, src
, align
);
13379 /* strlensi_unroll_1 returns the address of the zero at the end of
13380 the string, like memchr(), so compute the length by subtracting
13381 the start address. */
13383 emit_insn (gen_subdi3 (out
, out
, addr
));
13385 emit_insn (gen_subsi3 (out
, out
, addr
));
13390 scratch2
= gen_reg_rtx (Pmode
);
13391 scratch3
= gen_reg_rtx (Pmode
);
13392 scratch4
= force_reg (Pmode
, constm1_rtx
);
13394 emit_move_insn (scratch3
, addr
);
13395 eoschar
= force_reg (QImode
, eoschar
);
13397 emit_insn (gen_cld ());
13398 src
= replace_equiv_address_nv (src
, scratch3
);
13400 /* If .md starts supporting :P, this can be done in .md. */
13401 unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (4, src
, eoschar
, align
,
13402 scratch4
), UNSPEC_SCAS
);
13403 emit_insn (gen_strlenqi_1 (scratch1
, scratch3
, unspec
));
13406 emit_insn (gen_one_cmpldi2 (scratch2
, scratch1
));
13407 emit_insn (gen_adddi3 (out
, scratch2
, constm1_rtx
));
13411 emit_insn (gen_one_cmplsi2 (scratch2
, scratch1
));
13412 emit_insn (gen_addsi3 (out
, scratch2
, constm1_rtx
));
13418 /* Expand the appropriate insns for doing strlen if not just doing
13421 out = result, initialized with the start address
13422 align_rtx = alignment of the address.
13423 scratch = scratch register, initialized with the startaddress when
13424 not aligned, otherwise undefined
13426 This is just the body. It needs the initializations mentioned above and
13427 some address computing at the end. These things are done in i386.md. */
13430 ix86_expand_strlensi_unroll_1 (rtx out
, rtx src
, rtx align_rtx
)
13434 rtx align_2_label
= NULL_RTX
;
13435 rtx align_3_label
= NULL_RTX
;
13436 rtx align_4_label
= gen_label_rtx ();
13437 rtx end_0_label
= gen_label_rtx ();
13439 rtx tmpreg
= gen_reg_rtx (SImode
);
13440 rtx scratch
= gen_reg_rtx (SImode
);
13444 if (GET_CODE (align_rtx
) == CONST_INT
)
13445 align
= INTVAL (align_rtx
);
13447 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
13449 /* Is there a known alignment and is it less than 4? */
13452 rtx scratch1
= gen_reg_rtx (Pmode
);
13453 emit_move_insn (scratch1
, out
);
13454 /* Is there a known alignment and is it not 2? */
13457 align_3_label
= gen_label_rtx (); /* Label when aligned to 3-byte */
13458 align_2_label
= gen_label_rtx (); /* Label when aligned to 2-byte */
13460 /* Leave just the 3 lower bits. */
13461 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, GEN_INT (3),
13462 NULL_RTX
, 0, OPTAB_WIDEN
);
13464 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
13465 Pmode
, 1, align_4_label
);
13466 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, EQ
, NULL
,
13467 Pmode
, 1, align_2_label
);
13468 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, GTU
, NULL
,
13469 Pmode
, 1, align_3_label
);
13473 /* Since the alignment is 2, we have to check 2 or 0 bytes;
13474 check if is aligned to 4 - byte. */
13476 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, const2_rtx
,
13477 NULL_RTX
, 0, OPTAB_WIDEN
);
13479 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
13480 Pmode
, 1, align_4_label
);
13483 mem
= change_address (src
, QImode
, out
);
13485 /* Now compare the bytes. */
13487 /* Compare the first n unaligned byte on a byte per byte basis. */
13488 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
,
13489 QImode
, 1, end_0_label
);
13491 /* Increment the address. */
13493 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
13495 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
13497 /* Not needed with an alignment of 2 */
13500 emit_label (align_2_label
);
13502 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
13506 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
13508 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
13510 emit_label (align_3_label
);
13513 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
13517 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
13519 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
13522 /* Generate loop to check 4 bytes at a time. It is not a good idea to
13523 align this loop. It gives only huge programs, but does not help to
13525 emit_label (align_4_label
);
13527 mem
= change_address (src
, SImode
, out
);
13528 emit_move_insn (scratch
, mem
);
13530 emit_insn (gen_adddi3 (out
, out
, GEN_INT (4)));
13532 emit_insn (gen_addsi3 (out
, out
, GEN_INT (4)));
13534 /* This formula yields a nonzero result iff one of the bytes is zero.
13535 This saves three branches inside loop and many cycles. */
13537 emit_insn (gen_addsi3 (tmpreg
, scratch
, GEN_INT (-0x01010101)));
13538 emit_insn (gen_one_cmplsi2 (scratch
, scratch
));
13539 emit_insn (gen_andsi3 (tmpreg
, tmpreg
, scratch
));
13540 emit_insn (gen_andsi3 (tmpreg
, tmpreg
,
13541 gen_int_mode (0x80808080, SImode
)));
13542 emit_cmp_and_jump_insns (tmpreg
, const0_rtx
, EQ
, 0, SImode
, 1,
13547 rtx reg
= gen_reg_rtx (SImode
);
13548 rtx reg2
= gen_reg_rtx (Pmode
);
13549 emit_move_insn (reg
, tmpreg
);
13550 emit_insn (gen_lshrsi3 (reg
, reg
, GEN_INT (16)));
13552 /* If zero is not in the first two bytes, move two bytes forward. */
13553 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
13554 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
13555 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
13556 emit_insn (gen_rtx_SET (VOIDmode
, tmpreg
,
13557 gen_rtx_IF_THEN_ELSE (SImode
, tmp
,
13560 /* Emit lea manually to avoid clobbering of flags. */
13561 emit_insn (gen_rtx_SET (SImode
, reg2
,
13562 gen_rtx_PLUS (Pmode
, out
, const2_rtx
)));
13564 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
13565 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
13566 emit_insn (gen_rtx_SET (VOIDmode
, out
,
13567 gen_rtx_IF_THEN_ELSE (Pmode
, tmp
,
13574 rtx end_2_label
= gen_label_rtx ();
13575 /* Is zero in the first two bytes? */
13577 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
13578 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
13579 tmp
= gen_rtx_NE (VOIDmode
, tmp
, const0_rtx
);
13580 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
13581 gen_rtx_LABEL_REF (VOIDmode
, end_2_label
),
13583 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
13584 JUMP_LABEL (tmp
) = end_2_label
;
13586 /* Not in the first two. Move two bytes forward. */
13587 emit_insn (gen_lshrsi3 (tmpreg
, tmpreg
, GEN_INT (16)));
13589 emit_insn (gen_adddi3 (out
, out
, const2_rtx
));
13591 emit_insn (gen_addsi3 (out
, out
, const2_rtx
));
13593 emit_label (end_2_label
);
13597 /* Avoid branch in fixing the byte. */
13598 tmpreg
= gen_lowpart (QImode
, tmpreg
);
13599 emit_insn (gen_addqi3_cc (tmpreg
, tmpreg
, tmpreg
));
13600 cmp
= gen_rtx_LTU (Pmode
, gen_rtx_REG (CCmode
, 17), const0_rtx
);
13602 emit_insn (gen_subdi3_carry_rex64 (out
, out
, GEN_INT (3), cmp
));
13604 emit_insn (gen_subsi3_carry (out
, out
, GEN_INT (3), cmp
));
13606 emit_label (end_0_label
);
13610 ix86_expand_call (rtx retval
, rtx fnaddr
, rtx callarg1
,
13611 rtx callarg2 ATTRIBUTE_UNUSED
,
13612 rtx pop
, int sibcall
)
13614 rtx use
= NULL
, call
;
13616 if (pop
== const0_rtx
)
13618 gcc_assert (!TARGET_64BIT
|| !pop
);
13620 if (TARGET_MACHO
&& !TARGET_64BIT
)
13623 if (flag_pic
&& GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
)
13624 fnaddr
= machopic_indirect_call_target (fnaddr
);
13629 /* Static functions and indirect calls don't need the pic register. */
13630 if (! TARGET_64BIT
&& flag_pic
13631 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
13632 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr
, 0)))
13633 use_reg (&use
, pic_offset_table_rtx
);
13636 if (TARGET_64BIT
&& INTVAL (callarg2
) >= 0)
13638 rtx al
= gen_rtx_REG (QImode
, 0);
13639 emit_move_insn (al
, callarg2
);
13640 use_reg (&use
, al
);
13643 if (! call_insn_operand (XEXP (fnaddr
, 0), Pmode
))
13645 fnaddr
= copy_to_mode_reg (Pmode
, XEXP (fnaddr
, 0));
13646 fnaddr
= gen_rtx_MEM (QImode
, fnaddr
);
13648 if (sibcall
&& TARGET_64BIT
13649 && !constant_call_address_operand (XEXP (fnaddr
, 0), Pmode
))
13652 addr
= copy_to_mode_reg (Pmode
, XEXP (fnaddr
, 0));
13653 fnaddr
= gen_rtx_REG (Pmode
, FIRST_REX_INT_REG
+ 3 /* R11 */);
13654 emit_move_insn (fnaddr
, addr
);
13655 fnaddr
= gen_rtx_MEM (QImode
, fnaddr
);
13658 call
= gen_rtx_CALL (VOIDmode
, fnaddr
, callarg1
);
13660 call
= gen_rtx_SET (VOIDmode
, retval
, call
);
13663 pop
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, pop
);
13664 pop
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, pop
);
13665 call
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, call
, pop
));
13668 call
= emit_call_insn (call
);
13670 CALL_INSN_FUNCTION_USAGE (call
) = use
;
13674 /* Clear stack slot assignments remembered from previous functions.
13675 This is called from INIT_EXPANDERS once before RTL is emitted for each
13678 static struct machine_function
*
13679 ix86_init_machine_status (void)
13681 struct machine_function
*f
;
13683 f
= ggc_alloc_cleared (sizeof (struct machine_function
));
13684 f
->use_fast_prologue_epilogue_nregs
= -1;
13685 f
->tls_descriptor_call_expanded_p
= 0;
13690 /* Return a MEM corresponding to a stack slot with mode MODE.
13691 Allocate a new slot if necessary.
13693 The RTL for a function can have several slots available: N is
13694 which slot to use. */
13697 assign_386_stack_local (enum machine_mode mode
, enum ix86_stack_slot n
)
13699 struct stack_local_entry
*s
;
13701 gcc_assert (n
< MAX_386_STACK_LOCALS
);
13703 for (s
= ix86_stack_locals
; s
; s
= s
->next
)
13704 if (s
->mode
== mode
&& s
->n
== n
)
13705 return copy_rtx (s
->rtl
);
13707 s
= (struct stack_local_entry
*)
13708 ggc_alloc (sizeof (struct stack_local_entry
));
13711 s
->rtl
= assign_stack_local (mode
, GET_MODE_SIZE (mode
), 0);
13713 s
->next
= ix86_stack_locals
;
13714 ix86_stack_locals
= s
;
13718 /* Construct the SYMBOL_REF for the tls_get_addr function. */
13720 static GTY(()) rtx ix86_tls_symbol
;
13722 ix86_tls_get_addr (void)
13725 if (!ix86_tls_symbol
)
13727 ix86_tls_symbol
= gen_rtx_SYMBOL_REF (Pmode
,
13728 (TARGET_ANY_GNU_TLS
13730 ? "___tls_get_addr"
13731 : "__tls_get_addr");
13734 return ix86_tls_symbol
;
13737 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
13739 static GTY(()) rtx ix86_tls_module_base_symbol
;
13741 ix86_tls_module_base (void)
13744 if (!ix86_tls_module_base_symbol
)
13746 ix86_tls_module_base_symbol
= gen_rtx_SYMBOL_REF (Pmode
,
13747 "_TLS_MODULE_BASE_");
13748 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol
)
13749 |= TLS_MODEL_GLOBAL_DYNAMIC
<< SYMBOL_FLAG_TLS_SHIFT
;
13752 return ix86_tls_module_base_symbol
;
13755 /* Calculate the length of the memory address in the instruction
13756 encoding. Does not include the one-byte modrm, opcode, or prefix. */
13759 memory_address_length (rtx addr
)
13761 struct ix86_address parts
;
13762 rtx base
, index
, disp
;
13766 if (GET_CODE (addr
) == PRE_DEC
13767 || GET_CODE (addr
) == POST_INC
13768 || GET_CODE (addr
) == PRE_MODIFY
13769 || GET_CODE (addr
) == POST_MODIFY
)
13772 ok
= ix86_decompose_address (addr
, &parts
);
13775 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
13776 parts
.base
= SUBREG_REG (parts
.base
);
13777 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
13778 parts
.index
= SUBREG_REG (parts
.index
);
13781 index
= parts
.index
;
13786 - esp as the base always wants an index,
13787 - ebp as the base always wants a displacement. */
13789 /* Register Indirect. */
13790 if (base
&& !index
&& !disp
)
13792 /* esp (for its index) and ebp (for its displacement) need
13793 the two-byte modrm form. */
13794 if (addr
== stack_pointer_rtx
13795 || addr
== arg_pointer_rtx
13796 || addr
== frame_pointer_rtx
13797 || addr
== hard_frame_pointer_rtx
)
13801 /* Direct Addressing. */
13802 else if (disp
&& !base
&& !index
)
13807 /* Find the length of the displacement constant. */
13810 if (base
&& satisfies_constraint_K (disp
))
13815 /* ebp always wants a displacement. */
13816 else if (base
== hard_frame_pointer_rtx
)
13819 /* An index requires the two-byte modrm form.... */
13821 /* ...like esp, which always wants an index. */
13822 || base
== stack_pointer_rtx
13823 || base
== arg_pointer_rtx
13824 || base
== frame_pointer_rtx
)
13831 /* Compute default value for "length_immediate" attribute. When SHORTFORM
13832 is set, expect that insn have 8bit immediate alternative. */
13834 ix86_attr_length_immediate_default (rtx insn
, int shortform
)
13838 extract_insn_cached (insn
);
13839 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
13840 if (CONSTANT_P (recog_data
.operand
[i
]))
13843 if (shortform
&& satisfies_constraint_K (recog_data
.operand
[i
]))
13847 switch (get_attr_mode (insn
))
13858 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
13863 fatal_insn ("unknown insn mode", insn
);
13869 /* Compute default value for "length_address" attribute. */
13871 ix86_attr_length_address_default (rtx insn
)
13875 if (get_attr_type (insn
) == TYPE_LEA
)
13877 rtx set
= PATTERN (insn
);
13879 if (GET_CODE (set
) == PARALLEL
)
13880 set
= XVECEXP (set
, 0, 0);
13882 gcc_assert (GET_CODE (set
) == SET
);
13884 return memory_address_length (SET_SRC (set
));
13887 extract_insn_cached (insn
);
13888 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
13889 if (GET_CODE (recog_data
.operand
[i
]) == MEM
)
13891 return memory_address_length (XEXP (recog_data
.operand
[i
], 0));
13897 /* Return the maximum number of instructions a cpu can issue. */
13900 ix86_issue_rate (void)
13904 case PROCESSOR_PENTIUM
:
13908 case PROCESSOR_PENTIUMPRO
:
13909 case PROCESSOR_PENTIUM4
:
13910 case PROCESSOR_ATHLON
:
13912 case PROCESSOR_NOCONA
:
13913 case PROCESSOR_GENERIC32
:
13914 case PROCESSOR_GENERIC64
:
13917 case PROCESSOR_CORE2
:
13925 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
13926 by DEP_INSN and nothing set by DEP_INSN. */
13929 ix86_flags_dependent (rtx insn
, rtx dep_insn
, enum attr_type insn_type
)
13933 /* Simplify the test for uninteresting insns. */
13934 if (insn_type
!= TYPE_SETCC
13935 && insn_type
!= TYPE_ICMOV
13936 && insn_type
!= TYPE_FCMOV
13937 && insn_type
!= TYPE_IBR
)
13940 if ((set
= single_set (dep_insn
)) != 0)
13942 set
= SET_DEST (set
);
13945 else if (GET_CODE (PATTERN (dep_insn
)) == PARALLEL
13946 && XVECLEN (PATTERN (dep_insn
), 0) == 2
13947 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 0)) == SET
13948 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 1)) == SET
)
13950 set
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
13951 set2
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
13956 if (GET_CODE (set
) != REG
|| REGNO (set
) != FLAGS_REG
)
13959 /* This test is true if the dependent insn reads the flags but
13960 not any other potentially set register. */
13961 if (!reg_overlap_mentioned_p (set
, PATTERN (insn
)))
13964 if (set2
&& reg_overlap_mentioned_p (set2
, PATTERN (insn
)))
13970 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
13971 address with operands set by DEP_INSN. */
13974 ix86_agi_dependent (rtx insn
, rtx dep_insn
, enum attr_type insn_type
)
13978 if (insn_type
== TYPE_LEA
13981 addr
= PATTERN (insn
);
13983 if (GET_CODE (addr
) == PARALLEL
)
13984 addr
= XVECEXP (addr
, 0, 0);
13986 gcc_assert (GET_CODE (addr
) == SET
);
13988 addr
= SET_SRC (addr
);
13993 extract_insn_cached (insn
);
13994 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
13995 if (GET_CODE (recog_data
.operand
[i
]) == MEM
)
13997 addr
= XEXP (recog_data
.operand
[i
], 0);
14004 return modified_in_p (addr
, dep_insn
);
14008 ix86_adjust_cost (rtx insn
, rtx link
, rtx dep_insn
, int cost
)
14010 enum attr_type insn_type
, dep_insn_type
;
14011 enum attr_memory memory
;
14013 int dep_insn_code_number
;
14015 /* Anti and output dependencies have zero cost on all CPUs. */
14016 if (REG_NOTE_KIND (link
) != 0)
14019 dep_insn_code_number
= recog_memoized (dep_insn
);
14021 /* If we can't recognize the insns, we can't really do anything. */
14022 if (dep_insn_code_number
< 0 || recog_memoized (insn
) < 0)
14025 insn_type
= get_attr_type (insn
);
14026 dep_insn_type
= get_attr_type (dep_insn
);
14030 case PROCESSOR_PENTIUM
:
14031 /* Address Generation Interlock adds a cycle of latency. */
14032 if (ix86_agi_dependent (insn
, dep_insn
, insn_type
))
14035 /* ??? Compares pair with jump/setcc. */
14036 if (ix86_flags_dependent (insn
, dep_insn
, insn_type
))
14039 /* Floating point stores require value to be ready one cycle earlier. */
14040 if (insn_type
== TYPE_FMOV
14041 && get_attr_memory (insn
) == MEMORY_STORE
14042 && !ix86_agi_dependent (insn
, dep_insn
, insn_type
))
14046 case PROCESSOR_PENTIUMPRO
:
14047 memory
= get_attr_memory (insn
);
14049 /* INT->FP conversion is expensive. */
14050 if (get_attr_fp_int_src (dep_insn
))
14053 /* There is one cycle extra latency between an FP op and a store. */
14054 if (insn_type
== TYPE_FMOV
14055 && (set
= single_set (dep_insn
)) != NULL_RTX
14056 && (set2
= single_set (insn
)) != NULL_RTX
14057 && rtx_equal_p (SET_DEST (set
), SET_SRC (set2
))
14058 && GET_CODE (SET_DEST (set2
)) == MEM
)
14061 /* Show ability of reorder buffer to hide latency of load by executing
14062 in parallel with previous instruction in case
14063 previous instruction is not needed to compute the address. */
14064 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
14065 && !ix86_agi_dependent (insn
, dep_insn
, insn_type
))
14067 /* Claim moves to take one cycle, as core can issue one load
14068 at time and the next load can start cycle later. */
14069 if (dep_insn_type
== TYPE_IMOV
14070 || dep_insn_type
== TYPE_FMOV
)
14078 memory
= get_attr_memory (insn
);
14080 /* The esp dependency is resolved before the instruction is really
14082 if ((insn_type
== TYPE_PUSH
|| insn_type
== TYPE_POP
)
14083 && (dep_insn_type
== TYPE_PUSH
|| dep_insn_type
== TYPE_POP
))
14086 /* INT->FP conversion is expensive. */
14087 if (get_attr_fp_int_src (dep_insn
))
14090 /* Show ability of reorder buffer to hide latency of load by executing
14091 in parallel with previous instruction in case
14092 previous instruction is not needed to compute the address. */
14093 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
14094 && !ix86_agi_dependent (insn
, dep_insn
, insn_type
))
14096 /* Claim moves to take one cycle, as core can issue one load
14097 at time and the next load can start cycle later. */
14098 if (dep_insn_type
== TYPE_IMOV
14099 || dep_insn_type
== TYPE_FMOV
)
14108 case PROCESSOR_ATHLON
:
14110 case PROCESSOR_GENERIC32
:
14111 case PROCESSOR_GENERIC64
:
14112 memory
= get_attr_memory (insn
);
14114 /* Show ability of reorder buffer to hide latency of load by executing
14115 in parallel with previous instruction in case
14116 previous instruction is not needed to compute the address. */
14117 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
14118 && !ix86_agi_dependent (insn
, dep_insn
, insn_type
))
14120 enum attr_unit unit
= get_attr_unit (insn
);
14123 /* Because of the difference between the length of integer and
14124 floating unit pipeline preparation stages, the memory operands
14125 for floating point are cheaper.
14127 ??? For Athlon it the difference is most probably 2. */
14128 if (unit
== UNIT_INTEGER
|| unit
== UNIT_UNKNOWN
)
14131 loadcost
= TARGET_ATHLON
? 2 : 0;
14133 if (cost
>= loadcost
)
14146 /* How many alternative schedules to try. This should be as wide as the
14147 scheduling freedom in the DFA, but no wider. Making this value too
14148 large results extra work for the scheduler. */
14151 ia32_multipass_dfa_lookahead (void)
14153 if (ix86_tune
== PROCESSOR_PENTIUM
)
14156 if (ix86_tune
== PROCESSOR_PENTIUMPRO
14157 || ix86_tune
== PROCESSOR_K6
)
14165 /* Compute the alignment given to a constant that is being placed in memory.
14166 EXP is the constant and ALIGN is the alignment that the object would
14168 The value of this function is used instead of that alignment to align
14172 ix86_constant_alignment (tree exp
, int align
)
14174 if (TREE_CODE (exp
) == REAL_CST
)
14176 if (TYPE_MODE (TREE_TYPE (exp
)) == DFmode
&& align
< 64)
14178 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp
))) && align
< 128)
14181 else if (!optimize_size
&& TREE_CODE (exp
) == STRING_CST
14182 && TREE_STRING_LENGTH (exp
) >= 31 && align
< BITS_PER_WORD
)
14183 return BITS_PER_WORD
;
14188 /* Compute the alignment for a static variable.
14189 TYPE is the data type, and ALIGN is the alignment that
14190 the object would ordinarily have. The value of this function is used
14191 instead of that alignment to align the object. */
14194 ix86_data_alignment (tree type
, int align
)
14196 int max_align
= optimize_size
? BITS_PER_WORD
: 256;
14198 if (AGGREGATE_TYPE_P (type
)
14199 && TYPE_SIZE (type
)
14200 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
14201 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= (unsigned) max_align
14202 || TREE_INT_CST_HIGH (TYPE_SIZE (type
)))
14203 && align
< max_align
)
14206 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
14207 to 16byte boundary. */
14210 if (AGGREGATE_TYPE_P (type
)
14211 && TYPE_SIZE (type
)
14212 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
14213 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 128
14214 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
14218 if (TREE_CODE (type
) == ARRAY_TYPE
)
14220 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
14222 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
14225 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
14228 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
14230 if (TYPE_MODE (type
) == XCmode
&& align
< 128)
14233 else if ((TREE_CODE (type
) == RECORD_TYPE
14234 || TREE_CODE (type
) == UNION_TYPE
14235 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
14236 && TYPE_FIELDS (type
))
14238 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
14240 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
14243 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
14244 || TREE_CODE (type
) == INTEGER_TYPE
)
14246 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
14248 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
14255 /* Compute the alignment for a local variable.
14256 TYPE is the data type, and ALIGN is the alignment that
14257 the object would ordinarily have. The value of this macro is used
14258 instead of that alignment to align the object. */
14261 ix86_local_alignment (tree type
, int align
)
14263 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
14264 to 16byte boundary. */
14267 if (AGGREGATE_TYPE_P (type
)
14268 && TYPE_SIZE (type
)
14269 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
14270 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 16
14271 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
14274 if (TREE_CODE (type
) == ARRAY_TYPE
)
14276 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
14278 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
14281 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
14283 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
14285 if (TYPE_MODE (type
) == XCmode
&& align
< 128)
14288 else if ((TREE_CODE (type
) == RECORD_TYPE
14289 || TREE_CODE (type
) == UNION_TYPE
14290 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
14291 && TYPE_FIELDS (type
))
14293 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
14295 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
14298 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
14299 || TREE_CODE (type
) == INTEGER_TYPE
)
14302 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
14304 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
14310 /* Emit RTL insns to initialize the variable parts of a trampoline.
14311 FNADDR is an RTX for the address of the function's pure code.
14312 CXT is an RTX for the static chain value for the function. */
14314 x86_initialize_trampoline (rtx tramp
, rtx fnaddr
, rtx cxt
)
14318 /* Compute offset from the end of the jmp to the target function. */
14319 rtx disp
= expand_binop (SImode
, sub_optab
, fnaddr
,
14320 plus_constant (tramp
, 10),
14321 NULL_RTX
, 1, OPTAB_DIRECT
);
14322 emit_move_insn (gen_rtx_MEM (QImode
, tramp
),
14323 gen_int_mode (0xb9, QImode
));
14324 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, 1)), cxt
);
14325 emit_move_insn (gen_rtx_MEM (QImode
, plus_constant (tramp
, 5)),
14326 gen_int_mode (0xe9, QImode
));
14327 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, 6)), disp
);
14332 /* Try to load address using shorter movl instead of movabs.
14333 We may want to support movq for kernel mode, but kernel does not use
14334 trampolines at the moment. */
14335 if (x86_64_zext_immediate_operand (fnaddr
, VOIDmode
))
14337 fnaddr
= copy_to_mode_reg (DImode
, fnaddr
);
14338 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
14339 gen_int_mode (0xbb41, HImode
));
14340 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, offset
+ 2)),
14341 gen_lowpart (SImode
, fnaddr
));
14346 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
14347 gen_int_mode (0xbb49, HImode
));
14348 emit_move_insn (gen_rtx_MEM (DImode
, plus_constant (tramp
, offset
+ 2)),
14352 /* Load static chain using movabs to r10. */
14353 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
14354 gen_int_mode (0xba49, HImode
));
14355 emit_move_insn (gen_rtx_MEM (DImode
, plus_constant (tramp
, offset
+ 2)),
14358 /* Jump to the r11 */
14359 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
14360 gen_int_mode (0xff49, HImode
));
14361 emit_move_insn (gen_rtx_MEM (QImode
, plus_constant (tramp
, offset
+2)),
14362 gen_int_mode (0xe3, QImode
));
14364 gcc_assert (offset
<= TRAMPOLINE_SIZE
);
14367 #ifdef ENABLE_EXECUTE_STACK
14368 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__enable_execute_stack"),
14369 LCT_NORMAL
, VOIDmode
, 1, tramp
, Pmode
);
14373 /* Codes for all the SSE/MMX builtins. */
14376 IX86_BUILTIN_ADDPS
,
14377 IX86_BUILTIN_ADDSS
,
14378 IX86_BUILTIN_DIVPS
,
14379 IX86_BUILTIN_DIVSS
,
14380 IX86_BUILTIN_MULPS
,
14381 IX86_BUILTIN_MULSS
,
14382 IX86_BUILTIN_SUBPS
,
14383 IX86_BUILTIN_SUBSS
,
14385 IX86_BUILTIN_CMPEQPS
,
14386 IX86_BUILTIN_CMPLTPS
,
14387 IX86_BUILTIN_CMPLEPS
,
14388 IX86_BUILTIN_CMPGTPS
,
14389 IX86_BUILTIN_CMPGEPS
,
14390 IX86_BUILTIN_CMPNEQPS
,
14391 IX86_BUILTIN_CMPNLTPS
,
14392 IX86_BUILTIN_CMPNLEPS
,
14393 IX86_BUILTIN_CMPNGTPS
,
14394 IX86_BUILTIN_CMPNGEPS
,
14395 IX86_BUILTIN_CMPORDPS
,
14396 IX86_BUILTIN_CMPUNORDPS
,
14397 IX86_BUILTIN_CMPEQSS
,
14398 IX86_BUILTIN_CMPLTSS
,
14399 IX86_BUILTIN_CMPLESS
,
14400 IX86_BUILTIN_CMPNEQSS
,
14401 IX86_BUILTIN_CMPNLTSS
,
14402 IX86_BUILTIN_CMPNLESS
,
14403 IX86_BUILTIN_CMPNGTSS
,
14404 IX86_BUILTIN_CMPNGESS
,
14405 IX86_BUILTIN_CMPORDSS
,
14406 IX86_BUILTIN_CMPUNORDSS
,
14408 IX86_BUILTIN_COMIEQSS
,
14409 IX86_BUILTIN_COMILTSS
,
14410 IX86_BUILTIN_COMILESS
,
14411 IX86_BUILTIN_COMIGTSS
,
14412 IX86_BUILTIN_COMIGESS
,
14413 IX86_BUILTIN_COMINEQSS
,
14414 IX86_BUILTIN_UCOMIEQSS
,
14415 IX86_BUILTIN_UCOMILTSS
,
14416 IX86_BUILTIN_UCOMILESS
,
14417 IX86_BUILTIN_UCOMIGTSS
,
14418 IX86_BUILTIN_UCOMIGESS
,
14419 IX86_BUILTIN_UCOMINEQSS
,
14421 IX86_BUILTIN_CVTPI2PS
,
14422 IX86_BUILTIN_CVTPS2PI
,
14423 IX86_BUILTIN_CVTSI2SS
,
14424 IX86_BUILTIN_CVTSI642SS
,
14425 IX86_BUILTIN_CVTSS2SI
,
14426 IX86_BUILTIN_CVTSS2SI64
,
14427 IX86_BUILTIN_CVTTPS2PI
,
14428 IX86_BUILTIN_CVTTSS2SI
,
14429 IX86_BUILTIN_CVTTSS2SI64
,
14431 IX86_BUILTIN_MAXPS
,
14432 IX86_BUILTIN_MAXSS
,
14433 IX86_BUILTIN_MINPS
,
14434 IX86_BUILTIN_MINSS
,
14436 IX86_BUILTIN_LOADUPS
,
14437 IX86_BUILTIN_STOREUPS
,
14438 IX86_BUILTIN_MOVSS
,
14440 IX86_BUILTIN_MOVHLPS
,
14441 IX86_BUILTIN_MOVLHPS
,
14442 IX86_BUILTIN_LOADHPS
,
14443 IX86_BUILTIN_LOADLPS
,
14444 IX86_BUILTIN_STOREHPS
,
14445 IX86_BUILTIN_STORELPS
,
14447 IX86_BUILTIN_MASKMOVQ
,
14448 IX86_BUILTIN_MOVMSKPS
,
14449 IX86_BUILTIN_PMOVMSKB
,
14451 IX86_BUILTIN_MOVNTPS
,
14452 IX86_BUILTIN_MOVNTQ
,
14454 IX86_BUILTIN_LOADDQU
,
14455 IX86_BUILTIN_STOREDQU
,
14457 IX86_BUILTIN_PACKSSWB
,
14458 IX86_BUILTIN_PACKSSDW
,
14459 IX86_BUILTIN_PACKUSWB
,
14461 IX86_BUILTIN_PADDB
,
14462 IX86_BUILTIN_PADDW
,
14463 IX86_BUILTIN_PADDD
,
14464 IX86_BUILTIN_PADDQ
,
14465 IX86_BUILTIN_PADDSB
,
14466 IX86_BUILTIN_PADDSW
,
14467 IX86_BUILTIN_PADDUSB
,
14468 IX86_BUILTIN_PADDUSW
,
14469 IX86_BUILTIN_PSUBB
,
14470 IX86_BUILTIN_PSUBW
,
14471 IX86_BUILTIN_PSUBD
,
14472 IX86_BUILTIN_PSUBQ
,
14473 IX86_BUILTIN_PSUBSB
,
14474 IX86_BUILTIN_PSUBSW
,
14475 IX86_BUILTIN_PSUBUSB
,
14476 IX86_BUILTIN_PSUBUSW
,
14479 IX86_BUILTIN_PANDN
,
14483 IX86_BUILTIN_PAVGB
,
14484 IX86_BUILTIN_PAVGW
,
14486 IX86_BUILTIN_PCMPEQB
,
14487 IX86_BUILTIN_PCMPEQW
,
14488 IX86_BUILTIN_PCMPEQD
,
14489 IX86_BUILTIN_PCMPGTB
,
14490 IX86_BUILTIN_PCMPGTW
,
14491 IX86_BUILTIN_PCMPGTD
,
14493 IX86_BUILTIN_PMADDWD
,
14495 IX86_BUILTIN_PMAXSW
,
14496 IX86_BUILTIN_PMAXUB
,
14497 IX86_BUILTIN_PMINSW
,
14498 IX86_BUILTIN_PMINUB
,
14500 IX86_BUILTIN_PMULHUW
,
14501 IX86_BUILTIN_PMULHW
,
14502 IX86_BUILTIN_PMULLW
,
14504 IX86_BUILTIN_PSADBW
,
14505 IX86_BUILTIN_PSHUFW
,
14507 IX86_BUILTIN_PSLLW
,
14508 IX86_BUILTIN_PSLLD
,
14509 IX86_BUILTIN_PSLLQ
,
14510 IX86_BUILTIN_PSRAW
,
14511 IX86_BUILTIN_PSRAD
,
14512 IX86_BUILTIN_PSRLW
,
14513 IX86_BUILTIN_PSRLD
,
14514 IX86_BUILTIN_PSRLQ
,
14515 IX86_BUILTIN_PSLLWI
,
14516 IX86_BUILTIN_PSLLDI
,
14517 IX86_BUILTIN_PSLLQI
,
14518 IX86_BUILTIN_PSRAWI
,
14519 IX86_BUILTIN_PSRADI
,
14520 IX86_BUILTIN_PSRLWI
,
14521 IX86_BUILTIN_PSRLDI
,
14522 IX86_BUILTIN_PSRLQI
,
14524 IX86_BUILTIN_PUNPCKHBW
,
14525 IX86_BUILTIN_PUNPCKHWD
,
14526 IX86_BUILTIN_PUNPCKHDQ
,
14527 IX86_BUILTIN_PUNPCKLBW
,
14528 IX86_BUILTIN_PUNPCKLWD
,
14529 IX86_BUILTIN_PUNPCKLDQ
,
14531 IX86_BUILTIN_SHUFPS
,
14533 IX86_BUILTIN_RCPPS
,
14534 IX86_BUILTIN_RCPSS
,
14535 IX86_BUILTIN_RSQRTPS
,
14536 IX86_BUILTIN_RSQRTSS
,
14537 IX86_BUILTIN_SQRTPS
,
14538 IX86_BUILTIN_SQRTSS
,
14540 IX86_BUILTIN_UNPCKHPS
,
14541 IX86_BUILTIN_UNPCKLPS
,
14543 IX86_BUILTIN_ANDPS
,
14544 IX86_BUILTIN_ANDNPS
,
14546 IX86_BUILTIN_XORPS
,
14549 IX86_BUILTIN_LDMXCSR
,
14550 IX86_BUILTIN_STMXCSR
,
14551 IX86_BUILTIN_SFENCE
,
14553 /* 3DNow! Original */
14554 IX86_BUILTIN_FEMMS
,
14555 IX86_BUILTIN_PAVGUSB
,
14556 IX86_BUILTIN_PF2ID
,
14557 IX86_BUILTIN_PFACC
,
14558 IX86_BUILTIN_PFADD
,
14559 IX86_BUILTIN_PFCMPEQ
,
14560 IX86_BUILTIN_PFCMPGE
,
14561 IX86_BUILTIN_PFCMPGT
,
14562 IX86_BUILTIN_PFMAX
,
14563 IX86_BUILTIN_PFMIN
,
14564 IX86_BUILTIN_PFMUL
,
14565 IX86_BUILTIN_PFRCP
,
14566 IX86_BUILTIN_PFRCPIT1
,
14567 IX86_BUILTIN_PFRCPIT2
,
14568 IX86_BUILTIN_PFRSQIT1
,
14569 IX86_BUILTIN_PFRSQRT
,
14570 IX86_BUILTIN_PFSUB
,
14571 IX86_BUILTIN_PFSUBR
,
14572 IX86_BUILTIN_PI2FD
,
14573 IX86_BUILTIN_PMULHRW
,
14575 /* 3DNow! Athlon Extensions */
14576 IX86_BUILTIN_PF2IW
,
14577 IX86_BUILTIN_PFNACC
,
14578 IX86_BUILTIN_PFPNACC
,
14579 IX86_BUILTIN_PI2FW
,
14580 IX86_BUILTIN_PSWAPDSI
,
14581 IX86_BUILTIN_PSWAPDSF
,
14584 IX86_BUILTIN_ADDPD
,
14585 IX86_BUILTIN_ADDSD
,
14586 IX86_BUILTIN_DIVPD
,
14587 IX86_BUILTIN_DIVSD
,
14588 IX86_BUILTIN_MULPD
,
14589 IX86_BUILTIN_MULSD
,
14590 IX86_BUILTIN_SUBPD
,
14591 IX86_BUILTIN_SUBSD
,
14593 IX86_BUILTIN_CMPEQPD
,
14594 IX86_BUILTIN_CMPLTPD
,
14595 IX86_BUILTIN_CMPLEPD
,
14596 IX86_BUILTIN_CMPGTPD
,
14597 IX86_BUILTIN_CMPGEPD
,
14598 IX86_BUILTIN_CMPNEQPD
,
14599 IX86_BUILTIN_CMPNLTPD
,
14600 IX86_BUILTIN_CMPNLEPD
,
14601 IX86_BUILTIN_CMPNGTPD
,
14602 IX86_BUILTIN_CMPNGEPD
,
14603 IX86_BUILTIN_CMPORDPD
,
14604 IX86_BUILTIN_CMPUNORDPD
,
14605 IX86_BUILTIN_CMPNEPD
,
14606 IX86_BUILTIN_CMPEQSD
,
14607 IX86_BUILTIN_CMPLTSD
,
14608 IX86_BUILTIN_CMPLESD
,
14609 IX86_BUILTIN_CMPNEQSD
,
14610 IX86_BUILTIN_CMPNLTSD
,
14611 IX86_BUILTIN_CMPNLESD
,
14612 IX86_BUILTIN_CMPORDSD
,
14613 IX86_BUILTIN_CMPUNORDSD
,
14614 IX86_BUILTIN_CMPNESD
,
14616 IX86_BUILTIN_COMIEQSD
,
14617 IX86_BUILTIN_COMILTSD
,
14618 IX86_BUILTIN_COMILESD
,
14619 IX86_BUILTIN_COMIGTSD
,
14620 IX86_BUILTIN_COMIGESD
,
14621 IX86_BUILTIN_COMINEQSD
,
14622 IX86_BUILTIN_UCOMIEQSD
,
14623 IX86_BUILTIN_UCOMILTSD
,
14624 IX86_BUILTIN_UCOMILESD
,
14625 IX86_BUILTIN_UCOMIGTSD
,
14626 IX86_BUILTIN_UCOMIGESD
,
14627 IX86_BUILTIN_UCOMINEQSD
,
14629 IX86_BUILTIN_MAXPD
,
14630 IX86_BUILTIN_MAXSD
,
14631 IX86_BUILTIN_MINPD
,
14632 IX86_BUILTIN_MINSD
,
14634 IX86_BUILTIN_ANDPD
,
14635 IX86_BUILTIN_ANDNPD
,
14637 IX86_BUILTIN_XORPD
,
14639 IX86_BUILTIN_SQRTPD
,
14640 IX86_BUILTIN_SQRTSD
,
14642 IX86_BUILTIN_UNPCKHPD
,
14643 IX86_BUILTIN_UNPCKLPD
,
14645 IX86_BUILTIN_SHUFPD
,
14647 IX86_BUILTIN_LOADUPD
,
14648 IX86_BUILTIN_STOREUPD
,
14649 IX86_BUILTIN_MOVSD
,
14651 IX86_BUILTIN_LOADHPD
,
14652 IX86_BUILTIN_LOADLPD
,
14654 IX86_BUILTIN_CVTDQ2PD
,
14655 IX86_BUILTIN_CVTDQ2PS
,
14657 IX86_BUILTIN_CVTPD2DQ
,
14658 IX86_BUILTIN_CVTPD2PI
,
14659 IX86_BUILTIN_CVTPD2PS
,
14660 IX86_BUILTIN_CVTTPD2DQ
,
14661 IX86_BUILTIN_CVTTPD2PI
,
14663 IX86_BUILTIN_CVTPI2PD
,
14664 IX86_BUILTIN_CVTSI2SD
,
14665 IX86_BUILTIN_CVTSI642SD
,
14667 IX86_BUILTIN_CVTSD2SI
,
14668 IX86_BUILTIN_CVTSD2SI64
,
14669 IX86_BUILTIN_CVTSD2SS
,
14670 IX86_BUILTIN_CVTSS2SD
,
14671 IX86_BUILTIN_CVTTSD2SI
,
14672 IX86_BUILTIN_CVTTSD2SI64
,
14674 IX86_BUILTIN_CVTPS2DQ
,
14675 IX86_BUILTIN_CVTPS2PD
,
14676 IX86_BUILTIN_CVTTPS2DQ
,
14678 IX86_BUILTIN_MOVNTI
,
14679 IX86_BUILTIN_MOVNTPD
,
14680 IX86_BUILTIN_MOVNTDQ
,
14683 IX86_BUILTIN_MASKMOVDQU
,
14684 IX86_BUILTIN_MOVMSKPD
,
14685 IX86_BUILTIN_PMOVMSKB128
,
14687 IX86_BUILTIN_PACKSSWB128
,
14688 IX86_BUILTIN_PACKSSDW128
,
14689 IX86_BUILTIN_PACKUSWB128
,
14691 IX86_BUILTIN_PADDB128
,
14692 IX86_BUILTIN_PADDW128
,
14693 IX86_BUILTIN_PADDD128
,
14694 IX86_BUILTIN_PADDQ128
,
14695 IX86_BUILTIN_PADDSB128
,
14696 IX86_BUILTIN_PADDSW128
,
14697 IX86_BUILTIN_PADDUSB128
,
14698 IX86_BUILTIN_PADDUSW128
,
14699 IX86_BUILTIN_PSUBB128
,
14700 IX86_BUILTIN_PSUBW128
,
14701 IX86_BUILTIN_PSUBD128
,
14702 IX86_BUILTIN_PSUBQ128
,
14703 IX86_BUILTIN_PSUBSB128
,
14704 IX86_BUILTIN_PSUBSW128
,
14705 IX86_BUILTIN_PSUBUSB128
,
14706 IX86_BUILTIN_PSUBUSW128
,
14708 IX86_BUILTIN_PAND128
,
14709 IX86_BUILTIN_PANDN128
,
14710 IX86_BUILTIN_POR128
,
14711 IX86_BUILTIN_PXOR128
,
14713 IX86_BUILTIN_PAVGB128
,
14714 IX86_BUILTIN_PAVGW128
,
14716 IX86_BUILTIN_PCMPEQB128
,
14717 IX86_BUILTIN_PCMPEQW128
,
14718 IX86_BUILTIN_PCMPEQD128
,
14719 IX86_BUILTIN_PCMPGTB128
,
14720 IX86_BUILTIN_PCMPGTW128
,
14721 IX86_BUILTIN_PCMPGTD128
,
14723 IX86_BUILTIN_PMADDWD128
,
14725 IX86_BUILTIN_PMAXSW128
,
14726 IX86_BUILTIN_PMAXUB128
,
14727 IX86_BUILTIN_PMINSW128
,
14728 IX86_BUILTIN_PMINUB128
,
14730 IX86_BUILTIN_PMULUDQ
,
14731 IX86_BUILTIN_PMULUDQ128
,
14732 IX86_BUILTIN_PMULHUW128
,
14733 IX86_BUILTIN_PMULHW128
,
14734 IX86_BUILTIN_PMULLW128
,
14736 IX86_BUILTIN_PSADBW128
,
14737 IX86_BUILTIN_PSHUFHW
,
14738 IX86_BUILTIN_PSHUFLW
,
14739 IX86_BUILTIN_PSHUFD
,
14741 IX86_BUILTIN_PSLLW128
,
14742 IX86_BUILTIN_PSLLD128
,
14743 IX86_BUILTIN_PSLLQ128
,
14744 IX86_BUILTIN_PSRAW128
,
14745 IX86_BUILTIN_PSRAD128
,
14746 IX86_BUILTIN_PSRLW128
,
14747 IX86_BUILTIN_PSRLD128
,
14748 IX86_BUILTIN_PSRLQ128
,
14749 IX86_BUILTIN_PSLLDQI128
,
14750 IX86_BUILTIN_PSLLWI128
,
14751 IX86_BUILTIN_PSLLDI128
,
14752 IX86_BUILTIN_PSLLQI128
,
14753 IX86_BUILTIN_PSRAWI128
,
14754 IX86_BUILTIN_PSRADI128
,
14755 IX86_BUILTIN_PSRLDQI128
,
14756 IX86_BUILTIN_PSRLWI128
,
14757 IX86_BUILTIN_PSRLDI128
,
14758 IX86_BUILTIN_PSRLQI128
,
14760 IX86_BUILTIN_PUNPCKHBW128
,
14761 IX86_BUILTIN_PUNPCKHWD128
,
14762 IX86_BUILTIN_PUNPCKHDQ128
,
14763 IX86_BUILTIN_PUNPCKHQDQ128
,
14764 IX86_BUILTIN_PUNPCKLBW128
,
14765 IX86_BUILTIN_PUNPCKLWD128
,
14766 IX86_BUILTIN_PUNPCKLDQ128
,
14767 IX86_BUILTIN_PUNPCKLQDQ128
,
14769 IX86_BUILTIN_CLFLUSH
,
14770 IX86_BUILTIN_MFENCE
,
14771 IX86_BUILTIN_LFENCE
,
14773 /* Prescott New Instructions. */
14774 IX86_BUILTIN_ADDSUBPS
,
14775 IX86_BUILTIN_HADDPS
,
14776 IX86_BUILTIN_HSUBPS
,
14777 IX86_BUILTIN_MOVSHDUP
,
14778 IX86_BUILTIN_MOVSLDUP
,
14779 IX86_BUILTIN_ADDSUBPD
,
14780 IX86_BUILTIN_HADDPD
,
14781 IX86_BUILTIN_HSUBPD
,
14782 IX86_BUILTIN_LDDQU
,
14784 IX86_BUILTIN_MONITOR
,
14785 IX86_BUILTIN_MWAIT
,
14788 IX86_BUILTIN_PHADDW
,
14789 IX86_BUILTIN_PHADDD
,
14790 IX86_BUILTIN_PHADDSW
,
14791 IX86_BUILTIN_PHSUBW
,
14792 IX86_BUILTIN_PHSUBD
,
14793 IX86_BUILTIN_PHSUBSW
,
14794 IX86_BUILTIN_PMADDUBSW
,
14795 IX86_BUILTIN_PMULHRSW
,
14796 IX86_BUILTIN_PSHUFB
,
14797 IX86_BUILTIN_PSIGNB
,
14798 IX86_BUILTIN_PSIGNW
,
14799 IX86_BUILTIN_PSIGND
,
14800 IX86_BUILTIN_PALIGNR
,
14801 IX86_BUILTIN_PABSB
,
14802 IX86_BUILTIN_PABSW
,
14803 IX86_BUILTIN_PABSD
,
14805 IX86_BUILTIN_PHADDW128
,
14806 IX86_BUILTIN_PHADDD128
,
14807 IX86_BUILTIN_PHADDSW128
,
14808 IX86_BUILTIN_PHSUBW128
,
14809 IX86_BUILTIN_PHSUBD128
,
14810 IX86_BUILTIN_PHSUBSW128
,
14811 IX86_BUILTIN_PMADDUBSW128
,
14812 IX86_BUILTIN_PMULHRSW128
,
14813 IX86_BUILTIN_PSHUFB128
,
14814 IX86_BUILTIN_PSIGNB128
,
14815 IX86_BUILTIN_PSIGNW128
,
14816 IX86_BUILTIN_PSIGND128
,
14817 IX86_BUILTIN_PALIGNR128
,
14818 IX86_BUILTIN_PABSB128
,
14819 IX86_BUILTIN_PABSW128
,
14820 IX86_BUILTIN_PABSD128
,
14822 IX86_BUILTIN_VEC_INIT_V2SI
,
14823 IX86_BUILTIN_VEC_INIT_V4HI
,
14824 IX86_BUILTIN_VEC_INIT_V8QI
,
14825 IX86_BUILTIN_VEC_EXT_V2DF
,
14826 IX86_BUILTIN_VEC_EXT_V2DI
,
14827 IX86_BUILTIN_VEC_EXT_V4SF
,
14828 IX86_BUILTIN_VEC_EXT_V4SI
,
14829 IX86_BUILTIN_VEC_EXT_V8HI
,
14830 IX86_BUILTIN_VEC_EXT_V2SI
,
14831 IX86_BUILTIN_VEC_EXT_V4HI
,
14832 IX86_BUILTIN_VEC_SET_V8HI
,
14833 IX86_BUILTIN_VEC_SET_V4HI
,
14838 /* Table for the ix86 builtin decls. */
14839 static GTY(()) tree ix86_builtins
[(int) IX86_BUILTIN_MAX
];
14841 /* Add a ix86 target builtin function with CODE, NAME and TYPE. Do so,
14842 * if the target_flags include one of MASK. Stores the function decl
14843 * in the ix86_builtins array.
14844 * Returns the function decl or NULL_TREE, if the builtin was not added. */
14847 def_builtin (int mask
, const char *name
, tree type
, enum ix86_builtins code
)
14849 tree decl
= NULL_TREE
;
14851 if (mask
& target_flags
14852 && (!(mask
& MASK_64BIT
) || TARGET_64BIT
))
14854 decl
= add_builtin_function (name
, type
, code
, BUILT_IN_MD
,
14856 ix86_builtins
[(int) code
] = decl
;
14862 /* Like def_builtin, but also marks the function decl "const". */
14865 def_builtin_const (int mask
, const char *name
, tree type
,
14866 enum ix86_builtins code
)
14868 tree decl
= def_builtin (mask
, name
, type
, code
);
14870 TREE_READONLY (decl
) = 1;
14874 /* Bits for builtin_description.flag. */
14876 /* Set when we don't support the comparison natively, and should
14877 swap_comparison in order to support it. */
14878 #define BUILTIN_DESC_SWAP_OPERANDS 1
14880 struct builtin_description
14882 const unsigned int mask
;
14883 const enum insn_code icode
;
14884 const char *const name
;
14885 const enum ix86_builtins code
;
14886 const enum rtx_code comparison
;
14887 const unsigned int flag
;
14890 static const struct builtin_description bdesc_comi
[] =
14892 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS
, UNEQ
, 0 },
14893 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS
, UNLT
, 0 },
14894 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS
, UNLE
, 0 },
14895 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS
, GT
, 0 },
14896 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS
, GE
, 0 },
14897 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS
, LTGT
, 0 },
14898 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS
, UNEQ
, 0 },
14899 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS
, UNLT
, 0 },
14900 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS
, UNLE
, 0 },
14901 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS
, GT
, 0 },
14902 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS
, GE
, 0 },
14903 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS
, LTGT
, 0 },
14904 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD
, UNEQ
, 0 },
14905 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD
, UNLT
, 0 },
14906 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD
, UNLE
, 0 },
14907 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD
, GT
, 0 },
14908 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD
, GE
, 0 },
14909 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD
, LTGT
, 0 },
14910 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD
, UNEQ
, 0 },
14911 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD
, UNLT
, 0 },
14912 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD
, UNLE
, 0 },
14913 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD
, GT
, 0 },
14914 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD
, GE
, 0 },
14915 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD
, LTGT
, 0 },
14918 static const struct builtin_description bdesc_2arg
[] =
14921 { MASK_SSE
, CODE_FOR_addv4sf3
, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS
, 0, 0 },
14922 { MASK_SSE
, CODE_FOR_subv4sf3
, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS
, 0, 0 },
14923 { MASK_SSE
, CODE_FOR_mulv4sf3
, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS
, 0, 0 },
14924 { MASK_SSE
, CODE_FOR_divv4sf3
, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS
, 0, 0 },
14925 { MASK_SSE
, CODE_FOR_sse_vmaddv4sf3
, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS
, 0, 0 },
14926 { MASK_SSE
, CODE_FOR_sse_vmsubv4sf3
, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS
, 0, 0 },
14927 { MASK_SSE
, CODE_FOR_sse_vmmulv4sf3
, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS
, 0, 0 },
14928 { MASK_SSE
, CODE_FOR_sse_vmdivv4sf3
, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS
, 0, 0 },
14930 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS
, EQ
, 0 },
14931 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS
, LT
, 0 },
14932 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS
, LE
, 0 },
14933 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS
, LT
,
14934 BUILTIN_DESC_SWAP_OPERANDS
},
14935 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS
, LE
,
14936 BUILTIN_DESC_SWAP_OPERANDS
},
14937 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS
, UNORDERED
, 0 },
14938 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS
, NE
, 0 },
14939 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS
, UNGE
, 0 },
14940 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS
, UNGT
, 0 },
14941 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS
, UNGE
,
14942 BUILTIN_DESC_SWAP_OPERANDS
},
14943 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS
, UNGT
,
14944 BUILTIN_DESC_SWAP_OPERANDS
},
14945 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS
, ORDERED
, 0 },
14946 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS
, EQ
, 0 },
14947 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS
, LT
, 0 },
14948 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS
, LE
, 0 },
14949 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS
, UNORDERED
, 0 },
14950 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS
, NE
, 0 },
14951 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS
, UNGE
, 0 },
14952 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS
, UNGT
, 0 },
14953 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS
, UNGE
,
14954 BUILTIN_DESC_SWAP_OPERANDS
},
14955 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS
, UNGT
,
14956 BUILTIN_DESC_SWAP_OPERANDS
},
14957 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS
, UNORDERED
, 0 },
14959 { MASK_SSE
, CODE_FOR_sminv4sf3
, "__builtin_ia32_minps", IX86_BUILTIN_MINPS
, 0, 0 },
14960 { MASK_SSE
, CODE_FOR_smaxv4sf3
, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS
, 0, 0 },
14961 { MASK_SSE
, CODE_FOR_sse_vmsminv4sf3
, "__builtin_ia32_minss", IX86_BUILTIN_MINSS
, 0, 0 },
14962 { MASK_SSE
, CODE_FOR_sse_vmsmaxv4sf3
, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS
, 0, 0 },
14964 { MASK_SSE
, CODE_FOR_andv4sf3
, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS
, 0, 0 },
14965 { MASK_SSE
, CODE_FOR_sse_nandv4sf3
, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS
, 0, 0 },
14966 { MASK_SSE
, CODE_FOR_iorv4sf3
, "__builtin_ia32_orps", IX86_BUILTIN_ORPS
, 0, 0 },
14967 { MASK_SSE
, CODE_FOR_xorv4sf3
, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS
, 0, 0 },
14969 { MASK_SSE
, CODE_FOR_sse_movss
, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS
, 0, 0 },
14970 { MASK_SSE
, CODE_FOR_sse_movhlps
, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS
, 0, 0 },
14971 { MASK_SSE
, CODE_FOR_sse_movlhps
, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS
, 0, 0 },
14972 { MASK_SSE
, CODE_FOR_sse_unpckhps
, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS
, 0, 0 },
14973 { MASK_SSE
, CODE_FOR_sse_unpcklps
, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS
, 0, 0 },
14976 { MASK_MMX
, CODE_FOR_mmx_addv8qi3
, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB
, 0, 0 },
14977 { MASK_MMX
, CODE_FOR_mmx_addv4hi3
, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW
, 0, 0 },
14978 { MASK_MMX
, CODE_FOR_mmx_addv2si3
, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD
, 0, 0 },
14979 { MASK_SSE2
, CODE_FOR_mmx_adddi3
, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ
, 0, 0 },
14980 { MASK_MMX
, CODE_FOR_mmx_subv8qi3
, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB
, 0, 0 },
14981 { MASK_MMX
, CODE_FOR_mmx_subv4hi3
, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW
, 0, 0 },
14982 { MASK_MMX
, CODE_FOR_mmx_subv2si3
, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD
, 0, 0 },
14983 { MASK_SSE2
, CODE_FOR_mmx_subdi3
, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ
, 0, 0 },
14985 { MASK_MMX
, CODE_FOR_mmx_ssaddv8qi3
, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB
, 0, 0 },
14986 { MASK_MMX
, CODE_FOR_mmx_ssaddv4hi3
, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW
, 0, 0 },
14987 { MASK_MMX
, CODE_FOR_mmx_sssubv8qi3
, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB
, 0, 0 },
14988 { MASK_MMX
, CODE_FOR_mmx_sssubv4hi3
, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW
, 0, 0 },
14989 { MASK_MMX
, CODE_FOR_mmx_usaddv8qi3
, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB
, 0, 0 },
14990 { MASK_MMX
, CODE_FOR_mmx_usaddv4hi3
, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW
, 0, 0 },
14991 { MASK_MMX
, CODE_FOR_mmx_ussubv8qi3
, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB
, 0, 0 },
14992 { MASK_MMX
, CODE_FOR_mmx_ussubv4hi3
, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW
, 0, 0 },
14994 { MASK_MMX
, CODE_FOR_mmx_mulv4hi3
, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW
, 0, 0 },
14995 { MASK_MMX
, CODE_FOR_mmx_smulv4hi3_highpart
, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW
, 0, 0 },
14996 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_umulv4hi3_highpart
, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW
, 0, 0 },
14998 { MASK_MMX
, CODE_FOR_mmx_andv2si3
, "__builtin_ia32_pand", IX86_BUILTIN_PAND
, 0, 0 },
14999 { MASK_MMX
, CODE_FOR_mmx_nandv2si3
, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN
, 0, 0 },
15000 { MASK_MMX
, CODE_FOR_mmx_iorv2si3
, "__builtin_ia32_por", IX86_BUILTIN_POR
, 0, 0 },
15001 { MASK_MMX
, CODE_FOR_mmx_xorv2si3
, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR
, 0, 0 },
15003 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB
, 0, 0 },
15004 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_uavgv4hi3
, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW
, 0, 0 },
15006 { MASK_MMX
, CODE_FOR_mmx_eqv8qi3
, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB
, 0, 0 },
15007 { MASK_MMX
, CODE_FOR_mmx_eqv4hi3
, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW
, 0, 0 },
15008 { MASK_MMX
, CODE_FOR_mmx_eqv2si3
, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD
, 0, 0 },
15009 { MASK_MMX
, CODE_FOR_mmx_gtv8qi3
, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB
, 0, 0 },
15010 { MASK_MMX
, CODE_FOR_mmx_gtv4hi3
, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW
, 0, 0 },
15011 { MASK_MMX
, CODE_FOR_mmx_gtv2si3
, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD
, 0, 0 },
15013 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_umaxv8qi3
, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB
, 0, 0 },
15014 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_smaxv4hi3
, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW
, 0, 0 },
15015 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_uminv8qi3
, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB
, 0, 0 },
15016 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_sminv4hi3
, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW
, 0, 0 },
15018 { MASK_MMX
, CODE_FOR_mmx_punpckhbw
, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW
, 0, 0 },
15019 { MASK_MMX
, CODE_FOR_mmx_punpckhwd
, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD
, 0, 0 },
15020 { MASK_MMX
, CODE_FOR_mmx_punpckhdq
, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ
, 0, 0 },
15021 { MASK_MMX
, CODE_FOR_mmx_punpcklbw
, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW
, 0, 0 },
15022 { MASK_MMX
, CODE_FOR_mmx_punpcklwd
, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD
, 0, 0 },
15023 { MASK_MMX
, CODE_FOR_mmx_punpckldq
, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ
, 0, 0 },
15026 { MASK_MMX
, CODE_FOR_mmx_packsswb
, 0, IX86_BUILTIN_PACKSSWB
, 0, 0 },
15027 { MASK_MMX
, CODE_FOR_mmx_packssdw
, 0, IX86_BUILTIN_PACKSSDW
, 0, 0 },
15028 { MASK_MMX
, CODE_FOR_mmx_packuswb
, 0, IX86_BUILTIN_PACKUSWB
, 0, 0 },
15030 { MASK_SSE
, CODE_FOR_sse_cvtpi2ps
, 0, IX86_BUILTIN_CVTPI2PS
, 0, 0 },
15031 { MASK_SSE
, CODE_FOR_sse_cvtsi2ss
, 0, IX86_BUILTIN_CVTSI2SS
, 0, 0 },
15032 { MASK_SSE
| MASK_64BIT
, CODE_FOR_sse_cvtsi2ssq
, 0, IX86_BUILTIN_CVTSI642SS
, 0, 0 },
15034 { MASK_MMX
, CODE_FOR_mmx_ashlv4hi3
, 0, IX86_BUILTIN_PSLLW
, 0, 0 },
15035 { MASK_MMX
, CODE_FOR_mmx_ashlv4hi3
, 0, IX86_BUILTIN_PSLLWI
, 0, 0 },
15036 { MASK_MMX
, CODE_FOR_mmx_ashlv2si3
, 0, IX86_BUILTIN_PSLLD
, 0, 0 },
15037 { MASK_MMX
, CODE_FOR_mmx_ashlv2si3
, 0, IX86_BUILTIN_PSLLDI
, 0, 0 },
15038 { MASK_MMX
, CODE_FOR_mmx_ashldi3
, 0, IX86_BUILTIN_PSLLQ
, 0, 0 },
15039 { MASK_MMX
, CODE_FOR_mmx_ashldi3
, 0, IX86_BUILTIN_PSLLQI
, 0, 0 },
15041 { MASK_MMX
, CODE_FOR_mmx_lshrv4hi3
, 0, IX86_BUILTIN_PSRLW
, 0, 0 },
15042 { MASK_MMX
, CODE_FOR_mmx_lshrv4hi3
, 0, IX86_BUILTIN_PSRLWI
, 0, 0 },
15043 { MASK_MMX
, CODE_FOR_mmx_lshrv2si3
, 0, IX86_BUILTIN_PSRLD
, 0, 0 },
15044 { MASK_MMX
, CODE_FOR_mmx_lshrv2si3
, 0, IX86_BUILTIN_PSRLDI
, 0, 0 },
15045 { MASK_MMX
, CODE_FOR_mmx_lshrdi3
, 0, IX86_BUILTIN_PSRLQ
, 0, 0 },
15046 { MASK_MMX
, CODE_FOR_mmx_lshrdi3
, 0, IX86_BUILTIN_PSRLQI
, 0, 0 },
15048 { MASK_MMX
, CODE_FOR_mmx_ashrv4hi3
, 0, IX86_BUILTIN_PSRAW
, 0, 0 },
15049 { MASK_MMX
, CODE_FOR_mmx_ashrv4hi3
, 0, IX86_BUILTIN_PSRAWI
, 0, 0 },
15050 { MASK_MMX
, CODE_FOR_mmx_ashrv2si3
, 0, IX86_BUILTIN_PSRAD
, 0, 0 },
15051 { MASK_MMX
, CODE_FOR_mmx_ashrv2si3
, 0, IX86_BUILTIN_PSRADI
, 0, 0 },
15053 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_psadbw
, 0, IX86_BUILTIN_PSADBW
, 0, 0 },
15054 { MASK_MMX
, CODE_FOR_mmx_pmaddwd
, 0, IX86_BUILTIN_PMADDWD
, 0, 0 },
15057 { MASK_SSE2
, CODE_FOR_addv2df3
, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD
, 0, 0 },
15058 { MASK_SSE2
, CODE_FOR_subv2df3
, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD
, 0, 0 },
15059 { MASK_SSE2
, CODE_FOR_mulv2df3
, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD
, 0, 0 },
15060 { MASK_SSE2
, CODE_FOR_divv2df3
, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD
, 0, 0 },
15061 { MASK_SSE2
, CODE_FOR_sse2_vmaddv2df3
, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD
, 0, 0 },
15062 { MASK_SSE2
, CODE_FOR_sse2_vmsubv2df3
, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD
, 0, 0 },
15063 { MASK_SSE2
, CODE_FOR_sse2_vmmulv2df3
, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD
, 0, 0 },
15064 { MASK_SSE2
, CODE_FOR_sse2_vmdivv2df3
, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD
, 0, 0 },
15066 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD
, EQ
, 0 },
15067 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD
, LT
, 0 },
15068 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD
, LE
, 0 },
15069 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD
, LT
,
15070 BUILTIN_DESC_SWAP_OPERANDS
},
15071 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD
, LE
,
15072 BUILTIN_DESC_SWAP_OPERANDS
},
15073 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD
, UNORDERED
, 0 },
15074 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD
, NE
, 0 },
15075 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD
, UNGE
, 0 },
15076 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD
, UNGT
, 0 },
15077 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD
, UNGE
,
15078 BUILTIN_DESC_SWAP_OPERANDS
},
15079 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD
, UNGT
,
15080 BUILTIN_DESC_SWAP_OPERANDS
},
15081 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD
, ORDERED
, 0 },
15082 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD
, EQ
, 0 },
15083 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD
, LT
, 0 },
15084 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD
, LE
, 0 },
15085 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD
, UNORDERED
, 0 },
15086 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD
, NE
, 0 },
15087 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD
, UNGE
, 0 },
15088 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD
, UNGT
, 0 },
15089 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD
, ORDERED
, 0 },
15091 { MASK_SSE2
, CODE_FOR_sminv2df3
, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD
, 0, 0 },
15092 { MASK_SSE2
, CODE_FOR_smaxv2df3
, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD
, 0, 0 },
15093 { MASK_SSE2
, CODE_FOR_sse2_vmsminv2df3
, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD
, 0, 0 },
15094 { MASK_SSE2
, CODE_FOR_sse2_vmsmaxv2df3
, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD
, 0, 0 },
15096 { MASK_SSE2
, CODE_FOR_andv2df3
, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD
, 0, 0 },
15097 { MASK_SSE2
, CODE_FOR_sse2_nandv2df3
, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD
, 0, 0 },
15098 { MASK_SSE2
, CODE_FOR_iorv2df3
, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD
, 0, 0 },
15099 { MASK_SSE2
, CODE_FOR_xorv2df3
, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD
, 0, 0 },
15101 { MASK_SSE2
, CODE_FOR_sse2_movsd
, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD
, 0, 0 },
15102 { MASK_SSE2
, CODE_FOR_sse2_unpckhpd
, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD
, 0, 0 },
15103 { MASK_SSE2
, CODE_FOR_sse2_unpcklpd
, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD
, 0, 0 },
15106 { MASK_SSE2
, CODE_FOR_addv16qi3
, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128
, 0, 0 },
15107 { MASK_SSE2
, CODE_FOR_addv8hi3
, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128
, 0, 0 },
15108 { MASK_SSE2
, CODE_FOR_addv4si3
, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128
, 0, 0 },
15109 { MASK_SSE2
, CODE_FOR_addv2di3
, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128
, 0, 0 },
15110 { MASK_SSE2
, CODE_FOR_subv16qi3
, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128
, 0, 0 },
15111 { MASK_SSE2
, CODE_FOR_subv8hi3
, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128
, 0, 0 },
15112 { MASK_SSE2
, CODE_FOR_subv4si3
, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128
, 0, 0 },
15113 { MASK_SSE2
, CODE_FOR_subv2di3
, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128
, 0, 0 },
15115 { MASK_MMX
, CODE_FOR_sse2_ssaddv16qi3
, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128
, 0, 0 },
15116 { MASK_MMX
, CODE_FOR_sse2_ssaddv8hi3
, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128
, 0, 0 },
15117 { MASK_MMX
, CODE_FOR_sse2_sssubv16qi3
, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128
, 0, 0 },
15118 { MASK_MMX
, CODE_FOR_sse2_sssubv8hi3
, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128
, 0, 0 },
15119 { MASK_MMX
, CODE_FOR_sse2_usaddv16qi3
, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128
, 0, 0 },
15120 { MASK_MMX
, CODE_FOR_sse2_usaddv8hi3
, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128
, 0, 0 },
15121 { MASK_MMX
, CODE_FOR_sse2_ussubv16qi3
, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128
, 0, 0 },
15122 { MASK_MMX
, CODE_FOR_sse2_ussubv8hi3
, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128
, 0, 0 },
15124 { MASK_SSE2
, CODE_FOR_mulv8hi3
, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128
, 0, 0 },
15125 { MASK_SSE2
, CODE_FOR_smulv8hi3_highpart
, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128
, 0, 0 },
15127 { MASK_SSE2
, CODE_FOR_andv2di3
, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128
, 0, 0 },
15128 { MASK_SSE2
, CODE_FOR_sse2_nandv2di3
, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128
, 0, 0 },
15129 { MASK_SSE2
, CODE_FOR_iorv2di3
, "__builtin_ia32_por128", IX86_BUILTIN_POR128
, 0, 0 },
15130 { MASK_SSE2
, CODE_FOR_xorv2di3
, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128
, 0, 0 },
15132 { MASK_SSE2
, CODE_FOR_sse2_uavgv16qi3
, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128
, 0, 0 },
15133 { MASK_SSE2
, CODE_FOR_sse2_uavgv8hi3
, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128
, 0, 0 },
15135 { MASK_SSE2
, CODE_FOR_sse2_eqv16qi3
, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128
, 0, 0 },
15136 { MASK_SSE2
, CODE_FOR_sse2_eqv8hi3
, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128
, 0, 0 },
15137 { MASK_SSE2
, CODE_FOR_sse2_eqv4si3
, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128
, 0, 0 },
15138 { MASK_SSE2
, CODE_FOR_sse2_gtv16qi3
, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128
, 0, 0 },
15139 { MASK_SSE2
, CODE_FOR_sse2_gtv8hi3
, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128
, 0, 0 },
15140 { MASK_SSE2
, CODE_FOR_sse2_gtv4si3
, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128
, 0, 0 },
15142 { MASK_SSE2
, CODE_FOR_umaxv16qi3
, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128
, 0, 0 },
15143 { MASK_SSE2
, CODE_FOR_smaxv8hi3
, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128
, 0, 0 },
15144 { MASK_SSE2
, CODE_FOR_uminv16qi3
, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128
, 0, 0 },
15145 { MASK_SSE2
, CODE_FOR_sminv8hi3
, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128
, 0, 0 },
15147 { MASK_SSE2
, CODE_FOR_sse2_punpckhbw
, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128
, 0, 0 },
15148 { MASK_SSE2
, CODE_FOR_sse2_punpckhwd
, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128
, 0, 0 },
15149 { MASK_SSE2
, CODE_FOR_sse2_punpckhdq
, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128
, 0, 0 },
15150 { MASK_SSE2
, CODE_FOR_sse2_punpckhqdq
, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128
, 0, 0 },
15151 { MASK_SSE2
, CODE_FOR_sse2_punpcklbw
, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128
, 0, 0 },
15152 { MASK_SSE2
, CODE_FOR_sse2_punpcklwd
, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128
, 0, 0 },
15153 { MASK_SSE2
, CODE_FOR_sse2_punpckldq
, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128
, 0, 0 },
15154 { MASK_SSE2
, CODE_FOR_sse2_punpcklqdq
, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128
, 0, 0 },
15156 { MASK_SSE2
, CODE_FOR_sse2_packsswb
, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128
, 0, 0 },
15157 { MASK_SSE2
, CODE_FOR_sse2_packssdw
, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128
, 0, 0 },
15158 { MASK_SSE2
, CODE_FOR_sse2_packuswb
, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128
, 0, 0 },
15160 { MASK_SSE2
, CODE_FOR_umulv8hi3_highpart
, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128
, 0, 0 },
15161 { MASK_SSE2
, CODE_FOR_sse2_psadbw
, 0, IX86_BUILTIN_PSADBW128
, 0, 0 },
15163 { MASK_SSE2
, CODE_FOR_sse2_umulsidi3
, 0, IX86_BUILTIN_PMULUDQ
, 0, 0 },
15164 { MASK_SSE2
, CODE_FOR_sse2_umulv2siv2di3
, 0, IX86_BUILTIN_PMULUDQ128
, 0, 0 },
15166 { MASK_SSE2
, CODE_FOR_ashlv8hi3
, 0, IX86_BUILTIN_PSLLWI128
, 0, 0 },
15167 { MASK_SSE2
, CODE_FOR_ashlv4si3
, 0, IX86_BUILTIN_PSLLDI128
, 0, 0 },
15168 { MASK_SSE2
, CODE_FOR_ashlv2di3
, 0, IX86_BUILTIN_PSLLQI128
, 0, 0 },
15170 { MASK_SSE2
, CODE_FOR_lshrv8hi3
, 0, IX86_BUILTIN_PSRLWI128
, 0, 0 },
15171 { MASK_SSE2
, CODE_FOR_lshrv4si3
, 0, IX86_BUILTIN_PSRLDI128
, 0, 0 },
15172 { MASK_SSE2
, CODE_FOR_lshrv2di3
, 0, IX86_BUILTIN_PSRLQI128
, 0, 0 },
15174 { MASK_SSE2
, CODE_FOR_ashrv8hi3
, 0, IX86_BUILTIN_PSRAWI128
, 0, 0 },
15175 { MASK_SSE2
, CODE_FOR_ashrv4si3
, 0, IX86_BUILTIN_PSRADI128
, 0, 0 },
15177 { MASK_SSE2
, CODE_FOR_sse2_pmaddwd
, 0, IX86_BUILTIN_PMADDWD128
, 0, 0 },
15179 { MASK_SSE2
, CODE_FOR_sse2_cvtsi2sd
, 0, IX86_BUILTIN_CVTSI2SD
, 0, 0 },
15180 { MASK_SSE2
| MASK_64BIT
, CODE_FOR_sse2_cvtsi2sdq
, 0, IX86_BUILTIN_CVTSI642SD
, 0, 0 },
15181 { MASK_SSE2
, CODE_FOR_sse2_cvtsd2ss
, 0, IX86_BUILTIN_CVTSD2SS
, 0, 0 },
15182 { MASK_SSE2
, CODE_FOR_sse2_cvtss2sd
, 0, IX86_BUILTIN_CVTSS2SD
, 0, 0 },
15185 { MASK_SSE3
, CODE_FOR_sse3_addsubv4sf3
, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS
, 0, 0 },
15186 { MASK_SSE3
, CODE_FOR_sse3_addsubv2df3
, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD
, 0, 0 },
15187 { MASK_SSE3
, CODE_FOR_sse3_haddv4sf3
, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS
, 0, 0 },
15188 { MASK_SSE3
, CODE_FOR_sse3_haddv2df3
, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD
, 0, 0 },
15189 { MASK_SSE3
, CODE_FOR_sse3_hsubv4sf3
, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS
, 0, 0 },
15190 { MASK_SSE3
, CODE_FOR_sse3_hsubv2df3
, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD
, 0, 0 },
15193 { MASK_SSSE3
, CODE_FOR_ssse3_phaddwv8hi3
, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128
, 0, 0 },
15194 { MASK_SSSE3
, CODE_FOR_ssse3_phaddwv4hi3
, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW
, 0, 0 },
15195 { MASK_SSSE3
, CODE_FOR_ssse3_phadddv4si3
, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128
, 0, 0 },
15196 { MASK_SSSE3
, CODE_FOR_ssse3_phadddv2si3
, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD
, 0, 0 },
15197 { MASK_SSSE3
, CODE_FOR_ssse3_phaddswv8hi3
, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128
, 0, 0 },
15198 { MASK_SSSE3
, CODE_FOR_ssse3_phaddswv4hi3
, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW
, 0, 0 },
15199 { MASK_SSSE3
, CODE_FOR_ssse3_phsubwv8hi3
, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128
, 0, 0 },
15200 { MASK_SSSE3
, CODE_FOR_ssse3_phsubwv4hi3
, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW
, 0, 0 },
15201 { MASK_SSSE3
, CODE_FOR_ssse3_phsubdv4si3
, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128
, 0, 0 },
15202 { MASK_SSSE3
, CODE_FOR_ssse3_phsubdv2si3
, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD
, 0, 0 },
15203 { MASK_SSSE3
, CODE_FOR_ssse3_phsubswv8hi3
, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128
, 0, 0 },
15204 { MASK_SSSE3
, CODE_FOR_ssse3_phsubswv4hi3
, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW
, 0, 0 },
15205 { MASK_SSSE3
, CODE_FOR_ssse3_pmaddubswv8hi3
, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128
, 0, 0 },
15206 { MASK_SSSE3
, CODE_FOR_ssse3_pmaddubswv4hi3
, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW
, 0, 0 },
15207 { MASK_SSSE3
, CODE_FOR_ssse3_pmulhrswv8hi3
, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128
, 0, 0 },
15208 { MASK_SSSE3
, CODE_FOR_ssse3_pmulhrswv4hi3
, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW
, 0, 0 },
15209 { MASK_SSSE3
, CODE_FOR_ssse3_pshufbv16qi3
, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128
, 0, 0 },
15210 { MASK_SSSE3
, CODE_FOR_ssse3_pshufbv8qi3
, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB
, 0, 0 },
15211 { MASK_SSSE3
, CODE_FOR_ssse3_psignv16qi3
, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128
, 0, 0 },
15212 { MASK_SSSE3
, CODE_FOR_ssse3_psignv8qi3
, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB
, 0, 0 },
15213 { MASK_SSSE3
, CODE_FOR_ssse3_psignv8hi3
, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128
, 0, 0 },
15214 { MASK_SSSE3
, CODE_FOR_ssse3_psignv4hi3
, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW
, 0, 0 },
15215 { MASK_SSSE3
, CODE_FOR_ssse3_psignv4si3
, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128
, 0, 0 },
15216 { MASK_SSSE3
, CODE_FOR_ssse3_psignv2si3
, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND
, 0, 0 }
15219 static const struct builtin_description bdesc_1arg
[] =
15221 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_pmovmskb
, 0, IX86_BUILTIN_PMOVMSKB
, 0, 0 },
15222 { MASK_SSE
, CODE_FOR_sse_movmskps
, 0, IX86_BUILTIN_MOVMSKPS
, 0, 0 },
15224 { MASK_SSE
, CODE_FOR_sqrtv4sf2
, 0, IX86_BUILTIN_SQRTPS
, 0, 0 },
15225 { MASK_SSE
, CODE_FOR_sse_rsqrtv4sf2
, 0, IX86_BUILTIN_RSQRTPS
, 0, 0 },
15226 { MASK_SSE
, CODE_FOR_sse_rcpv4sf2
, 0, IX86_BUILTIN_RCPPS
, 0, 0 },
15228 { MASK_SSE
, CODE_FOR_sse_cvtps2pi
, 0, IX86_BUILTIN_CVTPS2PI
, 0, 0 },
15229 { MASK_SSE
, CODE_FOR_sse_cvtss2si
, 0, IX86_BUILTIN_CVTSS2SI
, 0, 0 },
15230 { MASK_SSE
| MASK_64BIT
, CODE_FOR_sse_cvtss2siq
, 0, IX86_BUILTIN_CVTSS2SI64
, 0, 0 },
15231 { MASK_SSE
, CODE_FOR_sse_cvttps2pi
, 0, IX86_BUILTIN_CVTTPS2PI
, 0, 0 },
15232 { MASK_SSE
, CODE_FOR_sse_cvttss2si
, 0, IX86_BUILTIN_CVTTSS2SI
, 0, 0 },
15233 { MASK_SSE
| MASK_64BIT
, CODE_FOR_sse_cvttss2siq
, 0, IX86_BUILTIN_CVTTSS2SI64
, 0, 0 },
15235 { MASK_SSE2
, CODE_FOR_sse2_pmovmskb
, 0, IX86_BUILTIN_PMOVMSKB128
, 0, 0 },
15236 { MASK_SSE2
, CODE_FOR_sse2_movmskpd
, 0, IX86_BUILTIN_MOVMSKPD
, 0, 0 },
15238 { MASK_SSE2
, CODE_FOR_sqrtv2df2
, 0, IX86_BUILTIN_SQRTPD
, 0, 0 },
15240 { MASK_SSE2
, CODE_FOR_sse2_cvtdq2pd
, 0, IX86_BUILTIN_CVTDQ2PD
, 0, 0 },
15241 { MASK_SSE2
, CODE_FOR_sse2_cvtdq2ps
, 0, IX86_BUILTIN_CVTDQ2PS
, 0, 0 },
15243 { MASK_SSE2
, CODE_FOR_sse2_cvtpd2dq
, 0, IX86_BUILTIN_CVTPD2DQ
, 0, 0 },
15244 { MASK_SSE2
, CODE_FOR_sse2_cvtpd2pi
, 0, IX86_BUILTIN_CVTPD2PI
, 0, 0 },
15245 { MASK_SSE2
, CODE_FOR_sse2_cvtpd2ps
, 0, IX86_BUILTIN_CVTPD2PS
, 0, 0 },
15246 { MASK_SSE2
, CODE_FOR_sse2_cvttpd2dq
, 0, IX86_BUILTIN_CVTTPD2DQ
, 0, 0 },
15247 { MASK_SSE2
, CODE_FOR_sse2_cvttpd2pi
, 0, IX86_BUILTIN_CVTTPD2PI
, 0, 0 },
15249 { MASK_SSE2
, CODE_FOR_sse2_cvtpi2pd
, 0, IX86_BUILTIN_CVTPI2PD
, 0, 0 },
15251 { MASK_SSE2
, CODE_FOR_sse2_cvtsd2si
, 0, IX86_BUILTIN_CVTSD2SI
, 0, 0 },
15252 { MASK_SSE2
, CODE_FOR_sse2_cvttsd2si
, 0, IX86_BUILTIN_CVTTSD2SI
, 0, 0 },
15253 { MASK_SSE2
| MASK_64BIT
, CODE_FOR_sse2_cvtsd2siq
, 0, IX86_BUILTIN_CVTSD2SI64
, 0, 0 },
15254 { MASK_SSE2
| MASK_64BIT
, CODE_FOR_sse2_cvttsd2siq
, 0, IX86_BUILTIN_CVTTSD2SI64
, 0, 0 },
15256 { MASK_SSE2
, CODE_FOR_sse2_cvtps2dq
, 0, IX86_BUILTIN_CVTPS2DQ
, 0, 0 },
15257 { MASK_SSE2
, CODE_FOR_sse2_cvtps2pd
, 0, IX86_BUILTIN_CVTPS2PD
, 0, 0 },
15258 { MASK_SSE2
, CODE_FOR_sse2_cvttps2dq
, 0, IX86_BUILTIN_CVTTPS2DQ
, 0, 0 },
15261 { MASK_SSE3
, CODE_FOR_sse3_movshdup
, 0, IX86_BUILTIN_MOVSHDUP
, 0, 0 },
15262 { MASK_SSE3
, CODE_FOR_sse3_movsldup
, 0, IX86_BUILTIN_MOVSLDUP
, 0, 0 },
15265 { MASK_SSSE3
, CODE_FOR_absv16qi2
, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128
, 0, 0 },
15266 { MASK_SSSE3
, CODE_FOR_absv8qi2
, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB
, 0, 0 },
15267 { MASK_SSSE3
, CODE_FOR_absv8hi2
, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128
, 0, 0 },
15268 { MASK_SSSE3
, CODE_FOR_absv4hi2
, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW
, 0, 0 },
15269 { MASK_SSSE3
, CODE_FOR_absv4si2
, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128
, 0, 0 },
15270 { MASK_SSSE3
, CODE_FOR_absv2si2
, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD
, 0, 0 },
15274 ix86_init_builtins (void)
15277 ix86_init_mmx_sse_builtins ();
15280 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
15281 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
15284 ix86_init_mmx_sse_builtins (void)
15286 const struct builtin_description
* d
;
15289 tree V16QI_type_node
= build_vector_type_for_mode (intQI_type_node
, V16QImode
);
15290 tree V2SI_type_node
= build_vector_type_for_mode (intSI_type_node
, V2SImode
);
15291 tree V2SF_type_node
= build_vector_type_for_mode (float_type_node
, V2SFmode
);
15292 tree V2DI_type_node
15293 = build_vector_type_for_mode (long_long_integer_type_node
, V2DImode
);
15294 tree V2DF_type_node
= build_vector_type_for_mode (double_type_node
, V2DFmode
);
15295 tree V4SF_type_node
= build_vector_type_for_mode (float_type_node
, V4SFmode
);
15296 tree V4SI_type_node
= build_vector_type_for_mode (intSI_type_node
, V4SImode
);
15297 tree V4HI_type_node
= build_vector_type_for_mode (intHI_type_node
, V4HImode
);
15298 tree V8QI_type_node
= build_vector_type_for_mode (intQI_type_node
, V8QImode
);
15299 tree V8HI_type_node
= build_vector_type_for_mode (intHI_type_node
, V8HImode
);
15301 tree pchar_type_node
= build_pointer_type (char_type_node
);
15302 tree pcchar_type_node
= build_pointer_type (
15303 build_type_variant (char_type_node
, 1, 0));
15304 tree pfloat_type_node
= build_pointer_type (float_type_node
);
15305 tree pcfloat_type_node
= build_pointer_type (
15306 build_type_variant (float_type_node
, 1, 0));
15307 tree pv2si_type_node
= build_pointer_type (V2SI_type_node
);
15308 tree pv2di_type_node
= build_pointer_type (V2DI_type_node
);
15309 tree pdi_type_node
= build_pointer_type (long_long_unsigned_type_node
);
15312 tree int_ftype_v4sf_v4sf
15313 = build_function_type_list (integer_type_node
,
15314 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
15315 tree v4si_ftype_v4sf_v4sf
15316 = build_function_type_list (V4SI_type_node
,
15317 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
15318 /* MMX/SSE/integer conversions. */
15319 tree int_ftype_v4sf
15320 = build_function_type_list (integer_type_node
,
15321 V4SF_type_node
, NULL_TREE
);
15322 tree int64_ftype_v4sf
15323 = build_function_type_list (long_long_integer_type_node
,
15324 V4SF_type_node
, NULL_TREE
);
15325 tree int_ftype_v8qi
15326 = build_function_type_list (integer_type_node
, V8QI_type_node
, NULL_TREE
);
15327 tree v4sf_ftype_v4sf_int
15328 = build_function_type_list (V4SF_type_node
,
15329 V4SF_type_node
, integer_type_node
, NULL_TREE
);
15330 tree v4sf_ftype_v4sf_int64
15331 = build_function_type_list (V4SF_type_node
,
15332 V4SF_type_node
, long_long_integer_type_node
,
15334 tree v4sf_ftype_v4sf_v2si
15335 = build_function_type_list (V4SF_type_node
,
15336 V4SF_type_node
, V2SI_type_node
, NULL_TREE
);
15338 /* Miscellaneous. */
15339 tree v8qi_ftype_v4hi_v4hi
15340 = build_function_type_list (V8QI_type_node
,
15341 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
15342 tree v4hi_ftype_v2si_v2si
15343 = build_function_type_list (V4HI_type_node
,
15344 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
15345 tree v4sf_ftype_v4sf_v4sf_int
15346 = build_function_type_list (V4SF_type_node
,
15347 V4SF_type_node
, V4SF_type_node
,
15348 integer_type_node
, NULL_TREE
);
15349 tree v2si_ftype_v4hi_v4hi
15350 = build_function_type_list (V2SI_type_node
,
15351 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
15352 tree v4hi_ftype_v4hi_int
15353 = build_function_type_list (V4HI_type_node
,
15354 V4HI_type_node
, integer_type_node
, NULL_TREE
);
15355 tree v4hi_ftype_v4hi_di
15356 = build_function_type_list (V4HI_type_node
,
15357 V4HI_type_node
, long_long_unsigned_type_node
,
15359 tree v2si_ftype_v2si_di
15360 = build_function_type_list (V2SI_type_node
,
15361 V2SI_type_node
, long_long_unsigned_type_node
,
15363 tree void_ftype_void
15364 = build_function_type (void_type_node
, void_list_node
);
15365 tree void_ftype_unsigned
15366 = build_function_type_list (void_type_node
, unsigned_type_node
, NULL_TREE
);
15367 tree void_ftype_unsigned_unsigned
15368 = build_function_type_list (void_type_node
, unsigned_type_node
,
15369 unsigned_type_node
, NULL_TREE
);
15370 tree void_ftype_pcvoid_unsigned_unsigned
15371 = build_function_type_list (void_type_node
, const_ptr_type_node
,
15372 unsigned_type_node
, unsigned_type_node
,
15374 tree unsigned_ftype_void
15375 = build_function_type (unsigned_type_node
, void_list_node
);
15376 tree v2si_ftype_v4sf
15377 = build_function_type_list (V2SI_type_node
, V4SF_type_node
, NULL_TREE
);
15378 /* Loads/stores. */
15379 tree void_ftype_v8qi_v8qi_pchar
15380 = build_function_type_list (void_type_node
,
15381 V8QI_type_node
, V8QI_type_node
,
15382 pchar_type_node
, NULL_TREE
);
15383 tree v4sf_ftype_pcfloat
15384 = build_function_type_list (V4SF_type_node
, pcfloat_type_node
, NULL_TREE
);
15385 /* @@@ the type is bogus */
15386 tree v4sf_ftype_v4sf_pv2si
15387 = build_function_type_list (V4SF_type_node
,
15388 V4SF_type_node
, pv2si_type_node
, NULL_TREE
);
15389 tree void_ftype_pv2si_v4sf
15390 = build_function_type_list (void_type_node
,
15391 pv2si_type_node
, V4SF_type_node
, NULL_TREE
);
15392 tree void_ftype_pfloat_v4sf
15393 = build_function_type_list (void_type_node
,
15394 pfloat_type_node
, V4SF_type_node
, NULL_TREE
);
15395 tree void_ftype_pdi_di
15396 = build_function_type_list (void_type_node
,
15397 pdi_type_node
, long_long_unsigned_type_node
,
15399 tree void_ftype_pv2di_v2di
15400 = build_function_type_list (void_type_node
,
15401 pv2di_type_node
, V2DI_type_node
, NULL_TREE
);
15402 /* Normal vector unops. */
15403 tree v4sf_ftype_v4sf
15404 = build_function_type_list (V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
15405 tree v16qi_ftype_v16qi
15406 = build_function_type_list (V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
15407 tree v8hi_ftype_v8hi
15408 = build_function_type_list (V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
15409 tree v4si_ftype_v4si
15410 = build_function_type_list (V4SI_type_node
, V4SI_type_node
, NULL_TREE
);
15411 tree v8qi_ftype_v8qi
15412 = build_function_type_list (V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
15413 tree v4hi_ftype_v4hi
15414 = build_function_type_list (V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
15416 /* Normal vector binops. */
15417 tree v4sf_ftype_v4sf_v4sf
15418 = build_function_type_list (V4SF_type_node
,
15419 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
15420 tree v8qi_ftype_v8qi_v8qi
15421 = build_function_type_list (V8QI_type_node
,
15422 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
15423 tree v4hi_ftype_v4hi_v4hi
15424 = build_function_type_list (V4HI_type_node
,
15425 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
15426 tree v2si_ftype_v2si_v2si
15427 = build_function_type_list (V2SI_type_node
,
15428 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
15429 tree di_ftype_di_di
15430 = build_function_type_list (long_long_unsigned_type_node
,
15431 long_long_unsigned_type_node
,
15432 long_long_unsigned_type_node
, NULL_TREE
);
15434 tree di_ftype_di_di_int
15435 = build_function_type_list (long_long_unsigned_type_node
,
15436 long_long_unsigned_type_node
,
15437 long_long_unsigned_type_node
,
15438 integer_type_node
, NULL_TREE
);
15440 tree v2si_ftype_v2sf
15441 = build_function_type_list (V2SI_type_node
, V2SF_type_node
, NULL_TREE
);
15442 tree v2sf_ftype_v2si
15443 = build_function_type_list (V2SF_type_node
, V2SI_type_node
, NULL_TREE
);
15444 tree v2si_ftype_v2si
15445 = build_function_type_list (V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
15446 tree v2sf_ftype_v2sf
15447 = build_function_type_list (V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
15448 tree v2sf_ftype_v2sf_v2sf
15449 = build_function_type_list (V2SF_type_node
,
15450 V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
15451 tree v2si_ftype_v2sf_v2sf
15452 = build_function_type_list (V2SI_type_node
,
15453 V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
15454 tree pint_type_node
= build_pointer_type (integer_type_node
);
15455 tree pdouble_type_node
= build_pointer_type (double_type_node
);
15456 tree pcdouble_type_node
= build_pointer_type (
15457 build_type_variant (double_type_node
, 1, 0));
15458 tree int_ftype_v2df_v2df
15459 = build_function_type_list (integer_type_node
,
15460 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
15462 tree void_ftype_pcvoid
15463 = build_function_type_list (void_type_node
, const_ptr_type_node
, NULL_TREE
);
15464 tree v4sf_ftype_v4si
15465 = build_function_type_list (V4SF_type_node
, V4SI_type_node
, NULL_TREE
);
15466 tree v4si_ftype_v4sf
15467 = build_function_type_list (V4SI_type_node
, V4SF_type_node
, NULL_TREE
);
15468 tree v2df_ftype_v4si
15469 = build_function_type_list (V2DF_type_node
, V4SI_type_node
, NULL_TREE
);
15470 tree v4si_ftype_v2df
15471 = build_function_type_list (V4SI_type_node
, V2DF_type_node
, NULL_TREE
);
15472 tree v2si_ftype_v2df
15473 = build_function_type_list (V2SI_type_node
, V2DF_type_node
, NULL_TREE
);
15474 tree v4sf_ftype_v2df
15475 = build_function_type_list (V4SF_type_node
, V2DF_type_node
, NULL_TREE
);
15476 tree v2df_ftype_v2si
15477 = build_function_type_list (V2DF_type_node
, V2SI_type_node
, NULL_TREE
);
15478 tree v2df_ftype_v4sf
15479 = build_function_type_list (V2DF_type_node
, V4SF_type_node
, NULL_TREE
);
15480 tree int_ftype_v2df
15481 = build_function_type_list (integer_type_node
, V2DF_type_node
, NULL_TREE
);
15482 tree int64_ftype_v2df
15483 = build_function_type_list (long_long_integer_type_node
,
15484 V2DF_type_node
, NULL_TREE
);
15485 tree v2df_ftype_v2df_int
15486 = build_function_type_list (V2DF_type_node
,
15487 V2DF_type_node
, integer_type_node
, NULL_TREE
);
15488 tree v2df_ftype_v2df_int64
15489 = build_function_type_list (V2DF_type_node
,
15490 V2DF_type_node
, long_long_integer_type_node
,
15492 tree v4sf_ftype_v4sf_v2df
15493 = build_function_type_list (V4SF_type_node
,
15494 V4SF_type_node
, V2DF_type_node
, NULL_TREE
);
15495 tree v2df_ftype_v2df_v4sf
15496 = build_function_type_list (V2DF_type_node
,
15497 V2DF_type_node
, V4SF_type_node
, NULL_TREE
);
15498 tree v2df_ftype_v2df_v2df_int
15499 = build_function_type_list (V2DF_type_node
,
15500 V2DF_type_node
, V2DF_type_node
,
15503 tree v2df_ftype_v2df_pcdouble
15504 = build_function_type_list (V2DF_type_node
,
15505 V2DF_type_node
, pcdouble_type_node
, NULL_TREE
);
15506 tree void_ftype_pdouble_v2df
15507 = build_function_type_list (void_type_node
,
15508 pdouble_type_node
, V2DF_type_node
, NULL_TREE
);
15509 tree void_ftype_pint_int
15510 = build_function_type_list (void_type_node
,
15511 pint_type_node
, integer_type_node
, NULL_TREE
);
15512 tree void_ftype_v16qi_v16qi_pchar
15513 = build_function_type_list (void_type_node
,
15514 V16QI_type_node
, V16QI_type_node
,
15515 pchar_type_node
, NULL_TREE
);
15516 tree v2df_ftype_pcdouble
15517 = build_function_type_list (V2DF_type_node
, pcdouble_type_node
, NULL_TREE
);
15518 tree v2df_ftype_v2df_v2df
15519 = build_function_type_list (V2DF_type_node
,
15520 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
15521 tree v16qi_ftype_v16qi_v16qi
15522 = build_function_type_list (V16QI_type_node
,
15523 V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
15524 tree v8hi_ftype_v8hi_v8hi
15525 = build_function_type_list (V8HI_type_node
,
15526 V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
15527 tree v4si_ftype_v4si_v4si
15528 = build_function_type_list (V4SI_type_node
,
15529 V4SI_type_node
, V4SI_type_node
, NULL_TREE
);
15530 tree v2di_ftype_v2di_v2di
15531 = build_function_type_list (V2DI_type_node
,
15532 V2DI_type_node
, V2DI_type_node
, NULL_TREE
);
15533 tree v2di_ftype_v2df_v2df
15534 = build_function_type_list (V2DI_type_node
,
15535 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
15536 tree v2df_ftype_v2df
15537 = build_function_type_list (V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
15538 tree v2di_ftype_v2di_int
15539 = build_function_type_list (V2DI_type_node
,
15540 V2DI_type_node
, integer_type_node
, NULL_TREE
);
15541 tree v2di_ftype_v2di_v2di_int
15542 = build_function_type_list (V2DI_type_node
, V2DI_type_node
,
15543 V2DI_type_node
, integer_type_node
, NULL_TREE
);
15544 tree v4si_ftype_v4si_int
15545 = build_function_type_list (V4SI_type_node
,
15546 V4SI_type_node
, integer_type_node
, NULL_TREE
);
15547 tree v8hi_ftype_v8hi_int
15548 = build_function_type_list (V8HI_type_node
,
15549 V8HI_type_node
, integer_type_node
, NULL_TREE
);
15550 tree v8hi_ftype_v8hi_v2di
15551 = build_function_type_list (V8HI_type_node
,
15552 V8HI_type_node
, V2DI_type_node
, NULL_TREE
);
15553 tree v4si_ftype_v4si_v2di
15554 = build_function_type_list (V4SI_type_node
,
15555 V4SI_type_node
, V2DI_type_node
, NULL_TREE
);
15556 tree v4si_ftype_v8hi_v8hi
15557 = build_function_type_list (V4SI_type_node
,
15558 V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
15559 tree di_ftype_v8qi_v8qi
15560 = build_function_type_list (long_long_unsigned_type_node
,
15561 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
15562 tree di_ftype_v2si_v2si
15563 = build_function_type_list (long_long_unsigned_type_node
,
15564 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
15565 tree v2di_ftype_v16qi_v16qi
15566 = build_function_type_list (V2DI_type_node
,
15567 V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
15568 tree v2di_ftype_v4si_v4si
15569 = build_function_type_list (V2DI_type_node
,
15570 V4SI_type_node
, V4SI_type_node
, NULL_TREE
);
15571 tree int_ftype_v16qi
15572 = build_function_type_list (integer_type_node
, V16QI_type_node
, NULL_TREE
);
15573 tree v16qi_ftype_pcchar
15574 = build_function_type_list (V16QI_type_node
, pcchar_type_node
, NULL_TREE
);
15575 tree void_ftype_pchar_v16qi
15576 = build_function_type_list (void_type_node
,
15577 pchar_type_node
, V16QI_type_node
, NULL_TREE
);
15580 tree float128_type
;
15583 /* The __float80 type. */
15584 if (TYPE_MODE (long_double_type_node
) == XFmode
)
15585 (*lang_hooks
.types
.register_builtin_type
) (long_double_type_node
,
15589 /* The __float80 type. */
15590 float80_type
= make_node (REAL_TYPE
);
15591 TYPE_PRECISION (float80_type
) = 80;
15592 layout_type (float80_type
);
15593 (*lang_hooks
.types
.register_builtin_type
) (float80_type
, "__float80");
15598 float128_type
= make_node (REAL_TYPE
);
15599 TYPE_PRECISION (float128_type
) = 128;
15600 layout_type (float128_type
);
15601 (*lang_hooks
.types
.register_builtin_type
) (float128_type
, "__float128");
15604 /* Add all builtins that are more or less simple operations on two
15606 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
15608 /* Use one of the operands; the target can have a different mode for
15609 mask-generating compares. */
15610 enum machine_mode mode
;
15615 mode
= insn_data
[d
->icode
].operand
[1].mode
;
15620 type
= v16qi_ftype_v16qi_v16qi
;
15623 type
= v8hi_ftype_v8hi_v8hi
;
15626 type
= v4si_ftype_v4si_v4si
;
15629 type
= v2di_ftype_v2di_v2di
;
15632 type
= v2df_ftype_v2df_v2df
;
15635 type
= v4sf_ftype_v4sf_v4sf
;
15638 type
= v8qi_ftype_v8qi_v8qi
;
15641 type
= v4hi_ftype_v4hi_v4hi
;
15644 type
= v2si_ftype_v2si_v2si
;
15647 type
= di_ftype_di_di
;
15651 gcc_unreachable ();
15654 /* Override for comparisons. */
15655 if (d
->icode
== CODE_FOR_sse_maskcmpv4sf3
15656 || d
->icode
== CODE_FOR_sse_vmmaskcmpv4sf3
)
15657 type
= v4si_ftype_v4sf_v4sf
;
15659 if (d
->icode
== CODE_FOR_sse2_maskcmpv2df3
15660 || d
->icode
== CODE_FOR_sse2_vmmaskcmpv2df3
)
15661 type
= v2di_ftype_v2df_v2df
;
15663 def_builtin (d
->mask
, d
->name
, type
, d
->code
);
15666 /* Add all builtins that are more or less simple operations on 1 operand. */
15667 for (i
= 0, d
= bdesc_1arg
; i
< ARRAY_SIZE (bdesc_1arg
); i
++, d
++)
15669 enum machine_mode mode
;
15674 mode
= insn_data
[d
->icode
].operand
[1].mode
;
15679 type
= v16qi_ftype_v16qi
;
15682 type
= v8hi_ftype_v8hi
;
15685 type
= v4si_ftype_v4si
;
15688 type
= v2df_ftype_v2df
;
15691 type
= v4sf_ftype_v4sf
;
15694 type
= v8qi_ftype_v8qi
;
15697 type
= v4hi_ftype_v4hi
;
15700 type
= v2si_ftype_v2si
;
15707 def_builtin (d
->mask
, d
->name
, type
, d
->code
);
15710 /* Add the remaining MMX insns with somewhat more complicated types. */
15711 def_builtin (MASK_MMX
, "__builtin_ia32_emms", void_ftype_void
, IX86_BUILTIN_EMMS
);
15712 def_builtin (MASK_MMX
, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSLLW
);
15713 def_builtin (MASK_MMX
, "__builtin_ia32_pslld", v2si_ftype_v2si_di
, IX86_BUILTIN_PSLLD
);
15714 def_builtin (MASK_MMX
, "__builtin_ia32_psllq", di_ftype_di_di
, IX86_BUILTIN_PSLLQ
);
15716 def_builtin (MASK_MMX
, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSRLW
);
15717 def_builtin (MASK_MMX
, "__builtin_ia32_psrld", v2si_ftype_v2si_di
, IX86_BUILTIN_PSRLD
);
15718 def_builtin (MASK_MMX
, "__builtin_ia32_psrlq", di_ftype_di_di
, IX86_BUILTIN_PSRLQ
);
15720 def_builtin (MASK_MMX
, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSRAW
);
15721 def_builtin (MASK_MMX
, "__builtin_ia32_psrad", v2si_ftype_v2si_di
, IX86_BUILTIN_PSRAD
);
15723 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int
, IX86_BUILTIN_PSHUFW
);
15724 def_builtin (MASK_MMX
, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi
, IX86_BUILTIN_PMADDWD
);
15726 /* comi/ucomi insns. */
15727 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
15728 if (d
->mask
== MASK_SSE2
)
15729 def_builtin (d
->mask
, d
->name
, int_ftype_v2df_v2df
, d
->code
);
15731 def_builtin (d
->mask
, d
->name
, int_ftype_v4sf_v4sf
, d
->code
);
15733 def_builtin (MASK_MMX
, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi
, IX86_BUILTIN_PACKSSWB
);
15734 def_builtin (MASK_MMX
, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si
, IX86_BUILTIN_PACKSSDW
);
15735 def_builtin (MASK_MMX
, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi
, IX86_BUILTIN_PACKUSWB
);
15737 def_builtin (MASK_SSE
, "__builtin_ia32_ldmxcsr", void_ftype_unsigned
, IX86_BUILTIN_LDMXCSR
);
15738 def_builtin (MASK_SSE
, "__builtin_ia32_stmxcsr", unsigned_ftype_void
, IX86_BUILTIN_STMXCSR
);
15739 def_builtin_const (MASK_SSE
, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si
, IX86_BUILTIN_CVTPI2PS
);
15740 def_builtin_const (MASK_SSE
, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf
, IX86_BUILTIN_CVTPS2PI
);
15741 def_builtin_const (MASK_SSE
, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int
, IX86_BUILTIN_CVTSI2SS
);
15742 def_builtin_const (MASK_SSE
| MASK_64BIT
, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64
, IX86_BUILTIN_CVTSI642SS
);
15743 def_builtin_const (MASK_SSE
, "__builtin_ia32_cvtss2si", int_ftype_v4sf
, IX86_BUILTIN_CVTSS2SI
);
15744 def_builtin_const (MASK_SSE
| MASK_64BIT
, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf
, IX86_BUILTIN_CVTSS2SI64
);
15745 def_builtin_const (MASK_SSE
, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf
, IX86_BUILTIN_CVTTPS2PI
);
15746 def_builtin_const (MASK_SSE
, "__builtin_ia32_cvttss2si", int_ftype_v4sf
, IX86_BUILTIN_CVTTSS2SI
);
15747 def_builtin_const (MASK_SSE
| MASK_64BIT
, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf
, IX86_BUILTIN_CVTTSS2SI64
);
15749 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar
, IX86_BUILTIN_MASKMOVQ
);
15751 def_builtin (MASK_SSE
, "__builtin_ia32_loadups", v4sf_ftype_pcfloat
, IX86_BUILTIN_LOADUPS
);
15752 def_builtin (MASK_SSE
, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf
, IX86_BUILTIN_STOREUPS
);
15754 def_builtin (MASK_SSE
, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si
, IX86_BUILTIN_LOADHPS
);
15755 def_builtin (MASK_SSE
, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si
, IX86_BUILTIN_LOADLPS
);
15756 def_builtin (MASK_SSE
, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf
, IX86_BUILTIN_STOREHPS
);
15757 def_builtin (MASK_SSE
, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf
, IX86_BUILTIN_STORELPS
);
15759 def_builtin (MASK_SSE
, "__builtin_ia32_movmskps", int_ftype_v4sf
, IX86_BUILTIN_MOVMSKPS
);
15760 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_pmovmskb", int_ftype_v8qi
, IX86_BUILTIN_PMOVMSKB
);
15761 def_builtin (MASK_SSE
, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf
, IX86_BUILTIN_MOVNTPS
);
15762 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_movntq", void_ftype_pdi_di
, IX86_BUILTIN_MOVNTQ
);
15764 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_sfence", void_ftype_void
, IX86_BUILTIN_SFENCE
);
15766 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi
, IX86_BUILTIN_PSADBW
);
15768 def_builtin (MASK_SSE
, "__builtin_ia32_rcpps", v4sf_ftype_v4sf
, IX86_BUILTIN_RCPPS
);
15769 def_builtin (MASK_SSE
, "__builtin_ia32_rcpss", v4sf_ftype_v4sf
, IX86_BUILTIN_RCPSS
);
15770 def_builtin (MASK_SSE
, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf
, IX86_BUILTIN_RSQRTPS
);
15771 def_builtin (MASK_SSE
, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf
, IX86_BUILTIN_RSQRTSS
);
15772 def_builtin_const (MASK_SSE
, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf
, IX86_BUILTIN_SQRTPS
);
15773 def_builtin_const (MASK_SSE
, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf
, IX86_BUILTIN_SQRTSS
);
15775 def_builtin (MASK_SSE
, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int
, IX86_BUILTIN_SHUFPS
);
15777 /* Original 3DNow! */
15778 def_builtin (MASK_3DNOW
, "__builtin_ia32_femms", void_ftype_void
, IX86_BUILTIN_FEMMS
);
15779 def_builtin (MASK_3DNOW
, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi
, IX86_BUILTIN_PAVGUSB
);
15780 def_builtin (MASK_3DNOW
, "__builtin_ia32_pf2id", v2si_ftype_v2sf
, IX86_BUILTIN_PF2ID
);
15781 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFACC
);
15782 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFADD
);
15783 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPEQ
);
15784 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPGE
);
15785 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPGT
);
15786 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMAX
);
15787 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMIN
);
15788 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMUL
);
15789 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf
, IX86_BUILTIN_PFRCP
);
15790 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRCPIT1
);
15791 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRCPIT2
);
15792 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf
, IX86_BUILTIN_PFRSQRT
);
15793 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRSQIT1
);
15794 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFSUB
);
15795 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFSUBR
);
15796 def_builtin (MASK_3DNOW
, "__builtin_ia32_pi2fd", v2sf_ftype_v2si
, IX86_BUILTIN_PI2FD
);
15797 def_builtin (MASK_3DNOW
, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi
, IX86_BUILTIN_PMULHRW
);
15799 /* 3DNow! extension as used in the Athlon CPU. */
15800 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pf2iw", v2si_ftype_v2sf
, IX86_BUILTIN_PF2IW
);
15801 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFNACC
);
15802 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFPNACC
);
15803 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pi2fw", v2sf_ftype_v2si
, IX86_BUILTIN_PI2FW
);
15804 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf
, IX86_BUILTIN_PSWAPDSF
);
15805 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pswapdsi", v2si_ftype_v2si
, IX86_BUILTIN_PSWAPDSI
);
15808 def_builtin (MASK_SSE2
, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar
, IX86_BUILTIN_MASKMOVDQU
);
15810 def_builtin (MASK_SSE2
, "__builtin_ia32_loadupd", v2df_ftype_pcdouble
, IX86_BUILTIN_LOADUPD
);
15811 def_builtin (MASK_SSE2
, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df
, IX86_BUILTIN_STOREUPD
);
15813 def_builtin (MASK_SSE2
, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble
, IX86_BUILTIN_LOADHPD
);
15814 def_builtin (MASK_SSE2
, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble
, IX86_BUILTIN_LOADLPD
);
15816 def_builtin (MASK_SSE2
, "__builtin_ia32_movmskpd", int_ftype_v2df
, IX86_BUILTIN_MOVMSKPD
);
15817 def_builtin (MASK_SSE2
, "__builtin_ia32_pmovmskb128", int_ftype_v16qi
, IX86_BUILTIN_PMOVMSKB128
);
15818 def_builtin (MASK_SSE2
, "__builtin_ia32_movnti", void_ftype_pint_int
, IX86_BUILTIN_MOVNTI
);
15819 def_builtin (MASK_SSE2
, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df
, IX86_BUILTIN_MOVNTPD
);
15820 def_builtin (MASK_SSE2
, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di
, IX86_BUILTIN_MOVNTDQ
);
15822 def_builtin (MASK_SSE2
, "__builtin_ia32_pshufd", v4si_ftype_v4si_int
, IX86_BUILTIN_PSHUFD
);
15823 def_builtin (MASK_SSE2
, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSHUFLW
);
15824 def_builtin (MASK_SSE2
, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSHUFHW
);
15825 def_builtin (MASK_SSE2
, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi
, IX86_BUILTIN_PSADBW128
);
15827 def_builtin_const (MASK_SSE2
, "__builtin_ia32_sqrtpd", v2df_ftype_v2df
, IX86_BUILTIN_SQRTPD
);
15828 def_builtin_const (MASK_SSE2
, "__builtin_ia32_sqrtsd", v2df_ftype_v2df
, IX86_BUILTIN_SQRTSD
);
15830 def_builtin (MASK_SSE2
, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int
, IX86_BUILTIN_SHUFPD
);
15832 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si
, IX86_BUILTIN_CVTDQ2PD
);
15833 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si
, IX86_BUILTIN_CVTDQ2PS
);
15835 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df
, IX86_BUILTIN_CVTPD2DQ
);
15836 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df
, IX86_BUILTIN_CVTPD2PI
);
15837 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df
, IX86_BUILTIN_CVTPD2PS
);
15838 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df
, IX86_BUILTIN_CVTTPD2DQ
);
15839 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df
, IX86_BUILTIN_CVTTPD2PI
);
15841 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si
, IX86_BUILTIN_CVTPI2PD
);
15843 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtsd2si", int_ftype_v2df
, IX86_BUILTIN_CVTSD2SI
);
15844 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvttsd2si", int_ftype_v2df
, IX86_BUILTIN_CVTTSD2SI
);
15845 def_builtin_const (MASK_SSE2
| MASK_64BIT
, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df
, IX86_BUILTIN_CVTSD2SI64
);
15846 def_builtin_const (MASK_SSE2
| MASK_64BIT
, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df
, IX86_BUILTIN_CVTTSD2SI64
);
15848 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf
, IX86_BUILTIN_CVTPS2DQ
);
15849 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf
, IX86_BUILTIN_CVTPS2PD
);
15850 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf
, IX86_BUILTIN_CVTTPS2DQ
);
15852 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int
, IX86_BUILTIN_CVTSI2SD
);
15853 def_builtin_const (MASK_SSE2
| MASK_64BIT
, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64
, IX86_BUILTIN_CVTSI642SD
);
15854 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df
, IX86_BUILTIN_CVTSD2SS
);
15855 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf
, IX86_BUILTIN_CVTSS2SD
);
15857 def_builtin (MASK_SSE2
, "__builtin_ia32_clflush", void_ftype_pcvoid
, IX86_BUILTIN_CLFLUSH
);
15858 def_builtin (MASK_SSE2
, "__builtin_ia32_lfence", void_ftype_void
, IX86_BUILTIN_LFENCE
);
15859 def_builtin (MASK_SSE2
, "__builtin_ia32_mfence", void_ftype_void
, IX86_BUILTIN_MFENCE
);
15861 def_builtin (MASK_SSE2
, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar
, IX86_BUILTIN_LOADDQU
);
15862 def_builtin (MASK_SSE2
, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi
, IX86_BUILTIN_STOREDQU
);
15864 def_builtin (MASK_SSE2
, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si
, IX86_BUILTIN_PMULUDQ
);
15865 def_builtin (MASK_SSE2
, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si
, IX86_BUILTIN_PMULUDQ128
);
15867 def_builtin (MASK_SSE2
, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSLLW128
);
15868 def_builtin (MASK_SSE2
, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSLLD128
);
15869 def_builtin (MASK_SSE2
, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di
, IX86_BUILTIN_PSLLQ128
);
15871 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSRLW128
);
15872 def_builtin (MASK_SSE2
, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSRLD128
);
15873 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di
, IX86_BUILTIN_PSRLQ128
);
15875 def_builtin (MASK_SSE2
, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSRAW128
);
15876 def_builtin (MASK_SSE2
, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSRAD128
);
15878 def_builtin (MASK_SSE2
, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSLLDQI128
);
15879 def_builtin (MASK_SSE2
, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSLLWI128
);
15880 def_builtin (MASK_SSE2
, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSLLDI128
);
15881 def_builtin (MASK_SSE2
, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSLLQI128
);
15883 def_builtin (MASK_SSE2
, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSRLDQI128
);
15884 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSRLWI128
);
15885 def_builtin (MASK_SSE2
, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSRLDI128
);
15886 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSRLQI128
);
15888 def_builtin (MASK_SSE2
, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSRAWI128
);
15889 def_builtin (MASK_SSE2
, "__builtin_ia32_psradi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSRADI128
);
15891 def_builtin (MASK_SSE2
, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi
, IX86_BUILTIN_PMADDWD128
);
15893 /* Prescott New Instructions. */
15894 def_builtin (MASK_SSE3
, "__builtin_ia32_monitor",
15895 void_ftype_pcvoid_unsigned_unsigned
,
15896 IX86_BUILTIN_MONITOR
);
15897 def_builtin (MASK_SSE3
, "__builtin_ia32_mwait",
15898 void_ftype_unsigned_unsigned
,
15899 IX86_BUILTIN_MWAIT
);
15900 def_builtin (MASK_SSE3
, "__builtin_ia32_movshdup",
15902 IX86_BUILTIN_MOVSHDUP
);
15903 def_builtin (MASK_SSE3
, "__builtin_ia32_movsldup",
15905 IX86_BUILTIN_MOVSLDUP
);
15906 def_builtin (MASK_SSE3
, "__builtin_ia32_lddqu",
15907 v16qi_ftype_pcchar
, IX86_BUILTIN_LDDQU
);
15910 def_builtin (MASK_SSSE3
, "__builtin_ia32_palignr128",
15911 v2di_ftype_v2di_v2di_int
, IX86_BUILTIN_PALIGNR128
);
15912 def_builtin (MASK_SSSE3
, "__builtin_ia32_palignr", di_ftype_di_di_int
,
15913 IX86_BUILTIN_PALIGNR
);
15915 /* Access to the vec_init patterns. */
15916 ftype
= build_function_type_list (V2SI_type_node
, integer_type_node
,
15917 integer_type_node
, NULL_TREE
);
15918 def_builtin (MASK_MMX
, "__builtin_ia32_vec_init_v2si",
15919 ftype
, IX86_BUILTIN_VEC_INIT_V2SI
);
15921 ftype
= build_function_type_list (V4HI_type_node
, short_integer_type_node
,
15922 short_integer_type_node
,
15923 short_integer_type_node
,
15924 short_integer_type_node
, NULL_TREE
);
15925 def_builtin (MASK_MMX
, "__builtin_ia32_vec_init_v4hi",
15926 ftype
, IX86_BUILTIN_VEC_INIT_V4HI
);
15928 ftype
= build_function_type_list (V8QI_type_node
, char_type_node
,
15929 char_type_node
, char_type_node
,
15930 char_type_node
, char_type_node
,
15931 char_type_node
, char_type_node
,
15932 char_type_node
, NULL_TREE
);
15933 def_builtin (MASK_MMX
, "__builtin_ia32_vec_init_v8qi",
15934 ftype
, IX86_BUILTIN_VEC_INIT_V8QI
);
15936 /* Access to the vec_extract patterns. */
15937 ftype
= build_function_type_list (double_type_node
, V2DF_type_node
,
15938 integer_type_node
, NULL_TREE
);
15939 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v2df",
15940 ftype
, IX86_BUILTIN_VEC_EXT_V2DF
);
15942 ftype
= build_function_type_list (long_long_integer_type_node
,
15943 V2DI_type_node
, integer_type_node
,
15945 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v2di",
15946 ftype
, IX86_BUILTIN_VEC_EXT_V2DI
);
15948 ftype
= build_function_type_list (float_type_node
, V4SF_type_node
,
15949 integer_type_node
, NULL_TREE
);
15950 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v4sf",
15951 ftype
, IX86_BUILTIN_VEC_EXT_V4SF
);
15953 ftype
= build_function_type_list (intSI_type_node
, V4SI_type_node
,
15954 integer_type_node
, NULL_TREE
);
15955 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v4si",
15956 ftype
, IX86_BUILTIN_VEC_EXT_V4SI
);
15958 ftype
= build_function_type_list (intHI_type_node
, V8HI_type_node
,
15959 integer_type_node
, NULL_TREE
);
15960 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v8hi",
15961 ftype
, IX86_BUILTIN_VEC_EXT_V8HI
);
15963 ftype
= build_function_type_list (intHI_type_node
, V4HI_type_node
,
15964 integer_type_node
, NULL_TREE
);
15965 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_vec_ext_v4hi",
15966 ftype
, IX86_BUILTIN_VEC_EXT_V4HI
);
15968 ftype
= build_function_type_list (intSI_type_node
, V2SI_type_node
,
15969 integer_type_node
, NULL_TREE
);
15970 def_builtin (MASK_MMX
, "__builtin_ia32_vec_ext_v2si",
15971 ftype
, IX86_BUILTIN_VEC_EXT_V2SI
);
15973 /* Access to the vec_set patterns. */
15974 ftype
= build_function_type_list (V8HI_type_node
, V8HI_type_node
,
15976 integer_type_node
, NULL_TREE
);
15977 def_builtin (MASK_SSE
, "__builtin_ia32_vec_set_v8hi",
15978 ftype
, IX86_BUILTIN_VEC_SET_V8HI
);
15980 ftype
= build_function_type_list (V4HI_type_node
, V4HI_type_node
,
15982 integer_type_node
, NULL_TREE
);
15983 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_vec_set_v4hi",
15984 ftype
, IX86_BUILTIN_VEC_SET_V4HI
);
15987 /* Errors in the source file can cause expand_expr to return const0_rtx
15988 where we expect a vector. To avoid crashing, use one of the vector
15989 clear instructions. */
15991 safe_vector_operand (rtx x
, enum machine_mode mode
)
15993 if (x
== const0_rtx
)
15994 x
= CONST0_RTX (mode
);
15998 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
16001 ix86_expand_binop_builtin (enum insn_code icode
, tree arglist
, rtx target
)
16004 tree arg0
= TREE_VALUE (arglist
);
16005 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
16006 rtx op0
= expand_normal (arg0
);
16007 rtx op1
= expand_normal (arg1
);
16008 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
16009 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
16010 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
16012 if (VECTOR_MODE_P (mode0
))
16013 op0
= safe_vector_operand (op0
, mode0
);
16014 if (VECTOR_MODE_P (mode1
))
16015 op1
= safe_vector_operand (op1
, mode1
);
16017 if (optimize
|| !target
16018 || GET_MODE (target
) != tmode
16019 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
16020 target
= gen_reg_rtx (tmode
);
16022 if (GET_MODE (op1
) == SImode
&& mode1
== TImode
)
16024 rtx x
= gen_reg_rtx (V4SImode
);
16025 emit_insn (gen_sse2_loadd (x
, op1
));
16026 op1
= gen_lowpart (TImode
, x
);
16029 /* The insn must want input operands in the same modes as the
16031 gcc_assert ((GET_MODE (op0
) == mode0
|| GET_MODE (op0
) == VOIDmode
)
16032 && (GET_MODE (op1
) == mode1
|| GET_MODE (op1
) == VOIDmode
));
16034 if (!(*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
16035 op0
= copy_to_mode_reg (mode0
, op0
);
16036 if (!(*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
16037 op1
= copy_to_mode_reg (mode1
, op1
);
16039 /* ??? Using ix86_fixup_binary_operands is problematic when
16040 we've got mismatched modes. Fake it. */
16046 if (tmode
== mode0
&& tmode
== mode1
)
16048 target
= ix86_fixup_binary_operands (UNKNOWN
, tmode
, xops
);
16052 else if (optimize
|| !ix86_binary_operator_ok (UNKNOWN
, tmode
, xops
))
16054 op0
= force_reg (mode0
, op0
);
16055 op1
= force_reg (mode1
, op1
);
16056 target
= gen_reg_rtx (tmode
);
16059 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
16066 /* Subroutine of ix86_expand_builtin to take care of stores. */
16069 ix86_expand_store_builtin (enum insn_code icode
, tree arglist
)
16072 tree arg0
= TREE_VALUE (arglist
);
16073 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
16074 rtx op0
= expand_normal (arg0
);
16075 rtx op1
= expand_normal (arg1
);
16076 enum machine_mode mode0
= insn_data
[icode
].operand
[0].mode
;
16077 enum machine_mode mode1
= insn_data
[icode
].operand
[1].mode
;
16079 if (VECTOR_MODE_P (mode1
))
16080 op1
= safe_vector_operand (op1
, mode1
);
16082 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
16083 op1
= copy_to_mode_reg (mode1
, op1
);
16085 pat
= GEN_FCN (icode
) (op0
, op1
);
16091 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
16094 ix86_expand_unop_builtin (enum insn_code icode
, tree arglist
,
16095 rtx target
, int do_load
)
16098 tree arg0
= TREE_VALUE (arglist
);
16099 rtx op0
= expand_normal (arg0
);
16100 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
16101 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
16103 if (optimize
|| !target
16104 || GET_MODE (target
) != tmode
16105 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
16106 target
= gen_reg_rtx (tmode
);
16108 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
16111 if (VECTOR_MODE_P (mode0
))
16112 op0
= safe_vector_operand (op0
, mode0
);
16114 if ((optimize
&& !register_operand (op0
, mode0
))
16115 || ! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
16116 op0
= copy_to_mode_reg (mode0
, op0
);
16119 pat
= GEN_FCN (icode
) (target
, op0
);
16126 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
16127 sqrtss, rsqrtss, rcpss. */
16130 ix86_expand_unop1_builtin (enum insn_code icode
, tree arglist
, rtx target
)
16133 tree arg0
= TREE_VALUE (arglist
);
16134 rtx op1
, op0
= expand_normal (arg0
);
16135 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
16136 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
16138 if (optimize
|| !target
16139 || GET_MODE (target
) != tmode
16140 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
16141 target
= gen_reg_rtx (tmode
);
16143 if (VECTOR_MODE_P (mode0
))
16144 op0
= safe_vector_operand (op0
, mode0
);
16146 if ((optimize
&& !register_operand (op0
, mode0
))
16147 || ! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
16148 op0
= copy_to_mode_reg (mode0
, op0
);
16151 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode0
))
16152 op1
= copy_to_mode_reg (mode0
, op1
);
16154 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
16161 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
16164 ix86_expand_sse_compare (const struct builtin_description
*d
, tree arglist
,
16168 tree arg0
= TREE_VALUE (arglist
);
16169 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
16170 rtx op0
= expand_normal (arg0
);
16171 rtx op1
= expand_normal (arg1
);
16173 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
16174 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
16175 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
16176 enum rtx_code comparison
= d
->comparison
;
16178 if (VECTOR_MODE_P (mode0
))
16179 op0
= safe_vector_operand (op0
, mode0
);
16180 if (VECTOR_MODE_P (mode1
))
16181 op1
= safe_vector_operand (op1
, mode1
);
16183 /* Swap operands if we have a comparison that isn't available in
16185 if (d
->flag
& BUILTIN_DESC_SWAP_OPERANDS
)
16187 rtx tmp
= gen_reg_rtx (mode1
);
16188 emit_move_insn (tmp
, op1
);
16193 if (optimize
|| !target
16194 || GET_MODE (target
) != tmode
16195 || ! (*insn_data
[d
->icode
].operand
[0].predicate
) (target
, tmode
))
16196 target
= gen_reg_rtx (tmode
);
16198 if ((optimize
&& !register_operand (op0
, mode0
))
16199 || ! (*insn_data
[d
->icode
].operand
[1].predicate
) (op0
, mode0
))
16200 op0
= copy_to_mode_reg (mode0
, op0
);
16201 if ((optimize
&& !register_operand (op1
, mode1
))
16202 || ! (*insn_data
[d
->icode
].operand
[2].predicate
) (op1
, mode1
))
16203 op1
= copy_to_mode_reg (mode1
, op1
);
16205 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
16206 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
16213 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
16216 ix86_expand_sse_comi (const struct builtin_description
*d
, tree arglist
,
16220 tree arg0
= TREE_VALUE (arglist
);
16221 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
16222 rtx op0
= expand_normal (arg0
);
16223 rtx op1
= expand_normal (arg1
);
16225 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
16226 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
16227 enum rtx_code comparison
= d
->comparison
;
16229 if (VECTOR_MODE_P (mode0
))
16230 op0
= safe_vector_operand (op0
, mode0
);
16231 if (VECTOR_MODE_P (mode1
))
16232 op1
= safe_vector_operand (op1
, mode1
);
16234 /* Swap operands if we have a comparison that isn't available in
16236 if (d
->flag
& BUILTIN_DESC_SWAP_OPERANDS
)
16243 target
= gen_reg_rtx (SImode
);
16244 emit_move_insn (target
, const0_rtx
);
16245 target
= gen_rtx_SUBREG (QImode
, target
, 0);
16247 if ((optimize
&& !register_operand (op0
, mode0
))
16248 || !(*insn_data
[d
->icode
].operand
[0].predicate
) (op0
, mode0
))
16249 op0
= copy_to_mode_reg (mode0
, op0
);
16250 if ((optimize
&& !register_operand (op1
, mode1
))
16251 || !(*insn_data
[d
->icode
].operand
[1].predicate
) (op1
, mode1
))
16252 op1
= copy_to_mode_reg (mode1
, op1
);
16254 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
16255 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
16259 emit_insn (gen_rtx_SET (VOIDmode
,
16260 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
16261 gen_rtx_fmt_ee (comparison
, QImode
,
16265 return SUBREG_REG (target
);
16268 /* Return the integer constant in ARG. Constrain it to be in the range
16269 of the subparts of VEC_TYPE; issue an error if not. */
16272 get_element_number (tree vec_type
, tree arg
)
16274 unsigned HOST_WIDE_INT elt
, max
= TYPE_VECTOR_SUBPARTS (vec_type
) - 1;
16276 if (!host_integerp (arg
, 1)
16277 || (elt
= tree_low_cst (arg
, 1), elt
> max
))
16279 error ("selector must be an integer constant in the range 0..%wi", max
);
16286 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
16287 ix86_expand_vector_init. We DO have language-level syntax for this, in
16288 the form of (type){ init-list }. Except that since we can't place emms
16289 instructions from inside the compiler, we can't allow the use of MMX
16290 registers unless the user explicitly asks for it. So we do *not* define
16291 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
16292 we have builtins invoked by mmintrin.h that gives us license to emit
16293 these sorts of instructions. */
16296 ix86_expand_vec_init_builtin (tree type
, tree arglist
, rtx target
)
16298 enum machine_mode tmode
= TYPE_MODE (type
);
16299 enum machine_mode inner_mode
= GET_MODE_INNER (tmode
);
16300 int i
, n_elt
= GET_MODE_NUNITS (tmode
);
16301 rtvec v
= rtvec_alloc (n_elt
);
16303 gcc_assert (VECTOR_MODE_P (tmode
));
16305 for (i
= 0; i
< n_elt
; ++i
, arglist
= TREE_CHAIN (arglist
))
16307 rtx x
= expand_normal (TREE_VALUE (arglist
));
16308 RTVEC_ELT (v
, i
) = gen_lowpart (inner_mode
, x
);
16311 gcc_assert (arglist
== NULL
);
16313 if (!target
|| !register_operand (target
, tmode
))
16314 target
= gen_reg_rtx (tmode
);
16316 ix86_expand_vector_init (true, target
, gen_rtx_PARALLEL (tmode
, v
));
16320 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
16321 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
16322 had a language-level syntax for referencing vector elements. */
16325 ix86_expand_vec_ext_builtin (tree arglist
, rtx target
)
16327 enum machine_mode tmode
, mode0
;
16332 arg0
= TREE_VALUE (arglist
);
16333 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
16335 op0
= expand_normal (arg0
);
16336 elt
= get_element_number (TREE_TYPE (arg0
), arg1
);
16338 tmode
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
16339 mode0
= TYPE_MODE (TREE_TYPE (arg0
));
16340 gcc_assert (VECTOR_MODE_P (mode0
));
16342 op0
= force_reg (mode0
, op0
);
16344 if (optimize
|| !target
|| !register_operand (target
, tmode
))
16345 target
= gen_reg_rtx (tmode
);
16347 ix86_expand_vector_extract (true, target
, op0
, elt
);
16352 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
16353 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
16354 a language-level syntax for referencing vector elements. */
16357 ix86_expand_vec_set_builtin (tree arglist
)
16359 enum machine_mode tmode
, mode1
;
16360 tree arg0
, arg1
, arg2
;
16364 arg0
= TREE_VALUE (arglist
);
16365 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
16366 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
16368 tmode
= TYPE_MODE (TREE_TYPE (arg0
));
16369 mode1
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
16370 gcc_assert (VECTOR_MODE_P (tmode
));
16372 op0
= expand_expr (arg0
, NULL_RTX
, tmode
, 0);
16373 op1
= expand_expr (arg1
, NULL_RTX
, mode1
, 0);
16374 elt
= get_element_number (TREE_TYPE (arg0
), arg2
);
16376 if (GET_MODE (op1
) != mode1
&& GET_MODE (op1
) != VOIDmode
)
16377 op1
= convert_modes (mode1
, GET_MODE (op1
), op1
, true);
16379 op0
= force_reg (tmode
, op0
);
16380 op1
= force_reg (mode1
, op1
);
16382 ix86_expand_vector_set (true, op0
, op1
, elt
);
16387 /* Expand an expression EXP that calls a built-in function,
16388 with result going to TARGET if that's convenient
16389 (and in mode MODE if that's convenient).
16390 SUBTARGET may be used as the target for computing one of EXP's operands.
16391 IGNORE is nonzero if the value is to be ignored. */
16394 ix86_expand_builtin (tree exp
, rtx target
, rtx subtarget ATTRIBUTE_UNUSED
,
16395 enum machine_mode mode ATTRIBUTE_UNUSED
,
16396 int ignore ATTRIBUTE_UNUSED
)
16398 const struct builtin_description
*d
;
16400 enum insn_code icode
;
16401 tree fndecl
= TREE_OPERAND (TREE_OPERAND (exp
, 0), 0);
16402 tree arglist
= TREE_OPERAND (exp
, 1);
16403 tree arg0
, arg1
, arg2
;
16404 rtx op0
, op1
, op2
, pat
;
16405 enum machine_mode tmode
, mode0
, mode1
, mode2
, mode3
;
16406 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
16410 case IX86_BUILTIN_EMMS
:
16411 emit_insn (gen_mmx_emms ());
16414 case IX86_BUILTIN_SFENCE
:
16415 emit_insn (gen_sse_sfence ());
16418 case IX86_BUILTIN_MASKMOVQ
:
16419 case IX86_BUILTIN_MASKMOVDQU
:
16420 icode
= (fcode
== IX86_BUILTIN_MASKMOVQ
16421 ? CODE_FOR_mmx_maskmovq
16422 : CODE_FOR_sse2_maskmovdqu
);
16423 /* Note the arg order is different from the operand order. */
16424 arg1
= TREE_VALUE (arglist
);
16425 arg2
= TREE_VALUE (TREE_CHAIN (arglist
));
16426 arg0
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
16427 op0
= expand_normal (arg0
);
16428 op1
= expand_normal (arg1
);
16429 op2
= expand_normal (arg2
);
16430 mode0
= insn_data
[icode
].operand
[0].mode
;
16431 mode1
= insn_data
[icode
].operand
[1].mode
;
16432 mode2
= insn_data
[icode
].operand
[2].mode
;
16434 op0
= force_reg (Pmode
, op0
);
16435 op0
= gen_rtx_MEM (mode1
, op0
);
16437 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, mode0
))
16438 op0
= copy_to_mode_reg (mode0
, op0
);
16439 if (! (*insn_data
[icode
].operand
[1].predicate
) (op1
, mode1
))
16440 op1
= copy_to_mode_reg (mode1
, op1
);
16441 if (! (*insn_data
[icode
].operand
[2].predicate
) (op2
, mode2
))
16442 op2
= copy_to_mode_reg (mode2
, op2
);
16443 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
16449 case IX86_BUILTIN_SQRTSS
:
16450 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2
, arglist
, target
);
16451 case IX86_BUILTIN_RSQRTSS
:
16452 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2
, arglist
, target
);
16453 case IX86_BUILTIN_RCPSS
:
16454 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2
, arglist
, target
);
16456 case IX86_BUILTIN_LOADUPS
:
16457 return ix86_expand_unop_builtin (CODE_FOR_sse_movups
, arglist
, target
, 1);
16459 case IX86_BUILTIN_STOREUPS
:
16460 return ix86_expand_store_builtin (CODE_FOR_sse_movups
, arglist
);
16462 case IX86_BUILTIN_LOADHPS
:
16463 case IX86_BUILTIN_LOADLPS
:
16464 case IX86_BUILTIN_LOADHPD
:
16465 case IX86_BUILTIN_LOADLPD
:
16466 icode
= (fcode
== IX86_BUILTIN_LOADHPS
? CODE_FOR_sse_loadhps
16467 : fcode
== IX86_BUILTIN_LOADLPS
? CODE_FOR_sse_loadlps
16468 : fcode
== IX86_BUILTIN_LOADHPD
? CODE_FOR_sse2_loadhpd
16469 : CODE_FOR_sse2_loadlpd
);
16470 arg0
= TREE_VALUE (arglist
);
16471 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
16472 op0
= expand_normal (arg0
);
16473 op1
= expand_normal (arg1
);
16474 tmode
= insn_data
[icode
].operand
[0].mode
;
16475 mode0
= insn_data
[icode
].operand
[1].mode
;
16476 mode1
= insn_data
[icode
].operand
[2].mode
;
16478 op0
= force_reg (mode0
, op0
);
16479 op1
= gen_rtx_MEM (mode1
, copy_to_mode_reg (Pmode
, op1
));
16480 if (optimize
|| target
== 0
16481 || GET_MODE (target
) != tmode
16482 || !register_operand (target
, tmode
))
16483 target
= gen_reg_rtx (tmode
);
16484 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
16490 case IX86_BUILTIN_STOREHPS
:
16491 case IX86_BUILTIN_STORELPS
:
16492 icode
= (fcode
== IX86_BUILTIN_STOREHPS
? CODE_FOR_sse_storehps
16493 : CODE_FOR_sse_storelps
);
16494 arg0
= TREE_VALUE (arglist
);
16495 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
16496 op0
= expand_normal (arg0
);
16497 op1
= expand_normal (arg1
);
16498 mode0
= insn_data
[icode
].operand
[0].mode
;
16499 mode1
= insn_data
[icode
].operand
[1].mode
;
16501 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
16502 op1
= force_reg (mode1
, op1
);
16504 pat
= GEN_FCN (icode
) (op0
, op1
);
16510 case IX86_BUILTIN_MOVNTPS
:
16511 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf
, arglist
);
16512 case IX86_BUILTIN_MOVNTQ
:
16513 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi
, arglist
);
16515 case IX86_BUILTIN_LDMXCSR
:
16516 op0
= expand_normal (TREE_VALUE (arglist
));
16517 target
= assign_386_stack_local (SImode
, SLOT_TEMP
);
16518 emit_move_insn (target
, op0
);
16519 emit_insn (gen_sse_ldmxcsr (target
));
16522 case IX86_BUILTIN_STMXCSR
:
16523 target
= assign_386_stack_local (SImode
, SLOT_TEMP
);
16524 emit_insn (gen_sse_stmxcsr (target
));
16525 return copy_to_mode_reg (SImode
, target
);
16527 case IX86_BUILTIN_SHUFPS
:
16528 case IX86_BUILTIN_SHUFPD
:
16529 icode
= (fcode
== IX86_BUILTIN_SHUFPS
16530 ? CODE_FOR_sse_shufps
16531 : CODE_FOR_sse2_shufpd
);
16532 arg0
= TREE_VALUE (arglist
);
16533 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
16534 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
16535 op0
= expand_normal (arg0
);
16536 op1
= expand_normal (arg1
);
16537 op2
= expand_normal (arg2
);
16538 tmode
= insn_data
[icode
].operand
[0].mode
;
16539 mode0
= insn_data
[icode
].operand
[1].mode
;
16540 mode1
= insn_data
[icode
].operand
[2].mode
;
16541 mode2
= insn_data
[icode
].operand
[3].mode
;
16543 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
16544 op0
= copy_to_mode_reg (mode0
, op0
);
16545 if ((optimize
&& !register_operand (op1
, mode1
))
16546 || !(*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
16547 op1
= copy_to_mode_reg (mode1
, op1
);
16548 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
16550 /* @@@ better error message */
16551 error ("mask must be an immediate");
16552 return gen_reg_rtx (tmode
);
16554 if (optimize
|| target
== 0
16555 || GET_MODE (target
) != tmode
16556 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
16557 target
= gen_reg_rtx (tmode
);
16558 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
16564 case IX86_BUILTIN_PSHUFW
:
16565 case IX86_BUILTIN_PSHUFD
:
16566 case IX86_BUILTIN_PSHUFHW
:
16567 case IX86_BUILTIN_PSHUFLW
:
16568 icode
= ( fcode
== IX86_BUILTIN_PSHUFHW
? CODE_FOR_sse2_pshufhw
16569 : fcode
== IX86_BUILTIN_PSHUFLW
? CODE_FOR_sse2_pshuflw
16570 : fcode
== IX86_BUILTIN_PSHUFD
? CODE_FOR_sse2_pshufd
16571 : CODE_FOR_mmx_pshufw
);
16572 arg0
= TREE_VALUE (arglist
);
16573 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
16574 op0
= expand_normal (arg0
);
16575 op1
= expand_normal (arg1
);
16576 tmode
= insn_data
[icode
].operand
[0].mode
;
16577 mode1
= insn_data
[icode
].operand
[1].mode
;
16578 mode2
= insn_data
[icode
].operand
[2].mode
;
16580 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
16581 op0
= copy_to_mode_reg (mode1
, op0
);
16582 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
16584 /* @@@ better error message */
16585 error ("mask must be an immediate");
16589 || GET_MODE (target
) != tmode
16590 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
16591 target
= gen_reg_rtx (tmode
);
16592 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
16598 case IX86_BUILTIN_PSLLDQI128
:
16599 case IX86_BUILTIN_PSRLDQI128
:
16600 icode
= ( fcode
== IX86_BUILTIN_PSLLDQI128
? CODE_FOR_sse2_ashlti3
16601 : CODE_FOR_sse2_lshrti3
);
16602 arg0
= TREE_VALUE (arglist
);
16603 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
16604 op0
= expand_normal (arg0
);
16605 op1
= expand_normal (arg1
);
16606 tmode
= insn_data
[icode
].operand
[0].mode
;
16607 mode1
= insn_data
[icode
].operand
[1].mode
;
16608 mode2
= insn_data
[icode
].operand
[2].mode
;
16610 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
16612 op0
= copy_to_reg (op0
);
16613 op0
= simplify_gen_subreg (mode1
, op0
, GET_MODE (op0
), 0);
16615 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
16617 error ("shift must be an immediate");
16620 target
= gen_reg_rtx (V2DImode
);
16621 pat
= GEN_FCN (icode
) (simplify_gen_subreg (tmode
, target
, V2DImode
, 0), op0
, op1
);
16627 case IX86_BUILTIN_FEMMS
:
16628 emit_insn (gen_mmx_femms ());
16631 case IX86_BUILTIN_PAVGUSB
:
16632 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3
, arglist
, target
);
16634 case IX86_BUILTIN_PF2ID
:
16635 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id
, arglist
, target
, 0);
16637 case IX86_BUILTIN_PFACC
:
16638 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3
, arglist
, target
);
16640 case IX86_BUILTIN_PFADD
:
16641 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3
, arglist
, target
);
16643 case IX86_BUILTIN_PFCMPEQ
:
16644 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3
, arglist
, target
);
16646 case IX86_BUILTIN_PFCMPGE
:
16647 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3
, arglist
, target
);
16649 case IX86_BUILTIN_PFCMPGT
:
16650 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3
, arglist
, target
);
16652 case IX86_BUILTIN_PFMAX
:
16653 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3
, arglist
, target
);
16655 case IX86_BUILTIN_PFMIN
:
16656 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3
, arglist
, target
);
16658 case IX86_BUILTIN_PFMUL
:
16659 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3
, arglist
, target
);
16661 case IX86_BUILTIN_PFRCP
:
16662 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2
, arglist
, target
, 0);
16664 case IX86_BUILTIN_PFRCPIT1
:
16665 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3
, arglist
, target
);
16667 case IX86_BUILTIN_PFRCPIT2
:
16668 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3
, arglist
, target
);
16670 case IX86_BUILTIN_PFRSQIT1
:
16671 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3
, arglist
, target
);
16673 case IX86_BUILTIN_PFRSQRT
:
16674 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2
, arglist
, target
, 0);
16676 case IX86_BUILTIN_PFSUB
:
16677 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3
, arglist
, target
);
16679 case IX86_BUILTIN_PFSUBR
:
16680 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3
, arglist
, target
);
16682 case IX86_BUILTIN_PI2FD
:
16683 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2
, arglist
, target
, 0);
16685 case IX86_BUILTIN_PMULHRW
:
16686 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3
, arglist
, target
);
16688 case IX86_BUILTIN_PF2IW
:
16689 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw
, arglist
, target
, 0);
16691 case IX86_BUILTIN_PFNACC
:
16692 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3
, arglist
, target
);
16694 case IX86_BUILTIN_PFPNACC
:
16695 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3
, arglist
, target
);
16697 case IX86_BUILTIN_PI2FW
:
16698 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw
, arglist
, target
, 0);
16700 case IX86_BUILTIN_PSWAPDSI
:
16701 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2
, arglist
, target
, 0);
16703 case IX86_BUILTIN_PSWAPDSF
:
16704 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2
, arglist
, target
, 0);
16706 case IX86_BUILTIN_SQRTSD
:
16707 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2
, arglist
, target
);
16708 case IX86_BUILTIN_LOADUPD
:
16709 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd
, arglist
, target
, 1);
16710 case IX86_BUILTIN_STOREUPD
:
16711 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd
, arglist
);
16713 case IX86_BUILTIN_MFENCE
:
16714 emit_insn (gen_sse2_mfence ());
16716 case IX86_BUILTIN_LFENCE
:
16717 emit_insn (gen_sse2_lfence ());
16720 case IX86_BUILTIN_CLFLUSH
:
16721 arg0
= TREE_VALUE (arglist
);
16722 op0
= expand_normal (arg0
);
16723 icode
= CODE_FOR_sse2_clflush
;
16724 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, Pmode
))
16725 op0
= copy_to_mode_reg (Pmode
, op0
);
16727 emit_insn (gen_sse2_clflush (op0
));
16730 case IX86_BUILTIN_MOVNTPD
:
16731 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df
, arglist
);
16732 case IX86_BUILTIN_MOVNTDQ
:
16733 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di
, arglist
);
16734 case IX86_BUILTIN_MOVNTI
:
16735 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi
, arglist
);
16737 case IX86_BUILTIN_LOADDQU
:
16738 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu
, arglist
, target
, 1);
16739 case IX86_BUILTIN_STOREDQU
:
16740 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu
, arglist
);
16742 case IX86_BUILTIN_MONITOR
:
16743 arg0
= TREE_VALUE (arglist
);
16744 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
16745 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
16746 op0
= expand_normal (arg0
);
16747 op1
= expand_normal (arg1
);
16748 op2
= expand_normal (arg2
);
16750 op0
= copy_to_mode_reg (Pmode
, op0
);
16752 op1
= copy_to_mode_reg (SImode
, op1
);
16754 op2
= copy_to_mode_reg (SImode
, op2
);
16756 emit_insn (gen_sse3_monitor (op0
, op1
, op2
));
16758 emit_insn (gen_sse3_monitor64 (op0
, op1
, op2
));
16761 case IX86_BUILTIN_MWAIT
:
16762 arg0
= TREE_VALUE (arglist
);
16763 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
16764 op0
= expand_normal (arg0
);
16765 op1
= expand_normal (arg1
);
16767 op0
= copy_to_mode_reg (SImode
, op0
);
16769 op1
= copy_to_mode_reg (SImode
, op1
);
16770 emit_insn (gen_sse3_mwait (op0
, op1
));
16773 case IX86_BUILTIN_LDDQU
:
16774 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu
, arglist
,
16777 case IX86_BUILTIN_PALIGNR
:
16778 case IX86_BUILTIN_PALIGNR128
:
16779 if (fcode
== IX86_BUILTIN_PALIGNR
)
16781 icode
= CODE_FOR_ssse3_palignrdi
;
16786 icode
= CODE_FOR_ssse3_palignrti
;
16789 arg0
= TREE_VALUE (arglist
);
16790 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
16791 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
16792 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
16793 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
16794 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, 0);
16795 tmode
= insn_data
[icode
].operand
[0].mode
;
16796 mode1
= insn_data
[icode
].operand
[1].mode
;
16797 mode2
= insn_data
[icode
].operand
[2].mode
;
16798 mode3
= insn_data
[icode
].operand
[3].mode
;
16800 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
16802 op0
= copy_to_reg (op0
);
16803 op0
= simplify_gen_subreg (mode1
, op0
, GET_MODE (op0
), 0);
16805 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
16807 op1
= copy_to_reg (op1
);
16808 op1
= simplify_gen_subreg (mode2
, op1
, GET_MODE (op1
), 0);
16810 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode3
))
16812 error ("shift must be an immediate");
16815 target
= gen_reg_rtx (mode
);
16816 pat
= GEN_FCN (icode
) (simplify_gen_subreg (tmode
, target
, mode
, 0),
16823 case IX86_BUILTIN_VEC_INIT_V2SI
:
16824 case IX86_BUILTIN_VEC_INIT_V4HI
:
16825 case IX86_BUILTIN_VEC_INIT_V8QI
:
16826 return ix86_expand_vec_init_builtin (TREE_TYPE (exp
), arglist
, target
);
16828 case IX86_BUILTIN_VEC_EXT_V2DF
:
16829 case IX86_BUILTIN_VEC_EXT_V2DI
:
16830 case IX86_BUILTIN_VEC_EXT_V4SF
:
16831 case IX86_BUILTIN_VEC_EXT_V4SI
:
16832 case IX86_BUILTIN_VEC_EXT_V8HI
:
16833 case IX86_BUILTIN_VEC_EXT_V2SI
:
16834 case IX86_BUILTIN_VEC_EXT_V4HI
:
16835 return ix86_expand_vec_ext_builtin (arglist
, target
);
16837 case IX86_BUILTIN_VEC_SET_V8HI
:
16838 case IX86_BUILTIN_VEC_SET_V4HI
:
16839 return ix86_expand_vec_set_builtin (arglist
);
16845 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
16846 if (d
->code
== fcode
)
16848 /* Compares are treated specially. */
16849 if (d
->icode
== CODE_FOR_sse_maskcmpv4sf3
16850 || d
->icode
== CODE_FOR_sse_vmmaskcmpv4sf3
16851 || d
->icode
== CODE_FOR_sse2_maskcmpv2df3
16852 || d
->icode
== CODE_FOR_sse2_vmmaskcmpv2df3
)
16853 return ix86_expand_sse_compare (d
, arglist
, target
);
16855 return ix86_expand_binop_builtin (d
->icode
, arglist
, target
);
16858 for (i
= 0, d
= bdesc_1arg
; i
< ARRAY_SIZE (bdesc_1arg
); i
++, d
++)
16859 if (d
->code
== fcode
)
16860 return ix86_expand_unop_builtin (d
->icode
, arglist
, target
, 0);
16862 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
16863 if (d
->code
== fcode
)
16864 return ix86_expand_sse_comi (d
, arglist
, target
);
16866 gcc_unreachable ();
16869 /* Store OPERAND to the memory after reload is completed. This means
16870 that we can't easily use assign_stack_local. */
16872 ix86_force_to_memory (enum machine_mode mode
, rtx operand
)
16876 gcc_assert (reload_completed
);
16877 if (TARGET_RED_ZONE
)
16879 result
= gen_rtx_MEM (mode
,
16880 gen_rtx_PLUS (Pmode
,
16882 GEN_INT (-RED_ZONE_SIZE
)));
16883 emit_move_insn (result
, operand
);
16885 else if (!TARGET_RED_ZONE
&& TARGET_64BIT
)
16891 operand
= gen_lowpart (DImode
, operand
);
16895 gen_rtx_SET (VOIDmode
,
16896 gen_rtx_MEM (DImode
,
16897 gen_rtx_PRE_DEC (DImode
,
16898 stack_pointer_rtx
)),
16902 gcc_unreachable ();
16904 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
16913 split_di (&operand
, 1, operands
, operands
+ 1);
16915 gen_rtx_SET (VOIDmode
,
16916 gen_rtx_MEM (SImode
,
16917 gen_rtx_PRE_DEC (Pmode
,
16918 stack_pointer_rtx
)),
16921 gen_rtx_SET (VOIDmode
,
16922 gen_rtx_MEM (SImode
,
16923 gen_rtx_PRE_DEC (Pmode
,
16924 stack_pointer_rtx
)),
16929 /* Store HImodes as SImodes. */
16930 operand
= gen_lowpart (SImode
, operand
);
16934 gen_rtx_SET (VOIDmode
,
16935 gen_rtx_MEM (GET_MODE (operand
),
16936 gen_rtx_PRE_DEC (SImode
,
16937 stack_pointer_rtx
)),
16941 gcc_unreachable ();
16943 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
16948 /* Free operand from the memory. */
16950 ix86_free_from_memory (enum machine_mode mode
)
16952 if (!TARGET_RED_ZONE
)
16956 if (mode
== DImode
|| TARGET_64BIT
)
16960 /* Use LEA to deallocate stack space. In peephole2 it will be converted
16961 to pop or add instruction if registers are available. */
16962 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
16963 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
16968 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
16969 QImode must go into class Q_REGS.
16970 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
16971 movdf to do mem-to-mem moves through integer regs. */
16973 ix86_preferred_reload_class (rtx x
, enum reg_class
class)
16975 enum machine_mode mode
= GET_MODE (x
);
16977 /* We're only allowed to return a subclass of CLASS. Many of the
16978 following checks fail for NO_REGS, so eliminate that early. */
16979 if (class == NO_REGS
)
16982 /* All classes can load zeros. */
16983 if (x
== CONST0_RTX (mode
))
16986 /* Force constants into memory if we are loading a (nonzero) constant into
16987 an MMX or SSE register. This is because there are no MMX/SSE instructions
16988 to load from a constant. */
16990 && (MAYBE_MMX_CLASS_P (class) || MAYBE_SSE_CLASS_P (class)))
16993 /* Prefer SSE regs only, if we can use them for math. */
16994 if (TARGET_SSE_MATH
&& !TARGET_MIX_SSE_I387
&& SSE_FLOAT_MODE_P (mode
))
16995 return SSE_CLASS_P (class) ? class : NO_REGS
;
16997 /* Floating-point constants need more complex checks. */
16998 if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) != VOIDmode
)
17000 /* General regs can load everything. */
17001 if (reg_class_subset_p (class, GENERAL_REGS
))
17004 /* Floats can load 0 and 1 plus some others. Note that we eliminated
17005 zero above. We only want to wind up preferring 80387 registers if
17006 we plan on doing computation with them. */
17008 && standard_80387_constant_p (x
))
17010 /* Limit class to non-sse. */
17011 if (class == FLOAT_SSE_REGS
)
17013 if (class == FP_TOP_SSE_REGS
)
17015 if (class == FP_SECOND_SSE_REGS
)
17016 return FP_SECOND_REG
;
17017 if (class == FLOAT_INT_REGS
|| class == FLOAT_REGS
)
17024 /* Generally when we see PLUS here, it's the function invariant
17025 (plus soft-fp const_int). Which can only be computed into general
17027 if (GET_CODE (x
) == PLUS
)
17028 return reg_class_subset_p (class, GENERAL_REGS
) ? class : NO_REGS
;
17030 /* QImode constants are easy to load, but non-constant QImode data
17031 must go into Q_REGS. */
17032 if (GET_MODE (x
) == QImode
&& !CONSTANT_P (x
))
17034 if (reg_class_subset_p (class, Q_REGS
))
17036 if (reg_class_subset_p (Q_REGS
, class))
17044 /* Discourage putting floating-point values in SSE registers unless
17045 SSE math is being used, and likewise for the 387 registers. */
17047 ix86_preferred_output_reload_class (rtx x
, enum reg_class
class)
17049 enum machine_mode mode
= GET_MODE (x
);
17051 /* Restrict the output reload class to the register bank that we are doing
17052 math on. If we would like not to return a subset of CLASS, reject this
17053 alternative: if reload cannot do this, it will still use its choice. */
17054 mode
= GET_MODE (x
);
17055 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
17056 return MAYBE_SSE_CLASS_P (class) ? SSE_REGS
: NO_REGS
;
17058 if (TARGET_80387
&& SCALAR_FLOAT_MODE_P (mode
))
17060 if (class == FP_TOP_SSE_REGS
)
17062 else if (class == FP_SECOND_SSE_REGS
)
17063 return FP_SECOND_REG
;
17065 return FLOAT_CLASS_P (class) ? class : NO_REGS
;
17071 /* If we are copying between general and FP registers, we need a memory
17072 location. The same is true for SSE and MMX registers.
17074 The macro can't work reliably when one of the CLASSES is class containing
17075 registers from multiple units (SSE, MMX, integer). We avoid this by never
17076 combining those units in single alternative in the machine description.
17077 Ensure that this constraint holds to avoid unexpected surprises.
17079 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
17080 enforce these sanity checks. */
17083 ix86_secondary_memory_needed (enum reg_class class1
, enum reg_class class2
,
17084 enum machine_mode mode
, int strict
)
17086 if (MAYBE_FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class1
)
17087 || MAYBE_FLOAT_CLASS_P (class2
) != FLOAT_CLASS_P (class2
)
17088 || MAYBE_SSE_CLASS_P (class1
) != SSE_CLASS_P (class1
)
17089 || MAYBE_SSE_CLASS_P (class2
) != SSE_CLASS_P (class2
)
17090 || MAYBE_MMX_CLASS_P (class1
) != MMX_CLASS_P (class1
)
17091 || MAYBE_MMX_CLASS_P (class2
) != MMX_CLASS_P (class2
))
17093 gcc_assert (!strict
);
17097 if (FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class2
))
17100 /* ??? This is a lie. We do have moves between mmx/general, and for
17101 mmx/sse2. But by saying we need secondary memory we discourage the
17102 register allocator from using the mmx registers unless needed. */
17103 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
))
17106 if (SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
17108 /* SSE1 doesn't have any direct moves from other classes. */
17112 /* If the target says that inter-unit moves are more expensive
17113 than moving through memory, then don't generate them. */
17114 if (!TARGET_INTER_UNIT_MOVES
&& !optimize_size
)
17117 /* Between SSE and general, we have moves no larger than word size. */
17118 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
17121 /* ??? For the cost of one register reformat penalty, we could use
17122 the same instructions to move SFmode and DFmode data, but the
17123 relevant move patterns don't support those alternatives. */
17124 if (mode
== SFmode
|| mode
== DFmode
)
17131 /* Return true if the registers in CLASS cannot represent the change from
17132 modes FROM to TO. */
17135 ix86_cannot_change_mode_class (enum machine_mode from
, enum machine_mode to
,
17136 enum reg_class
class)
17141 /* x87 registers can't do subreg at all, as all values are reformatted
17142 to extended precision. */
17143 if (MAYBE_FLOAT_CLASS_P (class))
17146 if (MAYBE_SSE_CLASS_P (class) || MAYBE_MMX_CLASS_P (class))
17148 /* Vector registers do not support QI or HImode loads. If we don't
17149 disallow a change to these modes, reload will assume it's ok to
17150 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
17151 the vec_dupv4hi pattern. */
17152 if (GET_MODE_SIZE (from
) < 4)
17155 /* Vector registers do not support subreg with nonzero offsets, which
17156 are otherwise valid for integer registers. Since we can't see
17157 whether we have a nonzero offset from here, prohibit all
17158 nonparadoxical subregs changing size. */
17159 if (GET_MODE_SIZE (to
) < GET_MODE_SIZE (from
))
17166 /* Return the cost of moving data from a register in class CLASS1 to
17167 one in class CLASS2.
17169 It is not required that the cost always equal 2 when FROM is the same as TO;
17170 on some machines it is expensive to move between registers if they are not
17171 general registers. */
17174 ix86_register_move_cost (enum machine_mode mode
, enum reg_class class1
,
17175 enum reg_class class2
)
17177 /* In case we require secondary memory, compute cost of the store followed
17178 by load. In order to avoid bad register allocation choices, we need
17179 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
17181 if (ix86_secondary_memory_needed (class1
, class2
, mode
, 0))
17185 cost
+= MAX (MEMORY_MOVE_COST (mode
, class1
, 0),
17186 MEMORY_MOVE_COST (mode
, class1
, 1));
17187 cost
+= MAX (MEMORY_MOVE_COST (mode
, class2
, 0),
17188 MEMORY_MOVE_COST (mode
, class2
, 1));
17190 /* In case of copying from general_purpose_register we may emit multiple
17191 stores followed by single load causing memory size mismatch stall.
17192 Count this as arbitrarily high cost of 20. */
17193 if (CLASS_MAX_NREGS (class1
, mode
) > CLASS_MAX_NREGS (class2
, mode
))
17196 /* In the case of FP/MMX moves, the registers actually overlap, and we
17197 have to switch modes in order to treat them differently. */
17198 if ((MMX_CLASS_P (class1
) && MAYBE_FLOAT_CLASS_P (class2
))
17199 || (MMX_CLASS_P (class2
) && MAYBE_FLOAT_CLASS_P (class1
)))
17205 /* Moves between SSE/MMX and integer unit are expensive. */
17206 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
)
17207 || SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
17208 return ix86_cost
->mmxsse_to_integer
;
17209 if (MAYBE_FLOAT_CLASS_P (class1
))
17210 return ix86_cost
->fp_move
;
17211 if (MAYBE_SSE_CLASS_P (class1
))
17212 return ix86_cost
->sse_move
;
17213 if (MAYBE_MMX_CLASS_P (class1
))
17214 return ix86_cost
->mmx_move
;
17218 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
17221 ix86_hard_regno_mode_ok (int regno
, enum machine_mode mode
)
17223 /* Flags and only flags can only hold CCmode values. */
17224 if (CC_REGNO_P (regno
))
17225 return GET_MODE_CLASS (mode
) == MODE_CC
;
17226 if (GET_MODE_CLASS (mode
) == MODE_CC
17227 || GET_MODE_CLASS (mode
) == MODE_RANDOM
17228 || GET_MODE_CLASS (mode
) == MODE_PARTIAL_INT
)
17230 if (FP_REGNO_P (regno
))
17231 return VALID_FP_MODE_P (mode
);
17232 if (SSE_REGNO_P (regno
))
17234 /* We implement the move patterns for all vector modes into and
17235 out of SSE registers, even when no operation instructions
17237 return (VALID_SSE_REG_MODE (mode
)
17238 || VALID_SSE2_REG_MODE (mode
)
17239 || VALID_MMX_REG_MODE (mode
)
17240 || VALID_MMX_REG_MODE_3DNOW (mode
));
17242 if (MMX_REGNO_P (regno
))
17244 /* We implement the move patterns for 3DNOW modes even in MMX mode,
17245 so if the register is available at all, then we can move data of
17246 the given mode into or out of it. */
17247 return (VALID_MMX_REG_MODE (mode
)
17248 || VALID_MMX_REG_MODE_3DNOW (mode
));
17251 if (mode
== QImode
)
17253 /* Take care for QImode values - they can be in non-QI regs,
17254 but then they do cause partial register stalls. */
17255 if (regno
< 4 || TARGET_64BIT
)
17257 if (!TARGET_PARTIAL_REG_STALL
)
17259 return reload_in_progress
|| reload_completed
;
17261 /* We handle both integer and floats in the general purpose registers. */
17262 else if (VALID_INT_MODE_P (mode
))
17264 else if (VALID_FP_MODE_P (mode
))
17266 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
17267 on to use that value in smaller contexts, this can easily force a
17268 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
17269 supporting DImode, allow it. */
17270 else if (VALID_MMX_REG_MODE_3DNOW (mode
) || VALID_MMX_REG_MODE (mode
))
17276 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
17277 tieable integer mode. */
17280 ix86_tieable_integer_mode_p (enum machine_mode mode
)
17289 return TARGET_64BIT
|| !TARGET_PARTIAL_REG_STALL
;
17292 return TARGET_64BIT
;
17299 /* Return true if MODE1 is accessible in a register that can hold MODE2
17300 without copying. That is, all register classes that can hold MODE2
17301 can also hold MODE1. */
17304 ix86_modes_tieable_p (enum machine_mode mode1
, enum machine_mode mode2
)
17306 if (mode1
== mode2
)
17309 if (ix86_tieable_integer_mode_p (mode1
)
17310 && ix86_tieable_integer_mode_p (mode2
))
17313 /* MODE2 being XFmode implies fp stack or general regs, which means we
17314 can tie any smaller floating point modes to it. Note that we do not
17315 tie this with TFmode. */
17316 if (mode2
== XFmode
)
17317 return mode1
== SFmode
|| mode1
== DFmode
;
17319 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
17320 that we can tie it with SFmode. */
17321 if (mode2
== DFmode
)
17322 return mode1
== SFmode
;
17324 /* If MODE2 is only appropriate for an SSE register, then tie with
17325 any other mode acceptable to SSE registers. */
17326 if (GET_MODE_SIZE (mode2
) >= 8
17327 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode2
))
17328 return ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode1
);
17330 /* If MODE2 is appropriate for an MMX (or SSE) register, then tie
17331 with any other mode acceptable to MMX registers. */
17332 if (GET_MODE_SIZE (mode2
) == 8
17333 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode2
))
17334 return ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode1
);
17339 /* Return the cost of moving data of mode M between a
17340 register and memory. A value of 2 is the default; this cost is
17341 relative to those in `REGISTER_MOVE_COST'.
17343 If moving between registers and memory is more expensive than
17344 between two registers, you should define this macro to express the
17347 Model also increased moving costs of QImode registers in non
17351 ix86_memory_move_cost (enum machine_mode mode
, enum reg_class
class, int in
)
17353 if (FLOAT_CLASS_P (class))
17370 return in
? ix86_cost
->fp_load
[index
] : ix86_cost
->fp_store
[index
];
17372 if (SSE_CLASS_P (class))
17375 switch (GET_MODE_SIZE (mode
))
17389 return in
? ix86_cost
->sse_load
[index
] : ix86_cost
->sse_store
[index
];
17391 if (MMX_CLASS_P (class))
17394 switch (GET_MODE_SIZE (mode
))
17405 return in
? ix86_cost
->mmx_load
[index
] : ix86_cost
->mmx_store
[index
];
17407 switch (GET_MODE_SIZE (mode
))
17411 return (Q_CLASS_P (class) ? ix86_cost
->int_load
[0]
17412 : ix86_cost
->movzbl_load
);
17414 return (Q_CLASS_P (class) ? ix86_cost
->int_store
[0]
17415 : ix86_cost
->int_store
[0] + 4);
17418 return in
? ix86_cost
->int_load
[1] : ix86_cost
->int_store
[1];
17420 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
17421 if (mode
== TFmode
)
17423 return ((in
? ix86_cost
->int_load
[2] : ix86_cost
->int_store
[2])
17424 * (((int) GET_MODE_SIZE (mode
)
17425 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
));
17429 /* Compute a (partial) cost for rtx X. Return true if the complete
17430 cost has been computed, and false if subexpressions should be
17431 scanned. In either case, *TOTAL contains the cost result. */
17434 ix86_rtx_costs (rtx x
, int code
, int outer_code
, int *total
)
17436 enum machine_mode mode
= GET_MODE (x
);
17444 if (TARGET_64BIT
&& !x86_64_immediate_operand (x
, VOIDmode
))
17446 else if (TARGET_64BIT
&& !x86_64_zext_immediate_operand (x
, VOIDmode
))
17448 else if (flag_pic
&& SYMBOLIC_CONST (x
)
17450 || (!GET_CODE (x
) != LABEL_REF
17451 && (GET_CODE (x
) != SYMBOL_REF
17452 || !SYMBOL_REF_LOCAL_P (x
)))))
17459 if (mode
== VOIDmode
)
17462 switch (standard_80387_constant_p (x
))
17467 default: /* Other constants */
17472 /* Start with (MEM (SYMBOL_REF)), since that's where
17473 it'll probably end up. Add a penalty for size. */
17474 *total
= (COSTS_N_INSNS (1)
17475 + (flag_pic
!= 0 && !TARGET_64BIT
)
17476 + (mode
== SFmode
? 0 : mode
== DFmode
? 1 : 2));
17482 /* The zero extensions is often completely free on x86_64, so make
17483 it as cheap as possible. */
17484 if (TARGET_64BIT
&& mode
== DImode
17485 && GET_MODE (XEXP (x
, 0)) == SImode
)
17487 else if (TARGET_ZERO_EXTEND_WITH_AND
)
17488 *total
= ix86_cost
->add
;
17490 *total
= ix86_cost
->movzx
;
17494 *total
= ix86_cost
->movsx
;
17498 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
17499 && (GET_MODE (XEXP (x
, 0)) != DImode
|| TARGET_64BIT
))
17501 HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
17504 *total
= ix86_cost
->add
;
17507 if ((value
== 2 || value
== 3)
17508 && ix86_cost
->lea
<= ix86_cost
->shift_const
)
17510 *total
= ix86_cost
->lea
;
17520 if (!TARGET_64BIT
&& GET_MODE (XEXP (x
, 0)) == DImode
)
17522 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
17524 if (INTVAL (XEXP (x
, 1)) > 32)
17525 *total
= ix86_cost
->shift_const
+ COSTS_N_INSNS (2);
17527 *total
= ix86_cost
->shift_const
* 2;
17531 if (GET_CODE (XEXP (x
, 1)) == AND
)
17532 *total
= ix86_cost
->shift_var
* 2;
17534 *total
= ix86_cost
->shift_var
* 6 + COSTS_N_INSNS (2);
17539 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
17540 *total
= ix86_cost
->shift_const
;
17542 *total
= ix86_cost
->shift_var
;
17547 if (FLOAT_MODE_P (mode
))
17549 *total
= ix86_cost
->fmul
;
17554 rtx op0
= XEXP (x
, 0);
17555 rtx op1
= XEXP (x
, 1);
17557 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
17559 unsigned HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
17560 for (nbits
= 0; value
!= 0; value
&= value
- 1)
17564 /* This is arbitrary. */
17567 /* Compute costs correctly for widening multiplication. */
17568 if ((GET_CODE (op0
) == SIGN_EXTEND
|| GET_CODE (op1
) == ZERO_EXTEND
)
17569 && GET_MODE_SIZE (GET_MODE (XEXP (op0
, 0))) * 2
17570 == GET_MODE_SIZE (mode
))
17572 int is_mulwiden
= 0;
17573 enum machine_mode inner_mode
= GET_MODE (op0
);
17575 if (GET_CODE (op0
) == GET_CODE (op1
))
17576 is_mulwiden
= 1, op1
= XEXP (op1
, 0);
17577 else if (GET_CODE (op1
) == CONST_INT
)
17579 if (GET_CODE (op0
) == SIGN_EXTEND
)
17580 is_mulwiden
= trunc_int_for_mode (INTVAL (op1
), inner_mode
)
17583 is_mulwiden
= !(INTVAL (op1
) & ~GET_MODE_MASK (inner_mode
));
17587 op0
= XEXP (op0
, 0), mode
= GET_MODE (op0
);
17590 *total
= (ix86_cost
->mult_init
[MODE_INDEX (mode
)]
17591 + nbits
* ix86_cost
->mult_bit
17592 + rtx_cost (op0
, outer_code
) + rtx_cost (op1
, outer_code
));
17601 if (FLOAT_MODE_P (mode
))
17602 *total
= ix86_cost
->fdiv
;
17604 *total
= ix86_cost
->divide
[MODE_INDEX (mode
)];
17608 if (FLOAT_MODE_P (mode
))
17609 *total
= ix86_cost
->fadd
;
17610 else if (GET_MODE_CLASS (mode
) == MODE_INT
17611 && GET_MODE_BITSIZE (mode
) <= GET_MODE_BITSIZE (Pmode
))
17613 if (GET_CODE (XEXP (x
, 0)) == PLUS
17614 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
17615 && GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)) == CONST_INT
17616 && CONSTANT_P (XEXP (x
, 1)))
17618 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (XEXP (x
, 0), 0), 1));
17619 if (val
== 2 || val
== 4 || val
== 8)
17621 *total
= ix86_cost
->lea
;
17622 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1), outer_code
);
17623 *total
+= rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0),
17625 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
17629 else if (GET_CODE (XEXP (x
, 0)) == MULT
17630 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
)
17632 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (x
, 0), 1));
17633 if (val
== 2 || val
== 4 || val
== 8)
17635 *total
= ix86_cost
->lea
;
17636 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
);
17637 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
17641 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
17643 *total
= ix86_cost
->lea
;
17644 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
);
17645 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1), outer_code
);
17646 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
17653 if (FLOAT_MODE_P (mode
))
17655 *total
= ix86_cost
->fadd
;
17663 if (!TARGET_64BIT
&& mode
== DImode
)
17665 *total
= (ix86_cost
->add
* 2
17666 + (rtx_cost (XEXP (x
, 0), outer_code
)
17667 << (GET_MODE (XEXP (x
, 0)) != DImode
))
17668 + (rtx_cost (XEXP (x
, 1), outer_code
)
17669 << (GET_MODE (XEXP (x
, 1)) != DImode
)));
17675 if (FLOAT_MODE_P (mode
))
17677 *total
= ix86_cost
->fchs
;
17683 if (!TARGET_64BIT
&& mode
== DImode
)
17684 *total
= ix86_cost
->add
* 2;
17686 *total
= ix86_cost
->add
;
17690 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTRACT
17691 && XEXP (XEXP (x
, 0), 1) == const1_rtx
17692 && GET_CODE (XEXP (XEXP (x
, 0), 2)) == CONST_INT
17693 && XEXP (x
, 1) == const0_rtx
)
17695 /* This kind of construct is implemented using test[bwl].
17696 Treat it as if we had an AND. */
17697 *total
= (ix86_cost
->add
17698 + rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
)
17699 + rtx_cost (const1_rtx
, outer_code
));
17705 if (!TARGET_SSE_MATH
17707 || (mode
== DFmode
&& !TARGET_SSE2
))
17712 if (FLOAT_MODE_P (mode
))
17713 *total
= ix86_cost
->fabs
;
17717 if (FLOAT_MODE_P (mode
))
17718 *total
= ix86_cost
->fsqrt
;
17722 if (XINT (x
, 1) == UNSPEC_TP
)
17733 static int current_machopic_label_num
;
17735 /* Given a symbol name and its associated stub, write out the
17736 definition of the stub. */
17739 machopic_output_stub (FILE *file
, const char *symb
, const char *stub
)
17741 unsigned int length
;
17742 char *binder_name
, *symbol_name
, lazy_ptr_name
[32];
17743 int label
= ++current_machopic_label_num
;
17745 /* For 64-bit we shouldn't get here. */
17746 gcc_assert (!TARGET_64BIT
);
17748 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
17749 symb
= (*targetm
.strip_name_encoding
) (symb
);
17751 length
= strlen (stub
);
17752 binder_name
= alloca (length
+ 32);
17753 GEN_BINDER_NAME_FOR_STUB (binder_name
, stub
, length
);
17755 length
= strlen (symb
);
17756 symbol_name
= alloca (length
+ 32);
17757 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name
, symb
, length
);
17759 sprintf (lazy_ptr_name
, "L%d$lz", label
);
17762 switch_to_section (darwin_sections
[machopic_picsymbol_stub_section
]);
17764 switch_to_section (darwin_sections
[machopic_symbol_stub_section
]);
17766 fprintf (file
, "%s:\n", stub
);
17767 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
17771 fprintf (file
, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label
, label
);
17772 fprintf (file
, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name
, label
);
17773 fprintf (file
, "\tjmp\t*%%edx\n");
17776 fprintf (file
, "\tjmp\t*%s\n", lazy_ptr_name
);
17778 fprintf (file
, "%s:\n", binder_name
);
17782 fprintf (file
, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name
, label
);
17783 fprintf (file
, "\tpushl\t%%eax\n");
17786 fprintf (file
, "\tpushl\t$%s\n", lazy_ptr_name
);
17788 fprintf (file
, "\tjmp\tdyld_stub_binding_helper\n");
17790 switch_to_section (darwin_sections
[machopic_lazy_symbol_ptr_section
]);
17791 fprintf (file
, "%s:\n", lazy_ptr_name
);
17792 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
17793 fprintf (file
, "\t.long %s\n", binder_name
);
17797 darwin_x86_file_end (void)
17799 darwin_file_end ();
17802 #endif /* TARGET_MACHO */
17804 /* Order the registers for register allocator. */
17807 x86_order_regs_for_local_alloc (void)
17812 /* First allocate the local general purpose registers. */
17813 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
17814 if (GENERAL_REGNO_P (i
) && call_used_regs
[i
])
17815 reg_alloc_order
[pos
++] = i
;
17817 /* Global general purpose registers. */
17818 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
17819 if (GENERAL_REGNO_P (i
) && !call_used_regs
[i
])
17820 reg_alloc_order
[pos
++] = i
;
17822 /* x87 registers come first in case we are doing FP math
17824 if (!TARGET_SSE_MATH
)
17825 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
17826 reg_alloc_order
[pos
++] = i
;
17828 /* SSE registers. */
17829 for (i
= FIRST_SSE_REG
; i
<= LAST_SSE_REG
; i
++)
17830 reg_alloc_order
[pos
++] = i
;
17831 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
17832 reg_alloc_order
[pos
++] = i
;
17834 /* x87 registers. */
17835 if (TARGET_SSE_MATH
)
17836 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
17837 reg_alloc_order
[pos
++] = i
;
17839 for (i
= FIRST_MMX_REG
; i
<= LAST_MMX_REG
; i
++)
17840 reg_alloc_order
[pos
++] = i
;
17842 /* Initialize the rest of array as we do not allocate some registers
17844 while (pos
< FIRST_PSEUDO_REGISTER
)
17845 reg_alloc_order
[pos
++] = 0;
17848 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
17849 struct attribute_spec.handler. */
17851 ix86_handle_struct_attribute (tree
*node
, tree name
,
17852 tree args ATTRIBUTE_UNUSED
,
17853 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
17856 if (DECL_P (*node
))
17858 if (TREE_CODE (*node
) == TYPE_DECL
)
17859 type
= &TREE_TYPE (*node
);
17864 if (!(type
&& (TREE_CODE (*type
) == RECORD_TYPE
17865 || TREE_CODE (*type
) == UNION_TYPE
)))
17867 warning (OPT_Wattributes
, "%qs attribute ignored",
17868 IDENTIFIER_POINTER (name
));
17869 *no_add_attrs
= true;
17872 else if ((is_attribute_p ("ms_struct", name
)
17873 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type
)))
17874 || ((is_attribute_p ("gcc_struct", name
)
17875 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type
)))))
17877 warning (OPT_Wattributes
, "%qs incompatible attribute ignored",
17878 IDENTIFIER_POINTER (name
));
17879 *no_add_attrs
= true;
17886 ix86_ms_bitfield_layout_p (tree record_type
)
17888 return (TARGET_MS_BITFIELD_LAYOUT
&&
17889 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type
)))
17890 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type
));
17893 /* Returns an expression indicating where the this parameter is
17894 located on entry to the FUNCTION. */
17897 x86_this_parameter (tree function
)
17899 tree type
= TREE_TYPE (function
);
17903 int n
= aggregate_value_p (TREE_TYPE (type
), type
) != 0;
17904 return gen_rtx_REG (DImode
, x86_64_int_parameter_registers
[n
]);
17907 if (ix86_function_regparm (type
, function
) > 0)
17911 parm
= TYPE_ARG_TYPES (type
);
17912 /* Figure out whether or not the function has a variable number of
17914 for (; parm
; parm
= TREE_CHAIN (parm
))
17915 if (TREE_VALUE (parm
) == void_type_node
)
17917 /* If not, the this parameter is in the first argument. */
17921 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type
)))
17923 return gen_rtx_REG (SImode
, regno
);
17927 if (aggregate_value_p (TREE_TYPE (type
), type
))
17928 return gen_rtx_MEM (SImode
, plus_constant (stack_pointer_rtx
, 8));
17930 return gen_rtx_MEM (SImode
, plus_constant (stack_pointer_rtx
, 4));
17933 /* Determine whether x86_output_mi_thunk can succeed. */
17936 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED
,
17937 HOST_WIDE_INT delta ATTRIBUTE_UNUSED
,
17938 HOST_WIDE_INT vcall_offset
, tree function
)
17940 /* 64-bit can handle anything. */
17944 /* For 32-bit, everything's fine if we have one free register. */
17945 if (ix86_function_regparm (TREE_TYPE (function
), function
) < 3)
17948 /* Need a free register for vcall_offset. */
17952 /* Need a free register for GOT references. */
17953 if (flag_pic
&& !(*targetm
.binds_local_p
) (function
))
17956 /* Otherwise ok. */
17960 /* Output the assembler code for a thunk function. THUNK_DECL is the
17961 declaration for the thunk function itself, FUNCTION is the decl for
17962 the target function. DELTA is an immediate constant offset to be
17963 added to THIS. If VCALL_OFFSET is nonzero, the word at
17964 *(*this + vcall_offset) should be added to THIS. */
17967 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED
,
17968 tree thunk ATTRIBUTE_UNUSED
, HOST_WIDE_INT delta
,
17969 HOST_WIDE_INT vcall_offset
, tree function
)
17972 rtx
this = x86_this_parameter (function
);
17975 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
17976 pull it in now and let DELTA benefit. */
17979 else if (vcall_offset
)
17981 /* Put the this parameter into %eax. */
17983 xops
[1] = this_reg
= gen_rtx_REG (Pmode
, 0);
17984 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
17987 this_reg
= NULL_RTX
;
17989 /* Adjust the this parameter by a fixed constant. */
17992 xops
[0] = GEN_INT (delta
);
17993 xops
[1] = this_reg
? this_reg
: this;
17996 if (!x86_64_general_operand (xops
[0], DImode
))
17998 tmp
= gen_rtx_REG (DImode
, FIRST_REX_INT_REG
+ 2 /* R10 */);
18000 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops
);
18004 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops
);
18007 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops
);
18010 /* Adjust the this parameter by a value stored in the vtable. */
18014 tmp
= gen_rtx_REG (DImode
, FIRST_REX_INT_REG
+ 2 /* R10 */);
18017 int tmp_regno
= 2 /* ECX */;
18018 if (lookup_attribute ("fastcall",
18019 TYPE_ATTRIBUTES (TREE_TYPE (function
))))
18020 tmp_regno
= 0 /* EAX */;
18021 tmp
= gen_rtx_REG (SImode
, tmp_regno
);
18024 xops
[0] = gen_rtx_MEM (Pmode
, this_reg
);
18027 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops
);
18029 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
18031 /* Adjust the this parameter. */
18032 xops
[0] = gen_rtx_MEM (Pmode
, plus_constant (tmp
, vcall_offset
));
18033 if (TARGET_64BIT
&& !memory_operand (xops
[0], Pmode
))
18035 rtx tmp2
= gen_rtx_REG (DImode
, FIRST_REX_INT_REG
+ 3 /* R11 */);
18036 xops
[0] = GEN_INT (vcall_offset
);
18038 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops
);
18039 xops
[0] = gen_rtx_MEM (Pmode
, gen_rtx_PLUS (Pmode
, tmp
, tmp2
));
18041 xops
[1] = this_reg
;
18043 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops
);
18045 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops
);
18048 /* If necessary, drop THIS back to its stack slot. */
18049 if (this_reg
&& this_reg
!= this)
18051 xops
[0] = this_reg
;
18053 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
18056 xops
[0] = XEXP (DECL_RTL (function
), 0);
18059 if (!flag_pic
|| (*targetm
.binds_local_p
) (function
))
18060 output_asm_insn ("jmp\t%P0", xops
);
18063 tmp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, xops
[0]), UNSPEC_GOTPCREL
);
18064 tmp
= gen_rtx_CONST (Pmode
, tmp
);
18065 tmp
= gen_rtx_MEM (QImode
, tmp
);
18067 output_asm_insn ("jmp\t%A0", xops
);
18072 if (!flag_pic
|| (*targetm
.binds_local_p
) (function
))
18073 output_asm_insn ("jmp\t%P0", xops
);
18078 rtx sym_ref
= XEXP (DECL_RTL (function
), 0);
18079 tmp
= (gen_rtx_SYMBOL_REF
18081 machopic_indirection_name (sym_ref
, /*stub_p=*/true)));
18082 tmp
= gen_rtx_MEM (QImode
, tmp
);
18084 output_asm_insn ("jmp\t%0", xops
);
18087 #endif /* TARGET_MACHO */
18089 tmp
= gen_rtx_REG (SImode
, 2 /* ECX */);
18090 output_set_got (tmp
, NULL_RTX
);
18093 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops
);
18094 output_asm_insn ("jmp\t{*}%1", xops
);
18100 x86_file_start (void)
18102 default_file_start ();
18104 darwin_file_start ();
18106 if (X86_FILE_START_VERSION_DIRECTIVE
)
18107 fputs ("\t.version\t\"01.01\"\n", asm_out_file
);
18108 if (X86_FILE_START_FLTUSED
)
18109 fputs ("\t.global\t__fltused\n", asm_out_file
);
18110 if (ix86_asm_dialect
== ASM_INTEL
)
18111 fputs ("\t.intel_syntax\n", asm_out_file
);
18115 x86_field_alignment (tree field
, int computed
)
18117 enum machine_mode mode
;
18118 tree type
= TREE_TYPE (field
);
18120 if (TARGET_64BIT
|| TARGET_ALIGN_DOUBLE
)
18122 mode
= TYPE_MODE (TREE_CODE (type
) == ARRAY_TYPE
18123 ? get_inner_array_type (type
) : type
);
18124 if (mode
== DFmode
|| mode
== DCmode
18125 || GET_MODE_CLASS (mode
) == MODE_INT
18126 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_INT
)
18127 return MIN (32, computed
);
18131 /* Output assembler code to FILE to increment profiler label # LABELNO
18132 for profiling a function entry. */
18134 x86_function_profiler (FILE *file
, int labelno ATTRIBUTE_UNUSED
)
18139 #ifndef NO_PROFILE_COUNTERS
18140 fprintf (file
, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX
, labelno
);
18142 fprintf (file
, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME
);
18146 #ifndef NO_PROFILE_COUNTERS
18147 fprintf (file
, "\tmovq\t$%sP%d,%%r11\n", LPREFIX
, labelno
);
18149 fprintf (file
, "\tcall\t%s\n", MCOUNT_NAME
);
18153 #ifndef NO_PROFILE_COUNTERS
18154 fprintf (file
, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
18155 LPREFIX
, labelno
, PROFILE_COUNT_REGISTER
);
18157 fprintf (file
, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME
);
18161 #ifndef NO_PROFILE_COUNTERS
18162 fprintf (file
, "\tmovl\t$%sP%d,%%%s\n", LPREFIX
, labelno
,
18163 PROFILE_COUNT_REGISTER
);
18165 fprintf (file
, "\tcall\t%s\n", MCOUNT_NAME
);
18169 /* We don't have exact information about the insn sizes, but we may assume
18170 quite safely that we are informed about all 1 byte insns and memory
18171 address sizes. This is enough to eliminate unnecessary padding in
18175 min_insn_size (rtx insn
)
18179 if (!INSN_P (insn
) || !active_insn_p (insn
))
18182 /* Discard alignments we've emit and jump instructions. */
18183 if (GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
18184 && XINT (PATTERN (insn
), 1) == UNSPECV_ALIGN
)
18186 if (GET_CODE (insn
) == JUMP_INSN
18187 && (GET_CODE (PATTERN (insn
)) == ADDR_VEC
18188 || GET_CODE (PATTERN (insn
)) == ADDR_DIFF_VEC
))
18191 /* Important case - calls are always 5 bytes.
18192 It is common to have many calls in the row. */
18193 if (GET_CODE (insn
) == CALL_INSN
18194 && symbolic_reference_mentioned_p (PATTERN (insn
))
18195 && !SIBLING_CALL_P (insn
))
18197 if (get_attr_length (insn
) <= 1)
18200 /* For normal instructions we may rely on the sizes of addresses
18201 and the presence of symbol to require 4 bytes of encoding.
18202 This is not the case for jumps where references are PC relative. */
18203 if (GET_CODE (insn
) != JUMP_INSN
)
18205 l
= get_attr_length_address (insn
);
18206 if (l
< 4 && symbolic_reference_mentioned_p (PATTERN (insn
)))
18215 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
18219 ix86_avoid_jump_misspredicts (void)
18221 rtx insn
, start
= get_insns ();
18222 int nbytes
= 0, njumps
= 0;
18225 /* Look for all minimal intervals of instructions containing 4 jumps.
18226 The intervals are bounded by START and INSN. NBYTES is the total
18227 size of instructions in the interval including INSN and not including
18228 START. When the NBYTES is smaller than 16 bytes, it is possible
18229 that the end of START and INSN ends up in the same 16byte page.
18231 The smallest offset in the page INSN can start is the case where START
18232 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
18233 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
18235 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
18238 nbytes
+= min_insn_size (insn
);
18240 fprintf(dump_file
, "Insn %i estimated to %i bytes\n",
18241 INSN_UID (insn
), min_insn_size (insn
));
18242 if ((GET_CODE (insn
) == JUMP_INSN
18243 && GET_CODE (PATTERN (insn
)) != ADDR_VEC
18244 && GET_CODE (PATTERN (insn
)) != ADDR_DIFF_VEC
)
18245 || GET_CODE (insn
) == CALL_INSN
)
18252 start
= NEXT_INSN (start
);
18253 if ((GET_CODE (start
) == JUMP_INSN
18254 && GET_CODE (PATTERN (start
)) != ADDR_VEC
18255 && GET_CODE (PATTERN (start
)) != ADDR_DIFF_VEC
)
18256 || GET_CODE (start
) == CALL_INSN
)
18257 njumps
--, isjump
= 1;
18260 nbytes
-= min_insn_size (start
);
18262 gcc_assert (njumps
>= 0);
18264 fprintf (dump_file
, "Interval %i to %i has %i bytes\n",
18265 INSN_UID (start
), INSN_UID (insn
), nbytes
);
18267 if (njumps
== 3 && isjump
&& nbytes
< 16)
18269 int padsize
= 15 - nbytes
+ min_insn_size (insn
);
18272 fprintf (dump_file
, "Padding insn %i by %i bytes!\n",
18273 INSN_UID (insn
), padsize
);
18274 emit_insn_before (gen_align (GEN_INT (padsize
)), insn
);
18279 /* AMD Athlon works faster
18280 when RET is not destination of conditional jump or directly preceded
18281 by other jump instruction. We avoid the penalty by inserting NOP just
18282 before the RET instructions in such cases. */
18284 ix86_pad_returns (void)
18289 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
18291 basic_block bb
= e
->src
;
18292 rtx ret
= BB_END (bb
);
18294 bool replace
= false;
18296 if (GET_CODE (ret
) != JUMP_INSN
|| GET_CODE (PATTERN (ret
)) != RETURN
18297 || !maybe_hot_bb_p (bb
))
18299 for (prev
= PREV_INSN (ret
); prev
; prev
= PREV_INSN (prev
))
18300 if (active_insn_p (prev
) || GET_CODE (prev
) == CODE_LABEL
)
18302 if (prev
&& GET_CODE (prev
) == CODE_LABEL
)
18307 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
18308 if (EDGE_FREQUENCY (e
) && e
->src
->index
>= 0
18309 && !(e
->flags
& EDGE_FALLTHRU
))
18314 prev
= prev_active_insn (ret
);
18316 && ((GET_CODE (prev
) == JUMP_INSN
&& any_condjump_p (prev
))
18317 || GET_CODE (prev
) == CALL_INSN
))
18319 /* Empty functions get branch mispredict even when the jump destination
18320 is not visible to us. */
18321 if (!prev
&& cfun
->function_frequency
> FUNCTION_FREQUENCY_UNLIKELY_EXECUTED
)
18326 emit_insn_before (gen_return_internal_long (), ret
);
18332 /* Implement machine specific optimizations. We implement padding of returns
18333 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
18337 if (TARGET_PAD_RETURNS
&& optimize
&& !optimize_size
)
18338 ix86_pad_returns ();
18339 if (TARGET_FOUR_JUMP_LIMIT
&& optimize
&& !optimize_size
)
18340 ix86_avoid_jump_misspredicts ();
18343 /* Return nonzero when QImode register that must be represented via REX prefix
18346 x86_extended_QIreg_mentioned_p (rtx insn
)
18349 extract_insn_cached (insn
);
18350 for (i
= 0; i
< recog_data
.n_operands
; i
++)
18351 if (REG_P (recog_data
.operand
[i
])
18352 && REGNO (recog_data
.operand
[i
]) >= 4)
18357 /* Return nonzero when P points to register encoded via REX prefix.
18358 Called via for_each_rtx. */
18360 extended_reg_mentioned_1 (rtx
*p
, void *data ATTRIBUTE_UNUSED
)
18362 unsigned int regno
;
18365 regno
= REGNO (*p
);
18366 return REX_INT_REGNO_P (regno
) || REX_SSE_REGNO_P (regno
);
18369 /* Return true when INSN mentions register that must be encoded using REX
18372 x86_extended_reg_mentioned_p (rtx insn
)
18374 return for_each_rtx (&PATTERN (insn
), extended_reg_mentioned_1
, NULL
);
18377 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
18378 optabs would emit if we didn't have TFmode patterns. */
18381 x86_emit_floatuns (rtx operands
[2])
18383 rtx neglab
, donelab
, i0
, i1
, f0
, in
, out
;
18384 enum machine_mode mode
, inmode
;
18386 inmode
= GET_MODE (operands
[1]);
18387 gcc_assert (inmode
== SImode
|| inmode
== DImode
);
18390 in
= force_reg (inmode
, operands
[1]);
18391 mode
= GET_MODE (out
);
18392 neglab
= gen_label_rtx ();
18393 donelab
= gen_label_rtx ();
18394 i1
= gen_reg_rtx (Pmode
);
18395 f0
= gen_reg_rtx (mode
);
18397 emit_cmp_and_jump_insns (in
, const0_rtx
, LT
, const0_rtx
, Pmode
, 0, neglab
);
18399 emit_insn (gen_rtx_SET (VOIDmode
, out
, gen_rtx_FLOAT (mode
, in
)));
18400 emit_jump_insn (gen_jump (donelab
));
18403 emit_label (neglab
);
18405 i0
= expand_simple_binop (Pmode
, LSHIFTRT
, in
, const1_rtx
, NULL
, 1, OPTAB_DIRECT
);
18406 i1
= expand_simple_binop (Pmode
, AND
, in
, const1_rtx
, NULL
, 1, OPTAB_DIRECT
);
18407 i0
= expand_simple_binop (Pmode
, IOR
, i0
, i1
, i0
, 1, OPTAB_DIRECT
);
18408 expand_float (f0
, i0
, 0);
18409 emit_insn (gen_rtx_SET (VOIDmode
, out
, gen_rtx_PLUS (mode
, f0
, f0
)));
18411 emit_label (donelab
);
18414 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
18415 with all elements equal to VAR. Return true if successful. */
18418 ix86_expand_vector_init_duplicate (bool mmx_ok
, enum machine_mode mode
,
18419 rtx target
, rtx val
)
18421 enum machine_mode smode
, wsmode
, wvmode
;
18436 val
= force_reg (GET_MODE_INNER (mode
), val
);
18437 x
= gen_rtx_VEC_DUPLICATE (mode
, val
);
18438 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
18444 if (TARGET_SSE
|| TARGET_3DNOW_A
)
18446 val
= gen_lowpart (SImode
, val
);
18447 x
= gen_rtx_TRUNCATE (HImode
, val
);
18448 x
= gen_rtx_VEC_DUPLICATE (mode
, x
);
18449 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
18471 /* Extend HImode to SImode using a paradoxical SUBREG. */
18472 tmp1
= gen_reg_rtx (SImode
);
18473 emit_move_insn (tmp1
, gen_lowpart (SImode
, val
));
18474 /* Insert the SImode value as low element of V4SImode vector. */
18475 tmp2
= gen_reg_rtx (V4SImode
);
18476 tmp1
= gen_rtx_VEC_MERGE (V4SImode
,
18477 gen_rtx_VEC_DUPLICATE (V4SImode
, tmp1
),
18478 CONST0_RTX (V4SImode
),
18480 emit_insn (gen_rtx_SET (VOIDmode
, tmp2
, tmp1
));
18481 /* Cast the V4SImode vector back to a V8HImode vector. */
18482 tmp1
= gen_reg_rtx (V8HImode
);
18483 emit_move_insn (tmp1
, gen_lowpart (V8HImode
, tmp2
));
18484 /* Duplicate the low short through the whole low SImode word. */
18485 emit_insn (gen_sse2_punpcklwd (tmp1
, tmp1
, tmp1
));
18486 /* Cast the V8HImode vector back to a V4SImode vector. */
18487 tmp2
= gen_reg_rtx (V4SImode
);
18488 emit_move_insn (tmp2
, gen_lowpart (V4SImode
, tmp1
));
18489 /* Replicate the low element of the V4SImode vector. */
18490 emit_insn (gen_sse2_pshufd (tmp2
, tmp2
, const0_rtx
));
18491 /* Cast the V2SImode back to V8HImode, and store in target. */
18492 emit_move_insn (target
, gen_lowpart (V8HImode
, tmp2
));
18503 /* Extend QImode to SImode using a paradoxical SUBREG. */
18504 tmp1
= gen_reg_rtx (SImode
);
18505 emit_move_insn (tmp1
, gen_lowpart (SImode
, val
));
18506 /* Insert the SImode value as low element of V4SImode vector. */
18507 tmp2
= gen_reg_rtx (V4SImode
);
18508 tmp1
= gen_rtx_VEC_MERGE (V4SImode
,
18509 gen_rtx_VEC_DUPLICATE (V4SImode
, tmp1
),
18510 CONST0_RTX (V4SImode
),
18512 emit_insn (gen_rtx_SET (VOIDmode
, tmp2
, tmp1
));
18513 /* Cast the V4SImode vector back to a V16QImode vector. */
18514 tmp1
= gen_reg_rtx (V16QImode
);
18515 emit_move_insn (tmp1
, gen_lowpart (V16QImode
, tmp2
));
18516 /* Duplicate the low byte through the whole low SImode word. */
18517 emit_insn (gen_sse2_punpcklbw (tmp1
, tmp1
, tmp1
));
18518 emit_insn (gen_sse2_punpcklbw (tmp1
, tmp1
, tmp1
));
18519 /* Cast the V16QImode vector back to a V4SImode vector. */
18520 tmp2
= gen_reg_rtx (V4SImode
);
18521 emit_move_insn (tmp2
, gen_lowpart (V4SImode
, tmp1
));
18522 /* Replicate the low element of the V4SImode vector. */
18523 emit_insn (gen_sse2_pshufd (tmp2
, tmp2
, const0_rtx
));
18524 /* Cast the V2SImode back to V16QImode, and store in target. */
18525 emit_move_insn (target
, gen_lowpart (V16QImode
, tmp2
));
18533 /* Replicate the value once into the next wider mode and recurse. */
18534 val
= convert_modes (wsmode
, smode
, val
, true);
18535 x
= expand_simple_binop (wsmode
, ASHIFT
, val
,
18536 GEN_INT (GET_MODE_BITSIZE (smode
)),
18537 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
18538 val
= expand_simple_binop (wsmode
, IOR
, val
, x
, x
, 1, OPTAB_LIB_WIDEN
);
18540 x
= gen_reg_rtx (wvmode
);
18541 if (!ix86_expand_vector_init_duplicate (mmx_ok
, wvmode
, x
, val
))
18542 gcc_unreachable ();
18543 emit_move_insn (target
, gen_lowpart (mode
, x
));
18551 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
18552 whose ONE_VAR element is VAR, and other elements are zero. Return true
18556 ix86_expand_vector_init_one_nonzero (bool mmx_ok
, enum machine_mode mode
,
18557 rtx target
, rtx var
, int one_var
)
18559 enum machine_mode vsimode
;
18575 var
= force_reg (GET_MODE_INNER (mode
), var
);
18576 x
= gen_rtx_VEC_CONCAT (mode
, var
, CONST0_RTX (GET_MODE_INNER (mode
)));
18577 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
18582 if (!REG_P (target
) || REGNO (target
) < FIRST_PSEUDO_REGISTER
)
18583 new_target
= gen_reg_rtx (mode
);
18585 new_target
= target
;
18586 var
= force_reg (GET_MODE_INNER (mode
), var
);
18587 x
= gen_rtx_VEC_DUPLICATE (mode
, var
);
18588 x
= gen_rtx_VEC_MERGE (mode
, x
, CONST0_RTX (mode
), const1_rtx
);
18589 emit_insn (gen_rtx_SET (VOIDmode
, new_target
, x
));
18592 /* We need to shuffle the value to the correct position, so
18593 create a new pseudo to store the intermediate result. */
18595 /* With SSE2, we can use the integer shuffle insns. */
18596 if (mode
!= V4SFmode
&& TARGET_SSE2
)
18598 emit_insn (gen_sse2_pshufd_1 (new_target
, new_target
,
18600 GEN_INT (one_var
== 1 ? 0 : 1),
18601 GEN_INT (one_var
== 2 ? 0 : 1),
18602 GEN_INT (one_var
== 3 ? 0 : 1)));
18603 if (target
!= new_target
)
18604 emit_move_insn (target
, new_target
);
18608 /* Otherwise convert the intermediate result to V4SFmode and
18609 use the SSE1 shuffle instructions. */
18610 if (mode
!= V4SFmode
)
18612 tmp
= gen_reg_rtx (V4SFmode
);
18613 emit_move_insn (tmp
, gen_lowpart (V4SFmode
, new_target
));
18618 emit_insn (gen_sse_shufps_1 (tmp
, tmp
, tmp
,
18620 GEN_INT (one_var
== 1 ? 0 : 1),
18621 GEN_INT (one_var
== 2 ? 0+4 : 1+4),
18622 GEN_INT (one_var
== 3 ? 0+4 : 1+4)));
18624 if (mode
!= V4SFmode
)
18625 emit_move_insn (target
, gen_lowpart (V4SImode
, tmp
));
18626 else if (tmp
!= target
)
18627 emit_move_insn (target
, tmp
);
18629 else if (target
!= new_target
)
18630 emit_move_insn (target
, new_target
);
18635 vsimode
= V4SImode
;
18641 vsimode
= V2SImode
;
18647 /* Zero extend the variable element to SImode and recurse. */
18648 var
= convert_modes (SImode
, GET_MODE_INNER (mode
), var
, true);
18650 x
= gen_reg_rtx (vsimode
);
18651 if (!ix86_expand_vector_init_one_nonzero (mmx_ok
, vsimode
, x
,
18653 gcc_unreachable ();
18655 emit_move_insn (target
, gen_lowpart (mode
, x
));
18663 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
18664 consisting of the values in VALS. It is known that all elements
18665 except ONE_VAR are constants. Return true if successful. */
18668 ix86_expand_vector_init_one_var (bool mmx_ok
, enum machine_mode mode
,
18669 rtx target
, rtx vals
, int one_var
)
18671 rtx var
= XVECEXP (vals
, 0, one_var
);
18672 enum machine_mode wmode
;
18675 const_vec
= copy_rtx (vals
);
18676 XVECEXP (const_vec
, 0, one_var
) = CONST0_RTX (GET_MODE_INNER (mode
));
18677 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (const_vec
, 0));
18685 /* For the two element vectors, it's just as easy to use
18686 the general case. */
18702 /* There's no way to set one QImode entry easily. Combine
18703 the variable value with its adjacent constant value, and
18704 promote to an HImode set. */
18705 x
= XVECEXP (vals
, 0, one_var
^ 1);
18708 var
= convert_modes (HImode
, QImode
, var
, true);
18709 var
= expand_simple_binop (HImode
, ASHIFT
, var
, GEN_INT (8),
18710 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
18711 x
= GEN_INT (INTVAL (x
) & 0xff);
18715 var
= convert_modes (HImode
, QImode
, var
, true);
18716 x
= gen_int_mode (INTVAL (x
) << 8, HImode
);
18718 if (x
!= const0_rtx
)
18719 var
= expand_simple_binop (HImode
, IOR
, var
, x
, var
,
18720 1, OPTAB_LIB_WIDEN
);
18722 x
= gen_reg_rtx (wmode
);
18723 emit_move_insn (x
, gen_lowpart (wmode
, const_vec
));
18724 ix86_expand_vector_set (mmx_ok
, x
, var
, one_var
>> 1);
18726 emit_move_insn (target
, gen_lowpart (mode
, x
));
18733 emit_move_insn (target
, const_vec
);
18734 ix86_expand_vector_set (mmx_ok
, target
, var
, one_var
);
18738 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
18739 all values variable, and none identical. */
18742 ix86_expand_vector_init_general (bool mmx_ok
, enum machine_mode mode
,
18743 rtx target
, rtx vals
)
18745 enum machine_mode half_mode
= GET_MODE_INNER (mode
);
18746 rtx op0
= NULL
, op1
= NULL
;
18747 bool use_vec_concat
= false;
18753 if (!mmx_ok
&& !TARGET_SSE
)
18759 /* For the two element vectors, we always implement VEC_CONCAT. */
18760 op0
= XVECEXP (vals
, 0, 0);
18761 op1
= XVECEXP (vals
, 0, 1);
18762 use_vec_concat
= true;
18766 half_mode
= V2SFmode
;
18769 half_mode
= V2SImode
;
18775 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
18776 Recurse to load the two halves. */
18778 op0
= gen_reg_rtx (half_mode
);
18779 v
= gen_rtvec (2, XVECEXP (vals
, 0, 0), XVECEXP (vals
, 0, 1));
18780 ix86_expand_vector_init (false, op0
, gen_rtx_PARALLEL (half_mode
, v
));
18782 op1
= gen_reg_rtx (half_mode
);
18783 v
= gen_rtvec (2, XVECEXP (vals
, 0, 2), XVECEXP (vals
, 0, 3));
18784 ix86_expand_vector_init (false, op1
, gen_rtx_PARALLEL (half_mode
, v
));
18786 use_vec_concat
= true;
18797 gcc_unreachable ();
18800 if (use_vec_concat
)
18802 if (!register_operand (op0
, half_mode
))
18803 op0
= force_reg (half_mode
, op0
);
18804 if (!register_operand (op1
, half_mode
))
18805 op1
= force_reg (half_mode
, op1
);
18807 emit_insn (gen_rtx_SET (VOIDmode
, target
,
18808 gen_rtx_VEC_CONCAT (mode
, op0
, op1
)));
18812 int i
, j
, n_elts
, n_words
, n_elt_per_word
;
18813 enum machine_mode inner_mode
;
18814 rtx words
[4], shift
;
18816 inner_mode
= GET_MODE_INNER (mode
);
18817 n_elts
= GET_MODE_NUNITS (mode
);
18818 n_words
= GET_MODE_SIZE (mode
) / UNITS_PER_WORD
;
18819 n_elt_per_word
= n_elts
/ n_words
;
18820 shift
= GEN_INT (GET_MODE_BITSIZE (inner_mode
));
18822 for (i
= 0; i
< n_words
; ++i
)
18824 rtx word
= NULL_RTX
;
18826 for (j
= 0; j
< n_elt_per_word
; ++j
)
18828 rtx elt
= XVECEXP (vals
, 0, (i
+1)*n_elt_per_word
- j
- 1);
18829 elt
= convert_modes (word_mode
, inner_mode
, elt
, true);
18835 word
= expand_simple_binop (word_mode
, ASHIFT
, word
, shift
,
18836 word
, 1, OPTAB_LIB_WIDEN
);
18837 word
= expand_simple_binop (word_mode
, IOR
, word
, elt
,
18838 word
, 1, OPTAB_LIB_WIDEN
);
18846 emit_move_insn (target
, gen_lowpart (mode
, words
[0]));
18847 else if (n_words
== 2)
18849 rtx tmp
= gen_reg_rtx (mode
);
18850 emit_insn (gen_rtx_CLOBBER (VOIDmode
, tmp
));
18851 emit_move_insn (gen_lowpart (word_mode
, tmp
), words
[0]);
18852 emit_move_insn (gen_highpart (word_mode
, tmp
), words
[1]);
18853 emit_move_insn (target
, tmp
);
18855 else if (n_words
== 4)
18857 rtx tmp
= gen_reg_rtx (V4SImode
);
18858 vals
= gen_rtx_PARALLEL (V4SImode
, gen_rtvec_v (4, words
));
18859 ix86_expand_vector_init_general (false, V4SImode
, tmp
, vals
);
18860 emit_move_insn (target
, gen_lowpart (mode
, tmp
));
18863 gcc_unreachable ();
18867 /* Initialize vector TARGET via VALS. Suppress the use of MMX
18868 instructions unless MMX_OK is true. */
18871 ix86_expand_vector_init (bool mmx_ok
, rtx target
, rtx vals
)
18873 enum machine_mode mode
= GET_MODE (target
);
18874 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
18875 int n_elts
= GET_MODE_NUNITS (mode
);
18876 int n_var
= 0, one_var
= -1;
18877 bool all_same
= true, all_const_zero
= true;
18881 for (i
= 0; i
< n_elts
; ++i
)
18883 x
= XVECEXP (vals
, 0, i
);
18884 if (!CONSTANT_P (x
))
18885 n_var
++, one_var
= i
;
18886 else if (x
!= CONST0_RTX (inner_mode
))
18887 all_const_zero
= false;
18888 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
18892 /* Constants are best loaded from the constant pool. */
18895 emit_move_insn (target
, gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0)));
18899 /* If all values are identical, broadcast the value. */
18901 && ix86_expand_vector_init_duplicate (mmx_ok
, mode
, target
,
18902 XVECEXP (vals
, 0, 0)))
18905 /* Values where only one field is non-constant are best loaded from
18906 the pool and overwritten via move later. */
18910 && ix86_expand_vector_init_one_nonzero (mmx_ok
, mode
, target
,
18911 XVECEXP (vals
, 0, one_var
),
18915 if (ix86_expand_vector_init_one_var (mmx_ok
, mode
, target
, vals
, one_var
))
18919 ix86_expand_vector_init_general (mmx_ok
, mode
, target
, vals
);
18923 ix86_expand_vector_set (bool mmx_ok
, rtx target
, rtx val
, int elt
)
18925 enum machine_mode mode
= GET_MODE (target
);
18926 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
18927 bool use_vec_merge
= false;
18936 tmp
= gen_reg_rtx (GET_MODE_INNER (mode
));
18937 ix86_expand_vector_extract (true, tmp
, target
, 1 - elt
);
18939 tmp
= gen_rtx_VEC_CONCAT (mode
, tmp
, val
);
18941 tmp
= gen_rtx_VEC_CONCAT (mode
, val
, tmp
);
18942 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
18952 /* For the two element vectors, we implement a VEC_CONCAT with
18953 the extraction of the other element. */
18955 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (1 - elt
)));
18956 tmp
= gen_rtx_VEC_SELECT (inner_mode
, target
, tmp
);
18959 op0
= val
, op1
= tmp
;
18961 op0
= tmp
, op1
= val
;
18963 tmp
= gen_rtx_VEC_CONCAT (mode
, op0
, op1
);
18964 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
18972 use_vec_merge
= true;
18976 /* tmp = target = A B C D */
18977 tmp
= copy_to_reg (target
);
18978 /* target = A A B B */
18979 emit_insn (gen_sse_unpcklps (target
, target
, target
));
18980 /* target = X A B B */
18981 ix86_expand_vector_set (false, target
, val
, 0);
18982 /* target = A X C D */
18983 emit_insn (gen_sse_shufps_1 (target
, target
, tmp
,
18984 GEN_INT (1), GEN_INT (0),
18985 GEN_INT (2+4), GEN_INT (3+4)));
18989 /* tmp = target = A B C D */
18990 tmp
= copy_to_reg (target
);
18991 /* tmp = X B C D */
18992 ix86_expand_vector_set (false, tmp
, val
, 0);
18993 /* target = A B X D */
18994 emit_insn (gen_sse_shufps_1 (target
, target
, tmp
,
18995 GEN_INT (0), GEN_INT (1),
18996 GEN_INT (0+4), GEN_INT (3+4)));
19000 /* tmp = target = A B C D */
19001 tmp
= copy_to_reg (target
);
19002 /* tmp = X B C D */
19003 ix86_expand_vector_set (false, tmp
, val
, 0);
19004 /* target = A B X D */
19005 emit_insn (gen_sse_shufps_1 (target
, target
, tmp
,
19006 GEN_INT (0), GEN_INT (1),
19007 GEN_INT (2+4), GEN_INT (0+4)));
19011 gcc_unreachable ();
19016 /* Element 0 handled by vec_merge below. */
19019 use_vec_merge
= true;
19025 /* With SSE2, use integer shuffles to swap element 0 and ELT,
19026 store into element 0, then shuffle them back. */
19030 order
[0] = GEN_INT (elt
);
19031 order
[1] = const1_rtx
;
19032 order
[2] = const2_rtx
;
19033 order
[3] = GEN_INT (3);
19034 order
[elt
] = const0_rtx
;
19036 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
19037 order
[1], order
[2], order
[3]));
19039 ix86_expand_vector_set (false, target
, val
, 0);
19041 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
19042 order
[1], order
[2], order
[3]));
19046 /* For SSE1, we have to reuse the V4SF code. */
19047 ix86_expand_vector_set (false, gen_lowpart (V4SFmode
, target
),
19048 gen_lowpart (SFmode
, val
), elt
);
19053 use_vec_merge
= TARGET_SSE2
;
19056 use_vec_merge
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
19067 tmp
= gen_rtx_VEC_DUPLICATE (mode
, val
);
19068 tmp
= gen_rtx_VEC_MERGE (mode
, tmp
, target
, GEN_INT (1 << elt
));
19069 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
19073 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
), false);
19075 emit_move_insn (mem
, target
);
19077 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
19078 emit_move_insn (tmp
, val
);
19080 emit_move_insn (target
, mem
);
19085 ix86_expand_vector_extract (bool mmx_ok
, rtx target
, rtx vec
, int elt
)
19087 enum machine_mode mode
= GET_MODE (vec
);
19088 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
19089 bool use_vec_extr
= false;
19102 use_vec_extr
= true;
19114 tmp
= gen_reg_rtx (mode
);
19115 emit_insn (gen_sse_shufps_1 (tmp
, vec
, vec
,
19116 GEN_INT (elt
), GEN_INT (elt
),
19117 GEN_INT (elt
+4), GEN_INT (elt
+4)));
19121 tmp
= gen_reg_rtx (mode
);
19122 emit_insn (gen_sse_unpckhps (tmp
, vec
, vec
));
19126 gcc_unreachable ();
19129 use_vec_extr
= true;
19144 tmp
= gen_reg_rtx (mode
);
19145 emit_insn (gen_sse2_pshufd_1 (tmp
, vec
,
19146 GEN_INT (elt
), GEN_INT (elt
),
19147 GEN_INT (elt
), GEN_INT (elt
)));
19151 tmp
= gen_reg_rtx (mode
);
19152 emit_insn (gen_sse2_punpckhdq (tmp
, vec
, vec
));
19156 gcc_unreachable ();
19159 use_vec_extr
= true;
19164 /* For SSE1, we have to reuse the V4SF code. */
19165 ix86_expand_vector_extract (false, gen_lowpart (SFmode
, target
),
19166 gen_lowpart (V4SFmode
, vec
), elt
);
19172 use_vec_extr
= TARGET_SSE2
;
19175 use_vec_extr
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
19180 /* ??? Could extract the appropriate HImode element and shift. */
19187 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (elt
)));
19188 tmp
= gen_rtx_VEC_SELECT (inner_mode
, vec
, tmp
);
19190 /* Let the rtl optimizers know about the zero extension performed. */
19191 if (inner_mode
== HImode
)
19193 tmp
= gen_rtx_ZERO_EXTEND (SImode
, tmp
);
19194 target
= gen_lowpart (SImode
, target
);
19197 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
19201 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
), false);
19203 emit_move_insn (mem
, vec
);
19205 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
19206 emit_move_insn (target
, tmp
);
19210 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
19211 pattern to reduce; DEST is the destination; IN is the input vector. */
19214 ix86_expand_reduc_v4sf (rtx (*fn
) (rtx
, rtx
, rtx
), rtx dest
, rtx in
)
19216 rtx tmp1
, tmp2
, tmp3
;
19218 tmp1
= gen_reg_rtx (V4SFmode
);
19219 tmp2
= gen_reg_rtx (V4SFmode
);
19220 tmp3
= gen_reg_rtx (V4SFmode
);
19222 emit_insn (gen_sse_movhlps (tmp1
, in
, in
));
19223 emit_insn (fn (tmp2
, tmp1
, in
));
19225 emit_insn (gen_sse_shufps_1 (tmp3
, tmp2
, tmp2
,
19226 GEN_INT (1), GEN_INT (1),
19227 GEN_INT (1+4), GEN_INT (1+4)));
19228 emit_insn (fn (dest
, tmp2
, tmp3
));
19231 /* Target hook for scalar_mode_supported_p. */
19233 ix86_scalar_mode_supported_p (enum machine_mode mode
)
19235 if (DECIMAL_FLOAT_MODE_P (mode
))
19238 return default_scalar_mode_supported_p (mode
);
19241 /* Implements target hook vector_mode_supported_p. */
19243 ix86_vector_mode_supported_p (enum machine_mode mode
)
19245 if (TARGET_SSE
&& VALID_SSE_REG_MODE (mode
))
19247 if (TARGET_SSE2
&& VALID_SSE2_REG_MODE (mode
))
19249 if (TARGET_MMX
&& VALID_MMX_REG_MODE (mode
))
19251 if (TARGET_3DNOW
&& VALID_MMX_REG_MODE_3DNOW (mode
))
19256 /* Worker function for TARGET_MD_ASM_CLOBBERS.
19258 We do this in the new i386 backend to maintain source compatibility
19259 with the old cc0-based compiler. */
19262 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED
,
19263 tree inputs ATTRIBUTE_UNUSED
,
19266 clobbers
= tree_cons (NULL_TREE
, build_string (5, "flags"),
19268 clobbers
= tree_cons (NULL_TREE
, build_string (4, "fpsr"),
19270 clobbers
= tree_cons (NULL_TREE
, build_string (7, "dirflag"),
19275 /* Return true if this goes in small data/bss. */
19278 ix86_in_large_data_p (tree exp
)
19280 if (ix86_cmodel
!= CM_MEDIUM
&& ix86_cmodel
!= CM_MEDIUM_PIC
)
19283 /* Functions are never large data. */
19284 if (TREE_CODE (exp
) == FUNCTION_DECL
)
19287 if (TREE_CODE (exp
) == VAR_DECL
&& DECL_SECTION_NAME (exp
))
19289 const char *section
= TREE_STRING_POINTER (DECL_SECTION_NAME (exp
));
19290 if (strcmp (section
, ".ldata") == 0
19291 || strcmp (section
, ".lbss") == 0)
19297 HOST_WIDE_INT size
= int_size_in_bytes (TREE_TYPE (exp
));
19299 /* If this is an incomplete type with size 0, then we can't put it
19300 in data because it might be too big when completed. */
19301 if (!size
|| size
> ix86_section_threshold
)
19308 ix86_encode_section_info (tree decl
, rtx rtl
, int first
)
19310 default_encode_section_info (decl
, rtl
, first
);
19312 if (TREE_CODE (decl
) == VAR_DECL
19313 && (TREE_STATIC (decl
) || DECL_EXTERNAL (decl
))
19314 && ix86_in_large_data_p (decl
))
19315 SYMBOL_REF_FLAGS (XEXP (rtl
, 0)) |= SYMBOL_FLAG_FAR_ADDR
;
19318 /* Worker function for REVERSE_CONDITION. */
19321 ix86_reverse_condition (enum rtx_code code
, enum machine_mode mode
)
19323 return (mode
!= CCFPmode
&& mode
!= CCFPUmode
19324 ? reverse_condition (code
)
19325 : reverse_condition_maybe_unordered (code
));
19328 /* Output code to perform an x87 FP register move, from OPERANDS[1]
19332 output_387_reg_move (rtx insn
, rtx
*operands
)
19334 if (REG_P (operands
[1])
19335 && find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
19337 if (REGNO (operands
[0]) == FIRST_STACK_REG
)
19338 return output_387_ffreep (operands
, 0);
19339 return "fstp\t%y0";
19341 if (STACK_TOP_P (operands
[0]))
19342 return "fld%z1\t%y1";
19346 /* Output code to perform a conditional jump to LABEL, if C2 flag in
19347 FP status register is set. */
19350 ix86_emit_fp_unordered_jump (rtx label
)
19352 rtx reg
= gen_reg_rtx (HImode
);
19355 emit_insn (gen_x86_fnstsw_1 (reg
));
19357 if (TARGET_USE_SAHF
)
19359 emit_insn (gen_x86_sahf_1 (reg
));
19361 temp
= gen_rtx_REG (CCmode
, FLAGS_REG
);
19362 temp
= gen_rtx_UNORDERED (VOIDmode
, temp
, const0_rtx
);
19366 emit_insn (gen_testqi_ext_ccno_0 (reg
, GEN_INT (0x04)));
19368 temp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
19369 temp
= gen_rtx_NE (VOIDmode
, temp
, const0_rtx
);
19372 temp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, temp
,
19373 gen_rtx_LABEL_REF (VOIDmode
, label
),
19375 temp
= gen_rtx_SET (VOIDmode
, pc_rtx
, temp
);
19376 emit_jump_insn (temp
);
19379 /* Output code to perform a log1p XFmode calculation. */
19381 void ix86_emit_i387_log1p (rtx op0
, rtx op1
)
19383 rtx label1
= gen_label_rtx ();
19384 rtx label2
= gen_label_rtx ();
19386 rtx tmp
= gen_reg_rtx (XFmode
);
19387 rtx tmp2
= gen_reg_rtx (XFmode
);
19389 emit_insn (gen_absxf2 (tmp
, op1
));
19390 emit_insn (gen_cmpxf (tmp
,
19391 CONST_DOUBLE_FROM_REAL_VALUE (
19392 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode
),
19394 emit_jump_insn (gen_bge (label1
));
19396 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
19397 emit_insn (gen_fyl2xp1_xf3 (op0
, tmp2
, op1
));
19398 emit_jump (label2
);
19400 emit_label (label1
);
19401 emit_move_insn (tmp
, CONST1_RTX (XFmode
));
19402 emit_insn (gen_addxf3 (tmp
, op1
, tmp
));
19403 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
19404 emit_insn (gen_fyl2x_xf3 (op0
, tmp2
, tmp
));
19406 emit_label (label2
);
19409 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
19412 i386_solaris_elf_named_section (const char *name
, unsigned int flags
,
19415 /* With Binutils 2.15, the "@unwind" marker must be specified on
19416 every occurrence of the ".eh_frame" section, not just the first
19419 && strcmp (name
, ".eh_frame") == 0)
19421 fprintf (asm_out_file
, "\t.section\t%s,\"%s\",@unwind\n", name
,
19422 flags
& SECTION_WRITE
? "aw" : "a");
19425 default_elf_asm_named_section (name
, flags
, decl
);
19428 /* Return the mangling of TYPE if it is an extended fundamental type. */
19430 static const char *
19431 ix86_mangle_fundamental_type (tree type
)
19433 switch (TYPE_MODE (type
))
19436 /* __float128 is "g". */
19439 /* "long double" or __float80 is "e". */
19446 /* For 32-bit code we can save PIC register setup by using
19447 __stack_chk_fail_local hidden function instead of calling
19448 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
19449 register, so it is better to call __stack_chk_fail directly. */
19452 ix86_stack_protect_fail (void)
19454 return TARGET_64BIT
19455 ? default_external_stack_protect_fail ()
19456 : default_hidden_stack_protect_fail ();
19459 /* Select a format to encode pointers in exception handling data. CODE
19460 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
19461 true if the symbol may be affected by dynamic relocations.
19463 ??? All x86 object file formats are capable of representing this.
19464 After all, the relocation needed is the same as for the call insn.
19465 Whether or not a particular assembler allows us to enter such, I
19466 guess we'll have to see. */
19468 asm_preferred_eh_data_format (int code
, int global
)
19472 int type
= DW_EH_PE_sdata8
;
19474 || ix86_cmodel
== CM_SMALL_PIC
19475 || (ix86_cmodel
== CM_MEDIUM_PIC
&& (global
|| code
)))
19476 type
= DW_EH_PE_sdata4
;
19477 return (global
? DW_EH_PE_indirect
: 0) | DW_EH_PE_pcrel
| type
;
19479 if (ix86_cmodel
== CM_SMALL
19480 || (ix86_cmodel
== CM_MEDIUM
&& code
))
19481 return DW_EH_PE_udata4
;
19482 return DW_EH_PE_absptr
;
19485 /* Expand copysign from SIGN to the positive value ABS_VALUE
19486 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
19489 ix86_sse_copysign_to_positive (rtx result
, rtx abs_value
, rtx sign
, rtx mask
)
19491 enum machine_mode mode
= GET_MODE (sign
);
19492 rtx sgn
= gen_reg_rtx (mode
);
19493 if (mask
== NULL_RTX
)
19495 mask
= ix86_build_signbit_mask (mode
, VECTOR_MODE_P (mode
), false);
19496 if (!VECTOR_MODE_P (mode
))
19498 /* We need to generate a scalar mode mask in this case. */
19499 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
19500 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
19501 mask
= gen_reg_rtx (mode
);
19502 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
19506 mask
= gen_rtx_NOT (mode
, mask
);
19507 emit_insn (gen_rtx_SET (VOIDmode
, sgn
,
19508 gen_rtx_AND (mode
, mask
, sign
)));
19509 emit_insn (gen_rtx_SET (VOIDmode
, result
,
19510 gen_rtx_IOR (mode
, abs_value
, sgn
)));
19513 /* Expand fabs (OP0) and return a new rtx that holds the result. The
19514 mask for masking out the sign-bit is stored in *SMASK, if that is
19517 ix86_expand_sse_fabs (rtx op0
, rtx
*smask
)
19519 enum machine_mode mode
= GET_MODE (op0
);
19522 xa
= gen_reg_rtx (mode
);
19523 mask
= ix86_build_signbit_mask (mode
, VECTOR_MODE_P (mode
), true);
19524 if (!VECTOR_MODE_P (mode
))
19526 /* We need to generate a scalar mode mask in this case. */
19527 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
19528 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
19529 mask
= gen_reg_rtx (mode
);
19530 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
19532 emit_insn (gen_rtx_SET (VOIDmode
, xa
,
19533 gen_rtx_AND (mode
, op0
, mask
)));
19541 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
19542 swapping the operands if SWAP_OPERANDS is true. The expanded
19543 code is a forward jump to a newly created label in case the
19544 comparison is true. The generated label rtx is returned. */
19546 ix86_expand_sse_compare_and_jump (enum rtx_code code
, rtx op0
, rtx op1
,
19547 bool swap_operands
)
19558 label
= gen_label_rtx ();
19559 tmp
= gen_rtx_REG (CCFPUmode
, FLAGS_REG
);
19560 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
19561 gen_rtx_COMPARE (CCFPUmode
, op0
, op1
)));
19562 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
, tmp
, const0_rtx
);
19563 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
19564 gen_rtx_LABEL_REF (VOIDmode
, label
), pc_rtx
);
19565 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
19566 JUMP_LABEL (tmp
) = label
;
19571 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
19572 using comparison code CODE. Operands are swapped for the comparison if
19573 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
19575 ix86_expand_sse_compare_mask (enum rtx_code code
, rtx op0
, rtx op1
,
19576 bool swap_operands
)
19578 enum machine_mode mode
= GET_MODE (op0
);
19579 rtx mask
= gen_reg_rtx (mode
);
19588 if (mode
== DFmode
)
19589 emit_insn (gen_sse2_maskcmpdf3 (mask
, op0
, op1
,
19590 gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
19592 emit_insn (gen_sse_maskcmpsf3 (mask
, op0
, op1
,
19593 gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
19598 /* Generate and return a rtx of mode MODE for 2**n where n is the number
19599 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
19601 ix86_gen_TWO52 (enum machine_mode mode
)
19603 REAL_VALUE_TYPE TWO52r
;
19606 real_ldexp (&TWO52r
, &dconst1
, mode
== DFmode
? 52 : 23);
19607 TWO52
= const_double_from_real_value (TWO52r
, mode
);
19608 TWO52
= force_reg (mode
, TWO52
);
19613 /* Expand SSE sequence for computing lround from OP1 storing
19616 ix86_expand_lround (rtx op0
, rtx op1
)
19618 /* C code for the stuff we're doing below:
19619 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
19622 enum machine_mode mode
= GET_MODE (op1
);
19623 const struct real_format
*fmt
;
19624 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
19627 /* load nextafter (0.5, 0.0) */
19628 fmt
= REAL_MODE_FORMAT (mode
);
19629 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1);
19630 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
19632 /* adj = copysign (0.5, op1) */
19633 adj
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
19634 ix86_sse_copysign_to_positive (adj
, adj
, force_reg (mode
, op1
), NULL_RTX
);
19636 /* adj = op1 + adj */
19637 adj
= expand_simple_binop (mode
, PLUS
, adj
, op1
, NULL_RTX
, 0, OPTAB_DIRECT
);
19639 /* op0 = (imode)adj */
19640 expand_fix (op0
, adj
, 0);
19643 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
19646 ix86_expand_lfloorceil (rtx op0
, rtx op1
, bool do_floor
)
19648 /* C code for the stuff we're doing below (for do_floor):
19650 xi -= (double)xi > op1 ? 1 : 0;
19653 enum machine_mode fmode
= GET_MODE (op1
);
19654 enum machine_mode imode
= GET_MODE (op0
);
19655 rtx ireg
, freg
, label
, tmp
;
19657 /* reg = (long)op1 */
19658 ireg
= gen_reg_rtx (imode
);
19659 expand_fix (ireg
, op1
, 0);
19661 /* freg = (double)reg */
19662 freg
= gen_reg_rtx (fmode
);
19663 expand_float (freg
, ireg
, 0);
19665 /* ireg = (freg > op1) ? ireg - 1 : ireg */
19666 label
= ix86_expand_sse_compare_and_jump (UNLE
,
19667 freg
, op1
, !do_floor
);
19668 tmp
= expand_simple_binop (imode
, do_floor
? MINUS
: PLUS
,
19669 ireg
, const1_rtx
, NULL_RTX
, 0, OPTAB_DIRECT
);
19670 emit_move_insn (ireg
, tmp
);
19672 emit_label (label
);
19673 LABEL_NUSES (label
) = 1;
19675 emit_move_insn (op0
, ireg
);
19678 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
19679 result in OPERAND0. */
19681 ix86_expand_rint (rtx operand0
, rtx operand1
)
19683 /* C code for the stuff we're doing below:
19684 xa = fabs (operand1);
19685 if (!isless (xa, 2**52))
19687 xa = xa + 2**52 - 2**52;
19688 return copysign (xa, operand1);
19690 enum machine_mode mode
= GET_MODE (operand0
);
19691 rtx res
, xa
, label
, TWO52
, mask
;
19693 res
= gen_reg_rtx (mode
);
19694 emit_move_insn (res
, operand1
);
19696 /* xa = abs (operand1) */
19697 xa
= ix86_expand_sse_fabs (res
, &mask
);
19699 /* if (!isless (xa, TWO52)) goto label; */
19700 TWO52
= ix86_gen_TWO52 (mode
);
19701 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
19703 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
19704 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
19706 ix86_sse_copysign_to_positive (res
, xa
, res
, mask
);
19708 emit_label (label
);
19709 LABEL_NUSES (label
) = 1;
19711 emit_move_insn (operand0
, res
);
19714 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
19717 ix86_expand_floorceildf_32 (rtx operand0
, rtx operand1
, bool do_floor
)
19719 /* C code for the stuff we expand below.
19720 double xa = fabs (x), x2;
19721 if (!isless (xa, TWO52))
19723 xa = xa + TWO52 - TWO52;
19724 x2 = copysign (xa, x);
19733 enum machine_mode mode
= GET_MODE (operand0
);
19734 rtx xa
, TWO52
, tmp
, label
, one
, res
, mask
;
19736 TWO52
= ix86_gen_TWO52 (mode
);
19738 /* Temporary for holding the result, initialized to the input
19739 operand to ease control flow. */
19740 res
= gen_reg_rtx (mode
);
19741 emit_move_insn (res
, operand1
);
19743 /* xa = abs (operand1) */
19744 xa
= ix86_expand_sse_fabs (res
, &mask
);
19746 /* if (!isless (xa, TWO52)) goto label; */
19747 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
19749 /* xa = xa + TWO52 - TWO52; */
19750 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
19751 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
19753 /* xa = copysign (xa, operand1) */
19754 ix86_sse_copysign_to_positive (xa
, xa
, res
, mask
);
19756 /* generate 1.0 or -1.0 */
19757 one
= force_reg (mode
,
19758 const_double_from_real_value (do_floor
19759 ? dconst1
: dconstm1
, mode
));
19761 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
19762 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
19763 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
19764 gen_rtx_AND (mode
, one
, tmp
)));
19765 /* We always need to subtract here to preserve signed zero. */
19766 tmp
= expand_simple_binop (mode
, MINUS
,
19767 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
19768 emit_move_insn (res
, tmp
);
19770 emit_label (label
);
19771 LABEL_NUSES (label
) = 1;
19773 emit_move_insn (operand0
, res
);
19776 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
19779 ix86_expand_floorceil (rtx operand0
, rtx operand1
, bool do_floor
)
19781 /* C code for the stuff we expand below.
19782 double xa = fabs (x), x2;
19783 if (!isless (xa, TWO52))
19785 x2 = (double)(long)x;
19792 if (HONOR_SIGNED_ZEROS (mode))
19793 return copysign (x2, x);
19796 enum machine_mode mode
= GET_MODE (operand0
);
19797 rtx xa
, xi
, TWO52
, tmp
, label
, one
, res
, mask
;
19799 TWO52
= ix86_gen_TWO52 (mode
);
19801 /* Temporary for holding the result, initialized to the input
19802 operand to ease control flow. */
19803 res
= gen_reg_rtx (mode
);
19804 emit_move_insn (res
, operand1
);
19806 /* xa = abs (operand1) */
19807 xa
= ix86_expand_sse_fabs (res
, &mask
);
19809 /* if (!isless (xa, TWO52)) goto label; */
19810 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
19812 /* xa = (double)(long)x */
19813 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
19814 expand_fix (xi
, res
, 0);
19815 expand_float (xa
, xi
, 0);
19818 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
19820 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
19821 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
19822 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
19823 gen_rtx_AND (mode
, one
, tmp
)));
19824 tmp
= expand_simple_binop (mode
, do_floor
? MINUS
: PLUS
,
19825 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
19826 emit_move_insn (res
, tmp
);
19828 if (HONOR_SIGNED_ZEROS (mode
))
19829 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
19831 emit_label (label
);
19832 LABEL_NUSES (label
) = 1;
19834 emit_move_insn (operand0
, res
);
19837 /* Expand SSE sequence for computing round from OPERAND1 storing
19838 into OPERAND0. Sequence that works without relying on DImode truncation
19839 via cvttsd2siq that is only available on 64bit targets. */
19841 ix86_expand_rounddf_32 (rtx operand0
, rtx operand1
)
19843 /* C code for the stuff we expand below.
19844 double xa = fabs (x), xa2, x2;
19845 if (!isless (xa, TWO52))
19847 Using the absolute value and copying back sign makes
19848 -0.0 -> -0.0 correct.
19849 xa2 = xa + TWO52 - TWO52;
19854 else if (dxa > 0.5)
19856 x2 = copysign (xa2, x);
19859 enum machine_mode mode
= GET_MODE (operand0
);
19860 rtx xa
, xa2
, dxa
, TWO52
, tmp
, label
, half
, mhalf
, one
, res
, mask
;
19862 TWO52
= ix86_gen_TWO52 (mode
);
19864 /* Temporary for holding the result, initialized to the input
19865 operand to ease control flow. */
19866 res
= gen_reg_rtx (mode
);
19867 emit_move_insn (res
, operand1
);
19869 /* xa = abs (operand1) */
19870 xa
= ix86_expand_sse_fabs (res
, &mask
);
19872 /* if (!isless (xa, TWO52)) goto label; */
19873 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
19875 /* xa2 = xa + TWO52 - TWO52; */
19876 xa2
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
19877 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, TWO52
, xa2
, 0, OPTAB_DIRECT
);
19879 /* dxa = xa2 - xa; */
19880 dxa
= expand_simple_binop (mode
, MINUS
, xa2
, xa
, NULL_RTX
, 0, OPTAB_DIRECT
);
19882 /* generate 0.5, 1.0 and -0.5 */
19883 half
= force_reg (mode
, const_double_from_real_value (dconsthalf
, mode
));
19884 one
= expand_simple_binop (mode
, PLUS
, half
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
19885 mhalf
= expand_simple_binop (mode
, MINUS
, half
, one
, NULL_RTX
,
19889 tmp
= gen_reg_rtx (mode
);
19890 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
19891 tmp
= ix86_expand_sse_compare_mask (UNGT
, dxa
, half
, false);
19892 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
19893 gen_rtx_AND (mode
, one
, tmp
)));
19894 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
19895 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
19896 tmp
= ix86_expand_sse_compare_mask (UNGE
, mhalf
, dxa
, false);
19897 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
19898 gen_rtx_AND (mode
, one
, tmp
)));
19899 xa2
= expand_simple_binop (mode
, PLUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
19901 /* res = copysign (xa2, operand1) */
19902 ix86_sse_copysign_to_positive (res
, xa2
, force_reg (mode
, operand1
), mask
);
19904 emit_label (label
);
19905 LABEL_NUSES (label
) = 1;
19907 emit_move_insn (operand0
, res
);
19910 /* Expand SSE sequence for computing trunc from OPERAND1 storing
19913 ix86_expand_trunc (rtx operand0
, rtx operand1
)
19915 /* C code for SSE variant we expand below.
19916 double xa = fabs (x), x2;
19917 if (!isless (xa, TWO52))
19919 x2 = (double)(long)x;
19920 if (HONOR_SIGNED_ZEROS (mode))
19921 return copysign (x2, x);
19924 enum machine_mode mode
= GET_MODE (operand0
);
19925 rtx xa
, xi
, TWO52
, label
, res
, mask
;
19927 TWO52
= ix86_gen_TWO52 (mode
);
19929 /* Temporary for holding the result, initialized to the input
19930 operand to ease control flow. */
19931 res
= gen_reg_rtx (mode
);
19932 emit_move_insn (res
, operand1
);
19934 /* xa = abs (operand1) */
19935 xa
= ix86_expand_sse_fabs (res
, &mask
);
19937 /* if (!isless (xa, TWO52)) goto label; */
19938 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
19940 /* x = (double)(long)x */
19941 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
19942 expand_fix (xi
, res
, 0);
19943 expand_float (res
, xi
, 0);
19945 if (HONOR_SIGNED_ZEROS (mode
))
19946 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
19948 emit_label (label
);
19949 LABEL_NUSES (label
) = 1;
19951 emit_move_insn (operand0
, res
);
19954 /* Expand SSE sequence for computing trunc from OPERAND1 storing
19957 ix86_expand_truncdf_32 (rtx operand0
, rtx operand1
)
19959 enum machine_mode mode
= GET_MODE (operand0
);
19960 rtx xa
, mask
, TWO52
, label
, one
, res
, smask
, tmp
;
19962 /* C code for SSE variant we expand below.
19963 double xa = fabs (x), x2;
19964 if (!isless (xa, TWO52))
19966 xa2 = xa + TWO52 - TWO52;
19970 x2 = copysign (xa2, x);
19974 TWO52
= ix86_gen_TWO52 (mode
);
19976 /* Temporary for holding the result, initialized to the input
19977 operand to ease control flow. */
19978 res
= gen_reg_rtx (mode
);
19979 emit_move_insn (res
, operand1
);
19981 /* xa = abs (operand1) */
19982 xa
= ix86_expand_sse_fabs (res
, &smask
);
19984 /* if (!isless (xa, TWO52)) goto label; */
19985 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
19987 /* res = xa + TWO52 - TWO52; */
19988 tmp
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
19989 tmp
= expand_simple_binop (mode
, MINUS
, tmp
, TWO52
, tmp
, 0, OPTAB_DIRECT
);
19990 emit_move_insn (res
, tmp
);
19993 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
19995 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
19996 mask
= ix86_expand_sse_compare_mask (UNGT
, res
, xa
, false);
19997 emit_insn (gen_rtx_SET (VOIDmode
, mask
,
19998 gen_rtx_AND (mode
, mask
, one
)));
19999 tmp
= expand_simple_binop (mode
, MINUS
,
20000 res
, mask
, NULL_RTX
, 0, OPTAB_DIRECT
);
20001 emit_move_insn (res
, tmp
);
20003 /* res = copysign (res, operand1) */
20004 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), smask
);
20006 emit_label (label
);
20007 LABEL_NUSES (label
) = 1;
20009 emit_move_insn (operand0
, res
);
20012 /* Expand SSE sequence for computing round from OPERAND1 storing
20015 ix86_expand_round (rtx operand0
, rtx operand1
)
20017 /* C code for the stuff we're doing below:
20018 double xa = fabs (x);
20019 if (!isless (xa, TWO52))
20021 xa = (double)(long)(xa + nextafter (0.5, 0.0));
20022 return copysign (xa, x);
20024 enum machine_mode mode
= GET_MODE (operand0
);
20025 rtx res
, TWO52
, xa
, label
, xi
, half
, mask
;
20026 const struct real_format
*fmt
;
20027 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
20029 /* Temporary for holding the result, initialized to the input
20030 operand to ease control flow. */
20031 res
= gen_reg_rtx (mode
);
20032 emit_move_insn (res
, operand1
);
20034 TWO52
= ix86_gen_TWO52 (mode
);
20035 xa
= ix86_expand_sse_fabs (res
, &mask
);
20036 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
20038 /* load nextafter (0.5, 0.0) */
20039 fmt
= REAL_MODE_FORMAT (mode
);
20040 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1);
20041 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
20043 /* xa = xa + 0.5 */
20044 half
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
20045 xa
= expand_simple_binop (mode
, PLUS
, xa
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
20047 /* xa = (double)(int64_t)xa */
20048 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
20049 expand_fix (xi
, xa
, 0);
20050 expand_float (xa
, xi
, 0);
20052 /* res = copysign (xa, operand1) */
20053 ix86_sse_copysign_to_positive (res
, xa
, force_reg (mode
, operand1
), mask
);
20055 emit_label (label
);
20056 LABEL_NUSES (label
) = 1;
20058 emit_move_insn (operand0
, res
);
20061 #include "gt-i386.h"