1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 Boston, MA 02110-1301, USA. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "langhooks.h"
50 #include "tree-gimple.h"
52 #include "tm-constrs.h"
54 #ifndef CHECK_STACK_LIMIT
55 #define CHECK_STACK_LIMIT (-1)
58 /* Return index of given mode in mult and division cost tables. */
59 #define MODE_INDEX(mode) \
60 ((mode) == QImode ? 0 \
61 : (mode) == HImode ? 1 \
62 : (mode) == SImode ? 2 \
63 : (mode) == DImode ? 3 \
66 /* Processor costs (relative to an add) */
67 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
68 #define COSTS_N_BYTES(N) ((N) * 2)
71 struct processor_costs size_cost
= { /* costs for tuning for size */
72 COSTS_N_BYTES (2), /* cost of an add instruction */
73 COSTS_N_BYTES (3), /* cost of a lea instruction */
74 COSTS_N_BYTES (2), /* variable shift costs */
75 COSTS_N_BYTES (3), /* constant shift costs */
76 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
77 COSTS_N_BYTES (3), /* HI */
78 COSTS_N_BYTES (3), /* SI */
79 COSTS_N_BYTES (3), /* DI */
80 COSTS_N_BYTES (5)}, /* other */
81 0, /* cost of multiply per each bit set */
82 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
83 COSTS_N_BYTES (3), /* HI */
84 COSTS_N_BYTES (3), /* SI */
85 COSTS_N_BYTES (3), /* DI */
86 COSTS_N_BYTES (5)}, /* other */
87 COSTS_N_BYTES (3), /* cost of movsx */
88 COSTS_N_BYTES (3), /* cost of movzx */
91 2, /* cost for loading QImode using movzbl */
92 {2, 2, 2}, /* cost of loading integer registers
93 in QImode, HImode and SImode.
94 Relative to reg-reg move (2). */
95 {2, 2, 2}, /* cost of storing integer registers */
96 2, /* cost of reg,reg fld/fst */
97 {2, 2, 2}, /* cost of loading fp registers
98 in SFmode, DFmode and XFmode */
99 {2, 2, 2}, /* cost of storing fp registers
100 in SFmode, DFmode and XFmode */
101 3, /* cost of moving MMX register */
102 {3, 3}, /* cost of loading MMX registers
103 in SImode and DImode */
104 {3, 3}, /* cost of storing MMX registers
105 in SImode and DImode */
106 3, /* cost of moving SSE register */
107 {3, 3, 3}, /* cost of loading SSE registers
108 in SImode, DImode and TImode */
109 {3, 3, 3}, /* cost of storing SSE registers
110 in SImode, DImode and TImode */
111 3, /* MMX or SSE register to integer */
112 0, /* size of prefetch block */
113 0, /* number of parallel prefetches */
115 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
116 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
117 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
118 COSTS_N_BYTES (2), /* cost of FABS instruction. */
119 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
120 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
123 /* Processor costs (relative to an add) */
125 struct processor_costs i386_cost
= { /* 386 specific costs */
126 COSTS_N_INSNS (1), /* cost of an add instruction */
127 COSTS_N_INSNS (1), /* cost of a lea instruction */
128 COSTS_N_INSNS (3), /* variable shift costs */
129 COSTS_N_INSNS (2), /* constant shift costs */
130 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
131 COSTS_N_INSNS (6), /* HI */
132 COSTS_N_INSNS (6), /* SI */
133 COSTS_N_INSNS (6), /* DI */
134 COSTS_N_INSNS (6)}, /* other */
135 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
136 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
137 COSTS_N_INSNS (23), /* HI */
138 COSTS_N_INSNS (23), /* SI */
139 COSTS_N_INSNS (23), /* DI */
140 COSTS_N_INSNS (23)}, /* other */
141 COSTS_N_INSNS (3), /* cost of movsx */
142 COSTS_N_INSNS (2), /* cost of movzx */
143 15, /* "large" insn */
145 4, /* cost for loading QImode using movzbl */
146 {2, 4, 2}, /* cost of loading integer registers
147 in QImode, HImode and SImode.
148 Relative to reg-reg move (2). */
149 {2, 4, 2}, /* cost of storing integer registers */
150 2, /* cost of reg,reg fld/fst */
151 {8, 8, 8}, /* cost of loading fp registers
152 in SFmode, DFmode and XFmode */
153 {8, 8, 8}, /* cost of storing fp registers
154 in SFmode, DFmode and XFmode */
155 2, /* cost of moving MMX register */
156 {4, 8}, /* cost of loading MMX registers
157 in SImode and DImode */
158 {4, 8}, /* cost of storing MMX registers
159 in SImode and DImode */
160 2, /* cost of moving SSE register */
161 {4, 8, 16}, /* cost of loading SSE registers
162 in SImode, DImode and TImode */
163 {4, 8, 16}, /* cost of storing SSE registers
164 in SImode, DImode and TImode */
165 3, /* MMX or SSE register to integer */
166 0, /* size of prefetch block */
167 0, /* number of parallel prefetches */
169 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
170 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
171 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
172 COSTS_N_INSNS (22), /* cost of FABS instruction. */
173 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
174 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
178 struct processor_costs i486_cost
= { /* 486 specific costs */
179 COSTS_N_INSNS (1), /* cost of an add instruction */
180 COSTS_N_INSNS (1), /* cost of a lea instruction */
181 COSTS_N_INSNS (3), /* variable shift costs */
182 COSTS_N_INSNS (2), /* constant shift costs */
183 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
184 COSTS_N_INSNS (12), /* HI */
185 COSTS_N_INSNS (12), /* SI */
186 COSTS_N_INSNS (12), /* DI */
187 COSTS_N_INSNS (12)}, /* other */
188 1, /* cost of multiply per each bit set */
189 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
190 COSTS_N_INSNS (40), /* HI */
191 COSTS_N_INSNS (40), /* SI */
192 COSTS_N_INSNS (40), /* DI */
193 COSTS_N_INSNS (40)}, /* other */
194 COSTS_N_INSNS (3), /* cost of movsx */
195 COSTS_N_INSNS (2), /* cost of movzx */
196 15, /* "large" insn */
198 4, /* cost for loading QImode using movzbl */
199 {2, 4, 2}, /* cost of loading integer registers
200 in QImode, HImode and SImode.
201 Relative to reg-reg move (2). */
202 {2, 4, 2}, /* cost of storing integer registers */
203 2, /* cost of reg,reg fld/fst */
204 {8, 8, 8}, /* cost of loading fp registers
205 in SFmode, DFmode and XFmode */
206 {8, 8, 8}, /* cost of storing fp registers
207 in SFmode, DFmode and XFmode */
208 2, /* cost of moving MMX register */
209 {4, 8}, /* cost of loading MMX registers
210 in SImode and DImode */
211 {4, 8}, /* cost of storing MMX registers
212 in SImode and DImode */
213 2, /* cost of moving SSE register */
214 {4, 8, 16}, /* cost of loading SSE registers
215 in SImode, DImode and TImode */
216 {4, 8, 16}, /* cost of storing SSE registers
217 in SImode, DImode and TImode */
218 3, /* MMX or SSE register to integer */
219 0, /* size of prefetch block */
220 0, /* number of parallel prefetches */
222 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
223 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
224 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
225 COSTS_N_INSNS (3), /* cost of FABS instruction. */
226 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
227 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
231 struct processor_costs pentium_cost
= {
232 COSTS_N_INSNS (1), /* cost of an add instruction */
233 COSTS_N_INSNS (1), /* cost of a lea instruction */
234 COSTS_N_INSNS (4), /* variable shift costs */
235 COSTS_N_INSNS (1), /* constant shift costs */
236 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
237 COSTS_N_INSNS (11), /* HI */
238 COSTS_N_INSNS (11), /* SI */
239 COSTS_N_INSNS (11), /* DI */
240 COSTS_N_INSNS (11)}, /* other */
241 0, /* cost of multiply per each bit set */
242 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
243 COSTS_N_INSNS (25), /* HI */
244 COSTS_N_INSNS (25), /* SI */
245 COSTS_N_INSNS (25), /* DI */
246 COSTS_N_INSNS (25)}, /* other */
247 COSTS_N_INSNS (3), /* cost of movsx */
248 COSTS_N_INSNS (2), /* cost of movzx */
249 8, /* "large" insn */
251 6, /* cost for loading QImode using movzbl */
252 {2, 4, 2}, /* cost of loading integer registers
253 in QImode, HImode and SImode.
254 Relative to reg-reg move (2). */
255 {2, 4, 2}, /* cost of storing integer registers */
256 2, /* cost of reg,reg fld/fst */
257 {2, 2, 6}, /* cost of loading fp registers
258 in SFmode, DFmode and XFmode */
259 {4, 4, 6}, /* cost of storing fp registers
260 in SFmode, DFmode and XFmode */
261 8, /* cost of moving MMX register */
262 {8, 8}, /* cost of loading MMX registers
263 in SImode and DImode */
264 {8, 8}, /* cost of storing MMX registers
265 in SImode and DImode */
266 2, /* cost of moving SSE register */
267 {4, 8, 16}, /* cost of loading SSE registers
268 in SImode, DImode and TImode */
269 {4, 8, 16}, /* cost of storing SSE registers
270 in SImode, DImode and TImode */
271 3, /* MMX or SSE register to integer */
272 0, /* size of prefetch block */
273 0, /* number of parallel prefetches */
275 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
276 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
277 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
278 COSTS_N_INSNS (1), /* cost of FABS instruction. */
279 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
280 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
284 struct processor_costs pentiumpro_cost
= {
285 COSTS_N_INSNS (1), /* cost of an add instruction */
286 COSTS_N_INSNS (1), /* cost of a lea instruction */
287 COSTS_N_INSNS (1), /* variable shift costs */
288 COSTS_N_INSNS (1), /* constant shift costs */
289 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
290 COSTS_N_INSNS (4), /* HI */
291 COSTS_N_INSNS (4), /* SI */
292 COSTS_N_INSNS (4), /* DI */
293 COSTS_N_INSNS (4)}, /* other */
294 0, /* cost of multiply per each bit set */
295 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
296 COSTS_N_INSNS (17), /* HI */
297 COSTS_N_INSNS (17), /* SI */
298 COSTS_N_INSNS (17), /* DI */
299 COSTS_N_INSNS (17)}, /* other */
300 COSTS_N_INSNS (1), /* cost of movsx */
301 COSTS_N_INSNS (1), /* cost of movzx */
302 8, /* "large" insn */
304 2, /* cost for loading QImode using movzbl */
305 {4, 4, 4}, /* cost of loading integer registers
306 in QImode, HImode and SImode.
307 Relative to reg-reg move (2). */
308 {2, 2, 2}, /* cost of storing integer registers */
309 2, /* cost of reg,reg fld/fst */
310 {2, 2, 6}, /* cost of loading fp registers
311 in SFmode, DFmode and XFmode */
312 {4, 4, 6}, /* cost of storing fp registers
313 in SFmode, DFmode and XFmode */
314 2, /* cost of moving MMX register */
315 {2, 2}, /* cost of loading MMX registers
316 in SImode and DImode */
317 {2, 2}, /* cost of storing MMX registers
318 in SImode and DImode */
319 2, /* cost of moving SSE register */
320 {2, 2, 8}, /* cost of loading SSE registers
321 in SImode, DImode and TImode */
322 {2, 2, 8}, /* cost of storing SSE registers
323 in SImode, DImode and TImode */
324 3, /* MMX or SSE register to integer */
325 32, /* size of prefetch block */
326 6, /* number of parallel prefetches */
328 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
329 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
330 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
331 COSTS_N_INSNS (2), /* cost of FABS instruction. */
332 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
333 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
337 struct processor_costs geode_cost
= {
338 COSTS_N_INSNS (1), /* cost of an add instruction */
339 COSTS_N_INSNS (1), /* cost of a lea instruction */
340 COSTS_N_INSNS (2), /* variable shift costs */
341 COSTS_N_INSNS (1), /* constant shift costs */
342 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
343 COSTS_N_INSNS (4), /* HI */
344 COSTS_N_INSNS (7), /* SI */
345 COSTS_N_INSNS (7), /* DI */
346 COSTS_N_INSNS (7)}, /* other */
347 0, /* cost of multiply per each bit set */
348 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
349 COSTS_N_INSNS (23), /* HI */
350 COSTS_N_INSNS (39), /* SI */
351 COSTS_N_INSNS (39), /* DI */
352 COSTS_N_INSNS (39)}, /* other */
353 COSTS_N_INSNS (1), /* cost of movsx */
354 COSTS_N_INSNS (1), /* cost of movzx */
355 8, /* "large" insn */
357 1, /* cost for loading QImode using movzbl */
358 {1, 1, 1}, /* cost of loading integer registers
359 in QImode, HImode and SImode.
360 Relative to reg-reg move (2). */
361 {1, 1, 1}, /* cost of storing integer registers */
362 1, /* cost of reg,reg fld/fst */
363 {1, 1, 1}, /* cost of loading fp registers
364 in SFmode, DFmode and XFmode */
365 {4, 6, 6}, /* cost of storing fp registers
366 in SFmode, DFmode and XFmode */
368 1, /* cost of moving MMX register */
369 {1, 1}, /* cost of loading MMX registers
370 in SImode and DImode */
371 {1, 1}, /* cost of storing MMX registers
372 in SImode and DImode */
373 1, /* cost of moving SSE register */
374 {1, 1, 1}, /* cost of loading SSE registers
375 in SImode, DImode and TImode */
376 {1, 1, 1}, /* cost of storing SSE registers
377 in SImode, DImode and TImode */
378 1, /* MMX or SSE register to integer */
379 32, /* size of prefetch block */
380 1, /* number of parallel prefetches */
382 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
383 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
384 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
385 COSTS_N_INSNS (1), /* cost of FABS instruction. */
386 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
387 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
391 struct processor_costs k6_cost
= {
392 COSTS_N_INSNS (1), /* cost of an add instruction */
393 COSTS_N_INSNS (2), /* cost of a lea instruction */
394 COSTS_N_INSNS (1), /* variable shift costs */
395 COSTS_N_INSNS (1), /* constant shift costs */
396 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
397 COSTS_N_INSNS (3), /* HI */
398 COSTS_N_INSNS (3), /* SI */
399 COSTS_N_INSNS (3), /* DI */
400 COSTS_N_INSNS (3)}, /* other */
401 0, /* cost of multiply per each bit set */
402 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
403 COSTS_N_INSNS (18), /* HI */
404 COSTS_N_INSNS (18), /* SI */
405 COSTS_N_INSNS (18), /* DI */
406 COSTS_N_INSNS (18)}, /* other */
407 COSTS_N_INSNS (2), /* cost of movsx */
408 COSTS_N_INSNS (2), /* cost of movzx */
409 8, /* "large" insn */
411 3, /* cost for loading QImode using movzbl */
412 {4, 5, 4}, /* cost of loading integer registers
413 in QImode, HImode and SImode.
414 Relative to reg-reg move (2). */
415 {2, 3, 2}, /* cost of storing integer registers */
416 4, /* cost of reg,reg fld/fst */
417 {6, 6, 6}, /* cost of loading fp registers
418 in SFmode, DFmode and XFmode */
419 {4, 4, 4}, /* cost of storing fp registers
420 in SFmode, DFmode and XFmode */
421 2, /* cost of moving MMX register */
422 {2, 2}, /* cost of loading MMX registers
423 in SImode and DImode */
424 {2, 2}, /* cost of storing MMX registers
425 in SImode and DImode */
426 2, /* cost of moving SSE register */
427 {2, 2, 8}, /* cost of loading SSE registers
428 in SImode, DImode and TImode */
429 {2, 2, 8}, /* cost of storing SSE registers
430 in SImode, DImode and TImode */
431 6, /* MMX or SSE register to integer */
432 32, /* size of prefetch block */
433 1, /* number of parallel prefetches */
435 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
436 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
437 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
438 COSTS_N_INSNS (2), /* cost of FABS instruction. */
439 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
440 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
444 struct processor_costs athlon_cost
= {
445 COSTS_N_INSNS (1), /* cost of an add instruction */
446 COSTS_N_INSNS (2), /* cost of a lea instruction */
447 COSTS_N_INSNS (1), /* variable shift costs */
448 COSTS_N_INSNS (1), /* constant shift costs */
449 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
450 COSTS_N_INSNS (5), /* HI */
451 COSTS_N_INSNS (5), /* SI */
452 COSTS_N_INSNS (5), /* DI */
453 COSTS_N_INSNS (5)}, /* other */
454 0, /* cost of multiply per each bit set */
455 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
456 COSTS_N_INSNS (26), /* HI */
457 COSTS_N_INSNS (42), /* SI */
458 COSTS_N_INSNS (74), /* DI */
459 COSTS_N_INSNS (74)}, /* other */
460 COSTS_N_INSNS (1), /* cost of movsx */
461 COSTS_N_INSNS (1), /* cost of movzx */
462 8, /* "large" insn */
464 4, /* cost for loading QImode using movzbl */
465 {3, 4, 3}, /* cost of loading integer registers
466 in QImode, HImode and SImode.
467 Relative to reg-reg move (2). */
468 {3, 4, 3}, /* cost of storing integer registers */
469 4, /* cost of reg,reg fld/fst */
470 {4, 4, 12}, /* cost of loading fp registers
471 in SFmode, DFmode and XFmode */
472 {6, 6, 8}, /* cost of storing fp registers
473 in SFmode, DFmode and XFmode */
474 2, /* cost of moving MMX register */
475 {4, 4}, /* cost of loading MMX registers
476 in SImode and DImode */
477 {4, 4}, /* cost of storing MMX registers
478 in SImode and DImode */
479 2, /* cost of moving SSE register */
480 {4, 4, 6}, /* cost of loading SSE registers
481 in SImode, DImode and TImode */
482 {4, 4, 5}, /* cost of storing SSE registers
483 in SImode, DImode and TImode */
484 5, /* MMX or SSE register to integer */
485 64, /* size of prefetch block */
486 6, /* number of parallel prefetches */
488 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
489 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
490 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
491 COSTS_N_INSNS (2), /* cost of FABS instruction. */
492 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
493 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
497 struct processor_costs k8_cost
= {
498 COSTS_N_INSNS (1), /* cost of an add instruction */
499 COSTS_N_INSNS (2), /* cost of a lea instruction */
500 COSTS_N_INSNS (1), /* variable shift costs */
501 COSTS_N_INSNS (1), /* constant shift costs */
502 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
503 COSTS_N_INSNS (4), /* HI */
504 COSTS_N_INSNS (3), /* SI */
505 COSTS_N_INSNS (4), /* DI */
506 COSTS_N_INSNS (5)}, /* other */
507 0, /* cost of multiply per each bit set */
508 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
509 COSTS_N_INSNS (26), /* HI */
510 COSTS_N_INSNS (42), /* SI */
511 COSTS_N_INSNS (74), /* DI */
512 COSTS_N_INSNS (74)}, /* other */
513 COSTS_N_INSNS (1), /* cost of movsx */
514 COSTS_N_INSNS (1), /* cost of movzx */
515 8, /* "large" insn */
517 4, /* cost for loading QImode using movzbl */
518 {3, 4, 3}, /* cost of loading integer registers
519 in QImode, HImode and SImode.
520 Relative to reg-reg move (2). */
521 {3, 4, 3}, /* cost of storing integer registers */
522 4, /* cost of reg,reg fld/fst */
523 {4, 4, 12}, /* cost of loading fp registers
524 in SFmode, DFmode and XFmode */
525 {6, 6, 8}, /* cost of storing fp registers
526 in SFmode, DFmode and XFmode */
527 2, /* cost of moving MMX register */
528 {3, 3}, /* cost of loading MMX registers
529 in SImode and DImode */
530 {4, 4}, /* cost of storing MMX registers
531 in SImode and DImode */
532 2, /* cost of moving SSE register */
533 {4, 3, 6}, /* cost of loading SSE registers
534 in SImode, DImode and TImode */
535 {4, 4, 5}, /* cost of storing SSE registers
536 in SImode, DImode and TImode */
537 5, /* MMX or SSE register to integer */
538 64, /* size of prefetch block */
539 6, /* number of parallel prefetches */
541 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
542 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
543 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
544 COSTS_N_INSNS (2), /* cost of FABS instruction. */
545 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
546 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
550 struct processor_costs pentium4_cost
= {
551 COSTS_N_INSNS (1), /* cost of an add instruction */
552 COSTS_N_INSNS (3), /* cost of a lea instruction */
553 COSTS_N_INSNS (4), /* variable shift costs */
554 COSTS_N_INSNS (4), /* constant shift costs */
555 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
556 COSTS_N_INSNS (15), /* HI */
557 COSTS_N_INSNS (15), /* SI */
558 COSTS_N_INSNS (15), /* DI */
559 COSTS_N_INSNS (15)}, /* other */
560 0, /* cost of multiply per each bit set */
561 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
562 COSTS_N_INSNS (56), /* HI */
563 COSTS_N_INSNS (56), /* SI */
564 COSTS_N_INSNS (56), /* DI */
565 COSTS_N_INSNS (56)}, /* other */
566 COSTS_N_INSNS (1), /* cost of movsx */
567 COSTS_N_INSNS (1), /* cost of movzx */
568 16, /* "large" insn */
570 2, /* cost for loading QImode using movzbl */
571 {4, 5, 4}, /* cost of loading integer registers
572 in QImode, HImode and SImode.
573 Relative to reg-reg move (2). */
574 {2, 3, 2}, /* cost of storing integer registers */
575 2, /* cost of reg,reg fld/fst */
576 {2, 2, 6}, /* cost of loading fp registers
577 in SFmode, DFmode and XFmode */
578 {4, 4, 6}, /* cost of storing fp registers
579 in SFmode, DFmode and XFmode */
580 2, /* cost of moving MMX register */
581 {2, 2}, /* cost of loading MMX registers
582 in SImode and DImode */
583 {2, 2}, /* cost of storing MMX registers
584 in SImode and DImode */
585 12, /* cost of moving SSE register */
586 {12, 12, 12}, /* cost of loading SSE registers
587 in SImode, DImode and TImode */
588 {2, 2, 8}, /* cost of storing SSE registers
589 in SImode, DImode and TImode */
590 10, /* MMX or SSE register to integer */
591 64, /* size of prefetch block */
592 6, /* number of parallel prefetches */
594 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
595 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
596 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
597 COSTS_N_INSNS (2), /* cost of FABS instruction. */
598 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
599 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
603 struct processor_costs nocona_cost
= {
604 COSTS_N_INSNS (1), /* cost of an add instruction */
605 COSTS_N_INSNS (1), /* cost of a lea instruction */
606 COSTS_N_INSNS (1), /* variable shift costs */
607 COSTS_N_INSNS (1), /* constant shift costs */
608 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
609 COSTS_N_INSNS (10), /* HI */
610 COSTS_N_INSNS (10), /* SI */
611 COSTS_N_INSNS (10), /* DI */
612 COSTS_N_INSNS (10)}, /* other */
613 0, /* cost of multiply per each bit set */
614 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
615 COSTS_N_INSNS (66), /* HI */
616 COSTS_N_INSNS (66), /* SI */
617 COSTS_N_INSNS (66), /* DI */
618 COSTS_N_INSNS (66)}, /* other */
619 COSTS_N_INSNS (1), /* cost of movsx */
620 COSTS_N_INSNS (1), /* cost of movzx */
621 16, /* "large" insn */
623 4, /* cost for loading QImode using movzbl */
624 {4, 4, 4}, /* cost of loading integer registers
625 in QImode, HImode and SImode.
626 Relative to reg-reg move (2). */
627 {4, 4, 4}, /* cost of storing integer registers */
628 3, /* cost of reg,reg fld/fst */
629 {12, 12, 12}, /* cost of loading fp registers
630 in SFmode, DFmode and XFmode */
631 {4, 4, 4}, /* cost of storing fp registers
632 in SFmode, DFmode and XFmode */
633 6, /* cost of moving MMX register */
634 {12, 12}, /* cost of loading MMX registers
635 in SImode and DImode */
636 {12, 12}, /* cost of storing MMX registers
637 in SImode and DImode */
638 6, /* cost of moving SSE register */
639 {12, 12, 12}, /* cost of loading SSE registers
640 in SImode, DImode and TImode */
641 {12, 12, 12}, /* cost of storing SSE registers
642 in SImode, DImode and TImode */
643 8, /* MMX or SSE register to integer */
644 128, /* size of prefetch block */
645 8, /* number of parallel prefetches */
647 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
648 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
649 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
650 COSTS_N_INSNS (3), /* cost of FABS instruction. */
651 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
652 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
655 /* Generic64 should produce code tuned for Nocona and K8. */
657 struct processor_costs generic64_cost
= {
658 COSTS_N_INSNS (1), /* cost of an add instruction */
659 /* On all chips taken into consideration lea is 2 cycles and more. With
660 this cost however our current implementation of synth_mult results in
661 use of unnecessary temporary registers causing regression on several
662 SPECfp benchmarks. */
663 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
664 COSTS_N_INSNS (1), /* variable shift costs */
665 COSTS_N_INSNS (1), /* constant shift costs */
666 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
667 COSTS_N_INSNS (4), /* HI */
668 COSTS_N_INSNS (3), /* SI */
669 COSTS_N_INSNS (4), /* DI */
670 COSTS_N_INSNS (2)}, /* other */
671 0, /* cost of multiply per each bit set */
672 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
673 COSTS_N_INSNS (26), /* HI */
674 COSTS_N_INSNS (42), /* SI */
675 COSTS_N_INSNS (74), /* DI */
676 COSTS_N_INSNS (74)}, /* other */
677 COSTS_N_INSNS (1), /* cost of movsx */
678 COSTS_N_INSNS (1), /* cost of movzx */
679 8, /* "large" insn */
681 4, /* cost for loading QImode using movzbl */
682 {4, 4, 4}, /* cost of loading integer registers
683 in QImode, HImode and SImode.
684 Relative to reg-reg move (2). */
685 {4, 4, 4}, /* cost of storing integer registers */
686 4, /* cost of reg,reg fld/fst */
687 {12, 12, 12}, /* cost of loading fp registers
688 in SFmode, DFmode and XFmode */
689 {6, 6, 8}, /* cost of storing fp registers
690 in SFmode, DFmode and XFmode */
691 2, /* cost of moving MMX register */
692 {8, 8}, /* cost of loading MMX registers
693 in SImode and DImode */
694 {8, 8}, /* cost of storing MMX registers
695 in SImode and DImode */
696 2, /* cost of moving SSE register */
697 {8, 8, 8}, /* cost of loading SSE registers
698 in SImode, DImode and TImode */
699 {8, 8, 8}, /* cost of storing SSE registers
700 in SImode, DImode and TImode */
701 5, /* MMX or SSE register to integer */
702 64, /* size of prefetch block */
703 6, /* number of parallel prefetches */
704 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
705 is increased to perhaps more appropriate value of 5. */
707 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
708 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
709 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
710 COSTS_N_INSNS (8), /* cost of FABS instruction. */
711 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
712 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
715 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
717 struct processor_costs generic32_cost
= {
718 COSTS_N_INSNS (1), /* cost of an add instruction */
719 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
720 COSTS_N_INSNS (1), /* variable shift costs */
721 COSTS_N_INSNS (1), /* constant shift costs */
722 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
723 COSTS_N_INSNS (4), /* HI */
724 COSTS_N_INSNS (3), /* SI */
725 COSTS_N_INSNS (4), /* DI */
726 COSTS_N_INSNS (2)}, /* other */
727 0, /* cost of multiply per each bit set */
728 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
729 COSTS_N_INSNS (26), /* HI */
730 COSTS_N_INSNS (42), /* SI */
731 COSTS_N_INSNS (74), /* DI */
732 COSTS_N_INSNS (74)}, /* other */
733 COSTS_N_INSNS (1), /* cost of movsx */
734 COSTS_N_INSNS (1), /* cost of movzx */
735 8, /* "large" insn */
737 4, /* cost for loading QImode using movzbl */
738 {4, 4, 4}, /* cost of loading integer registers
739 in QImode, HImode and SImode.
740 Relative to reg-reg move (2). */
741 {4, 4, 4}, /* cost of storing integer registers */
742 4, /* cost of reg,reg fld/fst */
743 {12, 12, 12}, /* cost of loading fp registers
744 in SFmode, DFmode and XFmode */
745 {6, 6, 8}, /* cost of storing fp registers
746 in SFmode, DFmode and XFmode */
747 2, /* cost of moving MMX register */
748 {8, 8}, /* cost of loading MMX registers
749 in SImode and DImode */
750 {8, 8}, /* cost of storing MMX registers
751 in SImode and DImode */
752 2, /* cost of moving SSE register */
753 {8, 8, 8}, /* cost of loading SSE registers
754 in SImode, DImode and TImode */
755 {8, 8, 8}, /* cost of storing SSE registers
756 in SImode, DImode and TImode */
757 5, /* MMX or SSE register to integer */
758 64, /* size of prefetch block */
759 6, /* number of parallel prefetches */
761 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
762 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
763 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
764 COSTS_N_INSNS (8), /* cost of FABS instruction. */
765 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
766 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
769 const struct processor_costs
*ix86_cost
= &pentium_cost
;
771 /* Processor feature/optimization bitmasks. */
772 #define m_386 (1<<PROCESSOR_I386)
773 #define m_486 (1<<PROCESSOR_I486)
774 #define m_PENT (1<<PROCESSOR_PENTIUM)
775 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
776 #define m_GEODE (1<<PROCESSOR_GEODE)
777 #define m_K6_GEODE (m_K6 | m_GEODE)
778 #define m_K6 (1<<PROCESSOR_K6)
779 #define m_ATHLON (1<<PROCESSOR_ATHLON)
780 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
781 #define m_K8 (1<<PROCESSOR_K8)
782 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
783 #define m_NOCONA (1<<PROCESSOR_NOCONA)
784 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
785 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
786 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
788 /* Generic instruction choice should be common subset of supported CPUs
789 (PPro/PENT4/NOCONA/Athlon/K8). */
791 /* Leave is not affecting Nocona SPEC2000 results negatively, so enabling for
792 Generic64 seems like good code size tradeoff. We can't enable it for 32bit
793 generic because it is not working well with PPro base chips. */
794 const int x86_use_leave
= m_386
| m_K6_GEODE
| m_ATHLON_K8
| m_GENERIC64
;
795 const int x86_push_memory
= m_386
| m_K6_GEODE
| m_ATHLON_K8
| m_PENT4
| m_NOCONA
| m_GENERIC
;
796 const int x86_zero_extend_with_and
= m_486
| m_PENT
;
797 const int x86_movx
= m_ATHLON_K8
| m_PPRO
| m_PENT4
| m_NOCONA
| m_GENERIC
| m_GEODE
/* m_386 | m_K6 */;
798 const int x86_double_with_add
= ~m_386
;
799 const int x86_use_bit_test
= m_386
;
800 const int x86_unroll_strlen
= m_486
| m_PENT
| m_PPRO
| m_ATHLON_K8
| m_K6
| m_GENERIC
;
801 const int x86_cmove
= m_PPRO
| m_GEODE
| m_ATHLON_K8
| m_PENT4
| m_NOCONA
;
802 const int x86_3dnow_a
= m_ATHLON_K8
;
803 const int x86_deep_branch
= m_PPRO
| m_K6_GEODE
| m_ATHLON_K8
| m_PENT4
| m_NOCONA
| m_GENERIC
;
804 /* Branch hints were put in P4 based on simulation result. But
805 after P4 was made, no performance benefit was observed with
806 branch hints. It also increases the code size. As the result,
807 icc never generates branch hints. */
808 const int x86_branch_hints
= 0;
809 const int x86_use_sahf
= m_PPRO
| m_K6_GEODE
| m_PENT4
| m_NOCONA
| m_GENERIC32
; /*m_GENERIC | m_ATHLON_K8 ? */
810 /* We probably ought to watch for partial register stalls on Generic32
811 compilation setting as well. However in current implementation the
812 partial register stalls are not eliminated very well - they can
813 be introduced via subregs synthesized by combine and can happen
814 in caller/callee saving sequences.
815 Because this option pays back little on PPro based chips and is in conflict
816 with partial reg. dependencies used by Athlon/P4 based chips, it is better
817 to leave it off for generic32 for now. */
818 const int x86_partial_reg_stall
= m_PPRO
;
819 const int x86_partial_flag_reg_stall
= m_GENERIC
;
820 const int x86_use_himode_fiop
= m_386
| m_486
| m_K6_GEODE
;
821 const int x86_use_simode_fiop
= ~(m_PPRO
| m_ATHLON_K8
| m_PENT
| m_GENERIC
);
822 const int x86_use_mov0
= m_K6
;
823 const int x86_use_cltd
= ~(m_PENT
| m_K6
| m_GENERIC
);
824 const int x86_read_modify_write
= ~m_PENT
;
825 const int x86_read_modify
= ~(m_PENT
| m_PPRO
);
826 const int x86_split_long_moves
= m_PPRO
;
827 const int x86_promote_QImode
= m_K6_GEODE
| m_PENT
| m_386
| m_486
| m_ATHLON_K8
| m_GENERIC
; /* m_PENT4 ? */
828 const int x86_fast_prefix
= ~(m_PENT
| m_486
| m_386
);
829 const int x86_single_stringop
= m_386
| m_PENT4
| m_NOCONA
;
830 const int x86_qimode_math
= ~(0);
831 const int x86_promote_qi_regs
= 0;
832 /* On PPro this flag is meant to avoid partial register stalls. Just like
833 the x86_partial_reg_stall this option might be considered for Generic32
834 if our scheme for avoiding partial stalls was more effective. */
835 const int x86_himode_math
= ~(m_PPRO
);
836 const int x86_promote_hi_regs
= m_PPRO
;
837 const int x86_sub_esp_4
= m_ATHLON_K8
| m_PPRO
| m_PENT4
| m_NOCONA
| m_GENERIC
;
838 const int x86_sub_esp_8
= m_ATHLON_K8
| m_PPRO
| m_386
| m_486
| m_PENT4
| m_NOCONA
| m_GENERIC
;
839 const int x86_add_esp_4
= m_ATHLON_K8
| m_K6_GEODE
| m_PENT4
| m_NOCONA
| m_GENERIC
;
840 const int x86_add_esp_8
= m_ATHLON_K8
| m_PPRO
| m_K6_GEODE
| m_386
| m_486
| m_PENT4
| m_NOCONA
| m_GENERIC
;
841 const int x86_integer_DFmode_moves
= ~(m_ATHLON_K8
| m_PENT4
| m_NOCONA
| m_PPRO
| m_GENERIC
| m_GEODE
);
842 const int x86_partial_reg_dependency
= m_ATHLON_K8
| m_PENT4
| m_NOCONA
| m_GENERIC
;
843 const int x86_memory_mismatch_stall
= m_ATHLON_K8
| m_PENT4
| m_NOCONA
| m_GENERIC
;
844 const int x86_accumulate_outgoing_args
= m_ATHLON_K8
| m_PENT4
| m_NOCONA
| m_PPRO
| m_GENERIC
;
845 const int x86_prologue_using_move
= m_ATHLON_K8
| m_PPRO
| m_GENERIC
;
846 const int x86_epilogue_using_move
= m_ATHLON_K8
| m_PPRO
| m_GENERIC
;
847 const int x86_shift1
= ~m_486
;
848 const int x86_arch_always_fancy_math_387
= m_PENT
| m_PPRO
| m_ATHLON_K8
| m_PENT4
| m_NOCONA
| m_GENERIC
;
849 /* In Generic model we have an conflict here in between PPro/Pentium4 based chips
850 that thread 128bit SSE registers as single units versus K8 based chips that
851 divide SSE registers to two 64bit halves.
852 x86_sse_partial_reg_dependency promote all store destinations to be 128bit
853 to allow register renaming on 128bit SSE units, but usually results in one
854 extra microop on 64bit SSE units. Experimental results shows that disabling
855 this option on P4 brings over 20% SPECfp regression, while enabling it on
856 K8 brings roughly 2.4% regression that can be partly masked by careful scheduling
858 const int x86_sse_partial_reg_dependency
= m_PENT4
| m_NOCONA
| m_PPRO
| m_GENERIC
;
859 /* Set for machines where the type and dependencies are resolved on SSE
860 register parts instead of whole registers, so we may maintain just
861 lower part of scalar values in proper format leaving the upper part
863 const int x86_sse_split_regs
= m_ATHLON_K8
;
864 const int x86_sse_typeless_stores
= m_ATHLON_K8
;
865 const int x86_sse_load0_by_pxor
= m_PPRO
| m_PENT4
| m_NOCONA
;
866 const int x86_use_ffreep
= m_ATHLON_K8
;
867 const int x86_rep_movl_optimal
= m_386
| m_PENT
| m_PPRO
| m_K6_GEODE
;
868 const int x86_use_incdec
= ~(m_PENT4
| m_NOCONA
| m_GENERIC
);
870 /* ??? Allowing interunit moves makes it all too easy for the compiler to put
871 integer data in xmm registers. Which results in pretty abysmal code. */
872 const int x86_inter_unit_moves
= 0 /* ~(m_ATHLON_K8) */;
874 const int x86_ext_80387_constants
= m_K6_GEODE
| m_ATHLON
| m_PENT4
| m_NOCONA
| m_PPRO
| m_GENERIC32
;
875 /* Some CPU cores are not able to predict more than 4 branch instructions in
876 the 16 byte window. */
877 const int x86_four_jump_limit
= m_PPRO
| m_ATHLON_K8
| m_PENT4
| m_NOCONA
| m_GENERIC
;
878 const int x86_schedule
= m_PPRO
| m_ATHLON_K8
| m_K6_GEODE
| m_PENT
| m_GENERIC
;
879 const int x86_use_bt
= m_ATHLON_K8
;
880 /* Compare and exchange was added for 80486. */
881 const int x86_cmpxchg
= ~m_386
;
882 /* Compare and exchange 8 bytes was added for pentium. */
883 const int x86_cmpxchg8b
= ~(m_386
| m_486
);
884 /* Compare and exchange 16 bytes was added for nocona. */
885 const int x86_cmpxchg16b
= m_NOCONA
;
886 /* Exchange and add was added for 80486. */
887 const int x86_xadd
= ~m_386
;
888 const int x86_pad_returns
= m_ATHLON_K8
| m_GENERIC
;
890 /* In case the average insn count for single function invocation is
891 lower than this constant, emit fast (but longer) prologue and
893 #define FAST_PROLOGUE_INSN_COUNT 20
895 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
896 static const char *const qi_reg_name
[] = QI_REGISTER_NAMES
;
897 static const char *const qi_high_reg_name
[] = QI_HIGH_REGISTER_NAMES
;
898 static const char *const hi_reg_name
[] = HI_REGISTER_NAMES
;
900 /* Array of the smallest class containing reg number REGNO, indexed by
901 REGNO. Used by REGNO_REG_CLASS in i386.h. */
903 enum reg_class
const regclass_map
[FIRST_PSEUDO_REGISTER
] =
906 AREG
, DREG
, CREG
, BREG
,
908 SIREG
, DIREG
, NON_Q_REGS
, NON_Q_REGS
,
910 FP_TOP_REG
, FP_SECOND_REG
, FLOAT_REGS
, FLOAT_REGS
,
911 FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
,
914 /* flags, fpsr, fpcr, dirflag, frame */
915 NO_REGS
, NO_REGS
, NO_REGS
, NO_REGS
, NON_Q_REGS
,
916 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
918 MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
,
920 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
921 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
922 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
926 /* The "default" register map used in 32bit mode. */
928 int const dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
930 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
931 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
932 -1, -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, dir, frame */
933 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
934 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
935 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
936 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
939 static int const x86_64_int_parameter_registers
[6] =
941 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
942 FIRST_REX_INT_REG
/*R8 */, FIRST_REX_INT_REG
+ 1 /*R9 */
945 static int const x86_64_int_return_registers
[4] =
947 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
950 /* The "default" register map used in 64bit mode. */
951 int const dbx64_register_map
[FIRST_PSEUDO_REGISTER
] =
953 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
954 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
955 -1, -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, dir, frame */
956 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
957 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
958 8,9,10,11,12,13,14,15, /* extended integer registers */
959 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
962 /* Define the register numbers to be used in Dwarf debugging information.
963 The SVR4 reference port C compiler uses the following register numbers
964 in its Dwarf output code:
965 0 for %eax (gcc regno = 0)
966 1 for %ecx (gcc regno = 2)
967 2 for %edx (gcc regno = 1)
968 3 for %ebx (gcc regno = 3)
969 4 for %esp (gcc regno = 7)
970 5 for %ebp (gcc regno = 6)
971 6 for %esi (gcc regno = 4)
972 7 for %edi (gcc regno = 5)
973 The following three DWARF register numbers are never generated by
974 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
975 believes these numbers have these meanings.
976 8 for %eip (no gcc equivalent)
977 9 for %eflags (gcc regno = 17)
978 10 for %trapno (no gcc equivalent)
979 It is not at all clear how we should number the FP stack registers
980 for the x86 architecture. If the version of SDB on x86/svr4 were
981 a bit less brain dead with respect to floating-point then we would
982 have a precedent to follow with respect to DWARF register numbers
983 for x86 FP registers, but the SDB on x86/svr4 is so completely
984 broken with respect to FP registers that it is hardly worth thinking
985 of it as something to strive for compatibility with.
986 The version of x86/svr4 SDB I have at the moment does (partially)
987 seem to believe that DWARF register number 11 is associated with
988 the x86 register %st(0), but that's about all. Higher DWARF
989 register numbers don't seem to be associated with anything in
990 particular, and even for DWARF regno 11, SDB only seems to under-
991 stand that it should say that a variable lives in %st(0) (when
992 asked via an `=' command) if we said it was in DWARF regno 11,
993 but SDB still prints garbage when asked for the value of the
994 variable in question (via a `/' command).
995 (Also note that the labels SDB prints for various FP stack regs
996 when doing an `x' command are all wrong.)
997 Note that these problems generally don't affect the native SVR4
998 C compiler because it doesn't allow the use of -O with -g and
999 because when it is *not* optimizing, it allocates a memory
1000 location for each floating-point variable, and the memory
1001 location is what gets described in the DWARF AT_location
1002 attribute for the variable in question.
1003 Regardless of the severe mental illness of the x86/svr4 SDB, we
1004 do something sensible here and we use the following DWARF
1005 register numbers. Note that these are all stack-top-relative
1007 11 for %st(0) (gcc regno = 8)
1008 12 for %st(1) (gcc regno = 9)
1009 13 for %st(2) (gcc regno = 10)
1010 14 for %st(3) (gcc regno = 11)
1011 15 for %st(4) (gcc regno = 12)
1012 16 for %st(5) (gcc regno = 13)
1013 17 for %st(6) (gcc regno = 14)
1014 18 for %st(7) (gcc regno = 15)
1016 int const svr4_dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
1018 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1019 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1020 -1, 9, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, dir, frame */
1021 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1022 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1023 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1024 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1027 /* Test and compare insns in i386.md store the information needed to
1028 generate branch and scc insns here. */
1030 rtx ix86_compare_op0
= NULL_RTX
;
1031 rtx ix86_compare_op1
= NULL_RTX
;
1032 rtx ix86_compare_emitted
= NULL_RTX
;
1034 /* Size of the register save area. */
1035 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
1037 /* Define the structure for the machine field in struct function. */
1039 struct stack_local_entry
GTY(())
1041 unsigned short mode
;
1044 struct stack_local_entry
*next
;
1047 /* Structure describing stack frame layout.
1048 Stack grows downward:
1054 saved frame pointer if frame_pointer_needed
1055 <- HARD_FRAME_POINTER
1060 [va_arg registers] (
1061 > to_allocate <- FRAME_POINTER
1071 HOST_WIDE_INT frame
;
1073 int outgoing_arguments_size
;
1076 HOST_WIDE_INT to_allocate
;
1077 /* The offsets relative to ARG_POINTER. */
1078 HOST_WIDE_INT frame_pointer_offset
;
1079 HOST_WIDE_INT hard_frame_pointer_offset
;
1080 HOST_WIDE_INT stack_pointer_offset
;
1082 /* When save_regs_using_mov is set, emit prologue using
1083 move instead of push instructions. */
1084 bool save_regs_using_mov
;
1087 /* Code model option. */
1088 enum cmodel ix86_cmodel
;
1090 enum asm_dialect ix86_asm_dialect
= ASM_ATT
;
1092 enum tls_dialect ix86_tls_dialect
= TLS_DIALECT_GNU
;
1094 /* Which unit we are generating floating point math for. */
1095 enum fpmath_unit ix86_fpmath
;
1097 /* Which cpu are we scheduling for. */
1098 enum processor_type ix86_tune
;
1099 /* Which instruction set architecture to use. */
1100 enum processor_type ix86_arch
;
1102 /* true if sse prefetch instruction is not NOOP. */
1103 int x86_prefetch_sse
;
1105 /* ix86_regparm_string as a number */
1106 static int ix86_regparm
;
1108 /* -mstackrealign option */
1109 extern int ix86_force_align_arg_pointer
;
1110 static const char ix86_force_align_arg_pointer_string
[] = "force_align_arg_pointer";
1112 /* Preferred alignment for stack boundary in bits. */
1113 unsigned int ix86_preferred_stack_boundary
;
1115 /* Values 1-5: see jump.c */
1116 int ix86_branch_cost
;
1118 /* Variables which are this size or smaller are put in the data/bss
1119 or ldata/lbss sections. */
1121 int ix86_section_threshold
= 65536;
1123 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1124 char internal_label_prefix
[16];
1125 int internal_label_prefix_len
;
1127 static bool ix86_handle_option (size_t, const char *, int);
1128 static void output_pic_addr_const (FILE *, rtx
, int);
1129 static void put_condition_code (enum rtx_code
, enum machine_mode
,
1131 static const char *get_some_local_dynamic_name (void);
1132 static int get_some_local_dynamic_name_1 (rtx
*, void *);
1133 static rtx
ix86_expand_int_compare (enum rtx_code
, rtx
, rtx
);
1134 static enum rtx_code
ix86_prepare_fp_compare_args (enum rtx_code
, rtx
*,
1136 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
1137 static enum machine_mode
ix86_cc_modes_compatible (enum machine_mode
,
1139 static rtx
get_thread_pointer (int);
1140 static rtx
legitimize_tls_address (rtx
, enum tls_model
, int);
1141 static void get_pc_thunk_name (char [32], unsigned int);
1142 static rtx
gen_push (rtx
);
1143 static int ix86_flags_dependent (rtx
, rtx
, enum attr_type
);
1144 static int ix86_agi_dependent (rtx
, rtx
, enum attr_type
);
1145 static struct machine_function
* ix86_init_machine_status (void);
1146 static int ix86_split_to_parts (rtx
, rtx
*, enum machine_mode
);
1147 static int ix86_nsaved_regs (void);
1148 static void ix86_emit_save_regs (void);
1149 static void ix86_emit_save_regs_using_mov (rtx
, HOST_WIDE_INT
);
1150 static void ix86_emit_restore_regs_using_mov (rtx
, HOST_WIDE_INT
, int);
1151 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT
);
1152 static HOST_WIDE_INT
ix86_GOT_alias_set (void);
1153 static void ix86_adjust_counter (rtx
, HOST_WIDE_INT
);
1154 static rtx
ix86_expand_aligntest (rtx
, int);
1155 static void ix86_expand_strlensi_unroll_1 (rtx
, rtx
, rtx
);
1156 static int ix86_issue_rate (void);
1157 static int ix86_adjust_cost (rtx
, rtx
, rtx
, int);
1158 static int ia32_multipass_dfa_lookahead (void);
1159 static void ix86_init_mmx_sse_builtins (void);
1160 static rtx
x86_this_parameter (tree
);
1161 static void x86_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
,
1162 HOST_WIDE_INT
, tree
);
1163 static bool x86_can_output_mi_thunk (tree
, HOST_WIDE_INT
, HOST_WIDE_INT
, tree
);
1164 static void x86_file_start (void);
1165 static void ix86_reorg (void);
1166 static bool ix86_expand_carry_flag_compare (enum rtx_code
, rtx
, rtx
, rtx
*);
1167 static tree
ix86_build_builtin_va_list (void);
1168 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS
*, enum machine_mode
,
1170 static tree
ix86_gimplify_va_arg (tree
, tree
, tree
*, tree
*);
1171 static bool ix86_scalar_mode_supported_p (enum machine_mode
);
1172 static bool ix86_vector_mode_supported_p (enum machine_mode
);
1174 static int ix86_address_cost (rtx
);
1175 static bool ix86_cannot_force_const_mem (rtx
);
1176 static rtx
ix86_delegitimize_address (rtx
);
1178 static void i386_output_dwarf_dtprel (FILE *, int, rtx
) ATTRIBUTE_UNUSED
;
1180 struct builtin_description
;
1181 static rtx
ix86_expand_sse_comi (const struct builtin_description
*,
1183 static rtx
ix86_expand_sse_compare (const struct builtin_description
*,
1185 static rtx
ix86_expand_unop1_builtin (enum insn_code
, tree
, rtx
);
1186 static rtx
ix86_expand_unop_builtin (enum insn_code
, tree
, rtx
, int);
1187 static rtx
ix86_expand_binop_builtin (enum insn_code
, tree
, rtx
);
1188 static rtx
ix86_expand_store_builtin (enum insn_code
, tree
);
1189 static rtx
safe_vector_operand (rtx
, enum machine_mode
);
1190 static rtx
ix86_expand_fp_compare (enum rtx_code
, rtx
, rtx
, rtx
, rtx
*, rtx
*);
1191 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code
);
1192 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code
);
1193 static int ix86_fp_comparison_sahf_cost (enum rtx_code code
);
1194 static int ix86_fp_comparison_cost (enum rtx_code code
);
1195 static unsigned int ix86_select_alt_pic_regnum (void);
1196 static int ix86_save_reg (unsigned int, int);
1197 static void ix86_compute_frame_layout (struct ix86_frame
*);
1198 static int ix86_comp_type_attributes (tree
, tree
);
1199 static int ix86_function_regparm (tree
, tree
);
1200 const struct attribute_spec ix86_attribute_table
[];
1201 static bool ix86_function_ok_for_sibcall (tree
, tree
);
1202 static tree
ix86_handle_cconv_attribute (tree
*, tree
, tree
, int, bool *);
1203 static int ix86_value_regno (enum machine_mode
, tree
, tree
);
1204 static bool contains_128bit_aligned_vector_p (tree
);
1205 static rtx
ix86_struct_value_rtx (tree
, int);
1206 static bool ix86_ms_bitfield_layout_p (tree
);
1207 static tree
ix86_handle_struct_attribute (tree
*, tree
, tree
, int, bool *);
1208 static int extended_reg_mentioned_1 (rtx
*, void *);
1209 static bool ix86_rtx_costs (rtx
, int, int, int *);
1210 static int min_insn_size (rtx
);
1211 static tree
ix86_md_asm_clobbers (tree outputs
, tree inputs
, tree clobbers
);
1212 static bool ix86_must_pass_in_stack (enum machine_mode mode
, tree type
);
1213 static bool ix86_pass_by_reference (CUMULATIVE_ARGS
*, enum machine_mode
,
1215 static void ix86_init_builtins (void);
1216 static rtx
ix86_expand_builtin (tree
, rtx
, rtx
, enum machine_mode
, int);
1217 static const char *ix86_mangle_fundamental_type (tree
);
1218 static tree
ix86_stack_protect_fail (void);
1219 static rtx
ix86_internal_arg_pointer (void);
1220 static void ix86_dwarf_handle_frame_unspec (const char *, rtx
, int);
1222 /* This function is only used on Solaris. */
1223 static void i386_solaris_elf_named_section (const char *, unsigned int, tree
)
1226 /* Register class used for passing given 64bit part of the argument.
1227 These represent classes as documented by the PS ABI, with the exception
1228 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1229 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1231 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1232 whenever possible (upper half does contain padding).
1234 enum x86_64_reg_class
1237 X86_64_INTEGER_CLASS
,
1238 X86_64_INTEGERSI_CLASS
,
1245 X86_64_COMPLEX_X87_CLASS
,
1248 static const char * const x86_64_reg_class_name
[] = {
1249 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1250 "sseup", "x87", "x87up", "cplx87", "no"
1253 #define MAX_CLASSES 4
1255 /* Table of constants used by fldpi, fldln2, etc.... */
1256 static REAL_VALUE_TYPE ext_80387_constants_table
[5];
1257 static bool ext_80387_constants_init
= 0;
1258 static void init_ext_80387_constants (void);
1259 static bool ix86_in_large_data_p (tree
) ATTRIBUTE_UNUSED
;
1260 static void ix86_encode_section_info (tree
, rtx
, int) ATTRIBUTE_UNUSED
;
1261 static void x86_64_elf_unique_section (tree decl
, int reloc
) ATTRIBUTE_UNUSED
;
1262 static section
*x86_64_elf_select_section (tree decl
, int reloc
,
1263 unsigned HOST_WIDE_INT align
)
1266 /* Initialize the GCC target structure. */
1267 #undef TARGET_ATTRIBUTE_TABLE
1268 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
1269 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1270 # undef TARGET_MERGE_DECL_ATTRIBUTES
1271 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
1274 #undef TARGET_COMP_TYPE_ATTRIBUTES
1275 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
1277 #undef TARGET_INIT_BUILTINS
1278 #define TARGET_INIT_BUILTINS ix86_init_builtins
1279 #undef TARGET_EXPAND_BUILTIN
1280 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
1282 #undef TARGET_ASM_FUNCTION_EPILOGUE
1283 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
1285 #undef TARGET_ENCODE_SECTION_INFO
1286 #ifndef SUBTARGET_ENCODE_SECTION_INFO
1287 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
1289 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
1292 #undef TARGET_ASM_OPEN_PAREN
1293 #define TARGET_ASM_OPEN_PAREN ""
1294 #undef TARGET_ASM_CLOSE_PAREN
1295 #define TARGET_ASM_CLOSE_PAREN ""
1297 #undef TARGET_ASM_ALIGNED_HI_OP
1298 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
1299 #undef TARGET_ASM_ALIGNED_SI_OP
1300 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
1302 #undef TARGET_ASM_ALIGNED_DI_OP
1303 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
1306 #undef TARGET_ASM_UNALIGNED_HI_OP
1307 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1308 #undef TARGET_ASM_UNALIGNED_SI_OP
1309 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1310 #undef TARGET_ASM_UNALIGNED_DI_OP
1311 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1313 #undef TARGET_SCHED_ADJUST_COST
1314 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1315 #undef TARGET_SCHED_ISSUE_RATE
1316 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1317 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1318 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1319 ia32_multipass_dfa_lookahead
1321 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1322 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1325 #undef TARGET_HAVE_TLS
1326 #define TARGET_HAVE_TLS true
1328 #undef TARGET_CANNOT_FORCE_CONST_MEM
1329 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1330 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1331 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_rtx_true
1333 #undef TARGET_DELEGITIMIZE_ADDRESS
1334 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1336 #undef TARGET_MS_BITFIELD_LAYOUT_P
1337 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1340 #undef TARGET_BINDS_LOCAL_P
1341 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1344 #undef TARGET_ASM_OUTPUT_MI_THUNK
1345 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1346 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1347 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1349 #undef TARGET_ASM_FILE_START
1350 #define TARGET_ASM_FILE_START x86_file_start
1352 #undef TARGET_DEFAULT_TARGET_FLAGS
1353 #define TARGET_DEFAULT_TARGET_FLAGS \
1355 | TARGET_64BIT_DEFAULT \
1356 | TARGET_SUBTARGET_DEFAULT \
1357 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
1359 #undef TARGET_HANDLE_OPTION
1360 #define TARGET_HANDLE_OPTION ix86_handle_option
1362 #undef TARGET_RTX_COSTS
1363 #define TARGET_RTX_COSTS ix86_rtx_costs
1364 #undef TARGET_ADDRESS_COST
1365 #define TARGET_ADDRESS_COST ix86_address_cost
1367 #undef TARGET_FIXED_CONDITION_CODE_REGS
1368 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1369 #undef TARGET_CC_MODES_COMPATIBLE
1370 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1372 #undef TARGET_MACHINE_DEPENDENT_REORG
1373 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1375 #undef TARGET_BUILD_BUILTIN_VA_LIST
1376 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1378 #undef TARGET_MD_ASM_CLOBBERS
1379 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1381 #undef TARGET_PROMOTE_PROTOTYPES
1382 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1383 #undef TARGET_STRUCT_VALUE_RTX
1384 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1385 #undef TARGET_SETUP_INCOMING_VARARGS
1386 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1387 #undef TARGET_MUST_PASS_IN_STACK
1388 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
1389 #undef TARGET_PASS_BY_REFERENCE
1390 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
1391 #undef TARGET_INTERNAL_ARG_POINTER
1392 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
1393 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
1394 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
1396 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1397 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1399 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1400 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
1402 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1403 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
1406 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1407 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
1410 #ifdef SUBTARGET_INSERT_ATTRIBUTES
1411 #undef TARGET_INSERT_ATTRIBUTES
1412 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1415 #undef TARGET_MANGLE_FUNDAMENTAL_TYPE
1416 #define TARGET_MANGLE_FUNDAMENTAL_TYPE ix86_mangle_fundamental_type
1418 #undef TARGET_STACK_PROTECT_FAIL
1419 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
1421 #undef TARGET_FUNCTION_VALUE
1422 #define TARGET_FUNCTION_VALUE ix86_function_value
1424 struct gcc_target targetm
= TARGET_INITIALIZER
;
1427 /* The svr4 ABI for the i386 says that records and unions are returned
1429 #ifndef DEFAULT_PCC_STRUCT_RETURN
1430 #define DEFAULT_PCC_STRUCT_RETURN 1
1433 /* Implement TARGET_HANDLE_OPTION. */
1436 ix86_handle_option (size_t code
, const char *arg ATTRIBUTE_UNUSED
, int value
)
1443 target_flags
&= ~MASK_3DNOW_A
;
1444 target_flags_explicit
|= MASK_3DNOW_A
;
1451 target_flags
&= ~(MASK_3DNOW
| MASK_3DNOW_A
);
1452 target_flags_explicit
|= MASK_3DNOW
| MASK_3DNOW_A
;
1459 target_flags
&= ~(MASK_SSE2
| MASK_SSE3
);
1460 target_flags_explicit
|= MASK_SSE2
| MASK_SSE3
;
1467 target_flags
&= ~MASK_SSE3
;
1468 target_flags_explicit
|= MASK_SSE3
;
1477 /* Sometimes certain combinations of command options do not make
1478 sense on a particular target machine. You can define a macro
1479 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1480 defined, is executed once just after all the command options have
1483 Don't use this macro to turn on various extra optimizations for
1484 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1487 override_options (void)
1490 int ix86_tune_defaulted
= 0;
1492 /* Comes from final.c -- no real reason to change it. */
1493 #define MAX_CODE_ALIGN 16
1497 const struct processor_costs
*cost
; /* Processor costs */
1498 const int target_enable
; /* Target flags to enable. */
1499 const int target_disable
; /* Target flags to disable. */
1500 const int align_loop
; /* Default alignments. */
1501 const int align_loop_max_skip
;
1502 const int align_jump
;
1503 const int align_jump_max_skip
;
1504 const int align_func
;
1506 const processor_target_table
[PROCESSOR_max
] =
1508 {&i386_cost
, 0, 0, 4, 3, 4, 3, 4},
1509 {&i486_cost
, 0, 0, 16, 15, 16, 15, 16},
1510 {&pentium_cost
, 0, 0, 16, 7, 16, 7, 16},
1511 {&pentiumpro_cost
, 0, 0, 16, 15, 16, 7, 16},
1512 {&geode_cost
, 0, 0, 0, 0, 0, 0, 0},
1513 {&k6_cost
, 0, 0, 32, 7, 32, 7, 32},
1514 {&athlon_cost
, 0, 0, 16, 7, 16, 7, 16},
1515 {&pentium4_cost
, 0, 0, 0, 0, 0, 0, 0},
1516 {&k8_cost
, 0, 0, 16, 7, 16, 7, 16},
1517 {&nocona_cost
, 0, 0, 0, 0, 0, 0, 0},
1518 {&generic32_cost
, 0, 0, 16, 7, 16, 7, 16},
1519 {&generic64_cost
, 0, 0, 16, 7, 16, 7, 16}
1522 static const char * const cpu_names
[] = TARGET_CPU_DEFAULT_NAMES
;
1525 const char *const name
; /* processor name or nickname. */
1526 const enum processor_type processor
;
1527 const enum pta_flags
1533 PTA_PREFETCH_SSE
= 16,
1540 const processor_alias_table
[] =
1542 {"i386", PROCESSOR_I386
, 0},
1543 {"i486", PROCESSOR_I486
, 0},
1544 {"i586", PROCESSOR_PENTIUM
, 0},
1545 {"pentium", PROCESSOR_PENTIUM
, 0},
1546 {"pentium-mmx", PROCESSOR_PENTIUM
, PTA_MMX
},
1547 {"winchip-c6", PROCESSOR_I486
, PTA_MMX
},
1548 {"winchip2", PROCESSOR_I486
, PTA_MMX
| PTA_3DNOW
},
1549 {"c3", PROCESSOR_I486
, PTA_MMX
| PTA_3DNOW
},
1550 {"c3-2", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_SSE
},
1551 {"i686", PROCESSOR_PENTIUMPRO
, 0},
1552 {"pentiumpro", PROCESSOR_PENTIUMPRO
, 0},
1553 {"pentium2", PROCESSOR_PENTIUMPRO
, PTA_MMX
},
1554 {"pentium3", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
| PTA_PREFETCH_SSE
},
1555 {"pentium3m", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
| PTA_PREFETCH_SSE
},
1556 {"pentium-m", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
| PTA_PREFETCH_SSE
| PTA_SSE2
},
1557 {"pentium4", PROCESSOR_PENTIUM4
, PTA_SSE
| PTA_SSE2
1558 | PTA_MMX
| PTA_PREFETCH_SSE
},
1559 {"pentium4m", PROCESSOR_PENTIUM4
, PTA_SSE
| PTA_SSE2
1560 | PTA_MMX
| PTA_PREFETCH_SSE
},
1561 {"prescott", PROCESSOR_NOCONA
, PTA_SSE
| PTA_SSE2
| PTA_SSE3
1562 | PTA_MMX
| PTA_PREFETCH_SSE
},
1563 {"nocona", PROCESSOR_NOCONA
, PTA_SSE
| PTA_SSE2
| PTA_SSE3
| PTA_64BIT
1564 | PTA_MMX
| PTA_PREFETCH_SSE
},
1565 {"geode", PROCESSOR_GEODE
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1567 {"k6", PROCESSOR_K6
, PTA_MMX
},
1568 {"k6-2", PROCESSOR_K6
, PTA_MMX
| PTA_3DNOW
},
1569 {"k6-3", PROCESSOR_K6
, PTA_MMX
| PTA_3DNOW
},
1570 {"athlon", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1572 {"athlon-tbird", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
1573 | PTA_3DNOW
| PTA_3DNOW_A
},
1574 {"athlon-4", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1575 | PTA_3DNOW_A
| PTA_SSE
},
1576 {"athlon-xp", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1577 | PTA_3DNOW_A
| PTA_SSE
},
1578 {"athlon-mp", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1579 | PTA_3DNOW_A
| PTA_SSE
},
1580 {"x86-64", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_64BIT
1581 | PTA_SSE
| PTA_SSE2
},
1582 {"k8", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1583 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
},
1584 {"opteron", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1585 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
},
1586 {"athlon64", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1587 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
},
1588 {"athlon-fx", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1589 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
},
1590 {"generic32", PROCESSOR_GENERIC32
, 0 /* flags are only used for -march switch. */ },
1591 {"generic64", PROCESSOR_GENERIC64
, PTA_64BIT
/* flags are only used for -march switch. */ },
1594 int const pta_size
= ARRAY_SIZE (processor_alias_table
);
1596 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1597 SUBTARGET_OVERRIDE_OPTIONS
;
1600 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
1601 SUBSUBTARGET_OVERRIDE_OPTIONS
;
1604 /* -fPIC is the default for x86_64. */
1605 if (TARGET_MACHO
&& TARGET_64BIT
)
1608 /* Set the default values for switches whose default depends on TARGET_64BIT
1609 in case they weren't overwritten by command line options. */
1612 /* Mach-O doesn't support omitting the frame pointer for now. */
1613 if (flag_omit_frame_pointer
== 2)
1614 flag_omit_frame_pointer
= (TARGET_MACHO
? 0 : 1);
1615 if (flag_asynchronous_unwind_tables
== 2)
1616 flag_asynchronous_unwind_tables
= 1;
1617 if (flag_pcc_struct_return
== 2)
1618 flag_pcc_struct_return
= 0;
1622 if (flag_omit_frame_pointer
== 2)
1623 flag_omit_frame_pointer
= 0;
1624 if (flag_asynchronous_unwind_tables
== 2)
1625 flag_asynchronous_unwind_tables
= 0;
1626 if (flag_pcc_struct_return
== 2)
1627 flag_pcc_struct_return
= DEFAULT_PCC_STRUCT_RETURN
;
1630 /* Need to check -mtune=generic first. */
1631 if (ix86_tune_string
)
1633 if (!strcmp (ix86_tune_string
, "generic")
1634 || !strcmp (ix86_tune_string
, "i686")
1635 /* As special support for cross compilers we read -mtune=native
1636 as -mtune=generic. With native compilers we won't see the
1637 -mtune=native, as it was changed by the driver. */
1638 || !strcmp (ix86_tune_string
, "native"))
1641 ix86_tune_string
= "generic64";
1643 ix86_tune_string
= "generic32";
1645 else if (!strncmp (ix86_tune_string
, "generic", 7))
1646 error ("bad value (%s) for -mtune= switch", ix86_tune_string
);
1650 if (ix86_arch_string
)
1651 ix86_tune_string
= ix86_arch_string
;
1652 if (!ix86_tune_string
)
1654 ix86_tune_string
= cpu_names
[TARGET_CPU_DEFAULT
];
1655 ix86_tune_defaulted
= 1;
1658 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
1659 need to use a sensible tune option. */
1660 if (!strcmp (ix86_tune_string
, "generic")
1661 || !strcmp (ix86_tune_string
, "x86-64")
1662 || !strcmp (ix86_tune_string
, "i686"))
1665 ix86_tune_string
= "generic64";
1667 ix86_tune_string
= "generic32";
1670 if (!strcmp (ix86_tune_string
, "x86-64"))
1671 warning (OPT_Wdeprecated
, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
1672 "-mtune=generic instead as appropriate.");
1674 if (!ix86_arch_string
)
1675 ix86_arch_string
= TARGET_64BIT
? "x86-64" : "i386";
1676 if (!strcmp (ix86_arch_string
, "generic"))
1677 error ("generic CPU can be used only for -mtune= switch");
1678 if (!strncmp (ix86_arch_string
, "generic", 7))
1679 error ("bad value (%s) for -march= switch", ix86_arch_string
);
1681 if (ix86_cmodel_string
!= 0)
1683 if (!strcmp (ix86_cmodel_string
, "small"))
1684 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
1685 else if (!strcmp (ix86_cmodel_string
, "medium"))
1686 ix86_cmodel
= flag_pic
? CM_MEDIUM_PIC
: CM_MEDIUM
;
1688 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string
);
1689 else if (!strcmp (ix86_cmodel_string
, "32"))
1690 ix86_cmodel
= CM_32
;
1691 else if (!strcmp (ix86_cmodel_string
, "kernel") && !flag_pic
)
1692 ix86_cmodel
= CM_KERNEL
;
1693 else if (!strcmp (ix86_cmodel_string
, "large") && !flag_pic
)
1694 ix86_cmodel
= CM_LARGE
;
1696 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string
);
1700 ix86_cmodel
= CM_32
;
1702 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
1704 if (ix86_asm_string
!= 0)
1707 && !strcmp (ix86_asm_string
, "intel"))
1708 ix86_asm_dialect
= ASM_INTEL
;
1709 else if (!strcmp (ix86_asm_string
, "att"))
1710 ix86_asm_dialect
= ASM_ATT
;
1712 error ("bad value (%s) for -masm= switch", ix86_asm_string
);
1714 if ((TARGET_64BIT
== 0) != (ix86_cmodel
== CM_32
))
1715 error ("code model %qs not supported in the %s bit mode",
1716 ix86_cmodel_string
, TARGET_64BIT
? "64" : "32");
1717 if (ix86_cmodel
== CM_LARGE
)
1718 sorry ("code model %<large%> not supported yet");
1719 if ((TARGET_64BIT
!= 0) != ((target_flags
& MASK_64BIT
) != 0))
1720 sorry ("%i-bit mode not compiled in",
1721 (target_flags
& MASK_64BIT
) ? 64 : 32);
1723 for (i
= 0; i
< pta_size
; i
++)
1724 if (! strcmp (ix86_arch_string
, processor_alias_table
[i
].name
))
1726 ix86_arch
= processor_alias_table
[i
].processor
;
1727 /* Default cpu tuning to the architecture. */
1728 ix86_tune
= ix86_arch
;
1729 if (processor_alias_table
[i
].flags
& PTA_MMX
1730 && !(target_flags_explicit
& MASK_MMX
))
1731 target_flags
|= MASK_MMX
;
1732 if (processor_alias_table
[i
].flags
& PTA_3DNOW
1733 && !(target_flags_explicit
& MASK_3DNOW
))
1734 target_flags
|= MASK_3DNOW
;
1735 if (processor_alias_table
[i
].flags
& PTA_3DNOW_A
1736 && !(target_flags_explicit
& MASK_3DNOW_A
))
1737 target_flags
|= MASK_3DNOW_A
;
1738 if (processor_alias_table
[i
].flags
& PTA_SSE
1739 && !(target_flags_explicit
& MASK_SSE
))
1740 target_flags
|= MASK_SSE
;
1741 if (processor_alias_table
[i
].flags
& PTA_SSE2
1742 && !(target_flags_explicit
& MASK_SSE2
))
1743 target_flags
|= MASK_SSE2
;
1744 if (processor_alias_table
[i
].flags
& PTA_SSE3
1745 && !(target_flags_explicit
& MASK_SSE3
))
1746 target_flags
|= MASK_SSE3
;
1747 if (processor_alias_table
[i
].flags
& PTA_SSSE3
1748 && !(target_flags_explicit
& MASK_SSSE3
))
1749 target_flags
|= MASK_SSSE3
;
1750 if (processor_alias_table
[i
].flags
& PTA_PREFETCH_SSE
)
1751 x86_prefetch_sse
= true;
1752 if (TARGET_64BIT
&& !(processor_alias_table
[i
].flags
& PTA_64BIT
))
1753 error ("CPU you selected does not support x86-64 "
1759 error ("bad value (%s) for -march= switch", ix86_arch_string
);
1761 for (i
= 0; i
< pta_size
; i
++)
1762 if (! strcmp (ix86_tune_string
, processor_alias_table
[i
].name
))
1764 ix86_tune
= processor_alias_table
[i
].processor
;
1765 if (TARGET_64BIT
&& !(processor_alias_table
[i
].flags
& PTA_64BIT
))
1767 if (ix86_tune_defaulted
)
1769 ix86_tune_string
= "x86-64";
1770 for (i
= 0; i
< pta_size
; i
++)
1771 if (! strcmp (ix86_tune_string
,
1772 processor_alias_table
[i
].name
))
1774 ix86_tune
= processor_alias_table
[i
].processor
;
1777 error ("CPU you selected does not support x86-64 "
1780 /* Intel CPUs have always interpreted SSE prefetch instructions as
1781 NOPs; so, we can enable SSE prefetch instructions even when
1782 -mtune (rather than -march) points us to a processor that has them.
1783 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
1784 higher processors. */
1785 if (TARGET_CMOVE
&& (processor_alias_table
[i
].flags
& PTA_PREFETCH_SSE
))
1786 x86_prefetch_sse
= true;
1790 error ("bad value (%s) for -mtune= switch", ix86_tune_string
);
1793 ix86_cost
= &size_cost
;
1795 ix86_cost
= processor_target_table
[ix86_tune
].cost
;
1796 target_flags
|= processor_target_table
[ix86_tune
].target_enable
;
1797 target_flags
&= ~processor_target_table
[ix86_tune
].target_disable
;
1799 /* Arrange to set up i386_stack_locals for all functions. */
1800 init_machine_status
= ix86_init_machine_status
;
1802 /* Validate -mregparm= value. */
1803 if (ix86_regparm_string
)
1805 i
= atoi (ix86_regparm_string
);
1806 if (i
< 0 || i
> REGPARM_MAX
)
1807 error ("-mregparm=%d is not between 0 and %d", i
, REGPARM_MAX
);
1813 ix86_regparm
= REGPARM_MAX
;
1815 /* If the user has provided any of the -malign-* options,
1816 warn and use that value only if -falign-* is not set.
1817 Remove this code in GCC 3.2 or later. */
1818 if (ix86_align_loops_string
)
1820 warning (0, "-malign-loops is obsolete, use -falign-loops");
1821 if (align_loops
== 0)
1823 i
= atoi (ix86_align_loops_string
);
1824 if (i
< 0 || i
> MAX_CODE_ALIGN
)
1825 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
1827 align_loops
= 1 << i
;
1831 if (ix86_align_jumps_string
)
1833 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
1834 if (align_jumps
== 0)
1836 i
= atoi (ix86_align_jumps_string
);
1837 if (i
< 0 || i
> MAX_CODE_ALIGN
)
1838 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
1840 align_jumps
= 1 << i
;
1844 if (ix86_align_funcs_string
)
1846 warning (0, "-malign-functions is obsolete, use -falign-functions");
1847 if (align_functions
== 0)
1849 i
= atoi (ix86_align_funcs_string
);
1850 if (i
< 0 || i
> MAX_CODE_ALIGN
)
1851 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
1853 align_functions
= 1 << i
;
1857 /* Default align_* from the processor table. */
1858 if (align_loops
== 0)
1860 align_loops
= processor_target_table
[ix86_tune
].align_loop
;
1861 align_loops_max_skip
= processor_target_table
[ix86_tune
].align_loop_max_skip
;
1863 if (align_jumps
== 0)
1865 align_jumps
= processor_target_table
[ix86_tune
].align_jump
;
1866 align_jumps_max_skip
= processor_target_table
[ix86_tune
].align_jump_max_skip
;
1868 if (align_functions
== 0)
1870 align_functions
= processor_target_table
[ix86_tune
].align_func
;
1873 /* Validate -mbranch-cost= value, or provide default. */
1874 ix86_branch_cost
= ix86_cost
->branch_cost
;
1875 if (ix86_branch_cost_string
)
1877 i
= atoi (ix86_branch_cost_string
);
1879 error ("-mbranch-cost=%d is not between 0 and 5", i
);
1881 ix86_branch_cost
= i
;
1883 if (ix86_section_threshold_string
)
1885 i
= atoi (ix86_section_threshold_string
);
1887 error ("-mlarge-data-threshold=%d is negative", i
);
1889 ix86_section_threshold
= i
;
1892 if (ix86_tls_dialect_string
)
1894 if (strcmp (ix86_tls_dialect_string
, "gnu") == 0)
1895 ix86_tls_dialect
= TLS_DIALECT_GNU
;
1896 else if (strcmp (ix86_tls_dialect_string
, "gnu2") == 0)
1897 ix86_tls_dialect
= TLS_DIALECT_GNU2
;
1898 else if (strcmp (ix86_tls_dialect_string
, "sun") == 0)
1899 ix86_tls_dialect
= TLS_DIALECT_SUN
;
1901 error ("bad value (%s) for -mtls-dialect= switch",
1902 ix86_tls_dialect_string
);
1905 /* Keep nonleaf frame pointers. */
1906 if (flag_omit_frame_pointer
)
1907 target_flags
&= ~MASK_OMIT_LEAF_FRAME_POINTER
;
1908 else if (TARGET_OMIT_LEAF_FRAME_POINTER
)
1909 flag_omit_frame_pointer
= 1;
1911 /* If we're doing fast math, we don't care about comparison order
1912 wrt NaNs. This lets us use a shorter comparison sequence. */
1913 if (flag_finite_math_only
)
1914 target_flags
&= ~MASK_IEEE_FP
;
1916 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1917 since the insns won't need emulation. */
1918 if (x86_arch_always_fancy_math_387
& (1 << ix86_arch
))
1919 target_flags
&= ~MASK_NO_FANCY_MATH_387
;
1921 /* Likewise, if the target doesn't have a 387, or we've specified
1922 software floating point, don't use 387 inline intrinsics. */
1924 target_flags
|= MASK_NO_FANCY_MATH_387
;
1926 /* Turn on SSE3 builtins for -mssse3. */
1928 target_flags
|= MASK_SSE3
;
1930 /* Turn on SSE2 builtins for -msse3. */
1932 target_flags
|= MASK_SSE2
;
1934 /* Turn on SSE builtins for -msse2. */
1936 target_flags
|= MASK_SSE
;
1938 /* Turn on MMX builtins for -msse. */
1941 target_flags
|= MASK_MMX
& ~target_flags_explicit
;
1942 x86_prefetch_sse
= true;
1945 /* Turn on MMX builtins for 3Dnow. */
1947 target_flags
|= MASK_MMX
;
1951 if (TARGET_ALIGN_DOUBLE
)
1952 error ("-malign-double makes no sense in the 64bit mode");
1954 error ("-mrtd calling convention not supported in the 64bit mode");
1956 /* Enable by default the SSE and MMX builtins. Do allow the user to
1957 explicitly disable any of these. In particular, disabling SSE and
1958 MMX for kernel code is extremely useful. */
1960 |= ((MASK_SSE2
| MASK_SSE
| MASK_MMX
| MASK_128BIT_LONG_DOUBLE
)
1961 & ~target_flags_explicit
);
1965 /* i386 ABI does not specify red zone. It still makes sense to use it
1966 when programmer takes care to stack from being destroyed. */
1967 if (!(target_flags_explicit
& MASK_NO_RED_ZONE
))
1968 target_flags
|= MASK_NO_RED_ZONE
;
1971 /* Validate -mpreferred-stack-boundary= value, or provide default.
1972 The default of 128 bits is for Pentium III's SSE __m128. We can't
1973 change it because of optimize_size. Otherwise, we can't mix object
1974 files compiled with -Os and -On. */
1975 ix86_preferred_stack_boundary
= 128;
1976 if (ix86_preferred_stack_boundary_string
)
1978 i
= atoi (ix86_preferred_stack_boundary_string
);
1979 if (i
< (TARGET_64BIT
? 4 : 2) || i
> 12)
1980 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i
,
1981 TARGET_64BIT
? 4 : 2);
1983 ix86_preferred_stack_boundary
= (1 << i
) * BITS_PER_UNIT
;
1986 /* Accept -msseregparm only if at least SSE support is enabled. */
1987 if (TARGET_SSEREGPARM
1989 error ("-msseregparm used without SSE enabled");
1991 ix86_fpmath
= TARGET_FPMATH_DEFAULT
;
1993 if (ix86_fpmath_string
!= 0)
1995 if (! strcmp (ix86_fpmath_string
, "387"))
1996 ix86_fpmath
= FPMATH_387
;
1997 else if (! strcmp (ix86_fpmath_string
, "sse"))
2001 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2002 ix86_fpmath
= FPMATH_387
;
2005 ix86_fpmath
= FPMATH_SSE
;
2007 else if (! strcmp (ix86_fpmath_string
, "387,sse")
2008 || ! strcmp (ix86_fpmath_string
, "sse,387"))
2012 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2013 ix86_fpmath
= FPMATH_387
;
2015 else if (!TARGET_80387
)
2017 warning (0, "387 instruction set disabled, using SSE arithmetics");
2018 ix86_fpmath
= FPMATH_SSE
;
2021 ix86_fpmath
= FPMATH_SSE
| FPMATH_387
;
2024 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string
);
2027 /* If the i387 is disabled, then do not return values in it. */
2029 target_flags
&= ~MASK_FLOAT_RETURNS
;
2031 if ((x86_accumulate_outgoing_args
& TUNEMASK
)
2032 && !(target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
2034 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
2036 /* ??? Unwind info is not correct around the CFG unless either a frame
2037 pointer is present or M_A_O_A is set. Fixing this requires rewriting
2038 unwind info generation to be aware of the CFG and propagating states
2040 if ((flag_unwind_tables
|| flag_asynchronous_unwind_tables
2041 || flag_exceptions
|| flag_non_call_exceptions
)
2042 && flag_omit_frame_pointer
2043 && !(target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS
))
2045 if (target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
2046 warning (0, "unwind tables currently require either a frame pointer "
2047 "or -maccumulate-outgoing-args for correctness");
2048 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
2051 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
2054 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix
, "LX", 0);
2055 p
= strchr (internal_label_prefix
, 'X');
2056 internal_label_prefix_len
= p
- internal_label_prefix
;
2060 /* When scheduling description is not available, disable scheduler pass
2061 so it won't slow down the compilation and make x87 code slower. */
2062 if (!TARGET_SCHEDULE
)
2063 flag_schedule_insns_after_reload
= flag_schedule_insns
= 0;
2066 /* switch to the appropriate section for output of DECL.
2067 DECL is either a `VAR_DECL' node or a constant of some sort.
2068 RELOC indicates whether forming the initial value of DECL requires
2069 link-time relocations. */
2072 x86_64_elf_select_section (tree decl
, int reloc
,
2073 unsigned HOST_WIDE_INT align
)
2075 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
2076 && ix86_in_large_data_p (decl
))
2078 const char *sname
= NULL
;
2079 unsigned int flags
= SECTION_WRITE
;
2080 switch (categorize_decl_for_section (decl
, reloc
, flag_pic
))
2085 case SECCAT_DATA_REL
:
2086 sname
= ".ldata.rel";
2088 case SECCAT_DATA_REL_LOCAL
:
2089 sname
= ".ldata.rel.local";
2091 case SECCAT_DATA_REL_RO
:
2092 sname
= ".ldata.rel.ro";
2094 case SECCAT_DATA_REL_RO_LOCAL
:
2095 sname
= ".ldata.rel.ro.local";
2099 flags
|= SECTION_BSS
;
2102 case SECCAT_RODATA_MERGE_STR
:
2103 case SECCAT_RODATA_MERGE_STR_INIT
:
2104 case SECCAT_RODATA_MERGE_CONST
:
2108 case SECCAT_SRODATA
:
2115 /* We don't split these for medium model. Place them into
2116 default sections and hope for best. */
2121 /* We might get called with string constants, but get_named_section
2122 doesn't like them as they are not DECLs. Also, we need to set
2123 flags in that case. */
2125 return get_section (sname
, flags
, NULL
);
2126 return get_named_section (decl
, sname
, reloc
);
2129 return default_elf_select_section (decl
, reloc
, align
);
2132 /* Build up a unique section name, expressed as a
2133 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2134 RELOC indicates whether the initial value of EXP requires
2135 link-time relocations. */
2138 x86_64_elf_unique_section (tree decl
, int reloc
)
2140 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
2141 && ix86_in_large_data_p (decl
))
2143 const char *prefix
= NULL
;
2144 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2145 bool one_only
= DECL_ONE_ONLY (decl
) && !HAVE_COMDAT_GROUP
;
2147 switch (categorize_decl_for_section (decl
, reloc
, flag_pic
))
2150 case SECCAT_DATA_REL
:
2151 case SECCAT_DATA_REL_LOCAL
:
2152 case SECCAT_DATA_REL_RO
:
2153 case SECCAT_DATA_REL_RO_LOCAL
:
2154 prefix
= one_only
? ".gnu.linkonce.ld." : ".ldata.";
2157 prefix
= one_only
? ".gnu.linkonce.lb." : ".lbss.";
2160 case SECCAT_RODATA_MERGE_STR
:
2161 case SECCAT_RODATA_MERGE_STR_INIT
:
2162 case SECCAT_RODATA_MERGE_CONST
:
2163 prefix
= one_only
? ".gnu.linkonce.lr." : ".lrodata.";
2165 case SECCAT_SRODATA
:
2172 /* We don't split these for medium model. Place them into
2173 default sections and hope for best. */
2181 plen
= strlen (prefix
);
2183 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
2184 name
= targetm
.strip_name_encoding (name
);
2185 nlen
= strlen (name
);
2187 string
= alloca (nlen
+ plen
+ 1);
2188 memcpy (string
, prefix
, plen
);
2189 memcpy (string
+ plen
, name
, nlen
+ 1);
2191 DECL_SECTION_NAME (decl
) = build_string (nlen
+ plen
, string
);
2195 default_unique_section (decl
, reloc
);
2198 #ifdef COMMON_ASM_OP
2199 /* This says how to output assembler code to declare an
2200 uninitialized external linkage data object.
2202 For medium model x86-64 we need to use .largecomm opcode for
2205 x86_elf_aligned_common (FILE *file
,
2206 const char *name
, unsigned HOST_WIDE_INT size
,
2209 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
2210 && size
> (unsigned int)ix86_section_threshold
)
2211 fprintf (file
, ".largecomm\t");
2213 fprintf (file
, "%s", COMMON_ASM_OP
);
2214 assemble_name (file
, name
);
2215 fprintf (file
, ","HOST_WIDE_INT_PRINT_UNSIGNED
",%u\n",
2216 size
, align
/ BITS_PER_UNIT
);
2219 /* Utility function for targets to use in implementing
2220 ASM_OUTPUT_ALIGNED_BSS. */
2223 x86_output_aligned_bss (FILE *file
, tree decl ATTRIBUTE_UNUSED
,
2224 const char *name
, unsigned HOST_WIDE_INT size
,
2227 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
2228 && size
> (unsigned int)ix86_section_threshold
)
2229 switch_to_section (get_named_section (decl
, ".lbss", 0));
2231 switch_to_section (bss_section
);
2232 ASM_OUTPUT_ALIGN (file
, floor_log2 (align
/ BITS_PER_UNIT
));
2233 #ifdef ASM_DECLARE_OBJECT_NAME
2234 last_assemble_variable_decl
= decl
;
2235 ASM_DECLARE_OBJECT_NAME (file
, name
, decl
);
2237 /* Standard thing is just output label for the object. */
2238 ASM_OUTPUT_LABEL (file
, name
);
2239 #endif /* ASM_DECLARE_OBJECT_NAME */
2240 ASM_OUTPUT_SKIP (file
, size
? size
: 1);
2245 optimization_options (int level
, int size ATTRIBUTE_UNUSED
)
2247 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2248 make the problem with not enough registers even worse. */
2249 #ifdef INSN_SCHEDULING
2251 flag_schedule_insns
= 0;
2255 /* The Darwin libraries never set errno, so we might as well
2256 avoid calling them when that's the only reason we would. */
2257 flag_errno_math
= 0;
2259 /* The default values of these switches depend on the TARGET_64BIT
2260 that is not known at this moment. Mark these values with 2 and
2261 let user the to override these. In case there is no command line option
2262 specifying them, we will set the defaults in override_options. */
2264 flag_omit_frame_pointer
= 2;
2265 flag_pcc_struct_return
= 2;
2266 flag_asynchronous_unwind_tables
= 2;
2267 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2268 SUBTARGET_OPTIMIZATION_OPTIONS
;
2272 /* Table of valid machine attributes. */
2273 const struct attribute_spec ix86_attribute_table
[] =
2275 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
2276 /* Stdcall attribute says callee is responsible for popping arguments
2277 if they are not variable. */
2278 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
2279 /* Fastcall attribute says callee is responsible for popping arguments
2280 if they are not variable. */
2281 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
2282 /* Cdecl attribute says the callee is a normal C declaration */
2283 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
2284 /* Regparm attribute specifies how many integer arguments are to be
2285 passed in registers. */
2286 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute
},
2287 /* Sseregparm attribute says we are using x86_64 calling conventions
2288 for FP arguments. */
2289 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
2290 /* force_align_arg_pointer says this function realigns the stack at entry. */
2291 { (const char *)&ix86_force_align_arg_pointer_string
, 0, 0,
2292 false, true, true, ix86_handle_cconv_attribute
},
2293 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2294 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
},
2295 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
},
2296 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute
},
2298 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
},
2299 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
},
2300 #ifdef SUBTARGET_ATTRIBUTE_TABLE
2301 SUBTARGET_ATTRIBUTE_TABLE
,
2303 { NULL
, 0, 0, false, false, false, NULL
}
2306 /* Decide whether we can make a sibling call to a function. DECL is the
2307 declaration of the function being targeted by the call and EXP is the
2308 CALL_EXPR representing the call. */
2311 ix86_function_ok_for_sibcall (tree decl
, tree exp
)
2316 /* If we are generating position-independent code, we cannot sibcall
2317 optimize any indirect call, or a direct call to a global function,
2318 as the PLT requires %ebx be live. */
2319 if (!TARGET_64BIT
&& flag_pic
&& (!decl
|| !targetm
.binds_local_p (decl
)))
2326 func
= TREE_TYPE (TREE_OPERAND (exp
, 0));
2327 if (POINTER_TYPE_P (func
))
2328 func
= TREE_TYPE (func
);
2331 /* Check that the return value locations are the same. Like
2332 if we are returning floats on the 80387 register stack, we cannot
2333 make a sibcall from a function that doesn't return a float to a
2334 function that does or, conversely, from a function that does return
2335 a float to a function that doesn't; the necessary stack adjustment
2336 would not be executed. This is also the place we notice
2337 differences in the return value ABI. Note that it is ok for one
2338 of the functions to have void return type as long as the return
2339 value of the other is passed in a register. */
2340 a
= ix86_function_value (TREE_TYPE (exp
), func
, false);
2341 b
= ix86_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
2343 if (STACK_REG_P (a
) || STACK_REG_P (b
))
2345 if (!rtx_equal_p (a
, b
))
2348 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
2350 else if (!rtx_equal_p (a
, b
))
2353 /* If this call is indirect, we'll need to be able to use a call-clobbered
2354 register for the address of the target function. Make sure that all
2355 such registers are not used for passing parameters. */
2356 if (!decl
&& !TARGET_64BIT
)
2360 /* We're looking at the CALL_EXPR, we need the type of the function. */
2361 type
= TREE_OPERAND (exp
, 0); /* pointer expression */
2362 type
= TREE_TYPE (type
); /* pointer type */
2363 type
= TREE_TYPE (type
); /* function type */
2365 if (ix86_function_regparm (type
, NULL
) >= 3)
2367 /* ??? Need to count the actual number of registers to be used,
2368 not the possible number of registers. Fix later. */
2373 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2374 /* Dllimport'd functions are also called indirectly. */
2375 if (decl
&& DECL_DLLIMPORT_P (decl
)
2376 && ix86_function_regparm (TREE_TYPE (decl
), NULL
) >= 3)
2380 /* If we forced aligned the stack, then sibcalling would unalign the
2381 stack, which may break the called function. */
2382 if (cfun
->machine
->force_align_arg_pointer
)
2385 /* Otherwise okay. That also includes certain types of indirect calls. */
2389 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
2390 calling convention attributes;
2391 arguments as in struct attribute_spec.handler. */
2394 ix86_handle_cconv_attribute (tree
*node
, tree name
,
2396 int flags ATTRIBUTE_UNUSED
,
2399 if (TREE_CODE (*node
) != FUNCTION_TYPE
2400 && TREE_CODE (*node
) != METHOD_TYPE
2401 && TREE_CODE (*node
) != FIELD_DECL
2402 && TREE_CODE (*node
) != TYPE_DECL
)
2404 warning (OPT_Wattributes
, "%qs attribute only applies to functions",
2405 IDENTIFIER_POINTER (name
));
2406 *no_add_attrs
= true;
2410 /* Can combine regparm with all attributes but fastcall. */
2411 if (is_attribute_p ("regparm", name
))
2415 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
2417 error ("fastcall and regparm attributes are not compatible");
2420 cst
= TREE_VALUE (args
);
2421 if (TREE_CODE (cst
) != INTEGER_CST
)
2423 warning (OPT_Wattributes
,
2424 "%qs attribute requires an integer constant argument",
2425 IDENTIFIER_POINTER (name
));
2426 *no_add_attrs
= true;
2428 else if (compare_tree_int (cst
, REGPARM_MAX
) > 0)
2430 warning (OPT_Wattributes
, "argument to %qs attribute larger than %d",
2431 IDENTIFIER_POINTER (name
), REGPARM_MAX
);
2432 *no_add_attrs
= true;
2436 && lookup_attribute (ix86_force_align_arg_pointer_string
,
2437 TYPE_ATTRIBUTES (*node
))
2438 && compare_tree_int (cst
, REGPARM_MAX
-1))
2440 error ("%s functions limited to %d register parameters",
2441 ix86_force_align_arg_pointer_string
, REGPARM_MAX
-1);
2449 warning (OPT_Wattributes
, "%qs attribute ignored",
2450 IDENTIFIER_POINTER (name
));
2451 *no_add_attrs
= true;
2455 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
2456 if (is_attribute_p ("fastcall", name
))
2458 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
2460 error ("fastcall and cdecl attributes are not compatible");
2462 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
2464 error ("fastcall and stdcall attributes are not compatible");
2466 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node
)))
2468 error ("fastcall and regparm attributes are not compatible");
2472 /* Can combine stdcall with fastcall (redundant), regparm and
2474 else if (is_attribute_p ("stdcall", name
))
2476 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
2478 error ("stdcall and cdecl attributes are not compatible");
2480 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
2482 error ("stdcall and fastcall attributes are not compatible");
2486 /* Can combine cdecl with regparm and sseregparm. */
2487 else if (is_attribute_p ("cdecl", name
))
2489 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
2491 error ("stdcall and cdecl attributes are not compatible");
2493 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
2495 error ("fastcall and cdecl attributes are not compatible");
2499 /* Can combine sseregparm with all attributes. */
2504 /* Return 0 if the attributes for two types are incompatible, 1 if they
2505 are compatible, and 2 if they are nearly compatible (which causes a
2506 warning to be generated). */
2509 ix86_comp_type_attributes (tree type1
, tree type2
)
2511 /* Check for mismatch of non-default calling convention. */
2512 const char *const rtdstr
= TARGET_RTD
? "cdecl" : "stdcall";
2514 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
2517 /* Check for mismatched fastcall/regparm types. */
2518 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1
))
2519 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2
)))
2520 || (ix86_function_regparm (type1
, NULL
)
2521 != ix86_function_regparm (type2
, NULL
)))
2524 /* Check for mismatched sseregparm types. */
2525 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1
))
2526 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2
)))
2529 /* Check for mismatched return types (cdecl vs stdcall). */
2530 if (!lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type1
))
2531 != !lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type2
)))
2537 /* Return the regparm value for a function with the indicated TYPE and DECL.
2538 DECL may be NULL when calling function indirectly
2539 or considering a libcall. */
2542 ix86_function_regparm (tree type
, tree decl
)
2545 int regparm
= ix86_regparm
;
2546 bool user_convention
= false;
2550 attr
= lookup_attribute ("regparm", TYPE_ATTRIBUTES (type
));
2553 regparm
= TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
)));
2554 user_convention
= true;
2557 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type
)))
2560 user_convention
= true;
2563 /* Use register calling convention for local functions when possible. */
2564 if (!TARGET_64BIT
&& !user_convention
&& decl
2565 && flag_unit_at_a_time
&& !profile_flag
)
2567 struct cgraph_local_info
*i
= cgraph_local_info (decl
);
2570 int local_regparm
, globals
= 0, regno
;
2572 /* Make sure no regparm register is taken by a global register
2574 for (local_regparm
= 0; local_regparm
< 3; local_regparm
++)
2575 if (global_regs
[local_regparm
])
2577 /* We can't use regparm(3) for nested functions as these use
2578 static chain pointer in third argument. */
2579 if (local_regparm
== 3
2580 && decl_function_context (decl
)
2581 && !DECL_NO_STATIC_CHAIN (decl
))
2583 /* If the function realigns its stackpointer, the
2584 prologue will clobber %ecx. If we've already
2585 generated code for the callee, the callee
2586 DECL_STRUCT_FUNCTION is gone, so we fall back to
2587 scanning the attributes for the self-realigning
2589 if ((DECL_STRUCT_FUNCTION (decl
)
2590 && DECL_STRUCT_FUNCTION (decl
)->machine
->force_align_arg_pointer
)
2591 || (!DECL_STRUCT_FUNCTION (decl
)
2592 && lookup_attribute (ix86_force_align_arg_pointer_string
,
2593 TYPE_ATTRIBUTES (TREE_TYPE (decl
)))))
2595 /* Each global register variable increases register preassure,
2596 so the more global reg vars there are, the smaller regparm
2597 optimization use, unless requested by the user explicitly. */
2598 for (regno
= 0; regno
< 6; regno
++)
2599 if (global_regs
[regno
])
2602 = globals
< local_regparm
? local_regparm
- globals
: 0;
2604 if (local_regparm
> regparm
)
2605 regparm
= local_regparm
;
2612 /* Return 1 or 2, if we can pass up to 8 SFmode (1) and DFmode (2) arguments
2613 in SSE registers for a function with the indicated TYPE and DECL.
2614 DECL may be NULL when calling function indirectly
2615 or considering a libcall. Otherwise return 0. */
2618 ix86_function_sseregparm (tree type
, tree decl
)
2620 /* Use SSE registers to pass SFmode and DFmode arguments if requested
2621 by the sseregparm attribute. */
2622 if (TARGET_SSEREGPARM
2624 && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type
))))
2629 error ("Calling %qD with attribute sseregparm without "
2630 "SSE/SSE2 enabled", decl
);
2632 error ("Calling %qT with attribute sseregparm without "
2633 "SSE/SSE2 enabled", type
);
2640 /* For local functions, pass SFmode (and DFmode for SSE2) arguments
2641 in SSE registers even for 32-bit mode and not just 3, but up to
2642 8 SSE arguments in registers. */
2643 if (!TARGET_64BIT
&& decl
2644 && TARGET_SSE_MATH
&& flag_unit_at_a_time
&& !profile_flag
)
2646 struct cgraph_local_info
*i
= cgraph_local_info (decl
);
2648 return TARGET_SSE2
? 2 : 1;
2654 /* Return true if EAX is live at the start of the function. Used by
2655 ix86_expand_prologue to determine if we need special help before
2656 calling allocate_stack_worker. */
2659 ix86_eax_live_at_start_p (void)
2661 /* Cheat. Don't bother working forward from ix86_function_regparm
2662 to the function type to whether an actual argument is located in
2663 eax. Instead just look at cfg info, which is still close enough
2664 to correct at this point. This gives false positives for broken
2665 functions that might use uninitialized data that happens to be
2666 allocated in eax, but who cares? */
2667 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR
->il
.rtl
->global_live_at_end
, 0);
2670 /* Value is the number of bytes of arguments automatically
2671 popped when returning from a subroutine call.
2672 FUNDECL is the declaration node of the function (as a tree),
2673 FUNTYPE is the data type of the function (as a tree),
2674 or for a library call it is an identifier node for the subroutine name.
2675 SIZE is the number of bytes of arguments passed on the stack.
2677 On the 80386, the RTD insn may be used to pop them if the number
2678 of args is fixed, but if the number is variable then the caller
2679 must pop them all. RTD can't be used for library calls now
2680 because the library is compiled with the Unix compiler.
2681 Use of RTD is a selectable option, since it is incompatible with
2682 standard Unix calling sequences. If the option is not selected,
2683 the caller must always pop the args.
2685 The attribute stdcall is equivalent to RTD on a per module basis. */
2688 ix86_return_pops_args (tree fundecl
, tree funtype
, int size
)
2690 int rtd
= TARGET_RTD
&& (!fundecl
|| TREE_CODE (fundecl
) != IDENTIFIER_NODE
);
2692 /* Cdecl functions override -mrtd, and never pop the stack. */
2693 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype
))) {
2695 /* Stdcall and fastcall functions will pop the stack if not
2697 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype
))
2698 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype
)))
2702 && (TYPE_ARG_TYPES (funtype
) == NULL_TREE
2703 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype
)))
2704 == void_type_node
)))
2708 /* Lose any fake structure return argument if it is passed on the stack. */
2709 if (aggregate_value_p (TREE_TYPE (funtype
), fundecl
)
2711 && !KEEP_AGGREGATE_RETURN_POINTER
)
2713 int nregs
= ix86_function_regparm (funtype
, fundecl
);
2716 return GET_MODE_SIZE (Pmode
);
2722 /* Argument support functions. */
2724 /* Return true when register may be used to pass function parameters. */
2726 ix86_function_arg_regno_p (int regno
)
2730 return (regno
< REGPARM_MAX
2731 || (TARGET_MMX
&& MMX_REGNO_P (regno
)
2732 && (regno
< FIRST_MMX_REG
+ MMX_REGPARM_MAX
))
2733 || (TARGET_SSE
&& SSE_REGNO_P (regno
)
2734 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
)));
2736 if (TARGET_SSE
&& SSE_REGNO_P (regno
)
2737 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
))
2739 /* RAX is used as hidden argument to va_arg functions. */
2742 for (i
= 0; i
< REGPARM_MAX
; i
++)
2743 if (regno
== x86_64_int_parameter_registers
[i
])
2748 /* Return if we do not know how to pass TYPE solely in registers. */
2751 ix86_must_pass_in_stack (enum machine_mode mode
, tree type
)
2753 if (must_pass_in_stack_var_size_or_pad (mode
, type
))
2756 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
2757 The layout_type routine is crafty and tries to trick us into passing
2758 currently unsupported vector types on the stack by using TImode. */
2759 return (!TARGET_64BIT
&& mode
== TImode
2760 && type
&& TREE_CODE (type
) != VECTOR_TYPE
);
2763 /* Initialize a variable CUM of type CUMULATIVE_ARGS
2764 for a call to a function whose data type is FNTYPE.
2765 For a library call, FNTYPE is 0. */
2768 init_cumulative_args (CUMULATIVE_ARGS
*cum
, /* Argument info to initialize */
2769 tree fntype
, /* tree ptr for function decl */
2770 rtx libname
, /* SYMBOL_REF of library name or 0 */
2773 static CUMULATIVE_ARGS zero_cum
;
2774 tree param
, next_param
;
2776 if (TARGET_DEBUG_ARG
)
2778 fprintf (stderr
, "\ninit_cumulative_args (");
2780 fprintf (stderr
, "fntype code = %s, ret code = %s",
2781 tree_code_name
[(int) TREE_CODE (fntype
)],
2782 tree_code_name
[(int) TREE_CODE (TREE_TYPE (fntype
))]);
2784 fprintf (stderr
, "no fntype");
2787 fprintf (stderr
, ", libname = %s", XSTR (libname
, 0));
2792 /* Set up the number of registers to use for passing arguments. */
2793 cum
->nregs
= ix86_regparm
;
2795 cum
->sse_nregs
= SSE_REGPARM_MAX
;
2797 cum
->mmx_nregs
= MMX_REGPARM_MAX
;
2798 cum
->warn_sse
= true;
2799 cum
->warn_mmx
= true;
2800 cum
->maybe_vaarg
= false;
2802 /* Use ecx and edx registers if function has fastcall attribute,
2803 else look for regparm information. */
2804 if (fntype
&& !TARGET_64BIT
)
2806 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype
)))
2812 cum
->nregs
= ix86_function_regparm (fntype
, fndecl
);
2815 /* Set up the number of SSE registers used for passing SFmode
2816 and DFmode arguments. Warn for mismatching ABI. */
2817 cum
->float_in_sse
= ix86_function_sseregparm (fntype
, fndecl
);
2819 /* Determine if this function has variable arguments. This is
2820 indicated by the last argument being 'void_type_mode' if there
2821 are no variable arguments. If there are variable arguments, then
2822 we won't pass anything in registers in 32-bit mode. */
2824 if (cum
->nregs
|| cum
->mmx_nregs
|| cum
->sse_nregs
)
2826 for (param
= (fntype
) ? TYPE_ARG_TYPES (fntype
) : 0;
2827 param
!= 0; param
= next_param
)
2829 next_param
= TREE_CHAIN (param
);
2830 if (next_param
== 0 && TREE_VALUE (param
) != void_type_node
)
2840 cum
->float_in_sse
= 0;
2842 cum
->maybe_vaarg
= true;
2846 if ((!fntype
&& !libname
)
2847 || (fntype
&& !TYPE_ARG_TYPES (fntype
)))
2848 cum
->maybe_vaarg
= true;
2850 if (TARGET_DEBUG_ARG
)
2851 fprintf (stderr
, ", nregs=%d )\n", cum
->nregs
);
2856 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
2857 But in the case of vector types, it is some vector mode.
2859 When we have only some of our vector isa extensions enabled, then there
2860 are some modes for which vector_mode_supported_p is false. For these
2861 modes, the generic vector support in gcc will choose some non-vector mode
2862 in order to implement the type. By computing the natural mode, we'll
2863 select the proper ABI location for the operand and not depend on whatever
2864 the middle-end decides to do with these vector types. */
2866 static enum machine_mode
2867 type_natural_mode (tree type
)
2869 enum machine_mode mode
= TYPE_MODE (type
);
2871 if (TREE_CODE (type
) == VECTOR_TYPE
&& !VECTOR_MODE_P (mode
))
2873 HOST_WIDE_INT size
= int_size_in_bytes (type
);
2874 if ((size
== 8 || size
== 16)
2875 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
2876 && TYPE_VECTOR_SUBPARTS (type
) > 1)
2878 enum machine_mode innermode
= TYPE_MODE (TREE_TYPE (type
));
2880 if (TREE_CODE (TREE_TYPE (type
)) == REAL_TYPE
)
2881 mode
= MIN_MODE_VECTOR_FLOAT
;
2883 mode
= MIN_MODE_VECTOR_INT
;
2885 /* Get the mode which has this inner mode and number of units. */
2886 for (; mode
!= VOIDmode
; mode
= GET_MODE_WIDER_MODE (mode
))
2887 if (GET_MODE_NUNITS (mode
) == TYPE_VECTOR_SUBPARTS (type
)
2888 && GET_MODE_INNER (mode
) == innermode
)
2898 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
2899 this may not agree with the mode that the type system has chosen for the
2900 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
2901 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
2904 gen_reg_or_parallel (enum machine_mode mode
, enum machine_mode orig_mode
,
2909 if (orig_mode
!= BLKmode
)
2910 tmp
= gen_rtx_REG (orig_mode
, regno
);
2913 tmp
= gen_rtx_REG (mode
, regno
);
2914 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
, const0_rtx
);
2915 tmp
= gen_rtx_PARALLEL (orig_mode
, gen_rtvec (1, tmp
));
2921 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
2922 of this code is to classify each 8bytes of incoming argument by the register
2923 class and assign registers accordingly. */
2925 /* Return the union class of CLASS1 and CLASS2.
2926 See the x86-64 PS ABI for details. */
2928 static enum x86_64_reg_class
2929 merge_classes (enum x86_64_reg_class class1
, enum x86_64_reg_class class2
)
2931 /* Rule #1: If both classes are equal, this is the resulting class. */
2932 if (class1
== class2
)
2935 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
2937 if (class1
== X86_64_NO_CLASS
)
2939 if (class2
== X86_64_NO_CLASS
)
2942 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
2943 if (class1
== X86_64_MEMORY_CLASS
|| class2
== X86_64_MEMORY_CLASS
)
2944 return X86_64_MEMORY_CLASS
;
2946 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
2947 if ((class1
== X86_64_INTEGERSI_CLASS
&& class2
== X86_64_SSESF_CLASS
)
2948 || (class2
== X86_64_INTEGERSI_CLASS
&& class1
== X86_64_SSESF_CLASS
))
2949 return X86_64_INTEGERSI_CLASS
;
2950 if (class1
== X86_64_INTEGER_CLASS
|| class1
== X86_64_INTEGERSI_CLASS
2951 || class2
== X86_64_INTEGER_CLASS
|| class2
== X86_64_INTEGERSI_CLASS
)
2952 return X86_64_INTEGER_CLASS
;
2954 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
2956 if (class1
== X86_64_X87_CLASS
2957 || class1
== X86_64_X87UP_CLASS
2958 || class1
== X86_64_COMPLEX_X87_CLASS
2959 || class2
== X86_64_X87_CLASS
2960 || class2
== X86_64_X87UP_CLASS
2961 || class2
== X86_64_COMPLEX_X87_CLASS
)
2962 return X86_64_MEMORY_CLASS
;
2964 /* Rule #6: Otherwise class SSE is used. */
2965 return X86_64_SSE_CLASS
;
2968 /* Classify the argument of type TYPE and mode MODE.
2969 CLASSES will be filled by the register class used to pass each word
2970 of the operand. The number of words is returned. In case the parameter
2971 should be passed in memory, 0 is returned. As a special case for zero
2972 sized containers, classes[0] will be NO_CLASS and 1 is returned.
2974 BIT_OFFSET is used internally for handling records and specifies offset
2975 of the offset in bits modulo 256 to avoid overflow cases.
2977 See the x86-64 PS ABI for details.
2981 classify_argument (enum machine_mode mode
, tree type
,
2982 enum x86_64_reg_class classes
[MAX_CLASSES
], int bit_offset
)
2984 HOST_WIDE_INT bytes
=
2985 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
2986 int words
= (bytes
+ (bit_offset
% 64) / 8 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
2988 /* Variable sized entities are always passed/returned in memory. */
2992 if (mode
!= VOIDmode
2993 && targetm
.calls
.must_pass_in_stack (mode
, type
))
2996 if (type
&& AGGREGATE_TYPE_P (type
))
3000 enum x86_64_reg_class subclasses
[MAX_CLASSES
];
3002 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
3006 for (i
= 0; i
< words
; i
++)
3007 classes
[i
] = X86_64_NO_CLASS
;
3009 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
3010 signalize memory class, so handle it as special case. */
3013 classes
[0] = X86_64_NO_CLASS
;
3017 /* Classify each field of record and merge classes. */
3018 switch (TREE_CODE (type
))
3021 /* And now merge the fields of structure. */
3022 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
3024 if (TREE_CODE (field
) == FIELD_DECL
)
3028 if (TREE_TYPE (field
) == error_mark_node
)
3031 /* Bitfields are always classified as integer. Handle them
3032 early, since later code would consider them to be
3033 misaligned integers. */
3034 if (DECL_BIT_FIELD (field
))
3036 for (i
= (int_bit_position (field
) + (bit_offset
% 64)) / 8 / 8;
3037 i
< ((int_bit_position (field
) + (bit_offset
% 64))
3038 + tree_low_cst (DECL_SIZE (field
), 0)
3041 merge_classes (X86_64_INTEGER_CLASS
,
3046 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
3047 TREE_TYPE (field
), subclasses
,
3048 (int_bit_position (field
)
3049 + bit_offset
) % 256);
3052 for (i
= 0; i
< num
; i
++)
3055 (int_bit_position (field
) + (bit_offset
% 64)) / 8 / 8;
3057 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
3065 /* Arrays are handled as small records. */
3068 num
= classify_argument (TYPE_MODE (TREE_TYPE (type
)),
3069 TREE_TYPE (type
), subclasses
, bit_offset
);
3073 /* The partial classes are now full classes. */
3074 if (subclasses
[0] == X86_64_SSESF_CLASS
&& bytes
!= 4)
3075 subclasses
[0] = X86_64_SSE_CLASS
;
3076 if (subclasses
[0] == X86_64_INTEGERSI_CLASS
&& bytes
!= 4)
3077 subclasses
[0] = X86_64_INTEGER_CLASS
;
3079 for (i
= 0; i
< words
; i
++)
3080 classes
[i
] = subclasses
[i
% num
];
3085 case QUAL_UNION_TYPE
:
3086 /* Unions are similar to RECORD_TYPE but offset is always 0.
3088 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
3090 if (TREE_CODE (field
) == FIELD_DECL
)
3094 if (TREE_TYPE (field
) == error_mark_node
)
3097 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
3098 TREE_TYPE (field
), subclasses
,
3102 for (i
= 0; i
< num
; i
++)
3103 classes
[i
] = merge_classes (subclasses
[i
], classes
[i
]);
3112 /* Final merger cleanup. */
3113 for (i
= 0; i
< words
; i
++)
3115 /* If one class is MEMORY, everything should be passed in
3117 if (classes
[i
] == X86_64_MEMORY_CLASS
)
3120 /* The X86_64_SSEUP_CLASS should be always preceded by
3121 X86_64_SSE_CLASS. */
3122 if (classes
[i
] == X86_64_SSEUP_CLASS
3123 && (i
== 0 || classes
[i
- 1] != X86_64_SSE_CLASS
))
3124 classes
[i
] = X86_64_SSE_CLASS
;
3126 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
3127 if (classes
[i
] == X86_64_X87UP_CLASS
3128 && (i
== 0 || classes
[i
- 1] != X86_64_X87_CLASS
))
3129 classes
[i
] = X86_64_SSE_CLASS
;
3134 /* Compute alignment needed. We align all types to natural boundaries with
3135 exception of XFmode that is aligned to 64bits. */
3136 if (mode
!= VOIDmode
&& mode
!= BLKmode
)
3138 int mode_alignment
= GET_MODE_BITSIZE (mode
);
3141 mode_alignment
= 128;
3142 else if (mode
== XCmode
)
3143 mode_alignment
= 256;
3144 if (COMPLEX_MODE_P (mode
))
3145 mode_alignment
/= 2;
3146 /* Misaligned fields are always returned in memory. */
3147 if (bit_offset
% mode_alignment
)
3151 /* for V1xx modes, just use the base mode */
3152 if (VECTOR_MODE_P (mode
)
3153 && GET_MODE_SIZE (GET_MODE_INNER (mode
)) == bytes
)
3154 mode
= GET_MODE_INNER (mode
);
3156 /* Classification of atomic types. */
3161 classes
[0] = X86_64_SSE_CLASS
;
3164 classes
[0] = X86_64_SSE_CLASS
;
3165 classes
[1] = X86_64_SSEUP_CLASS
;
3174 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
3175 classes
[0] = X86_64_INTEGERSI_CLASS
;
3177 classes
[0] = X86_64_INTEGER_CLASS
;
3181 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
3186 if (!(bit_offset
% 64))
3187 classes
[0] = X86_64_SSESF_CLASS
;
3189 classes
[0] = X86_64_SSE_CLASS
;
3192 classes
[0] = X86_64_SSEDF_CLASS
;
3195 classes
[0] = X86_64_X87_CLASS
;
3196 classes
[1] = X86_64_X87UP_CLASS
;
3199 classes
[0] = X86_64_SSE_CLASS
;
3200 classes
[1] = X86_64_SSEUP_CLASS
;
3203 classes
[0] = X86_64_SSE_CLASS
;
3206 classes
[0] = X86_64_SSEDF_CLASS
;
3207 classes
[1] = X86_64_SSEDF_CLASS
;
3210 classes
[0] = X86_64_COMPLEX_X87_CLASS
;
3213 /* This modes is larger than 16 bytes. */
3221 classes
[0] = X86_64_SSE_CLASS
;
3222 classes
[1] = X86_64_SSEUP_CLASS
;
3228 classes
[0] = X86_64_SSE_CLASS
;
3234 gcc_assert (VECTOR_MODE_P (mode
));
3239 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode
)) == MODE_INT
);
3241 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
3242 classes
[0] = X86_64_INTEGERSI_CLASS
;
3244 classes
[0] = X86_64_INTEGER_CLASS
;
3245 classes
[1] = X86_64_INTEGER_CLASS
;
3246 return 1 + (bytes
> 8);
3250 /* Examine the argument and return set number of register required in each
3251 class. Return 0 iff parameter should be passed in memory. */
3253 examine_argument (enum machine_mode mode
, tree type
, int in_return
,
3254 int *int_nregs
, int *sse_nregs
)
3256 enum x86_64_reg_class
class[MAX_CLASSES
];
3257 int n
= classify_argument (mode
, type
, class, 0);
3263 for (n
--; n
>= 0; n
--)
3266 case X86_64_INTEGER_CLASS
:
3267 case X86_64_INTEGERSI_CLASS
:
3270 case X86_64_SSE_CLASS
:
3271 case X86_64_SSESF_CLASS
:
3272 case X86_64_SSEDF_CLASS
:
3275 case X86_64_NO_CLASS
:
3276 case X86_64_SSEUP_CLASS
:
3278 case X86_64_X87_CLASS
:
3279 case X86_64_X87UP_CLASS
:
3283 case X86_64_COMPLEX_X87_CLASS
:
3284 return in_return
? 2 : 0;
3285 case X86_64_MEMORY_CLASS
:
3291 /* Construct container for the argument used by GCC interface. See
3292 FUNCTION_ARG for the detailed description. */
3295 construct_container (enum machine_mode mode
, enum machine_mode orig_mode
,
3296 tree type
, int in_return
, int nintregs
, int nsseregs
,
3297 const int *intreg
, int sse_regno
)
3299 /* The following variables hold the static issued_error state. */
3300 static bool issued_sse_arg_error
;
3301 static bool issued_sse_ret_error
;
3302 static bool issued_x87_ret_error
;
3304 enum machine_mode tmpmode
;
3306 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
3307 enum x86_64_reg_class
class[MAX_CLASSES
];
3311 int needed_sseregs
, needed_intregs
;
3312 rtx exp
[MAX_CLASSES
];
3315 n
= classify_argument (mode
, type
, class, 0);
3316 if (TARGET_DEBUG_ARG
)
3319 fprintf (stderr
, "Memory class\n");
3322 fprintf (stderr
, "Classes:");
3323 for (i
= 0; i
< n
; i
++)
3325 fprintf (stderr
, " %s", x86_64_reg_class_name
[class[i
]]);
3327 fprintf (stderr
, "\n");
3332 if (!examine_argument (mode
, type
, in_return
, &needed_intregs
,
3335 if (needed_intregs
> nintregs
|| needed_sseregs
> nsseregs
)
3338 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
3339 some less clueful developer tries to use floating-point anyway. */
3340 if (needed_sseregs
&& !TARGET_SSE
)
3344 if (!issued_sse_ret_error
)
3346 error ("SSE register return with SSE disabled");
3347 issued_sse_ret_error
= true;
3350 else if (!issued_sse_arg_error
)
3352 error ("SSE register argument with SSE disabled");
3353 issued_sse_arg_error
= true;
3358 /* Likewise, error if the ABI requires us to return values in the
3359 x87 registers and the user specified -mno-80387. */
3360 if (!TARGET_80387
&& in_return
)
3361 for (i
= 0; i
< n
; i
++)
3362 if (class[i
] == X86_64_X87_CLASS
3363 || class[i
] == X86_64_X87UP_CLASS
3364 || class[i
] == X86_64_COMPLEX_X87_CLASS
)
3366 if (!issued_x87_ret_error
)
3368 error ("x87 register return with x87 disabled");
3369 issued_x87_ret_error
= true;
3374 /* First construct simple cases. Avoid SCmode, since we want to use
3375 single register to pass this type. */
3376 if (n
== 1 && mode
!= SCmode
)
3379 case X86_64_INTEGER_CLASS
:
3380 case X86_64_INTEGERSI_CLASS
:
3381 return gen_rtx_REG (mode
, intreg
[0]);
3382 case X86_64_SSE_CLASS
:
3383 case X86_64_SSESF_CLASS
:
3384 case X86_64_SSEDF_CLASS
:
3385 return gen_reg_or_parallel (mode
, orig_mode
, SSE_REGNO (sse_regno
));
3386 case X86_64_X87_CLASS
:
3387 case X86_64_COMPLEX_X87_CLASS
:
3388 return gen_rtx_REG (mode
, FIRST_STACK_REG
);
3389 case X86_64_NO_CLASS
:
3390 /* Zero sized array, struct or class. */
3395 if (n
== 2 && class[0] == X86_64_SSE_CLASS
&& class[1] == X86_64_SSEUP_CLASS
3397 return gen_rtx_REG (mode
, SSE_REGNO (sse_regno
));
3399 && class[0] == X86_64_X87_CLASS
&& class[1] == X86_64_X87UP_CLASS
)
3400 return gen_rtx_REG (XFmode
, FIRST_STACK_REG
);
3401 if (n
== 2 && class[0] == X86_64_INTEGER_CLASS
3402 && class[1] == X86_64_INTEGER_CLASS
3403 && (mode
== CDImode
|| mode
== TImode
|| mode
== TFmode
)
3404 && intreg
[0] + 1 == intreg
[1])
3405 return gen_rtx_REG (mode
, intreg
[0]);
3407 /* Otherwise figure out the entries of the PARALLEL. */
3408 for (i
= 0; i
< n
; i
++)
3412 case X86_64_NO_CLASS
:
3414 case X86_64_INTEGER_CLASS
:
3415 case X86_64_INTEGERSI_CLASS
:
3416 /* Merge TImodes on aligned occasions here too. */
3417 if (i
* 8 + 8 > bytes
)
3418 tmpmode
= mode_for_size ((bytes
- i
* 8) * BITS_PER_UNIT
, MODE_INT
, 0);
3419 else if (class[i
] == X86_64_INTEGERSI_CLASS
)
3423 /* We've requested 24 bytes we don't have mode for. Use DImode. */
3424 if (tmpmode
== BLKmode
)
3426 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
3427 gen_rtx_REG (tmpmode
, *intreg
),
3431 case X86_64_SSESF_CLASS
:
3432 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
3433 gen_rtx_REG (SFmode
,
3434 SSE_REGNO (sse_regno
)),
3438 case X86_64_SSEDF_CLASS
:
3439 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
3440 gen_rtx_REG (DFmode
,
3441 SSE_REGNO (sse_regno
)),
3445 case X86_64_SSE_CLASS
:
3446 if (i
< n
- 1 && class[i
+ 1] == X86_64_SSEUP_CLASS
)
3450 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
3451 gen_rtx_REG (tmpmode
,
3452 SSE_REGNO (sse_regno
)),
3454 if (tmpmode
== TImode
)
3463 /* Empty aligned struct, union or class. */
3467 ret
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nexps
));
3468 for (i
= 0; i
< nexps
; i
++)
3469 XVECEXP (ret
, 0, i
) = exp
[i
];
3473 /* Update the data in CUM to advance over an argument
3474 of mode MODE and data type TYPE.
3475 (TYPE is null for libcalls where that information may not be available.) */
3478 function_arg_advance (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
3479 tree type
, int named
)
3482 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
3483 int words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
3486 mode
= type_natural_mode (type
);
3488 if (TARGET_DEBUG_ARG
)
3489 fprintf (stderr
, "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, "
3490 "mode=%s, named=%d)\n\n",
3491 words
, cum
->words
, cum
->nregs
, cum
->sse_nregs
,
3492 GET_MODE_NAME (mode
), named
);
3496 int int_nregs
, sse_nregs
;
3497 if (!examine_argument (mode
, type
, 0, &int_nregs
, &sse_nregs
))
3498 cum
->words
+= words
;
3499 else if (sse_nregs
<= cum
->sse_nregs
&& int_nregs
<= cum
->nregs
)
3501 cum
->nregs
-= int_nregs
;
3502 cum
->sse_nregs
-= sse_nregs
;
3503 cum
->regno
+= int_nregs
;
3504 cum
->sse_regno
+= sse_nregs
;
3507 cum
->words
+= words
;
3525 cum
->words
+= words
;
3526 cum
->nregs
-= words
;
3527 cum
->regno
+= words
;
3529 if (cum
->nregs
<= 0)
3537 if (cum
->float_in_sse
< 2)
3540 if (cum
->float_in_sse
< 1)
3551 if (!type
|| !AGGREGATE_TYPE_P (type
))
3553 cum
->sse_words
+= words
;
3554 cum
->sse_nregs
-= 1;
3555 cum
->sse_regno
+= 1;
3556 if (cum
->sse_nregs
<= 0)
3568 if (!type
|| !AGGREGATE_TYPE_P (type
))
3570 cum
->mmx_words
+= words
;
3571 cum
->mmx_nregs
-= 1;
3572 cum
->mmx_regno
+= 1;
3573 if (cum
->mmx_nregs
<= 0)
3584 /* Define where to put the arguments to a function.
3585 Value is zero to push the argument on the stack,
3586 or a hard register in which to store the argument.
3588 MODE is the argument's machine mode.
3589 TYPE is the data type of the argument (as a tree).
3590 This is null for libcalls where that information may
3592 CUM is a variable of type CUMULATIVE_ARGS which gives info about
3593 the preceding args and about the function being called.
3594 NAMED is nonzero if this argument is a named parameter
3595 (otherwise it is an extra parameter matching an ellipsis). */
3598 function_arg (CUMULATIVE_ARGS
*cum
, enum machine_mode orig_mode
,
3599 tree type
, int named
)
3601 enum machine_mode mode
= orig_mode
;
3604 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
3605 int words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
3606 static bool warnedsse
, warnedmmx
;
3608 /* To simplify the code below, represent vector types with a vector mode
3609 even if MMX/SSE are not active. */
3610 if (type
&& TREE_CODE (type
) == VECTOR_TYPE
)
3611 mode
= type_natural_mode (type
);
3613 /* Handle a hidden AL argument containing number of registers for varargs
3614 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
3616 if (mode
== VOIDmode
)
3619 return GEN_INT (cum
->maybe_vaarg
3620 ? (cum
->sse_nregs
< 0
3628 ret
= construct_container (mode
, orig_mode
, type
, 0, cum
->nregs
,
3630 &x86_64_int_parameter_registers
[cum
->regno
],
3635 /* For now, pass fp/complex values on the stack. */
3647 if (words
<= cum
->nregs
)
3649 int regno
= cum
->regno
;
3651 /* Fastcall allocates the first two DWORD (SImode) or
3652 smaller arguments to ECX and EDX. */
3655 if (mode
== BLKmode
|| mode
== DImode
)
3658 /* ECX not EAX is the first allocated register. */
3662 ret
= gen_rtx_REG (mode
, regno
);
3666 if (cum
->float_in_sse
< 2)
3669 if (cum
->float_in_sse
< 1)
3679 if (!type
|| !AGGREGATE_TYPE_P (type
))
3681 if (!TARGET_SSE
&& !warnedsse
&& cum
->warn_sse
)
3684 warning (0, "SSE vector argument without SSE enabled "
3688 ret
= gen_reg_or_parallel (mode
, orig_mode
,
3689 cum
->sse_regno
+ FIRST_SSE_REG
);
3696 if (!type
|| !AGGREGATE_TYPE_P (type
))
3698 if (!TARGET_MMX
&& !warnedmmx
&& cum
->warn_mmx
)
3701 warning (0, "MMX vector argument without MMX enabled "
3705 ret
= gen_reg_or_parallel (mode
, orig_mode
,
3706 cum
->mmx_regno
+ FIRST_MMX_REG
);
3711 if (TARGET_DEBUG_ARG
)
3714 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
3715 words
, cum
->words
, cum
->nregs
, GET_MODE_NAME (mode
), named
);
3718 print_simple_rtl (stderr
, ret
);
3720 fprintf (stderr
, ", stack");
3722 fprintf (stderr
, " )\n");
3728 /* A C expression that indicates when an argument must be passed by
3729 reference. If nonzero for an argument, a copy of that argument is
3730 made in memory and a pointer to the argument is passed instead of
3731 the argument itself. The pointer is passed in whatever way is
3732 appropriate for passing a pointer to that type. */
3735 ix86_pass_by_reference (CUMULATIVE_ARGS
*cum ATTRIBUTE_UNUSED
,
3736 enum machine_mode mode ATTRIBUTE_UNUSED
,
3737 tree type
, bool named ATTRIBUTE_UNUSED
)
3742 if (type
&& int_size_in_bytes (type
) == -1)
3744 if (TARGET_DEBUG_ARG
)
3745 fprintf (stderr
, "function_arg_pass_by_reference\n");
3752 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
3753 ABI. Only called if TARGET_SSE. */
3755 contains_128bit_aligned_vector_p (tree type
)
3757 enum machine_mode mode
= TYPE_MODE (type
);
3758 if (SSE_REG_MODE_P (mode
)
3759 && (!TYPE_USER_ALIGN (type
) || TYPE_ALIGN (type
) > 128))
3761 if (TYPE_ALIGN (type
) < 128)
3764 if (AGGREGATE_TYPE_P (type
))
3766 /* Walk the aggregates recursively. */
3767 switch (TREE_CODE (type
))
3771 case QUAL_UNION_TYPE
:
3775 /* Walk all the structure fields. */
3776 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
3778 if (TREE_CODE (field
) == FIELD_DECL
3779 && contains_128bit_aligned_vector_p (TREE_TYPE (field
)))
3786 /* Just for use if some languages passes arrays by value. */
3787 if (contains_128bit_aligned_vector_p (TREE_TYPE (type
)))
3798 /* Gives the alignment boundary, in bits, of an argument with the
3799 specified mode and type. */
3802 ix86_function_arg_boundary (enum machine_mode mode
, tree type
)
3806 align
= TYPE_ALIGN (type
);
3808 align
= GET_MODE_ALIGNMENT (mode
);
3809 if (align
< PARM_BOUNDARY
)
3810 align
= PARM_BOUNDARY
;
3813 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
3814 make an exception for SSE modes since these require 128bit
3817 The handling here differs from field_alignment. ICC aligns MMX
3818 arguments to 4 byte boundaries, while structure fields are aligned
3819 to 8 byte boundaries. */
3821 align
= PARM_BOUNDARY
;
3824 if (!SSE_REG_MODE_P (mode
))
3825 align
= PARM_BOUNDARY
;
3829 if (!contains_128bit_aligned_vector_p (type
))
3830 align
= PARM_BOUNDARY
;
3838 /* Return true if N is a possible register number of function value. */
3840 ix86_function_value_regno_p (int regno
)
3843 || (regno
== FIRST_FLOAT_REG
&& TARGET_FLOAT_RETURNS_IN_80387
)
3844 || (regno
== FIRST_SSE_REG
&& TARGET_SSE
))
3848 && (regno
== FIRST_MMX_REG
&& TARGET_MMX
))
3854 /* Define how to find the value returned by a function.
3855 VALTYPE is the data type of the value (as a tree).
3856 If the precise function being called is known, FUNC is its FUNCTION_DECL;
3857 otherwise, FUNC is 0. */
3859 ix86_function_value (tree valtype
, tree fntype_or_decl
,
3860 bool outgoing ATTRIBUTE_UNUSED
)
3862 enum machine_mode natmode
= type_natural_mode (valtype
);
3866 rtx ret
= construct_container (natmode
, TYPE_MODE (valtype
), valtype
,
3867 1, REGPARM_MAX
, SSE_REGPARM_MAX
,
3868 x86_64_int_return_registers
, 0);
3869 /* For zero sized structures, construct_container return NULL, but we
3870 need to keep rest of compiler happy by returning meaningful value. */
3872 ret
= gen_rtx_REG (TYPE_MODE (valtype
), 0);
3877 tree fn
= NULL_TREE
, fntype
;
3879 && DECL_P (fntype_or_decl
))
3880 fn
= fntype_or_decl
;
3881 fntype
= fn
? TREE_TYPE (fn
) : fntype_or_decl
;
3882 return gen_rtx_REG (TYPE_MODE (valtype
),
3883 ix86_value_regno (natmode
, fn
, fntype
));
3887 /* Return true iff type is returned in memory. */
3889 ix86_return_in_memory (tree type
)
3891 int needed_intregs
, needed_sseregs
, size
;
3892 enum machine_mode mode
= type_natural_mode (type
);
3895 return !examine_argument (mode
, type
, 1, &needed_intregs
, &needed_sseregs
);
3897 if (mode
== BLKmode
)
3900 size
= int_size_in_bytes (type
);
3902 if (MS_AGGREGATE_RETURN
&& AGGREGATE_TYPE_P (type
) && size
<= 8)
3905 if (VECTOR_MODE_P (mode
) || mode
== TImode
)
3907 /* User-created vectors small enough to fit in EAX. */
3911 /* MMX/3dNow values are returned in MM0,
3912 except when it doesn't exits. */
3914 return (TARGET_MMX
? 0 : 1);
3916 /* SSE values are returned in XMM0, except when it doesn't exist. */
3918 return (TARGET_SSE
? 0 : 1);
3932 /* When returning SSE vector types, we have a choice of either
3933 (1) being abi incompatible with a -march switch, or
3934 (2) generating an error.
3935 Given no good solution, I think the safest thing is one warning.
3936 The user won't be able to use -Werror, but....
3938 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
3939 called in response to actually generating a caller or callee that
3940 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
3941 via aggregate_value_p for general type probing from tree-ssa. */
3944 ix86_struct_value_rtx (tree type
, int incoming ATTRIBUTE_UNUSED
)
3946 static bool warnedsse
, warnedmmx
;
3950 /* Look at the return type of the function, not the function type. */
3951 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (type
));
3953 if (!TARGET_SSE
&& !warnedsse
)
3956 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
3959 warning (0, "SSE vector return without SSE enabled "
3964 if (!TARGET_MMX
&& !warnedmmx
)
3966 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
3969 warning (0, "MMX vector return without MMX enabled "
3978 /* Define how to find the value returned by a library function
3979 assuming the value has mode MODE. */
3981 ix86_libcall_value (enum machine_mode mode
)
3995 return gen_rtx_REG (mode
, FIRST_SSE_REG
);
3998 return gen_rtx_REG (mode
, FIRST_FLOAT_REG
);
4002 return gen_rtx_REG (mode
, 0);
4006 return gen_rtx_REG (mode
, ix86_value_regno (mode
, NULL
, NULL
));
4009 /* Given a mode, return the register to use for a return value. */
4012 ix86_value_regno (enum machine_mode mode
, tree func
, tree fntype
)
4014 gcc_assert (!TARGET_64BIT
);
4016 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4017 we normally prevent this case when mmx is not available. However
4018 some ABIs may require the result to be returned like DImode. */
4019 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
4020 return TARGET_MMX
? FIRST_MMX_REG
: 0;
4022 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4023 we prevent this case when sse is not available. However some ABIs
4024 may require the result to be returned like integer TImode. */
4025 if (mode
== TImode
|| (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
4026 return TARGET_SSE
? FIRST_SSE_REG
: 0;
4028 /* Decimal floating point values can go in %eax, unlike other float modes. */
4029 if (DECIMAL_FLOAT_MODE_P (mode
))
4032 /* Most things go in %eax, except (unless -mno-fp-ret-in-387) fp values. */
4033 if (!SCALAR_FLOAT_MODE_P (mode
) || !TARGET_FLOAT_RETURNS_IN_80387
)
4036 /* Floating point return values in %st(0), except for local functions when
4037 SSE math is enabled or for functions with sseregparm attribute. */
4038 if ((func
|| fntype
)
4039 && (mode
== SFmode
|| mode
== DFmode
))
4041 int sse_level
= ix86_function_sseregparm (fntype
, func
);
4042 if ((sse_level
>= 1 && mode
== SFmode
)
4043 || (sse_level
== 2 && mode
== DFmode
))
4044 return FIRST_SSE_REG
;
4047 return FIRST_FLOAT_REG
;
4050 /* Create the va_list data type. */
4053 ix86_build_builtin_va_list (void)
4055 tree f_gpr
, f_fpr
, f_ovf
, f_sav
, record
, type_decl
;
4057 /* For i386 we use plain pointer to argument area. */
4059 return build_pointer_type (char_type_node
);
4061 record
= (*lang_hooks
.types
.make_type
) (RECORD_TYPE
);
4062 type_decl
= build_decl (TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
4064 f_gpr
= build_decl (FIELD_DECL
, get_identifier ("gp_offset"),
4065 unsigned_type_node
);
4066 f_fpr
= build_decl (FIELD_DECL
, get_identifier ("fp_offset"),
4067 unsigned_type_node
);
4068 f_ovf
= build_decl (FIELD_DECL
, get_identifier ("overflow_arg_area"),
4070 f_sav
= build_decl (FIELD_DECL
, get_identifier ("reg_save_area"),
4073 va_list_gpr_counter_field
= f_gpr
;
4074 va_list_fpr_counter_field
= f_fpr
;
4076 DECL_FIELD_CONTEXT (f_gpr
) = record
;
4077 DECL_FIELD_CONTEXT (f_fpr
) = record
;
4078 DECL_FIELD_CONTEXT (f_ovf
) = record
;
4079 DECL_FIELD_CONTEXT (f_sav
) = record
;
4081 TREE_CHAIN (record
) = type_decl
;
4082 TYPE_NAME (record
) = type_decl
;
4083 TYPE_FIELDS (record
) = f_gpr
;
4084 TREE_CHAIN (f_gpr
) = f_fpr
;
4085 TREE_CHAIN (f_fpr
) = f_ovf
;
4086 TREE_CHAIN (f_ovf
) = f_sav
;
4088 layout_type (record
);
4090 /* The correct type is an array type of one element. */
4091 return build_array_type (record
, build_index_type (size_zero_node
));
4094 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4097 ix86_setup_incoming_varargs (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
4098 tree type
, int *pretend_size ATTRIBUTE_UNUSED
,
4101 CUMULATIVE_ARGS next_cum
;
4102 rtx save_area
= NULL_RTX
, mem
;
4115 if (! cfun
->va_list_gpr_size
&& ! cfun
->va_list_fpr_size
)
4118 /* Indicate to allocate space on the stack for varargs save area. */
4119 ix86_save_varrargs_registers
= 1;
4121 cfun
->stack_alignment_needed
= 128;
4123 fntype
= TREE_TYPE (current_function_decl
);
4124 stdarg_p
= (TYPE_ARG_TYPES (fntype
) != 0
4125 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype
)))
4126 != void_type_node
));
4128 /* For varargs, we do not want to skip the dummy va_dcl argument.
4129 For stdargs, we do want to skip the last named argument. */
4132 function_arg_advance (&next_cum
, mode
, type
, 1);
4135 save_area
= frame_pointer_rtx
;
4137 set
= get_varargs_alias_set ();
4139 for (i
= next_cum
.regno
;
4141 && i
< next_cum
.regno
+ cfun
->va_list_gpr_size
/ UNITS_PER_WORD
;
4144 mem
= gen_rtx_MEM (Pmode
,
4145 plus_constant (save_area
, i
* UNITS_PER_WORD
));
4146 MEM_NOTRAP_P (mem
) = 1;
4147 set_mem_alias_set (mem
, set
);
4148 emit_move_insn (mem
, gen_rtx_REG (Pmode
,
4149 x86_64_int_parameter_registers
[i
]));
4152 if (next_cum
.sse_nregs
&& cfun
->va_list_fpr_size
)
4154 /* Now emit code to save SSE registers. The AX parameter contains number
4155 of SSE parameter registers used to call this function. We use
4156 sse_prologue_save insn template that produces computed jump across
4157 SSE saves. We need some preparation work to get this working. */
4159 label
= gen_label_rtx ();
4160 label_ref
= gen_rtx_LABEL_REF (Pmode
, label
);
4162 /* Compute address to jump to :
4163 label - 5*eax + nnamed_sse_arguments*5 */
4164 tmp_reg
= gen_reg_rtx (Pmode
);
4165 nsse_reg
= gen_reg_rtx (Pmode
);
4166 emit_insn (gen_zero_extendqidi2 (nsse_reg
, gen_rtx_REG (QImode
, 0)));
4167 emit_insn (gen_rtx_SET (VOIDmode
, tmp_reg
,
4168 gen_rtx_MULT (Pmode
, nsse_reg
,
4170 if (next_cum
.sse_regno
)
4173 gen_rtx_CONST (DImode
,
4174 gen_rtx_PLUS (DImode
,
4176 GEN_INT (next_cum
.sse_regno
* 4))));
4178 emit_move_insn (nsse_reg
, label_ref
);
4179 emit_insn (gen_subdi3 (nsse_reg
, nsse_reg
, tmp_reg
));
4181 /* Compute address of memory block we save into. We always use pointer
4182 pointing 127 bytes after first byte to store - this is needed to keep
4183 instruction size limited by 4 bytes. */
4184 tmp_reg
= gen_reg_rtx (Pmode
);
4185 emit_insn (gen_rtx_SET (VOIDmode
, tmp_reg
,
4186 plus_constant (save_area
,
4187 8 * REGPARM_MAX
+ 127)));
4188 mem
= gen_rtx_MEM (BLKmode
, plus_constant (tmp_reg
, -127));
4189 MEM_NOTRAP_P (mem
) = 1;
4190 set_mem_alias_set (mem
, set
);
4191 set_mem_align (mem
, BITS_PER_WORD
);
4193 /* And finally do the dirty job! */
4194 emit_insn (gen_sse_prologue_save (mem
, nsse_reg
,
4195 GEN_INT (next_cum
.sse_regno
), label
));
4200 /* Implement va_start. */
4203 ix86_va_start (tree valist
, rtx nextarg
)
4205 HOST_WIDE_INT words
, n_gpr
, n_fpr
;
4206 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
4207 tree gpr
, fpr
, ovf
, sav
, t
;
4210 /* Only 64bit target needs something special. */
4213 std_expand_builtin_va_start (valist
, nextarg
);
4217 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
4218 f_fpr
= TREE_CHAIN (f_gpr
);
4219 f_ovf
= TREE_CHAIN (f_fpr
);
4220 f_sav
= TREE_CHAIN (f_ovf
);
4222 valist
= build1 (INDIRECT_REF
, TREE_TYPE (TREE_TYPE (valist
)), valist
);
4223 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
, NULL_TREE
);
4224 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
4225 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
4226 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
4228 /* Count number of gp and fp argument registers used. */
4229 words
= current_function_args_info
.words
;
4230 n_gpr
= current_function_args_info
.regno
;
4231 n_fpr
= current_function_args_info
.sse_regno
;
4233 if (TARGET_DEBUG_ARG
)
4234 fprintf (stderr
, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
4235 (int) words
, (int) n_gpr
, (int) n_fpr
);
4237 if (cfun
->va_list_gpr_size
)
4239 type
= TREE_TYPE (gpr
);
4240 t
= build2 (MODIFY_EXPR
, type
, gpr
,
4241 build_int_cst (type
, n_gpr
* 8));
4242 TREE_SIDE_EFFECTS (t
) = 1;
4243 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4246 if (cfun
->va_list_fpr_size
)
4248 type
= TREE_TYPE (fpr
);
4249 t
= build2 (MODIFY_EXPR
, type
, fpr
,
4250 build_int_cst (type
, n_fpr
* 16 + 8*REGPARM_MAX
));
4251 TREE_SIDE_EFFECTS (t
) = 1;
4252 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4255 /* Find the overflow area. */
4256 type
= TREE_TYPE (ovf
);
4257 t
= make_tree (type
, virtual_incoming_args_rtx
);
4259 t
= build2 (PLUS_EXPR
, type
, t
,
4260 build_int_cst (type
, words
* UNITS_PER_WORD
));
4261 t
= build2 (MODIFY_EXPR
, type
, ovf
, t
);
4262 TREE_SIDE_EFFECTS (t
) = 1;
4263 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4265 if (cfun
->va_list_gpr_size
|| cfun
->va_list_fpr_size
)
4267 /* Find the register save area.
4268 Prologue of the function save it right above stack frame. */
4269 type
= TREE_TYPE (sav
);
4270 t
= make_tree (type
, frame_pointer_rtx
);
4271 t
= build2 (MODIFY_EXPR
, type
, sav
, t
);
4272 TREE_SIDE_EFFECTS (t
) = 1;
4273 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4277 /* Implement va_arg. */
4280 ix86_gimplify_va_arg (tree valist
, tree type
, tree
*pre_p
, tree
*post_p
)
4282 static const int intreg
[6] = { 0, 1, 2, 3, 4, 5 };
4283 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
4284 tree gpr
, fpr
, ovf
, sav
, t
;
4286 tree lab_false
, lab_over
= NULL_TREE
;
4291 enum machine_mode nat_mode
;
4293 /* Only 64bit target needs something special. */
4295 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
4297 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
4298 f_fpr
= TREE_CHAIN (f_gpr
);
4299 f_ovf
= TREE_CHAIN (f_fpr
);
4300 f_sav
= TREE_CHAIN (f_ovf
);
4302 valist
= build_va_arg_indirect_ref (valist
);
4303 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
, NULL_TREE
);
4304 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
4305 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
4306 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
4308 indirect_p
= pass_by_reference (NULL
, TYPE_MODE (type
), type
, false);
4310 type
= build_pointer_type (type
);
4311 size
= int_size_in_bytes (type
);
4312 rsize
= (size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
4314 nat_mode
= type_natural_mode (type
);
4315 container
= construct_container (nat_mode
, TYPE_MODE (type
), type
, 0,
4316 REGPARM_MAX
, SSE_REGPARM_MAX
, intreg
, 0);
4318 /* Pull the value out of the saved registers. */
4320 addr
= create_tmp_var (ptr_type_node
, "addr");
4321 DECL_POINTER_ALIAS_SET (addr
) = get_varargs_alias_set ();
4325 int needed_intregs
, needed_sseregs
;
4327 tree int_addr
, sse_addr
;
4329 lab_false
= create_artificial_label ();
4330 lab_over
= create_artificial_label ();
4332 examine_argument (nat_mode
, type
, 0, &needed_intregs
, &needed_sseregs
);
4334 need_temp
= (!REG_P (container
)
4335 && ((needed_intregs
&& TYPE_ALIGN (type
) > 64)
4336 || TYPE_ALIGN (type
) > 128));
4338 /* In case we are passing structure, verify that it is consecutive block
4339 on the register save area. If not we need to do moves. */
4340 if (!need_temp
&& !REG_P (container
))
4342 /* Verify that all registers are strictly consecutive */
4343 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container
, 0, 0), 0))))
4347 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
4349 rtx slot
= XVECEXP (container
, 0, i
);
4350 if (REGNO (XEXP (slot
, 0)) != FIRST_SSE_REG
+ (unsigned int) i
4351 || INTVAL (XEXP (slot
, 1)) != i
* 16)
4359 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
4361 rtx slot
= XVECEXP (container
, 0, i
);
4362 if (REGNO (XEXP (slot
, 0)) != (unsigned int) i
4363 || INTVAL (XEXP (slot
, 1)) != i
* 8)
4375 int_addr
= create_tmp_var (ptr_type_node
, "int_addr");
4376 DECL_POINTER_ALIAS_SET (int_addr
) = get_varargs_alias_set ();
4377 sse_addr
= create_tmp_var (ptr_type_node
, "sse_addr");
4378 DECL_POINTER_ALIAS_SET (sse_addr
) = get_varargs_alias_set ();
4381 /* First ensure that we fit completely in registers. */
4384 t
= build_int_cst (TREE_TYPE (gpr
),
4385 (REGPARM_MAX
- needed_intregs
+ 1) * 8);
4386 t
= build2 (GE_EXPR
, boolean_type_node
, gpr
, t
);
4387 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
4388 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
4389 gimplify_and_add (t
, pre_p
);
4393 t
= build_int_cst (TREE_TYPE (fpr
),
4394 (SSE_REGPARM_MAX
- needed_sseregs
+ 1) * 16
4396 t
= build2 (GE_EXPR
, boolean_type_node
, fpr
, t
);
4397 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
4398 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
4399 gimplify_and_add (t
, pre_p
);
4402 /* Compute index to start of area used for integer regs. */
4405 /* int_addr = gpr + sav; */
4406 t
= fold_convert (ptr_type_node
, gpr
);
4407 t
= build2 (PLUS_EXPR
, ptr_type_node
, sav
, t
);
4408 t
= build2 (MODIFY_EXPR
, void_type_node
, int_addr
, t
);
4409 gimplify_and_add (t
, pre_p
);
4413 /* sse_addr = fpr + sav; */
4414 t
= fold_convert (ptr_type_node
, fpr
);
4415 t
= build2 (PLUS_EXPR
, ptr_type_node
, sav
, t
);
4416 t
= build2 (MODIFY_EXPR
, void_type_node
, sse_addr
, t
);
4417 gimplify_and_add (t
, pre_p
);
4422 tree temp
= create_tmp_var (type
, "va_arg_tmp");
4425 t
= build1 (ADDR_EXPR
, build_pointer_type (type
), temp
);
4426 t
= build2 (MODIFY_EXPR
, void_type_node
, addr
, t
);
4427 gimplify_and_add (t
, pre_p
);
4429 for (i
= 0; i
< XVECLEN (container
, 0); i
++)
4431 rtx slot
= XVECEXP (container
, 0, i
);
4432 rtx reg
= XEXP (slot
, 0);
4433 enum machine_mode mode
= GET_MODE (reg
);
4434 tree piece_type
= lang_hooks
.types
.type_for_mode (mode
, 1);
4435 tree addr_type
= build_pointer_type (piece_type
);
4438 tree dest_addr
, dest
;
4440 if (SSE_REGNO_P (REGNO (reg
)))
4442 src_addr
= sse_addr
;
4443 src_offset
= (REGNO (reg
) - FIRST_SSE_REG
) * 16;
4447 src_addr
= int_addr
;
4448 src_offset
= REGNO (reg
) * 8;
4450 src_addr
= fold_convert (addr_type
, src_addr
);
4451 src_addr
= fold (build2 (PLUS_EXPR
, addr_type
, src_addr
,
4452 size_int (src_offset
)));
4453 src
= build_va_arg_indirect_ref (src_addr
);
4455 dest_addr
= fold_convert (addr_type
, addr
);
4456 dest_addr
= fold (build2 (PLUS_EXPR
, addr_type
, dest_addr
,
4457 size_int (INTVAL (XEXP (slot
, 1)))));
4458 dest
= build_va_arg_indirect_ref (dest_addr
);
4460 t
= build2 (MODIFY_EXPR
, void_type_node
, dest
, src
);
4461 gimplify_and_add (t
, pre_p
);
4467 t
= build2 (PLUS_EXPR
, TREE_TYPE (gpr
), gpr
,
4468 build_int_cst (TREE_TYPE (gpr
), needed_intregs
* 8));
4469 t
= build2 (MODIFY_EXPR
, TREE_TYPE (gpr
), gpr
, t
);
4470 gimplify_and_add (t
, pre_p
);
4474 t
= build2 (PLUS_EXPR
, TREE_TYPE (fpr
), fpr
,
4475 build_int_cst (TREE_TYPE (fpr
), needed_sseregs
* 16));
4476 t
= build2 (MODIFY_EXPR
, TREE_TYPE (fpr
), fpr
, t
);
4477 gimplify_and_add (t
, pre_p
);
4480 t
= build1 (GOTO_EXPR
, void_type_node
, lab_over
);
4481 gimplify_and_add (t
, pre_p
);
4483 t
= build1 (LABEL_EXPR
, void_type_node
, lab_false
);
4484 append_to_statement_list (t
, pre_p
);
4487 /* ... otherwise out of the overflow area. */
4489 /* Care for on-stack alignment if needed. */
4490 if (FUNCTION_ARG_BOUNDARY (VOIDmode
, type
) <= 64
4491 || integer_zerop (TYPE_SIZE (type
)))
4495 HOST_WIDE_INT align
= FUNCTION_ARG_BOUNDARY (VOIDmode
, type
) / 8;
4496 t
= build2 (PLUS_EXPR
, TREE_TYPE (ovf
), ovf
,
4497 build_int_cst (TREE_TYPE (ovf
), align
- 1));
4498 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
4499 build_int_cst (TREE_TYPE (t
), -align
));
4501 gimplify_expr (&t
, pre_p
, NULL
, is_gimple_val
, fb_rvalue
);
4503 t2
= build2 (MODIFY_EXPR
, void_type_node
, addr
, t
);
4504 gimplify_and_add (t2
, pre_p
);
4506 t
= build2 (PLUS_EXPR
, TREE_TYPE (t
), t
,
4507 build_int_cst (TREE_TYPE (t
), rsize
* UNITS_PER_WORD
));
4508 t
= build2 (MODIFY_EXPR
, TREE_TYPE (ovf
), ovf
, t
);
4509 gimplify_and_add (t
, pre_p
);
4513 t
= build1 (LABEL_EXPR
, void_type_node
, lab_over
);
4514 append_to_statement_list (t
, pre_p
);
4517 ptrtype
= build_pointer_type (type
);
4518 addr
= fold_convert (ptrtype
, addr
);
4521 addr
= build_va_arg_indirect_ref (addr
);
4522 return build_va_arg_indirect_ref (addr
);
4525 /* Return nonzero if OPNUM's MEM should be matched
4526 in movabs* patterns. */
4529 ix86_check_movabs (rtx insn
, int opnum
)
4533 set
= PATTERN (insn
);
4534 if (GET_CODE (set
) == PARALLEL
)
4535 set
= XVECEXP (set
, 0, 0);
4536 gcc_assert (GET_CODE (set
) == SET
);
4537 mem
= XEXP (set
, opnum
);
4538 while (GET_CODE (mem
) == SUBREG
)
4539 mem
= SUBREG_REG (mem
);
4540 gcc_assert (GET_CODE (mem
) == MEM
);
4541 return (volatile_ok
|| !MEM_VOLATILE_P (mem
));
4544 /* Initialize the table of extra 80387 mathematical constants. */
4547 init_ext_80387_constants (void)
4549 static const char * cst
[5] =
4551 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4552 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4553 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4554 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4555 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4559 for (i
= 0; i
< 5; i
++)
4561 real_from_string (&ext_80387_constants_table
[i
], cst
[i
]);
4562 /* Ensure each constant is rounded to XFmode precision. */
4563 real_convert (&ext_80387_constants_table
[i
],
4564 XFmode
, &ext_80387_constants_table
[i
]);
4567 ext_80387_constants_init
= 1;
4570 /* Return true if the constant is something that can be loaded with
4571 a special instruction. */
4574 standard_80387_constant_p (rtx x
)
4576 if (GET_CODE (x
) != CONST_DOUBLE
|| !FLOAT_MODE_P (GET_MODE (x
)))
4579 if (x
== CONST0_RTX (GET_MODE (x
)))
4581 if (x
== CONST1_RTX (GET_MODE (x
)))
4584 /* For XFmode constants, try to find a special 80387 instruction when
4585 optimizing for size or on those CPUs that benefit from them. */
4586 if (GET_MODE (x
) == XFmode
4587 && (optimize_size
|| x86_ext_80387_constants
& TUNEMASK
))
4592 if (! ext_80387_constants_init
)
4593 init_ext_80387_constants ();
4595 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
4596 for (i
= 0; i
< 5; i
++)
4597 if (real_identical (&r
, &ext_80387_constants_table
[i
]))
4604 /* Return the opcode of the special instruction to be used to load
4608 standard_80387_constant_opcode (rtx x
)
4610 switch (standard_80387_constant_p (x
))
4631 /* Return the CONST_DOUBLE representing the 80387 constant that is
4632 loaded by the specified special instruction. The argument IDX
4633 matches the return value from standard_80387_constant_p. */
4636 standard_80387_constant_rtx (int idx
)
4640 if (! ext_80387_constants_init
)
4641 init_ext_80387_constants ();
4657 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table
[i
],
4661 /* Return 1 if mode is a valid mode for sse. */
4663 standard_sse_mode_p (enum machine_mode mode
)
4680 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4683 standard_sse_constant_p (rtx x
)
4685 enum machine_mode mode
= GET_MODE (x
);
4687 if (x
== const0_rtx
|| x
== CONST0_RTX (GET_MODE (x
)))
4689 if (vector_all_ones_operand (x
, mode
)
4690 && standard_sse_mode_p (mode
))
4691 return TARGET_SSE2
? 2 : -1;
4696 /* Return the opcode of the special instruction to be used to load
4700 standard_sse_constant_opcode (rtx insn
, rtx x
)
4702 switch (standard_sse_constant_p (x
))
4705 if (get_attr_mode (insn
) == MODE_V4SF
)
4706 return "xorps\t%0, %0";
4707 else if (get_attr_mode (insn
) == MODE_V2DF
)
4708 return "xorpd\t%0, %0";
4710 return "pxor\t%0, %0";
4712 return "pcmpeqd\t%0, %0";
4717 /* Returns 1 if OP contains a symbol reference */
4720 symbolic_reference_mentioned_p (rtx op
)
4725 if (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == LABEL_REF
)
4728 fmt
= GET_RTX_FORMAT (GET_CODE (op
));
4729 for (i
= GET_RTX_LENGTH (GET_CODE (op
)) - 1; i
>= 0; i
--)
4735 for (j
= XVECLEN (op
, i
) - 1; j
>= 0; j
--)
4736 if (symbolic_reference_mentioned_p (XVECEXP (op
, i
, j
)))
4740 else if (fmt
[i
] == 'e' && symbolic_reference_mentioned_p (XEXP (op
, i
)))
4747 /* Return 1 if it is appropriate to emit `ret' instructions in the
4748 body of a function. Do this only if the epilogue is simple, needing a
4749 couple of insns. Prior to reloading, we can't tell how many registers
4750 must be saved, so return 0 then. Return 0 if there is no frame
4751 marker to de-allocate. */
4754 ix86_can_use_return_insn_p (void)
4756 struct ix86_frame frame
;
4758 if (! reload_completed
|| frame_pointer_needed
)
4761 /* Don't allow more than 32 pop, since that's all we can do
4762 with one instruction. */
4763 if (current_function_pops_args
4764 && current_function_args_size
>= 32768)
4767 ix86_compute_frame_layout (&frame
);
4768 return frame
.to_allocate
== 0 && frame
.nregs
== 0;
4771 /* Value should be nonzero if functions must have frame pointers.
4772 Zero means the frame pointer need not be set up (and parms may
4773 be accessed via the stack pointer) in functions that seem suitable. */
4776 ix86_frame_pointer_required (void)
4778 /* If we accessed previous frames, then the generated code expects
4779 to be able to access the saved ebp value in our frame. */
4780 if (cfun
->machine
->accesses_prev_frame
)
4783 /* Several x86 os'es need a frame pointer for other reasons,
4784 usually pertaining to setjmp. */
4785 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
4788 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4789 the frame pointer by default. Turn it back on now if we've not
4790 got a leaf function. */
4791 if (TARGET_OMIT_LEAF_FRAME_POINTER
4792 && (!current_function_is_leaf
4793 || ix86_current_function_calls_tls_descriptor
))
4796 if (current_function_profile
)
4802 /* Record that the current function accesses previous call frames. */
4805 ix86_setup_frame_addresses (void)
4807 cfun
->machine
->accesses_prev_frame
= 1;
4810 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
4811 # define USE_HIDDEN_LINKONCE 1
4813 # define USE_HIDDEN_LINKONCE 0
4816 static int pic_labels_used
;
4818 /* Fills in the label name that should be used for a pc thunk for
4819 the given register. */
4822 get_pc_thunk_name (char name
[32], unsigned int regno
)
4824 gcc_assert (!TARGET_64BIT
);
4826 if (USE_HIDDEN_LINKONCE
)
4827 sprintf (name
, "__i686.get_pc_thunk.%s", reg_names
[regno
]);
4829 ASM_GENERATE_INTERNAL_LABEL (name
, "LPR", regno
);
4833 /* This function generates code for -fpic that loads %ebx with
4834 the return address of the caller and then returns. */
4837 ix86_file_end (void)
4842 for (regno
= 0; regno
< 8; ++regno
)
4846 if (! ((pic_labels_used
>> regno
) & 1))
4849 get_pc_thunk_name (name
, regno
);
4854 switch_to_section (darwin_sections
[text_coal_section
]);
4855 fputs ("\t.weak_definition\t", asm_out_file
);
4856 assemble_name (asm_out_file
, name
);
4857 fputs ("\n\t.private_extern\t", asm_out_file
);
4858 assemble_name (asm_out_file
, name
);
4859 fputs ("\n", asm_out_file
);
4860 ASM_OUTPUT_LABEL (asm_out_file
, name
);
4864 if (USE_HIDDEN_LINKONCE
)
4868 decl
= build_decl (FUNCTION_DECL
, get_identifier (name
),
4870 TREE_PUBLIC (decl
) = 1;
4871 TREE_STATIC (decl
) = 1;
4872 DECL_ONE_ONLY (decl
) = 1;
4874 (*targetm
.asm_out
.unique_section
) (decl
, 0);
4875 switch_to_section (get_named_section (decl
, NULL
, 0));
4877 (*targetm
.asm_out
.globalize_label
) (asm_out_file
, name
);
4878 fputs ("\t.hidden\t", asm_out_file
);
4879 assemble_name (asm_out_file
, name
);
4880 fputc ('\n', asm_out_file
);
4881 ASM_DECLARE_FUNCTION_NAME (asm_out_file
, name
, decl
);
4885 switch_to_section (text_section
);
4886 ASM_OUTPUT_LABEL (asm_out_file
, name
);
4889 xops
[0] = gen_rtx_REG (SImode
, regno
);
4890 xops
[1] = gen_rtx_MEM (SImode
, stack_pointer_rtx
);
4891 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops
);
4892 output_asm_insn ("ret", xops
);
4895 if (NEED_INDICATE_EXEC_STACK
)
4896 file_end_indicate_exec_stack ();
4899 /* Emit code for the SET_GOT patterns. */
4902 output_set_got (rtx dest
, rtx label ATTRIBUTE_UNUSED
)
4907 xops
[1] = gen_rtx_SYMBOL_REF (Pmode
, GOT_SYMBOL_NAME
);
4909 if (! TARGET_DEEP_BRANCH_PREDICTION
|| !flag_pic
)
4911 xops
[2] = gen_rtx_LABEL_REF (Pmode
, label
? label
: gen_label_rtx ());
4914 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
4916 output_asm_insn ("call\t%a2", xops
);
4919 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
4920 is what will be referenced by the Mach-O PIC subsystem. */
4922 ASM_OUTPUT_LABEL (asm_out_file
, machopic_function_base_name ());
4925 (*targetm
.asm_out
.internal_label
) (asm_out_file
, "L",
4926 CODE_LABEL_NUMBER (XEXP (xops
[2], 0)));
4929 output_asm_insn ("pop{l}\t%0", xops
);
4934 get_pc_thunk_name (name
, REGNO (dest
));
4935 pic_labels_used
|= 1 << REGNO (dest
);
4937 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (name
));
4938 xops
[2] = gen_rtx_MEM (QImode
, xops
[2]);
4939 output_asm_insn ("call\t%X2", xops
);
4940 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
4941 is what will be referenced by the Mach-O PIC subsystem. */
4944 ASM_OUTPUT_LABEL (asm_out_file
, machopic_function_base_name ());
4946 targetm
.asm_out
.internal_label (asm_out_file
, "L",
4947 CODE_LABEL_NUMBER (label
));
4954 if (!flag_pic
|| TARGET_DEEP_BRANCH_PREDICTION
)
4955 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops
);
4957 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops
);
4962 /* Generate an "push" pattern for input ARG. */
4967 return gen_rtx_SET (VOIDmode
,
4969 gen_rtx_PRE_DEC (Pmode
,
4970 stack_pointer_rtx
)),
4974 /* Return >= 0 if there is an unused call-clobbered register available
4975 for the entire function. */
4978 ix86_select_alt_pic_regnum (void)
4980 if (current_function_is_leaf
&& !current_function_profile
4981 && !ix86_current_function_calls_tls_descriptor
)
4984 for (i
= 2; i
>= 0; --i
)
4985 if (!regs_ever_live
[i
])
4989 return INVALID_REGNUM
;
4992 /* Return 1 if we need to save REGNO. */
4994 ix86_save_reg (unsigned int regno
, int maybe_eh_return
)
4996 if (pic_offset_table_rtx
4997 && regno
== REAL_PIC_OFFSET_TABLE_REGNUM
4998 && (regs_ever_live
[REAL_PIC_OFFSET_TABLE_REGNUM
]
4999 || current_function_profile
5000 || current_function_calls_eh_return
5001 || current_function_uses_const_pool
))
5003 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM
)
5008 if (current_function_calls_eh_return
&& maybe_eh_return
)
5013 unsigned test
= EH_RETURN_DATA_REGNO (i
);
5014 if (test
== INVALID_REGNUM
)
5021 if (cfun
->machine
->force_align_arg_pointer
5022 && regno
== REGNO (cfun
->machine
->force_align_arg_pointer
))
5025 return (regs_ever_live
[regno
]
5026 && !call_used_regs
[regno
]
5027 && !fixed_regs
[regno
]
5028 && (regno
!= HARD_FRAME_POINTER_REGNUM
|| !frame_pointer_needed
));
5031 /* Return number of registers to be saved on the stack. */
5034 ix86_nsaved_regs (void)
5039 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
>= 0; regno
--)
5040 if (ix86_save_reg (regno
, true))
5045 /* Return the offset between two registers, one to be eliminated, and the other
5046 its replacement, at the start of a routine. */
5049 ix86_initial_elimination_offset (int from
, int to
)
5051 struct ix86_frame frame
;
5052 ix86_compute_frame_layout (&frame
);
5054 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
5055 return frame
.hard_frame_pointer_offset
;
5056 else if (from
== FRAME_POINTER_REGNUM
5057 && to
== HARD_FRAME_POINTER_REGNUM
)
5058 return frame
.hard_frame_pointer_offset
- frame
.frame_pointer_offset
;
5061 gcc_assert (to
== STACK_POINTER_REGNUM
);
5063 if (from
== ARG_POINTER_REGNUM
)
5064 return frame
.stack_pointer_offset
;
5066 gcc_assert (from
== FRAME_POINTER_REGNUM
);
5067 return frame
.stack_pointer_offset
- frame
.frame_pointer_offset
;
5071 /* Fill structure ix86_frame about frame of currently computed function. */
5074 ix86_compute_frame_layout (struct ix86_frame
*frame
)
5076 HOST_WIDE_INT total_size
;
5077 unsigned int stack_alignment_needed
;
5078 HOST_WIDE_INT offset
;
5079 unsigned int preferred_alignment
;
5080 HOST_WIDE_INT size
= get_frame_size ();
5082 frame
->nregs
= ix86_nsaved_regs ();
5085 stack_alignment_needed
= cfun
->stack_alignment_needed
/ BITS_PER_UNIT
;
5086 preferred_alignment
= cfun
->preferred_stack_boundary
/ BITS_PER_UNIT
;
5088 /* During reload iteration the amount of registers saved can change.
5089 Recompute the value as needed. Do not recompute when amount of registers
5090 didn't change as reload does multiple calls to the function and does not
5091 expect the decision to change within single iteration. */
5093 && cfun
->machine
->use_fast_prologue_epilogue_nregs
!= frame
->nregs
)
5095 int count
= frame
->nregs
;
5097 cfun
->machine
->use_fast_prologue_epilogue_nregs
= count
;
5098 /* The fast prologue uses move instead of push to save registers. This
5099 is significantly longer, but also executes faster as modern hardware
5100 can execute the moves in parallel, but can't do that for push/pop.
5102 Be careful about choosing what prologue to emit: When function takes
5103 many instructions to execute we may use slow version as well as in
5104 case function is known to be outside hot spot (this is known with
5105 feedback only). Weight the size of function by number of registers
5106 to save as it is cheap to use one or two push instructions but very
5107 slow to use many of them. */
5109 count
= (count
- 1) * FAST_PROLOGUE_INSN_COUNT
;
5110 if (cfun
->function_frequency
< FUNCTION_FREQUENCY_NORMAL
5111 || (flag_branch_probabilities
5112 && cfun
->function_frequency
< FUNCTION_FREQUENCY_HOT
))
5113 cfun
->machine
->use_fast_prologue_epilogue
= false;
5115 cfun
->machine
->use_fast_prologue_epilogue
5116 = !expensive_function_p (count
);
5118 if (TARGET_PROLOGUE_USING_MOVE
5119 && cfun
->machine
->use_fast_prologue_epilogue
)
5120 frame
->save_regs_using_mov
= true;
5122 frame
->save_regs_using_mov
= false;
5125 /* Skip return address and saved base pointer. */
5126 offset
= frame_pointer_needed
? UNITS_PER_WORD
* 2 : UNITS_PER_WORD
;
5128 frame
->hard_frame_pointer_offset
= offset
;
5130 /* Do some sanity checking of stack_alignment_needed and
5131 preferred_alignment, since i386 port is the only using those features
5132 that may break easily. */
5134 gcc_assert (!size
|| stack_alignment_needed
);
5135 gcc_assert (preferred_alignment
>= STACK_BOUNDARY
/ BITS_PER_UNIT
);
5136 gcc_assert (preferred_alignment
<= PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
);
5137 gcc_assert (stack_alignment_needed
5138 <= PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
);
5140 if (stack_alignment_needed
< STACK_BOUNDARY
/ BITS_PER_UNIT
)
5141 stack_alignment_needed
= STACK_BOUNDARY
/ BITS_PER_UNIT
;
5143 /* Register save area */
5144 offset
+= frame
->nregs
* UNITS_PER_WORD
;
5147 if (ix86_save_varrargs_registers
)
5149 offset
+= X86_64_VARARGS_SIZE
;
5150 frame
->va_arg_size
= X86_64_VARARGS_SIZE
;
5153 frame
->va_arg_size
= 0;
5155 /* Align start of frame for local function. */
5156 frame
->padding1
= ((offset
+ stack_alignment_needed
- 1)
5157 & -stack_alignment_needed
) - offset
;
5159 offset
+= frame
->padding1
;
5161 /* Frame pointer points here. */
5162 frame
->frame_pointer_offset
= offset
;
5166 /* Add outgoing arguments area. Can be skipped if we eliminated
5167 all the function calls as dead code.
5168 Skipping is however impossible when function calls alloca. Alloca
5169 expander assumes that last current_function_outgoing_args_size
5170 of stack frame are unused. */
5171 if (ACCUMULATE_OUTGOING_ARGS
5172 && (!current_function_is_leaf
|| current_function_calls_alloca
5173 || ix86_current_function_calls_tls_descriptor
))
5175 offset
+= current_function_outgoing_args_size
;
5176 frame
->outgoing_arguments_size
= current_function_outgoing_args_size
;
5179 frame
->outgoing_arguments_size
= 0;
5181 /* Align stack boundary. Only needed if we're calling another function
5183 if (!current_function_is_leaf
|| current_function_calls_alloca
5184 || ix86_current_function_calls_tls_descriptor
)
5185 frame
->padding2
= ((offset
+ preferred_alignment
- 1)
5186 & -preferred_alignment
) - offset
;
5188 frame
->padding2
= 0;
5190 offset
+= frame
->padding2
;
5192 /* We've reached end of stack frame. */
5193 frame
->stack_pointer_offset
= offset
;
5195 /* Size prologue needs to allocate. */
5196 frame
->to_allocate
=
5197 (size
+ frame
->padding1
+ frame
->padding2
5198 + frame
->outgoing_arguments_size
+ frame
->va_arg_size
);
5200 if ((!frame
->to_allocate
&& frame
->nregs
<= 1)
5201 || (TARGET_64BIT
&& frame
->to_allocate
>= (HOST_WIDE_INT
) 0x80000000))
5202 frame
->save_regs_using_mov
= false;
5204 if (TARGET_RED_ZONE
&& current_function_sp_is_unchanging
5205 && current_function_is_leaf
5206 && !ix86_current_function_calls_tls_descriptor
)
5208 frame
->red_zone_size
= frame
->to_allocate
;
5209 if (frame
->save_regs_using_mov
)
5210 frame
->red_zone_size
+= frame
->nregs
* UNITS_PER_WORD
;
5211 if (frame
->red_zone_size
> RED_ZONE_SIZE
- RED_ZONE_RESERVE
)
5212 frame
->red_zone_size
= RED_ZONE_SIZE
- RED_ZONE_RESERVE
;
5215 frame
->red_zone_size
= 0;
5216 frame
->to_allocate
-= frame
->red_zone_size
;
5217 frame
->stack_pointer_offset
-= frame
->red_zone_size
;
5219 fprintf (stderr
, "nregs: %i\n", frame
->nregs
);
5220 fprintf (stderr
, "size: %i\n", size
);
5221 fprintf (stderr
, "alignment1: %i\n", stack_alignment_needed
);
5222 fprintf (stderr
, "padding1: %i\n", frame
->padding1
);
5223 fprintf (stderr
, "va_arg: %i\n", frame
->va_arg_size
);
5224 fprintf (stderr
, "padding2: %i\n", frame
->padding2
);
5225 fprintf (stderr
, "to_allocate: %i\n", frame
->to_allocate
);
5226 fprintf (stderr
, "red_zone_size: %i\n", frame
->red_zone_size
);
5227 fprintf (stderr
, "frame_pointer_offset: %i\n", frame
->frame_pointer_offset
);
5228 fprintf (stderr
, "hard_frame_pointer_offset: %i\n",
5229 frame
->hard_frame_pointer_offset
);
5230 fprintf (stderr
, "stack_pointer_offset: %i\n", frame
->stack_pointer_offset
);
5234 /* Emit code to save registers in the prologue. */
5237 ix86_emit_save_regs (void)
5242 for (regno
= FIRST_PSEUDO_REGISTER
; regno
-- > 0; )
5243 if (ix86_save_reg (regno
, true))
5245 insn
= emit_insn (gen_push (gen_rtx_REG (Pmode
, regno
)));
5246 RTX_FRAME_RELATED_P (insn
) = 1;
5250 /* Emit code to save registers using MOV insns. First register
5251 is restored from POINTER + OFFSET. */
5253 ix86_emit_save_regs_using_mov (rtx pointer
, HOST_WIDE_INT offset
)
5258 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
5259 if (ix86_save_reg (regno
, true))
5261 insn
= emit_move_insn (adjust_address (gen_rtx_MEM (Pmode
, pointer
),
5263 gen_rtx_REG (Pmode
, regno
));
5264 RTX_FRAME_RELATED_P (insn
) = 1;
5265 offset
+= UNITS_PER_WORD
;
5269 /* Expand prologue or epilogue stack adjustment.
5270 The pattern exist to put a dependency on all ebp-based memory accesses.
5271 STYLE should be negative if instructions should be marked as frame related,
5272 zero if %r11 register is live and cannot be freely used and positive
5276 pro_epilogue_adjust_stack (rtx dest
, rtx src
, rtx offset
, int style
)
5281 insn
= emit_insn (gen_pro_epilogue_adjust_stack_1 (dest
, src
, offset
));
5282 else if (x86_64_immediate_operand (offset
, DImode
))
5283 insn
= emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest
, src
, offset
));
5287 /* r11 is used by indirect sibcall return as well, set before the
5288 epilogue and used after the epilogue. ATM indirect sibcall
5289 shouldn't be used together with huge frame sizes in one
5290 function because of the frame_size check in sibcall.c. */
5292 r11
= gen_rtx_REG (DImode
, FIRST_REX_INT_REG
+ 3 /* R11 */);
5293 insn
= emit_insn (gen_rtx_SET (DImode
, r11
, offset
));
5295 RTX_FRAME_RELATED_P (insn
) = 1;
5296 insn
= emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest
, src
, r11
,
5300 RTX_FRAME_RELATED_P (insn
) = 1;
5303 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
5306 ix86_internal_arg_pointer (void)
5308 bool has_force_align_arg_pointer
=
5309 (0 != lookup_attribute (ix86_force_align_arg_pointer_string
,
5310 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))));
5311 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
5312 && DECL_NAME (current_function_decl
)
5313 && MAIN_NAME_P (DECL_NAME (current_function_decl
))
5314 && DECL_FILE_SCOPE_P (current_function_decl
))
5315 || ix86_force_align_arg_pointer
5316 || has_force_align_arg_pointer
)
5318 /* Nested functions can't realign the stack due to a register
5320 if (DECL_CONTEXT (current_function_decl
)
5321 && TREE_CODE (DECL_CONTEXT (current_function_decl
)) == FUNCTION_DECL
)
5323 if (ix86_force_align_arg_pointer
)
5324 warning (0, "-mstackrealign ignored for nested functions");
5325 if (has_force_align_arg_pointer
)
5326 error ("%s not supported for nested functions",
5327 ix86_force_align_arg_pointer_string
);
5328 return virtual_incoming_args_rtx
;
5330 cfun
->machine
->force_align_arg_pointer
= gen_rtx_REG (Pmode
, 2);
5331 return copy_to_reg (cfun
->machine
->force_align_arg_pointer
);
5334 return virtual_incoming_args_rtx
;
5337 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
5338 This is called from dwarf2out.c to emit call frame instructions
5339 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
5341 ix86_dwarf_handle_frame_unspec (const char *label
, rtx pattern
, int index
)
5343 rtx unspec
= SET_SRC (pattern
);
5344 gcc_assert (GET_CODE (unspec
) == UNSPEC
);
5348 case UNSPEC_REG_SAVE
:
5349 dwarf2out_reg_save_reg (label
, XVECEXP (unspec
, 0, 0),
5350 SET_DEST (pattern
));
5352 case UNSPEC_DEF_CFA
:
5353 dwarf2out_def_cfa (label
, REGNO (SET_DEST (pattern
)),
5354 INTVAL (XVECEXP (unspec
, 0, 0)));
5361 /* Expand the prologue into a bunch of separate insns. */
5364 ix86_expand_prologue (void)
5368 struct ix86_frame frame
;
5369 HOST_WIDE_INT allocate
;
5371 ix86_compute_frame_layout (&frame
);
5373 if (cfun
->machine
->force_align_arg_pointer
)
5377 /* Grab the argument pointer. */
5378 x
= plus_constant (stack_pointer_rtx
, 4);
5379 y
= cfun
->machine
->force_align_arg_pointer
;
5380 insn
= emit_insn (gen_rtx_SET (VOIDmode
, y
, x
));
5381 RTX_FRAME_RELATED_P (insn
) = 1;
5383 /* The unwind info consists of two parts: install the fafp as the cfa,
5384 and record the fafp as the "save register" of the stack pointer.
5385 The later is there in order that the unwinder can see where it
5386 should restore the stack pointer across the and insn. */
5387 x
= gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, const0_rtx
), UNSPEC_DEF_CFA
);
5388 x
= gen_rtx_SET (VOIDmode
, y
, x
);
5389 RTX_FRAME_RELATED_P (x
) = 1;
5390 y
= gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, stack_pointer_rtx
),
5392 y
= gen_rtx_SET (VOIDmode
, cfun
->machine
->force_align_arg_pointer
, y
);
5393 RTX_FRAME_RELATED_P (y
) = 1;
5394 x
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, x
, y
));
5395 x
= gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
, x
, NULL
);
5396 REG_NOTES (insn
) = x
;
5398 /* Align the stack. */
5399 emit_insn (gen_andsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
5402 /* And here we cheat like madmen with the unwind info. We force the
5403 cfa register back to sp+4, which is exactly what it was at the
5404 start of the function. Re-pushing the return address results in
5405 the return at the same spot relative to the cfa, and thus is
5406 correct wrt the unwind info. */
5407 x
= cfun
->machine
->force_align_arg_pointer
;
5408 x
= gen_frame_mem (Pmode
, plus_constant (x
, -4));
5409 insn
= emit_insn (gen_push (x
));
5410 RTX_FRAME_RELATED_P (insn
) = 1;
5413 x
= gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, x
), UNSPEC_DEF_CFA
);
5414 x
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, x
);
5415 x
= gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
, x
, NULL
);
5416 REG_NOTES (insn
) = x
;
5419 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5420 slower on all targets. Also sdb doesn't like it. */
5422 if (frame_pointer_needed
)
5424 insn
= emit_insn (gen_push (hard_frame_pointer_rtx
));
5425 RTX_FRAME_RELATED_P (insn
) = 1;
5427 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
5428 RTX_FRAME_RELATED_P (insn
) = 1;
5431 allocate
= frame
.to_allocate
;
5433 if (!frame
.save_regs_using_mov
)
5434 ix86_emit_save_regs ();
5436 allocate
+= frame
.nregs
* UNITS_PER_WORD
;
5438 /* When using red zone we may start register saving before allocating
5439 the stack frame saving one cycle of the prologue. */
5440 if (TARGET_RED_ZONE
&& frame
.save_regs_using_mov
)
5441 ix86_emit_save_regs_using_mov (frame_pointer_needed
? hard_frame_pointer_rtx
5442 : stack_pointer_rtx
,
5443 -frame
.nregs
* UNITS_PER_WORD
);
5447 else if (! TARGET_STACK_PROBE
|| allocate
< CHECK_STACK_LIMIT
)
5448 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
5449 GEN_INT (-allocate
), -1);
5452 /* Only valid for Win32. */
5453 rtx eax
= gen_rtx_REG (SImode
, 0);
5454 bool eax_live
= ix86_eax_live_at_start_p ();
5457 gcc_assert (!TARGET_64BIT
);
5461 emit_insn (gen_push (eax
));
5465 emit_move_insn (eax
, GEN_INT (allocate
));
5467 insn
= emit_insn (gen_allocate_stack_worker (eax
));
5468 RTX_FRAME_RELATED_P (insn
) = 1;
5469 t
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, GEN_INT (-allocate
));
5470 t
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, t
);
5471 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
,
5472 t
, REG_NOTES (insn
));
5476 if (frame_pointer_needed
)
5477 t
= plus_constant (hard_frame_pointer_rtx
,
5480 - frame
.nregs
* UNITS_PER_WORD
);
5482 t
= plus_constant (stack_pointer_rtx
, allocate
);
5483 emit_move_insn (eax
, gen_rtx_MEM (SImode
, t
));
5487 if (frame
.save_regs_using_mov
&& !TARGET_RED_ZONE
)
5489 if (!frame_pointer_needed
|| !frame
.to_allocate
)
5490 ix86_emit_save_regs_using_mov (stack_pointer_rtx
, frame
.to_allocate
);
5492 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx
,
5493 -frame
.nregs
* UNITS_PER_WORD
);
5496 pic_reg_used
= false;
5497 if (pic_offset_table_rtx
5498 && (regs_ever_live
[REAL_PIC_OFFSET_TABLE_REGNUM
]
5499 || current_function_profile
))
5501 unsigned int alt_pic_reg_used
= ix86_select_alt_pic_regnum ();
5503 if (alt_pic_reg_used
!= INVALID_REGNUM
)
5504 REGNO (pic_offset_table_rtx
) = alt_pic_reg_used
;
5506 pic_reg_used
= true;
5512 insn
= emit_insn (gen_set_got_rex64 (pic_offset_table_rtx
));
5514 insn
= emit_insn (gen_set_got (pic_offset_table_rtx
));
5516 /* Even with accurate pre-reload life analysis, we can wind up
5517 deleting all references to the pic register after reload.
5518 Consider if cross-jumping unifies two sides of a branch
5519 controlled by a comparison vs the only read from a global.
5520 In which case, allow the set_got to be deleted, though we're
5521 too late to do anything about the ebx save in the prologue. */
5522 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD
, const0_rtx
, NULL
);
5525 /* Prevent function calls from be scheduled before the call to mcount.
5526 In the pic_reg_used case, make sure that the got load isn't deleted. */
5527 if (current_function_profile
)
5528 emit_insn (gen_blockage (pic_reg_used
? pic_offset_table_rtx
: const0_rtx
));
5531 /* Emit code to restore saved registers using MOV insns. First register
5532 is restored from POINTER + OFFSET. */
5534 ix86_emit_restore_regs_using_mov (rtx pointer
, HOST_WIDE_INT offset
,
5535 int maybe_eh_return
)
5538 rtx base_address
= gen_rtx_MEM (Pmode
, pointer
);
5540 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
5541 if (ix86_save_reg (regno
, maybe_eh_return
))
5543 /* Ensure that adjust_address won't be forced to produce pointer
5544 out of range allowed by x86-64 instruction set. */
5545 if (TARGET_64BIT
&& offset
!= trunc_int_for_mode (offset
, SImode
))
5549 r11
= gen_rtx_REG (DImode
, FIRST_REX_INT_REG
+ 3 /* R11 */);
5550 emit_move_insn (r11
, GEN_INT (offset
));
5551 emit_insn (gen_adddi3 (r11
, r11
, pointer
));
5552 base_address
= gen_rtx_MEM (Pmode
, r11
);
5555 emit_move_insn (gen_rtx_REG (Pmode
, regno
),
5556 adjust_address (base_address
, Pmode
, offset
));
5557 offset
+= UNITS_PER_WORD
;
5561 /* Restore function stack, frame, and registers. */
5564 ix86_expand_epilogue (int style
)
5567 int sp_valid
= !frame_pointer_needed
|| current_function_sp_is_unchanging
;
5568 struct ix86_frame frame
;
5569 HOST_WIDE_INT offset
;
5571 ix86_compute_frame_layout (&frame
);
5573 /* Calculate start of saved registers relative to ebp. Special care
5574 must be taken for the normal return case of a function using
5575 eh_return: the eax and edx registers are marked as saved, but not
5576 restored along this path. */
5577 offset
= frame
.nregs
;
5578 if (current_function_calls_eh_return
&& style
!= 2)
5580 offset
*= -UNITS_PER_WORD
;
5582 /* If we're only restoring one register and sp is not valid then
5583 using a move instruction to restore the register since it's
5584 less work than reloading sp and popping the register.
5586 The default code result in stack adjustment using add/lea instruction,
5587 while this code results in LEAVE instruction (or discrete equivalent),
5588 so it is profitable in some other cases as well. Especially when there
5589 are no registers to restore. We also use this code when TARGET_USE_LEAVE
5590 and there is exactly one register to pop. This heuristic may need some
5591 tuning in future. */
5592 if ((!sp_valid
&& frame
.nregs
<= 1)
5593 || (TARGET_EPILOGUE_USING_MOVE
5594 && cfun
->machine
->use_fast_prologue_epilogue
5595 && (frame
.nregs
> 1 || frame
.to_allocate
))
5596 || (frame_pointer_needed
&& !frame
.nregs
&& frame
.to_allocate
)
5597 || (frame_pointer_needed
&& TARGET_USE_LEAVE
5598 && cfun
->machine
->use_fast_prologue_epilogue
5599 && frame
.nregs
== 1)
5600 || current_function_calls_eh_return
)
5602 /* Restore registers. We can use ebp or esp to address the memory
5603 locations. If both are available, default to ebp, since offsets
5604 are known to be small. Only exception is esp pointing directly to the
5605 end of block of saved registers, where we may simplify addressing
5608 if (!frame_pointer_needed
|| (sp_valid
&& !frame
.to_allocate
))
5609 ix86_emit_restore_regs_using_mov (stack_pointer_rtx
,
5610 frame
.to_allocate
, style
== 2);
5612 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx
,
5613 offset
, style
== 2);
5615 /* eh_return epilogues need %ecx added to the stack pointer. */
5618 rtx tmp
, sa
= EH_RETURN_STACKADJ_RTX
;
5620 if (frame_pointer_needed
)
5622 tmp
= gen_rtx_PLUS (Pmode
, hard_frame_pointer_rtx
, sa
);
5623 tmp
= plus_constant (tmp
, UNITS_PER_WORD
);
5624 emit_insn (gen_rtx_SET (VOIDmode
, sa
, tmp
));
5626 tmp
= gen_rtx_MEM (Pmode
, hard_frame_pointer_rtx
);
5627 emit_move_insn (hard_frame_pointer_rtx
, tmp
);
5629 pro_epilogue_adjust_stack (stack_pointer_rtx
, sa
,
5634 tmp
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, sa
);
5635 tmp
= plus_constant (tmp
, (frame
.to_allocate
5636 + frame
.nregs
* UNITS_PER_WORD
));
5637 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, tmp
));
5640 else if (!frame_pointer_needed
)
5641 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
5642 GEN_INT (frame
.to_allocate
5643 + frame
.nregs
* UNITS_PER_WORD
),
5645 /* If not an i386, mov & pop is faster than "leave". */
5646 else if (TARGET_USE_LEAVE
|| optimize_size
5647 || !cfun
->machine
->use_fast_prologue_epilogue
)
5648 emit_insn (TARGET_64BIT
? gen_leave_rex64 () : gen_leave ());
5651 pro_epilogue_adjust_stack (stack_pointer_rtx
,
5652 hard_frame_pointer_rtx
,
5655 emit_insn (gen_popdi1 (hard_frame_pointer_rtx
));
5657 emit_insn (gen_popsi1 (hard_frame_pointer_rtx
));
5662 /* First step is to deallocate the stack frame so that we can
5663 pop the registers. */
5666 gcc_assert (frame_pointer_needed
);
5667 pro_epilogue_adjust_stack (stack_pointer_rtx
,
5668 hard_frame_pointer_rtx
,
5669 GEN_INT (offset
), style
);
5671 else if (frame
.to_allocate
)
5672 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
5673 GEN_INT (frame
.to_allocate
), style
);
5675 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
5676 if (ix86_save_reg (regno
, false))
5679 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode
, regno
)));
5681 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode
, regno
)));
5683 if (frame_pointer_needed
)
5685 /* Leave results in shorter dependency chains on CPUs that are
5686 able to grok it fast. */
5687 if (TARGET_USE_LEAVE
)
5688 emit_insn (TARGET_64BIT
? gen_leave_rex64 () : gen_leave ());
5689 else if (TARGET_64BIT
)
5690 emit_insn (gen_popdi1 (hard_frame_pointer_rtx
));
5692 emit_insn (gen_popsi1 (hard_frame_pointer_rtx
));
5696 if (cfun
->machine
->force_align_arg_pointer
)
5698 emit_insn (gen_addsi3 (stack_pointer_rtx
,
5699 cfun
->machine
->force_align_arg_pointer
,
5703 /* Sibcall epilogues don't want a return instruction. */
5707 if (current_function_pops_args
&& current_function_args_size
)
5709 rtx popc
= GEN_INT (current_function_pops_args
);
5711 /* i386 can only pop 64K bytes. If asked to pop more, pop
5712 return address, do explicit add, and jump indirectly to the
5715 if (current_function_pops_args
>= 65536)
5717 rtx ecx
= gen_rtx_REG (SImode
, 2);
5719 /* There is no "pascal" calling convention in 64bit ABI. */
5720 gcc_assert (!TARGET_64BIT
);
5722 emit_insn (gen_popsi1 (ecx
));
5723 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, popc
));
5724 emit_jump_insn (gen_return_indirect_internal (ecx
));
5727 emit_jump_insn (gen_return_pop_internal (popc
));
5730 emit_jump_insn (gen_return_internal ());
5733 /* Reset from the function's potential modifications. */
5736 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
5737 HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
5739 if (pic_offset_table_rtx
)
5740 REGNO (pic_offset_table_rtx
) = REAL_PIC_OFFSET_TABLE_REGNUM
;
5742 /* Mach-O doesn't support labels at the end of objects, so if
5743 it looks like we might want one, insert a NOP. */
5745 rtx insn
= get_last_insn ();
5748 && NOTE_LINE_NUMBER (insn
) != NOTE_INSN_DELETED_LABEL
)
5749 insn
= PREV_INSN (insn
);
5753 && NOTE_LINE_NUMBER (insn
) == NOTE_INSN_DELETED_LABEL
)))
5754 fputs ("\tnop\n", file
);
5760 /* Extract the parts of an RTL expression that is a valid memory address
5761 for an instruction. Return 0 if the structure of the address is
5762 grossly off. Return -1 if the address contains ASHIFT, so it is not
5763 strictly valid, but still used for computing length of lea instruction. */
5766 ix86_decompose_address (rtx addr
, struct ix86_address
*out
)
5768 rtx base
= NULL_RTX
, index
= NULL_RTX
, disp
= NULL_RTX
;
5769 rtx base_reg
, index_reg
;
5770 HOST_WIDE_INT scale
= 1;
5771 rtx scale_rtx
= NULL_RTX
;
5773 enum ix86_address_seg seg
= SEG_DEFAULT
;
5775 if (GET_CODE (addr
) == REG
|| GET_CODE (addr
) == SUBREG
)
5777 else if (GET_CODE (addr
) == PLUS
)
5787 addends
[n
++] = XEXP (op
, 1);
5790 while (GET_CODE (op
) == PLUS
);
5795 for (i
= n
; i
>= 0; --i
)
5798 switch (GET_CODE (op
))
5803 index
= XEXP (op
, 0);
5804 scale_rtx
= XEXP (op
, 1);
5808 if (XINT (op
, 1) == UNSPEC_TP
5809 && TARGET_TLS_DIRECT_SEG_REFS
5810 && seg
== SEG_DEFAULT
)
5811 seg
= TARGET_64BIT
? SEG_FS
: SEG_GS
;
5840 else if (GET_CODE (addr
) == MULT
)
5842 index
= XEXP (addr
, 0); /* index*scale */
5843 scale_rtx
= XEXP (addr
, 1);
5845 else if (GET_CODE (addr
) == ASHIFT
)
5849 /* We're called for lea too, which implements ashift on occasion. */
5850 index
= XEXP (addr
, 0);
5851 tmp
= XEXP (addr
, 1);
5852 if (GET_CODE (tmp
) != CONST_INT
)
5854 scale
= INTVAL (tmp
);
5855 if ((unsigned HOST_WIDE_INT
) scale
> 3)
5861 disp
= addr
; /* displacement */
5863 /* Extract the integral value of scale. */
5866 if (GET_CODE (scale_rtx
) != CONST_INT
)
5868 scale
= INTVAL (scale_rtx
);
5871 base_reg
= base
&& GET_CODE (base
) == SUBREG
? SUBREG_REG (base
) : base
;
5872 index_reg
= index
&& GET_CODE (index
) == SUBREG
? SUBREG_REG (index
) : index
;
5874 /* Allow arg pointer and stack pointer as index if there is not scaling. */
5875 if (base_reg
&& index_reg
&& scale
== 1
5876 && (index_reg
== arg_pointer_rtx
5877 || index_reg
== frame_pointer_rtx
5878 || (REG_P (index_reg
) && REGNO (index_reg
) == STACK_POINTER_REGNUM
)))
5881 tmp
= base
, base
= index
, index
= tmp
;
5882 tmp
= base_reg
, base_reg
= index_reg
, index_reg
= tmp
;
5885 /* Special case: %ebp cannot be encoded as a base without a displacement. */
5886 if ((base_reg
== hard_frame_pointer_rtx
5887 || base_reg
== frame_pointer_rtx
5888 || base_reg
== arg_pointer_rtx
) && !disp
)
5891 /* Special case: on K6, [%esi] makes the instruction vector decoded.
5892 Avoid this by transforming to [%esi+0]. */
5893 if (ix86_tune
== PROCESSOR_K6
&& !optimize_size
5894 && base_reg
&& !index_reg
&& !disp
5896 && REGNO_REG_CLASS (REGNO (base_reg
)) == SIREG
)
5899 /* Special case: encode reg+reg instead of reg*2. */
5900 if (!base
&& index
&& scale
&& scale
== 2)
5901 base
= index
, base_reg
= index_reg
, scale
= 1;
5903 /* Special case: scaling cannot be encoded without base or displacement. */
5904 if (!base
&& !disp
&& index
&& scale
!= 1)
5916 /* Return cost of the memory address x.
5917 For i386, it is better to use a complex address than let gcc copy
5918 the address into a reg and make a new pseudo. But not if the address
5919 requires to two regs - that would mean more pseudos with longer
5922 ix86_address_cost (rtx x
)
5924 struct ix86_address parts
;
5926 int ok
= ix86_decompose_address (x
, &parts
);
5930 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
5931 parts
.base
= SUBREG_REG (parts
.base
);
5932 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
5933 parts
.index
= SUBREG_REG (parts
.index
);
5935 /* More complex memory references are better. */
5936 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
5938 if (parts
.seg
!= SEG_DEFAULT
)
5941 /* Attempt to minimize number of registers in the address. */
5943 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
))
5945 && (!REG_P (parts
.index
)
5946 || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)))
5950 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
)
5952 && (!REG_P (parts
.index
) || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)
5953 && parts
.base
!= parts
.index
)
5956 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5957 since it's predecode logic can't detect the length of instructions
5958 and it degenerates to vector decoded. Increase cost of such
5959 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
5960 to split such addresses or even refuse such addresses at all.
5962 Following addressing modes are affected:
5967 The first and last case may be avoidable by explicitly coding the zero in
5968 memory address, but I don't have AMD-K6 machine handy to check this
5972 && ((!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
!= 1)
5973 || (parts
.disp
&& !parts
.base
&& parts
.index
&& parts
.scale
!= 1)
5974 || (!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
== 1)))
5980 /* If X is a machine specific address (i.e. a symbol or label being
5981 referenced as a displacement from the GOT implemented using an
5982 UNSPEC), then return the base term. Otherwise return X. */
5985 ix86_find_base_term (rtx x
)
5991 if (GET_CODE (x
) != CONST
)
5994 if (GET_CODE (term
) == PLUS
5995 && (GET_CODE (XEXP (term
, 1)) == CONST_INT
5996 || GET_CODE (XEXP (term
, 1)) == CONST_DOUBLE
))
5997 term
= XEXP (term
, 0);
5998 if (GET_CODE (term
) != UNSPEC
5999 || XINT (term
, 1) != UNSPEC_GOTPCREL
)
6002 term
= XVECEXP (term
, 0, 0);
6004 if (GET_CODE (term
) != SYMBOL_REF
6005 && GET_CODE (term
) != LABEL_REF
)
6011 term
= ix86_delegitimize_address (x
);
6013 if (GET_CODE (term
) != SYMBOL_REF
6014 && GET_CODE (term
) != LABEL_REF
)
6020 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
6021 this is used for to form addresses to local data when -fPIC is in
6025 darwin_local_data_pic (rtx disp
)
6027 if (GET_CODE (disp
) == MINUS
)
6029 if (GET_CODE (XEXP (disp
, 0)) == LABEL_REF
6030 || GET_CODE (XEXP (disp
, 0)) == SYMBOL_REF
)
6031 if (GET_CODE (XEXP (disp
, 1)) == SYMBOL_REF
)
6033 const char *sym_name
= XSTR (XEXP (disp
, 1), 0);
6034 if (! strcmp (sym_name
, "<pic base>"))
6042 /* Determine if a given RTX is a valid constant. We already know this
6043 satisfies CONSTANT_P. */
6046 legitimate_constant_p (rtx x
)
6048 switch (GET_CODE (x
))
6053 if (GET_CODE (x
) == PLUS
)
6055 if (GET_CODE (XEXP (x
, 1)) != CONST_INT
)
6060 if (TARGET_MACHO
&& darwin_local_data_pic (x
))
6063 /* Only some unspecs are valid as "constants". */
6064 if (GET_CODE (x
) == UNSPEC
)
6065 switch (XINT (x
, 1))
6068 return TARGET_64BIT
;
6071 x
= XVECEXP (x
, 0, 0);
6072 return (GET_CODE (x
) == SYMBOL_REF
6073 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
6075 x
= XVECEXP (x
, 0, 0);
6076 return (GET_CODE (x
) == SYMBOL_REF
6077 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
);
6082 /* We must have drilled down to a symbol. */
6083 if (GET_CODE (x
) == LABEL_REF
)
6085 if (GET_CODE (x
) != SYMBOL_REF
)
6090 /* TLS symbols are never valid. */
6091 if (SYMBOL_REF_TLS_MODEL (x
))
6096 if (GET_MODE (x
) == TImode
6097 && x
!= CONST0_RTX (TImode
)
6103 if (x
== CONST0_RTX (GET_MODE (x
)))
6111 /* Otherwise we handle everything else in the move patterns. */
6115 /* Determine if it's legal to put X into the constant pool. This
6116 is not possible for the address of thread-local symbols, which
6117 is checked above. */
6120 ix86_cannot_force_const_mem (rtx x
)
6122 /* We can always put integral constants and vectors in memory. */
6123 switch (GET_CODE (x
))
6133 return !legitimate_constant_p (x
);
6136 /* Determine if a given RTX is a valid constant address. */
6139 constant_address_p (rtx x
)
6141 return CONSTANT_P (x
) && legitimate_address_p (Pmode
, x
, 1);
6144 /* Nonzero if the constant value X is a legitimate general operand
6145 when generating PIC code. It is given that flag_pic is on and
6146 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
6149 legitimate_pic_operand_p (rtx x
)
6153 switch (GET_CODE (x
))
6156 inner
= XEXP (x
, 0);
6157 if (GET_CODE (inner
) == PLUS
6158 && GET_CODE (XEXP (inner
, 1)) == CONST_INT
)
6159 inner
= XEXP (inner
, 0);
6161 /* Only some unspecs are valid as "constants". */
6162 if (GET_CODE (inner
) == UNSPEC
)
6163 switch (XINT (inner
, 1))
6166 return TARGET_64BIT
;
6168 x
= XVECEXP (inner
, 0, 0);
6169 return (GET_CODE (x
) == SYMBOL_REF
6170 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
6178 return legitimate_pic_address_disp_p (x
);
6185 /* Determine if a given CONST RTX is a valid memory displacement
6189 legitimate_pic_address_disp_p (rtx disp
)
6193 /* In 64bit mode we can allow direct addresses of symbols and labels
6194 when they are not dynamic symbols. */
6197 rtx op0
= disp
, op1
;
6199 switch (GET_CODE (disp
))
6205 if (GET_CODE (XEXP (disp
, 0)) != PLUS
)
6207 op0
= XEXP (XEXP (disp
, 0), 0);
6208 op1
= XEXP (XEXP (disp
, 0), 1);
6209 if (GET_CODE (op1
) != CONST_INT
6210 || INTVAL (op1
) >= 16*1024*1024
6211 || INTVAL (op1
) < -16*1024*1024)
6213 if (GET_CODE (op0
) == LABEL_REF
)
6215 if (GET_CODE (op0
) != SYMBOL_REF
)
6220 /* TLS references should always be enclosed in UNSPEC. */
6221 if (SYMBOL_REF_TLS_MODEL (op0
))
6223 if (!SYMBOL_REF_FAR_ADDR_P (op0
) && SYMBOL_REF_LOCAL_P (op0
))
6231 if (GET_CODE (disp
) != CONST
)
6233 disp
= XEXP (disp
, 0);
6237 /* We are unsafe to allow PLUS expressions. This limit allowed distance
6238 of GOT tables. We should not need these anyway. */
6239 if (GET_CODE (disp
) != UNSPEC
6240 || (XINT (disp
, 1) != UNSPEC_GOTPCREL
6241 && XINT (disp
, 1) != UNSPEC_GOTOFF
))
6244 if (GET_CODE (XVECEXP (disp
, 0, 0)) != SYMBOL_REF
6245 && GET_CODE (XVECEXP (disp
, 0, 0)) != LABEL_REF
)
6251 if (GET_CODE (disp
) == PLUS
)
6253 if (GET_CODE (XEXP (disp
, 1)) != CONST_INT
)
6255 disp
= XEXP (disp
, 0);
6259 if (TARGET_MACHO
&& darwin_local_data_pic (disp
))
6262 if (GET_CODE (disp
) != UNSPEC
)
6265 switch (XINT (disp
, 1))
6270 return GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
;
6272 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
6273 While ABI specify also 32bit relocation but we don't produce it in
6274 small PIC model at all. */
6275 if ((GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
6276 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
)
6278 return local_symbolic_operand (XVECEXP (disp
, 0, 0), Pmode
);
6280 case UNSPEC_GOTTPOFF
:
6281 case UNSPEC_GOTNTPOFF
:
6282 case UNSPEC_INDNTPOFF
:
6285 disp
= XVECEXP (disp
, 0, 0);
6286 return (GET_CODE (disp
) == SYMBOL_REF
6287 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_INITIAL_EXEC
);
6289 disp
= XVECEXP (disp
, 0, 0);
6290 return (GET_CODE (disp
) == SYMBOL_REF
6291 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_EXEC
);
6293 disp
= XVECEXP (disp
, 0, 0);
6294 return (GET_CODE (disp
) == SYMBOL_REF
6295 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_DYNAMIC
);
6301 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6302 memory address for an instruction. The MODE argument is the machine mode
6303 for the MEM expression that wants to use this address.
6305 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6306 convert common non-canonical forms to canonical form so that they will
6310 legitimate_address_p (enum machine_mode mode
, rtx addr
, int strict
)
6312 struct ix86_address parts
;
6313 rtx base
, index
, disp
;
6314 HOST_WIDE_INT scale
;
6315 const char *reason
= NULL
;
6316 rtx reason_rtx
= NULL_RTX
;
6318 if (TARGET_DEBUG_ADDR
)
6321 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
6322 GET_MODE_NAME (mode
), strict
);
6326 if (ix86_decompose_address (addr
, &parts
) <= 0)
6328 reason
= "decomposition failed";
6333 index
= parts
.index
;
6335 scale
= parts
.scale
;
6337 /* Validate base register.
6339 Don't allow SUBREG's that span more than a word here. It can lead to spill
6340 failures when the base is one word out of a two word structure, which is
6341 represented internally as a DImode int. */
6350 else if (GET_CODE (base
) == SUBREG
6351 && REG_P (SUBREG_REG (base
))
6352 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base
)))
6354 reg
= SUBREG_REG (base
);
6357 reason
= "base is not a register";
6361 if (GET_MODE (base
) != Pmode
)
6363 reason
= "base is not in Pmode";
6367 if ((strict
&& ! REG_OK_FOR_BASE_STRICT_P (reg
))
6368 || (! strict
&& ! REG_OK_FOR_BASE_NONSTRICT_P (reg
)))
6370 reason
= "base is not valid";
6375 /* Validate index register.
6377 Don't allow SUBREG's that span more than a word here -- same as above. */
6386 else if (GET_CODE (index
) == SUBREG
6387 && REG_P (SUBREG_REG (index
))
6388 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index
)))
6390 reg
= SUBREG_REG (index
);
6393 reason
= "index is not a register";
6397 if (GET_MODE (index
) != Pmode
)
6399 reason
= "index is not in Pmode";
6403 if ((strict
&& ! REG_OK_FOR_INDEX_STRICT_P (reg
))
6404 || (! strict
&& ! REG_OK_FOR_INDEX_NONSTRICT_P (reg
)))
6406 reason
= "index is not valid";
6411 /* Validate scale factor. */
6414 reason_rtx
= GEN_INT (scale
);
6417 reason
= "scale without index";
6421 if (scale
!= 2 && scale
!= 4 && scale
!= 8)
6423 reason
= "scale is not a valid multiplier";
6428 /* Validate displacement. */
6433 if (GET_CODE (disp
) == CONST
6434 && GET_CODE (XEXP (disp
, 0)) == UNSPEC
)
6435 switch (XINT (XEXP (disp
, 0), 1))
6437 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
6438 used. While ABI specify also 32bit relocations, we don't produce
6439 them at all and use IP relative instead. */
6442 gcc_assert (flag_pic
);
6444 goto is_legitimate_pic
;
6445 reason
= "64bit address unspec";
6448 case UNSPEC_GOTPCREL
:
6449 gcc_assert (flag_pic
);
6450 goto is_legitimate_pic
;
6452 case UNSPEC_GOTTPOFF
:
6453 case UNSPEC_GOTNTPOFF
:
6454 case UNSPEC_INDNTPOFF
:
6460 reason
= "invalid address unspec";
6464 else if (SYMBOLIC_CONST (disp
)
6468 && MACHOPIC_INDIRECT
6469 && !machopic_operand_p (disp
)
6475 if (TARGET_64BIT
&& (index
|| base
))
6477 /* foo@dtpoff(%rX) is ok. */
6478 if (GET_CODE (disp
) != CONST
6479 || GET_CODE (XEXP (disp
, 0)) != PLUS
6480 || GET_CODE (XEXP (XEXP (disp
, 0), 0)) != UNSPEC
6481 || GET_CODE (XEXP (XEXP (disp
, 0), 1)) != CONST_INT
6482 || (XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_DTPOFF
6483 && XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_NTPOFF
))
6485 reason
= "non-constant pic memory reference";
6489 else if (! legitimate_pic_address_disp_p (disp
))
6491 reason
= "displacement is an invalid pic construct";
6495 /* This code used to verify that a symbolic pic displacement
6496 includes the pic_offset_table_rtx register.
6498 While this is good idea, unfortunately these constructs may
6499 be created by "adds using lea" optimization for incorrect
6508 This code is nonsensical, but results in addressing
6509 GOT table with pic_offset_table_rtx base. We can't
6510 just refuse it easily, since it gets matched by
6511 "addsi3" pattern, that later gets split to lea in the
6512 case output register differs from input. While this
6513 can be handled by separate addsi pattern for this case
6514 that never results in lea, this seems to be easier and
6515 correct fix for crash to disable this test. */
6517 else if (GET_CODE (disp
) != LABEL_REF
6518 && GET_CODE (disp
) != CONST_INT
6519 && (GET_CODE (disp
) != CONST
6520 || !legitimate_constant_p (disp
))
6521 && (GET_CODE (disp
) != SYMBOL_REF
6522 || !legitimate_constant_p (disp
)))
6524 reason
= "displacement is not constant";
6527 else if (TARGET_64BIT
6528 && !x86_64_immediate_operand (disp
, VOIDmode
))
6530 reason
= "displacement is out of range";
6535 /* Everything looks valid. */
6536 if (TARGET_DEBUG_ADDR
)
6537 fprintf (stderr
, "Success.\n");
6541 if (TARGET_DEBUG_ADDR
)
6543 fprintf (stderr
, "Error: %s\n", reason
);
6544 debug_rtx (reason_rtx
);
6549 /* Return a unique alias set for the GOT. */
6551 static HOST_WIDE_INT
6552 ix86_GOT_alias_set (void)
6554 static HOST_WIDE_INT set
= -1;
6556 set
= new_alias_set ();
6560 /* Return a legitimate reference for ORIG (an address) using the
6561 register REG. If REG is 0, a new pseudo is generated.
6563 There are two types of references that must be handled:
6565 1. Global data references must load the address from the GOT, via
6566 the PIC reg. An insn is emitted to do this load, and the reg is
6569 2. Static data references, constant pool addresses, and code labels
6570 compute the address as an offset from the GOT, whose base is in
6571 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
6572 differentiate them from global data objects. The returned
6573 address is the PIC reg + an unspec constant.
6575 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6576 reg also appears in the address. */
6579 legitimize_pic_address (rtx orig
, rtx reg
)
6586 if (TARGET_MACHO
&& !TARGET_64BIT
)
6589 reg
= gen_reg_rtx (Pmode
);
6590 /* Use the generic Mach-O PIC machinery. */
6591 return machopic_legitimize_pic_address (orig
, GET_MODE (orig
), reg
);
6595 if (TARGET_64BIT
&& legitimate_pic_address_disp_p (addr
))
6597 else if (TARGET_64BIT
6598 && ix86_cmodel
!= CM_SMALL_PIC
6599 && local_symbolic_operand (addr
, Pmode
))
6602 /* This symbol may be referenced via a displacement from the PIC
6603 base address (@GOTOFF). */
6605 if (reload_in_progress
)
6606 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
6607 if (GET_CODE (addr
) == CONST
)
6608 addr
= XEXP (addr
, 0);
6609 if (GET_CODE (addr
) == PLUS
)
6611 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)), UNSPEC_GOTOFF
);
6612 new = gen_rtx_PLUS (Pmode
, new, XEXP (addr
, 1));
6615 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
6616 new = gen_rtx_CONST (Pmode
, new);
6618 tmpreg
= gen_reg_rtx (Pmode
);
6621 emit_move_insn (tmpreg
, new);
6625 new = expand_simple_binop (Pmode
, PLUS
, reg
, pic_offset_table_rtx
,
6626 tmpreg
, 1, OPTAB_DIRECT
);
6629 else new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, tmpreg
);
6631 else if (!TARGET_64BIT
&& local_symbolic_operand (addr
, Pmode
))
6633 /* This symbol may be referenced via a displacement from the PIC
6634 base address (@GOTOFF). */
6636 if (reload_in_progress
)
6637 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
6638 if (GET_CODE (addr
) == CONST
)
6639 addr
= XEXP (addr
, 0);
6640 if (GET_CODE (addr
) == PLUS
)
6642 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)), UNSPEC_GOTOFF
);
6643 new = gen_rtx_PLUS (Pmode
, new, XEXP (addr
, 1));
6646 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
6647 new = gen_rtx_CONST (Pmode
, new);
6648 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
6652 emit_move_insn (reg
, new);
6656 else if (GET_CODE (addr
) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (addr
) == 0)
6660 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTPCREL
);
6661 new = gen_rtx_CONST (Pmode
, new);
6662 new = gen_const_mem (Pmode
, new);
6663 set_mem_alias_set (new, ix86_GOT_alias_set ());
6666 reg
= gen_reg_rtx (Pmode
);
6667 /* Use directly gen_movsi, otherwise the address is loaded
6668 into register for CSE. We don't want to CSE this addresses,
6669 instead we CSE addresses from the GOT table, so skip this. */
6670 emit_insn (gen_movsi (reg
, new));
6675 /* This symbol must be referenced via a load from the
6676 Global Offset Table (@GOT). */
6678 if (reload_in_progress
)
6679 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
6680 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOT
);
6681 new = gen_rtx_CONST (Pmode
, new);
6682 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
6683 new = gen_const_mem (Pmode
, new);
6684 set_mem_alias_set (new, ix86_GOT_alias_set ());
6687 reg
= gen_reg_rtx (Pmode
);
6688 emit_move_insn (reg
, new);
6694 if (GET_CODE (addr
) == CONST_INT
6695 && !x86_64_immediate_operand (addr
, VOIDmode
))
6699 emit_move_insn (reg
, addr
);
6703 new = force_reg (Pmode
, addr
);
6705 else if (GET_CODE (addr
) == CONST
)
6707 addr
= XEXP (addr
, 0);
6709 /* We must match stuff we generate before. Assume the only
6710 unspecs that can get here are ours. Not that we could do
6711 anything with them anyway.... */
6712 if (GET_CODE (addr
) == UNSPEC
6713 || (GET_CODE (addr
) == PLUS
6714 && GET_CODE (XEXP (addr
, 0)) == UNSPEC
))
6716 gcc_assert (GET_CODE (addr
) == PLUS
);
6718 if (GET_CODE (addr
) == PLUS
)
6720 rtx op0
= XEXP (addr
, 0), op1
= XEXP (addr
, 1);
6722 /* Check first to see if this is a constant offset from a @GOTOFF
6723 symbol reference. */
6724 if (local_symbolic_operand (op0
, Pmode
)
6725 && GET_CODE (op1
) == CONST_INT
)
6729 if (reload_in_progress
)
6730 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
6731 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, op0
),
6733 new = gen_rtx_PLUS (Pmode
, new, op1
);
6734 new = gen_rtx_CONST (Pmode
, new);
6735 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
6739 emit_move_insn (reg
, new);
6745 if (INTVAL (op1
) < -16*1024*1024
6746 || INTVAL (op1
) >= 16*1024*1024)
6748 if (!x86_64_immediate_operand (op1
, Pmode
))
6749 op1
= force_reg (Pmode
, op1
);
6750 new = gen_rtx_PLUS (Pmode
, force_reg (Pmode
, op0
), op1
);
6756 base
= legitimize_pic_address (XEXP (addr
, 0), reg
);
6757 new = legitimize_pic_address (XEXP (addr
, 1),
6758 base
== reg
? NULL_RTX
: reg
);
6760 if (GET_CODE (new) == CONST_INT
)
6761 new = plus_constant (base
, INTVAL (new));
6764 if (GET_CODE (new) == PLUS
&& CONSTANT_P (XEXP (new, 1)))
6766 base
= gen_rtx_PLUS (Pmode
, base
, XEXP (new, 0));
6767 new = XEXP (new, 1);
6769 new = gen_rtx_PLUS (Pmode
, base
, new);
6777 /* Load the thread pointer. If TO_REG is true, force it into a register. */
6780 get_thread_pointer (int to_reg
)
6784 tp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
), UNSPEC_TP
);
6788 reg
= gen_reg_rtx (Pmode
);
6789 insn
= gen_rtx_SET (VOIDmode
, reg
, tp
);
6790 insn
= emit_insn (insn
);
6795 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
6796 false if we expect this to be used for a memory address and true if
6797 we expect to load the address into a register. */
6800 legitimize_tls_address (rtx x
, enum tls_model model
, int for_mov
)
6802 rtx dest
, base
, off
, pic
, tp
;
6807 case TLS_MODEL_GLOBAL_DYNAMIC
:
6808 dest
= gen_reg_rtx (Pmode
);
6809 tp
= TARGET_GNU2_TLS
? get_thread_pointer (1) : 0;
6811 if (TARGET_64BIT
&& ! TARGET_GNU2_TLS
)
6813 rtx rax
= gen_rtx_REG (Pmode
, 0), insns
;
6816 emit_call_insn (gen_tls_global_dynamic_64 (rax
, x
));
6817 insns
= get_insns ();
6820 emit_libcall_block (insns
, dest
, rax
, x
);
6822 else if (TARGET_64BIT
&& TARGET_GNU2_TLS
)
6823 emit_insn (gen_tls_global_dynamic_64 (dest
, x
));
6825 emit_insn (gen_tls_global_dynamic_32 (dest
, x
));
6827 if (TARGET_GNU2_TLS
)
6829 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, tp
, dest
));
6831 set_unique_reg_note (get_last_insn (), REG_EQUIV
, x
);
6835 case TLS_MODEL_LOCAL_DYNAMIC
:
6836 base
= gen_reg_rtx (Pmode
);
6837 tp
= TARGET_GNU2_TLS
? get_thread_pointer (1) : 0;
6839 if (TARGET_64BIT
&& ! TARGET_GNU2_TLS
)
6841 rtx rax
= gen_rtx_REG (Pmode
, 0), insns
, note
;
6844 emit_call_insn (gen_tls_local_dynamic_base_64 (rax
));
6845 insns
= get_insns ();
6848 note
= gen_rtx_EXPR_LIST (VOIDmode
, const0_rtx
, NULL
);
6849 note
= gen_rtx_EXPR_LIST (VOIDmode
, ix86_tls_get_addr (), note
);
6850 emit_libcall_block (insns
, base
, rax
, note
);
6852 else if (TARGET_64BIT
&& TARGET_GNU2_TLS
)
6853 emit_insn (gen_tls_local_dynamic_base_64 (base
));
6855 emit_insn (gen_tls_local_dynamic_base_32 (base
));
6857 if (TARGET_GNU2_TLS
)
6859 rtx x
= ix86_tls_module_base ();
6861 set_unique_reg_note (get_last_insn (), REG_EQUIV
,
6862 gen_rtx_MINUS (Pmode
, x
, tp
));
6865 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), UNSPEC_DTPOFF
);
6866 off
= gen_rtx_CONST (Pmode
, off
);
6868 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, base
, off
));
6870 if (TARGET_GNU2_TLS
)
6872 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, dest
, tp
));
6874 set_unique_reg_note (get_last_insn (), REG_EQUIV
, x
);
6879 case TLS_MODEL_INITIAL_EXEC
:
6883 type
= UNSPEC_GOTNTPOFF
;
6887 if (reload_in_progress
)
6888 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
6889 pic
= pic_offset_table_rtx
;
6890 type
= TARGET_ANY_GNU_TLS
? UNSPEC_GOTNTPOFF
: UNSPEC_GOTTPOFF
;
6892 else if (!TARGET_ANY_GNU_TLS
)
6894 pic
= gen_reg_rtx (Pmode
);
6895 emit_insn (gen_set_got (pic
));
6896 type
= UNSPEC_GOTTPOFF
;
6901 type
= UNSPEC_INDNTPOFF
;
6904 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), type
);
6905 off
= gen_rtx_CONST (Pmode
, off
);
6907 off
= gen_rtx_PLUS (Pmode
, pic
, off
);
6908 off
= gen_const_mem (Pmode
, off
);
6909 set_mem_alias_set (off
, ix86_GOT_alias_set ());
6911 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
6913 base
= get_thread_pointer (for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
6914 off
= force_reg (Pmode
, off
);
6915 return gen_rtx_PLUS (Pmode
, base
, off
);
6919 base
= get_thread_pointer (true);
6920 dest
= gen_reg_rtx (Pmode
);
6921 emit_insn (gen_subsi3 (dest
, base
, off
));
6925 case TLS_MODEL_LOCAL_EXEC
:
6926 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
),
6927 (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
6928 ? UNSPEC_NTPOFF
: UNSPEC_TPOFF
);
6929 off
= gen_rtx_CONST (Pmode
, off
);
6931 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
6933 base
= get_thread_pointer (for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
6934 return gen_rtx_PLUS (Pmode
, base
, off
);
6938 base
= get_thread_pointer (true);
6939 dest
= gen_reg_rtx (Pmode
);
6940 emit_insn (gen_subsi3 (dest
, base
, off
));
6951 /* Try machine-dependent ways of modifying an illegitimate address
6952 to be legitimate. If we find one, return the new, valid address.
6953 This macro is used in only one place: `memory_address' in explow.c.
6955 OLDX is the address as it was before break_out_memory_refs was called.
6956 In some cases it is useful to look at this to decide what needs to be done.
6958 MODE and WIN are passed so that this macro can use
6959 GO_IF_LEGITIMATE_ADDRESS.
6961 It is always safe for this macro to do nothing. It exists to recognize
6962 opportunities to optimize the output.
6964 For the 80386, we handle X+REG by loading X into a register R and
6965 using R+REG. R will go in a general reg and indexing will be used.
6966 However, if REG is a broken-out memory address or multiplication,
6967 nothing needs to be done because REG can certainly go in a general reg.
6969 When -fpic is used, special handling is needed for symbolic references.
6970 See comments by legitimize_pic_address in i386.c for details. */
6973 legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
, enum machine_mode mode
)
6978 if (TARGET_DEBUG_ADDR
)
6980 fprintf (stderr
, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
6981 GET_MODE_NAME (mode
));
6985 log
= GET_CODE (x
) == SYMBOL_REF
? SYMBOL_REF_TLS_MODEL (x
) : 0;
6987 return legitimize_tls_address (x
, log
, false);
6988 if (GET_CODE (x
) == CONST
6989 && GET_CODE (XEXP (x
, 0)) == PLUS
6990 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
6991 && (log
= SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x
, 0), 0))))
6993 rtx t
= legitimize_tls_address (XEXP (XEXP (x
, 0), 0), log
, false);
6994 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (x
, 0), 1));
6997 if (flag_pic
&& SYMBOLIC_CONST (x
))
6998 return legitimize_pic_address (x
, 0);
7000 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
7001 if (GET_CODE (x
) == ASHIFT
7002 && GET_CODE (XEXP (x
, 1)) == CONST_INT
7003 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (x
, 1)) < 4)
7006 log
= INTVAL (XEXP (x
, 1));
7007 x
= gen_rtx_MULT (Pmode
, force_reg (Pmode
, XEXP (x
, 0)),
7008 GEN_INT (1 << log
));
7011 if (GET_CODE (x
) == PLUS
)
7013 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
7015 if (GET_CODE (XEXP (x
, 0)) == ASHIFT
7016 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
7017 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 0), 1)) < 4)
7020 log
= INTVAL (XEXP (XEXP (x
, 0), 1));
7021 XEXP (x
, 0) = gen_rtx_MULT (Pmode
,
7022 force_reg (Pmode
, XEXP (XEXP (x
, 0), 0)),
7023 GEN_INT (1 << log
));
7026 if (GET_CODE (XEXP (x
, 1)) == ASHIFT
7027 && GET_CODE (XEXP (XEXP (x
, 1), 1)) == CONST_INT
7028 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 1), 1)) < 4)
7031 log
= INTVAL (XEXP (XEXP (x
, 1), 1));
7032 XEXP (x
, 1) = gen_rtx_MULT (Pmode
,
7033 force_reg (Pmode
, XEXP (XEXP (x
, 1), 0)),
7034 GEN_INT (1 << log
));
7037 /* Put multiply first if it isn't already. */
7038 if (GET_CODE (XEXP (x
, 1)) == MULT
)
7040 rtx tmp
= XEXP (x
, 0);
7041 XEXP (x
, 0) = XEXP (x
, 1);
7046 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
7047 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
7048 created by virtual register instantiation, register elimination, and
7049 similar optimizations. */
7050 if (GET_CODE (XEXP (x
, 0)) == MULT
&& GET_CODE (XEXP (x
, 1)) == PLUS
)
7053 x
= gen_rtx_PLUS (Pmode
,
7054 gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
7055 XEXP (XEXP (x
, 1), 0)),
7056 XEXP (XEXP (x
, 1), 1));
7060 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
7061 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
7062 else if (GET_CODE (x
) == PLUS
&& GET_CODE (XEXP (x
, 0)) == PLUS
7063 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
7064 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == PLUS
7065 && CONSTANT_P (XEXP (x
, 1)))
7068 rtx other
= NULL_RTX
;
7070 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
7072 constant
= XEXP (x
, 1);
7073 other
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
7075 else if (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 1), 1)) == CONST_INT
)
7077 constant
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
7078 other
= XEXP (x
, 1);
7086 x
= gen_rtx_PLUS (Pmode
,
7087 gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 0),
7088 XEXP (XEXP (XEXP (x
, 0), 1), 0)),
7089 plus_constant (other
, INTVAL (constant
)));
7093 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
7096 if (GET_CODE (XEXP (x
, 0)) == MULT
)
7099 XEXP (x
, 0) = force_operand (XEXP (x
, 0), 0);
7102 if (GET_CODE (XEXP (x
, 1)) == MULT
)
7105 XEXP (x
, 1) = force_operand (XEXP (x
, 1), 0);
7109 && GET_CODE (XEXP (x
, 1)) == REG
7110 && GET_CODE (XEXP (x
, 0)) == REG
)
7113 if (flag_pic
&& SYMBOLIC_CONST (XEXP (x
, 1)))
7116 x
= legitimize_pic_address (x
, 0);
7119 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
7122 if (GET_CODE (XEXP (x
, 0)) == REG
)
7124 rtx temp
= gen_reg_rtx (Pmode
);
7125 rtx val
= force_operand (XEXP (x
, 1), temp
);
7127 emit_move_insn (temp
, val
);
7133 else if (GET_CODE (XEXP (x
, 1)) == REG
)
7135 rtx temp
= gen_reg_rtx (Pmode
);
7136 rtx val
= force_operand (XEXP (x
, 0), temp
);
7138 emit_move_insn (temp
, val
);
7148 /* Print an integer constant expression in assembler syntax. Addition
7149 and subtraction are the only arithmetic that may appear in these
7150 expressions. FILE is the stdio stream to write to, X is the rtx, and
7151 CODE is the operand print code from the output string. */
7154 output_pic_addr_const (FILE *file
, rtx x
, int code
)
7158 switch (GET_CODE (x
))
7161 gcc_assert (flag_pic
);
7166 output_addr_const (file
, x
);
7167 if (!TARGET_MACHO
&& code
== 'P' && ! SYMBOL_REF_LOCAL_P (x
))
7168 fputs ("@PLT", file
);
7175 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (x
));
7176 assemble_name (asm_out_file
, buf
);
7180 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
7184 /* This used to output parentheses around the expression,
7185 but that does not work on the 386 (either ATT or BSD assembler). */
7186 output_pic_addr_const (file
, XEXP (x
, 0), code
);
7190 if (GET_MODE (x
) == VOIDmode
)
7192 /* We can use %d if the number is <32 bits and positive. */
7193 if (CONST_DOUBLE_HIGH (x
) || CONST_DOUBLE_LOW (x
) < 0)
7194 fprintf (file
, "0x%lx%08lx",
7195 (unsigned long) CONST_DOUBLE_HIGH (x
),
7196 (unsigned long) CONST_DOUBLE_LOW (x
));
7198 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, CONST_DOUBLE_LOW (x
));
7201 /* We can't handle floating point constants;
7202 PRINT_OPERAND must handle them. */
7203 output_operand_lossage ("floating constant misused");
7207 /* Some assemblers need integer constants to appear first. */
7208 if (GET_CODE (XEXP (x
, 0)) == CONST_INT
)
7210 output_pic_addr_const (file
, XEXP (x
, 0), code
);
7212 output_pic_addr_const (file
, XEXP (x
, 1), code
);
7216 gcc_assert (GET_CODE (XEXP (x
, 1)) == CONST_INT
);
7217 output_pic_addr_const (file
, XEXP (x
, 1), code
);
7219 output_pic_addr_const (file
, XEXP (x
, 0), code
);
7225 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? '(' : '[', file
);
7226 output_pic_addr_const (file
, XEXP (x
, 0), code
);
7228 output_pic_addr_const (file
, XEXP (x
, 1), code
);
7230 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? ')' : ']', file
);
7234 gcc_assert (XVECLEN (x
, 0) == 1);
7235 output_pic_addr_const (file
, XVECEXP (x
, 0, 0), code
);
7236 switch (XINT (x
, 1))
7239 fputs ("@GOT", file
);
7242 fputs ("@GOTOFF", file
);
7244 case UNSPEC_GOTPCREL
:
7245 fputs ("@GOTPCREL(%rip)", file
);
7247 case UNSPEC_GOTTPOFF
:
7248 /* FIXME: This might be @TPOFF in Sun ld too. */
7249 fputs ("@GOTTPOFF", file
);
7252 fputs ("@TPOFF", file
);
7256 fputs ("@TPOFF", file
);
7258 fputs ("@NTPOFF", file
);
7261 fputs ("@DTPOFF", file
);
7263 case UNSPEC_GOTNTPOFF
:
7265 fputs ("@GOTTPOFF(%rip)", file
);
7267 fputs ("@GOTNTPOFF", file
);
7269 case UNSPEC_INDNTPOFF
:
7270 fputs ("@INDNTPOFF", file
);
7273 output_operand_lossage ("invalid UNSPEC as operand");
7279 output_operand_lossage ("invalid expression as operand");
7283 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7284 We need to emit DTP-relative relocations. */
7287 i386_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
7289 fputs (ASM_LONG
, file
);
7290 output_addr_const (file
, x
);
7291 fputs ("@DTPOFF", file
);
7297 fputs (", 0", file
);
7304 /* In the name of slightly smaller debug output, and to cater to
7305 general assembler lossage, recognize PIC+GOTOFF and turn it back
7306 into a direct symbol reference.
7308 On Darwin, this is necessary to avoid a crash, because Darwin
7309 has a different PIC label for each routine but the DWARF debugging
7310 information is not associated with any particular routine, so it's
7311 necessary to remove references to the PIC label from RTL stored by
7312 the DWARF output code. */
7315 ix86_delegitimize_address (rtx orig_x
)
7318 /* reg_addend is NULL or a multiple of some register. */
7319 rtx reg_addend
= NULL_RTX
;
7320 /* const_addend is NULL or a const_int. */
7321 rtx const_addend
= NULL_RTX
;
7322 /* This is the result, or NULL. */
7323 rtx result
= NULL_RTX
;
7325 if (GET_CODE (x
) == MEM
)
7330 if (GET_CODE (x
) != CONST
7331 || GET_CODE (XEXP (x
, 0)) != UNSPEC
7332 || XINT (XEXP (x
, 0), 1) != UNSPEC_GOTPCREL
7333 || GET_CODE (orig_x
) != MEM
)
7335 return XVECEXP (XEXP (x
, 0), 0, 0);
7338 if (GET_CODE (x
) != PLUS
7339 || GET_CODE (XEXP (x
, 1)) != CONST
)
7342 if (GET_CODE (XEXP (x
, 0)) == REG
7343 && REGNO (XEXP (x
, 0)) == PIC_OFFSET_TABLE_REGNUM
)
7344 /* %ebx + GOT/GOTOFF */
7346 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
7348 /* %ebx + %reg * scale + GOT/GOTOFF */
7349 reg_addend
= XEXP (x
, 0);
7350 if (GET_CODE (XEXP (reg_addend
, 0)) == REG
7351 && REGNO (XEXP (reg_addend
, 0)) == PIC_OFFSET_TABLE_REGNUM
)
7352 reg_addend
= XEXP (reg_addend
, 1);
7353 else if (GET_CODE (XEXP (reg_addend
, 1)) == REG
7354 && REGNO (XEXP (reg_addend
, 1)) == PIC_OFFSET_TABLE_REGNUM
)
7355 reg_addend
= XEXP (reg_addend
, 0);
7358 if (GET_CODE (reg_addend
) != REG
7359 && GET_CODE (reg_addend
) != MULT
7360 && GET_CODE (reg_addend
) != ASHIFT
)
7366 x
= XEXP (XEXP (x
, 1), 0);
7367 if (GET_CODE (x
) == PLUS
7368 && GET_CODE (XEXP (x
, 1)) == CONST_INT
)
7370 const_addend
= XEXP (x
, 1);
7374 if (GET_CODE (x
) == UNSPEC
7375 && ((XINT (x
, 1) == UNSPEC_GOT
&& GET_CODE (orig_x
) == MEM
)
7376 || (XINT (x
, 1) == UNSPEC_GOTOFF
&& GET_CODE (orig_x
) != MEM
)))
7377 result
= XVECEXP (x
, 0, 0);
7379 if (TARGET_MACHO
&& darwin_local_data_pic (x
)
7380 && GET_CODE (orig_x
) != MEM
)
7381 result
= XEXP (x
, 0);
7387 result
= gen_rtx_PLUS (Pmode
, result
, const_addend
);
7389 result
= gen_rtx_PLUS (Pmode
, reg_addend
, result
);
7394 put_condition_code (enum rtx_code code
, enum machine_mode mode
, int reverse
,
7399 if (mode
== CCFPmode
|| mode
== CCFPUmode
)
7401 enum rtx_code second_code
, bypass_code
;
7402 ix86_fp_comparison_codes (code
, &bypass_code
, &code
, &second_code
);
7403 gcc_assert (bypass_code
== UNKNOWN
&& second_code
== UNKNOWN
);
7404 code
= ix86_fp_compare_code_to_integer (code
);
7408 code
= reverse_condition (code
);
7419 gcc_assert (mode
== CCmode
|| mode
== CCNOmode
|| mode
== CCGCmode
);
7423 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
7424 Those same assemblers have the same but opposite lossage on cmov. */
7425 gcc_assert (mode
== CCmode
);
7426 suffix
= fp
? "nbe" : "a";
7446 gcc_assert (mode
== CCmode
);
7468 gcc_assert (mode
== CCmode
);
7469 suffix
= fp
? "nb" : "ae";
7472 gcc_assert (mode
== CCmode
|| mode
== CCGCmode
|| mode
== CCNOmode
);
7476 gcc_assert (mode
== CCmode
);
7480 suffix
= fp
? "u" : "p";
7483 suffix
= fp
? "nu" : "np";
7488 fputs (suffix
, file
);
7491 /* Print the name of register X to FILE based on its machine mode and number.
7492 If CODE is 'w', pretend the mode is HImode.
7493 If CODE is 'b', pretend the mode is QImode.
7494 If CODE is 'k', pretend the mode is SImode.
7495 If CODE is 'q', pretend the mode is DImode.
7496 If CODE is 'h', pretend the reg is the 'high' byte register.
7497 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
7500 print_reg (rtx x
, int code
, FILE *file
)
7502 gcc_assert (REGNO (x
) != ARG_POINTER_REGNUM
7503 && REGNO (x
) != FRAME_POINTER_REGNUM
7504 && REGNO (x
) != FLAGS_REG
7505 && REGNO (x
) != FPSR_REG
7506 && REGNO (x
) != FPCR_REG
);
7508 if (ASSEMBLER_DIALECT
== ASM_ATT
|| USER_LABEL_PREFIX
[0] == 0)
7511 if (code
== 'w' || MMX_REG_P (x
))
7513 else if (code
== 'b')
7515 else if (code
== 'k')
7517 else if (code
== 'q')
7519 else if (code
== 'y')
7521 else if (code
== 'h')
7524 code
= GET_MODE_SIZE (GET_MODE (x
));
7526 /* Irritatingly, AMD extended registers use different naming convention
7527 from the normal registers. */
7528 if (REX_INT_REG_P (x
))
7530 gcc_assert (TARGET_64BIT
);
7534 error ("extended registers have no high halves");
7537 fprintf (file
, "r%ib", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
7540 fprintf (file
, "r%iw", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
7543 fprintf (file
, "r%id", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
7546 fprintf (file
, "r%i", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
7549 error ("unsupported operand size for extended register");
7557 if (STACK_TOP_P (x
))
7559 fputs ("st(0)", file
);
7566 if (! ANY_FP_REG_P (x
))
7567 putc (code
== 8 && TARGET_64BIT
? 'r' : 'e', file
);
7572 fputs (hi_reg_name
[REGNO (x
)], file
);
7575 if (REGNO (x
) >= ARRAY_SIZE (qi_reg_name
))
7577 fputs (qi_reg_name
[REGNO (x
)], file
);
7580 if (REGNO (x
) >= ARRAY_SIZE (qi_high_reg_name
))
7582 fputs (qi_high_reg_name
[REGNO (x
)], file
);
7589 /* Locate some local-dynamic symbol still in use by this function
7590 so that we can print its name in some tls_local_dynamic_base
7594 get_some_local_dynamic_name (void)
7598 if (cfun
->machine
->some_ld_name
)
7599 return cfun
->machine
->some_ld_name
;
7601 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
7603 && for_each_rtx (&PATTERN (insn
), get_some_local_dynamic_name_1
, 0))
7604 return cfun
->machine
->some_ld_name
;
7610 get_some_local_dynamic_name_1 (rtx
*px
, void *data ATTRIBUTE_UNUSED
)
7614 if (GET_CODE (x
) == SYMBOL_REF
7615 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
)
7617 cfun
->machine
->some_ld_name
= XSTR (x
, 0);
7625 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
7626 C -- print opcode suffix for set/cmov insn.
7627 c -- like C, but print reversed condition
7628 F,f -- likewise, but for floating-point.
7629 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
7631 R -- print the prefix for register names.
7632 z -- print the opcode suffix for the size of the current operand.
7633 * -- print a star (in certain assembler syntax)
7634 A -- print an absolute memory reference.
7635 w -- print the operand as if it's a "word" (HImode) even if it isn't.
7636 s -- print a shift double count, followed by the assemblers argument
7638 b -- print the QImode name of the register for the indicated operand.
7639 %b0 would print %al if operands[0] is reg 0.
7640 w -- likewise, print the HImode name of the register.
7641 k -- likewise, print the SImode name of the register.
7642 q -- likewise, print the DImode name of the register.
7643 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
7644 y -- print "st(0)" instead of "st" as a register.
7645 D -- print condition for SSE cmp instruction.
7646 P -- if PIC, print an @PLT suffix.
7647 X -- don't print any sort of PIC '@' suffix for a symbol.
7648 & -- print some in-use local-dynamic symbol name.
7649 H -- print a memory address offset by 8; used for sse high-parts
7653 print_operand (FILE *file
, rtx x
, int code
)
7660 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7665 assemble_name (file
, get_some_local_dynamic_name ());
7669 switch (ASSEMBLER_DIALECT
)
7676 /* Intel syntax. For absolute addresses, registers should not
7677 be surrounded by braces. */
7678 if (GET_CODE (x
) != REG
)
7681 PRINT_OPERAND (file
, x
, 0);
7691 PRINT_OPERAND (file
, x
, 0);
7696 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7701 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7706 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7711 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7716 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7721 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7726 /* 387 opcodes don't get size suffixes if the operands are
7728 if (STACK_REG_P (x
))
7731 /* Likewise if using Intel opcodes. */
7732 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
7735 /* This is the size of op from size of operand. */
7736 switch (GET_MODE_SIZE (GET_MODE (x
)))
7739 #ifdef HAVE_GAS_FILDS_FISTS
7745 if (GET_MODE (x
) == SFmode
)
7760 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
7762 #ifdef GAS_MNEMONICS
7788 if (GET_CODE (x
) == CONST_INT
|| ! SHIFT_DOUBLE_OMITS_COUNT
)
7790 PRINT_OPERAND (file
, x
, 0);
7796 /* Little bit of braindamage here. The SSE compare instructions
7797 does use completely different names for the comparisons that the
7798 fp conditional moves. */
7799 switch (GET_CODE (x
))
7814 fputs ("unord", file
);
7818 fputs ("neq", file
);
7822 fputs ("nlt", file
);
7826 fputs ("nle", file
);
7829 fputs ("ord", file
);
7836 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7837 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7839 switch (GET_MODE (x
))
7841 case HImode
: putc ('w', file
); break;
7843 case SFmode
: putc ('l', file
); break;
7845 case DFmode
: putc ('q', file
); break;
7846 default: gcc_unreachable ();
7853 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 0, file
);
7856 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7857 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7860 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 1, file
);
7863 /* Like above, but reverse condition */
7865 /* Check to see if argument to %c is really a constant
7866 and not a condition code which needs to be reversed. */
7867 if (!COMPARISON_P (x
))
7869 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
7872 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 0, file
);
7875 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7876 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7879 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 1, file
);
7883 /* It doesn't actually matter what mode we use here, as we're
7884 only going to use this for printing. */
7885 x
= adjust_address_nv (x
, DImode
, 8);
7892 if (!optimize
|| optimize_size
|| !TARGET_BRANCH_PREDICTION_HINTS
)
7895 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
7898 int pred_val
= INTVAL (XEXP (x
, 0));
7900 if (pred_val
< REG_BR_PROB_BASE
* 45 / 100
7901 || pred_val
> REG_BR_PROB_BASE
* 55 / 100)
7903 int taken
= pred_val
> REG_BR_PROB_BASE
/ 2;
7904 int cputaken
= final_forward_branch_p (current_output_insn
) == 0;
7906 /* Emit hints only in the case default branch prediction
7907 heuristics would fail. */
7908 if (taken
!= cputaken
)
7910 /* We use 3e (DS) prefix for taken branches and
7911 2e (CS) prefix for not taken branches. */
7913 fputs ("ds ; ", file
);
7915 fputs ("cs ; ", file
);
7922 output_operand_lossage ("invalid operand code '%c'", code
);
7926 if (GET_CODE (x
) == REG
)
7927 print_reg (x
, code
, file
);
7929 else if (GET_CODE (x
) == MEM
)
7931 /* No `byte ptr' prefix for call instructions. */
7932 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& code
!= 'X' && code
!= 'P')
7935 switch (GET_MODE_SIZE (GET_MODE (x
)))
7937 case 1: size
= "BYTE"; break;
7938 case 2: size
= "WORD"; break;
7939 case 4: size
= "DWORD"; break;
7940 case 8: size
= "QWORD"; break;
7941 case 12: size
= "XWORD"; break;
7942 case 16: size
= "XMMWORD"; break;
7947 /* Check for explicit size override (codes 'b', 'w' and 'k') */
7950 else if (code
== 'w')
7952 else if (code
== 'k')
7956 fputs (" PTR ", file
);
7960 /* Avoid (%rip) for call operands. */
7961 if (CONSTANT_ADDRESS_P (x
) && code
== 'P'
7962 && GET_CODE (x
) != CONST_INT
)
7963 output_addr_const (file
, x
);
7964 else if (this_is_asm_operands
&& ! address_operand (x
, VOIDmode
))
7965 output_operand_lossage ("invalid constraints for operand");
7970 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == SFmode
)
7975 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
7976 REAL_VALUE_TO_TARGET_SINGLE (r
, l
);
7978 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7980 fprintf (file
, "0x%08lx", l
);
7983 /* These float cases don't actually occur as immediate operands. */
7984 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == DFmode
)
7988 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
7989 fprintf (file
, "%s", dstr
);
7992 else if (GET_CODE (x
) == CONST_DOUBLE
7993 && GET_MODE (x
) == XFmode
)
7997 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
7998 fprintf (file
, "%s", dstr
);
8003 /* We have patterns that allow zero sets of memory, for instance.
8004 In 64-bit mode, we should probably support all 8-byte vectors,
8005 since we can in fact encode that into an immediate. */
8006 if (GET_CODE (x
) == CONST_VECTOR
)
8008 gcc_assert (x
== CONST0_RTX (GET_MODE (x
)));
8014 if (GET_CODE (x
) == CONST_INT
|| GET_CODE (x
) == CONST_DOUBLE
)
8016 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8019 else if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
8020 || GET_CODE (x
) == LABEL_REF
)
8022 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8025 fputs ("OFFSET FLAT:", file
);
8028 if (GET_CODE (x
) == CONST_INT
)
8029 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
8031 output_pic_addr_const (file
, x
, code
);
8033 output_addr_const (file
, x
);
8037 /* Print a memory operand whose address is ADDR. */
8040 print_operand_address (FILE *file
, rtx addr
)
8042 struct ix86_address parts
;
8043 rtx base
, index
, disp
;
8045 int ok
= ix86_decompose_address (addr
, &parts
);
8050 index
= parts
.index
;
8052 scale
= parts
.scale
;
8060 if (USER_LABEL_PREFIX
[0] == 0)
8062 fputs ((parts
.seg
== SEG_FS
? "fs:" : "gs:"), file
);
8068 if (!base
&& !index
)
8070 /* Displacement only requires special attention. */
8072 if (GET_CODE (disp
) == CONST_INT
)
8074 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& parts
.seg
== SEG_DEFAULT
)
8076 if (USER_LABEL_PREFIX
[0] == 0)
8078 fputs ("ds:", file
);
8080 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (disp
));
8083 output_pic_addr_const (file
, disp
, 0);
8085 output_addr_const (file
, disp
);
8087 /* Use one byte shorter RIP relative addressing for 64bit mode. */
8090 if (GET_CODE (disp
) == CONST
8091 && GET_CODE (XEXP (disp
, 0)) == PLUS
8092 && GET_CODE (XEXP (XEXP (disp
, 0), 1)) == CONST_INT
)
8093 disp
= XEXP (XEXP (disp
, 0), 0);
8094 if (GET_CODE (disp
) == LABEL_REF
8095 || (GET_CODE (disp
) == SYMBOL_REF
8096 && SYMBOL_REF_TLS_MODEL (disp
) == 0))
8097 fputs ("(%rip)", file
);
8102 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8107 output_pic_addr_const (file
, disp
, 0);
8108 else if (GET_CODE (disp
) == LABEL_REF
)
8109 output_asm_label (disp
);
8111 output_addr_const (file
, disp
);
8116 print_reg (base
, 0, file
);
8120 print_reg (index
, 0, file
);
8122 fprintf (file
, ",%d", scale
);
8128 rtx offset
= NULL_RTX
;
8132 /* Pull out the offset of a symbol; print any symbol itself. */
8133 if (GET_CODE (disp
) == CONST
8134 && GET_CODE (XEXP (disp
, 0)) == PLUS
8135 && GET_CODE (XEXP (XEXP (disp
, 0), 1)) == CONST_INT
)
8137 offset
= XEXP (XEXP (disp
, 0), 1);
8138 disp
= gen_rtx_CONST (VOIDmode
,
8139 XEXP (XEXP (disp
, 0), 0));
8143 output_pic_addr_const (file
, disp
, 0);
8144 else if (GET_CODE (disp
) == LABEL_REF
)
8145 output_asm_label (disp
);
8146 else if (GET_CODE (disp
) == CONST_INT
)
8149 output_addr_const (file
, disp
);
8155 print_reg (base
, 0, file
);
8158 if (INTVAL (offset
) >= 0)
8160 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
8164 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
8171 print_reg (index
, 0, file
);
8173 fprintf (file
, "*%d", scale
);
8181 output_addr_const_extra (FILE *file
, rtx x
)
8185 if (GET_CODE (x
) != UNSPEC
)
8188 op
= XVECEXP (x
, 0, 0);
8189 switch (XINT (x
, 1))
8191 case UNSPEC_GOTTPOFF
:
8192 output_addr_const (file
, op
);
8193 /* FIXME: This might be @TPOFF in Sun ld. */
8194 fputs ("@GOTTPOFF", file
);
8197 output_addr_const (file
, op
);
8198 fputs ("@TPOFF", file
);
8201 output_addr_const (file
, op
);
8203 fputs ("@TPOFF", file
);
8205 fputs ("@NTPOFF", file
);
8208 output_addr_const (file
, op
);
8209 fputs ("@DTPOFF", file
);
8211 case UNSPEC_GOTNTPOFF
:
8212 output_addr_const (file
, op
);
8214 fputs ("@GOTTPOFF(%rip)", file
);
8216 fputs ("@GOTNTPOFF", file
);
8218 case UNSPEC_INDNTPOFF
:
8219 output_addr_const (file
, op
);
8220 fputs ("@INDNTPOFF", file
);
8230 /* Split one or more DImode RTL references into pairs of SImode
8231 references. The RTL can be REG, offsettable MEM, integer constant, or
8232 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8233 split and "num" is its length. lo_half and hi_half are output arrays
8234 that parallel "operands". */
8237 split_di (rtx operands
[], int num
, rtx lo_half
[], rtx hi_half
[])
8241 rtx op
= operands
[num
];
8243 /* simplify_subreg refuse to split volatile memory addresses,
8244 but we still have to handle it. */
8245 if (GET_CODE (op
) == MEM
)
8247 lo_half
[num
] = adjust_address (op
, SImode
, 0);
8248 hi_half
[num
] = adjust_address (op
, SImode
, 4);
8252 lo_half
[num
] = simplify_gen_subreg (SImode
, op
,
8253 GET_MODE (op
) == VOIDmode
8254 ? DImode
: GET_MODE (op
), 0);
8255 hi_half
[num
] = simplify_gen_subreg (SImode
, op
,
8256 GET_MODE (op
) == VOIDmode
8257 ? DImode
: GET_MODE (op
), 4);
8261 /* Split one or more TImode RTL references into pairs of DImode
8262 references. The RTL can be REG, offsettable MEM, integer constant, or
8263 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8264 split and "num" is its length. lo_half and hi_half are output arrays
8265 that parallel "operands". */
8268 split_ti (rtx operands
[], int num
, rtx lo_half
[], rtx hi_half
[])
8272 rtx op
= operands
[num
];
8274 /* simplify_subreg refuse to split volatile memory addresses, but we
8275 still have to handle it. */
8276 if (GET_CODE (op
) == MEM
)
8278 lo_half
[num
] = adjust_address (op
, DImode
, 0);
8279 hi_half
[num
] = adjust_address (op
, DImode
, 8);
8283 lo_half
[num
] = simplify_gen_subreg (DImode
, op
, TImode
, 0);
8284 hi_half
[num
] = simplify_gen_subreg (DImode
, op
, TImode
, 8);
8289 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
8290 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
8291 is the expression of the binary operation. The output may either be
8292 emitted here, or returned to the caller, like all output_* functions.
8294 There is no guarantee that the operands are the same mode, as they
8295 might be within FLOAT or FLOAT_EXTEND expressions. */
8297 #ifndef SYSV386_COMPAT
8298 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
8299 wants to fix the assemblers because that causes incompatibility
8300 with gcc. No-one wants to fix gcc because that causes
8301 incompatibility with assemblers... You can use the option of
8302 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
8303 #define SYSV386_COMPAT 1
8307 output_387_binary_op (rtx insn
, rtx
*operands
)
8309 static char buf
[30];
8312 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]) || SSE_REG_P (operands
[2]);
8314 #ifdef ENABLE_CHECKING
8315 /* Even if we do not want to check the inputs, this documents input
8316 constraints. Which helps in understanding the following code. */
8317 if (STACK_REG_P (operands
[0])
8318 && ((REG_P (operands
[1])
8319 && REGNO (operands
[0]) == REGNO (operands
[1])
8320 && (STACK_REG_P (operands
[2]) || GET_CODE (operands
[2]) == MEM
))
8321 || (REG_P (operands
[2])
8322 && REGNO (operands
[0]) == REGNO (operands
[2])
8323 && (STACK_REG_P (operands
[1]) || GET_CODE (operands
[1]) == MEM
)))
8324 && (STACK_TOP_P (operands
[1]) || STACK_TOP_P (operands
[2])))
8327 gcc_assert (is_sse
);
8330 switch (GET_CODE (operands
[3]))
8333 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
8334 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
8342 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
8343 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
8351 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
8352 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
8360 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
8361 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
8375 if (GET_MODE (operands
[0]) == SFmode
)
8376 strcat (buf
, "ss\t{%2, %0|%0, %2}");
8378 strcat (buf
, "sd\t{%2, %0|%0, %2}");
8383 switch (GET_CODE (operands
[3]))
8387 if (REG_P (operands
[2]) && REGNO (operands
[0]) == REGNO (operands
[2]))
8389 rtx temp
= operands
[2];
8390 operands
[2] = operands
[1];
8394 /* know operands[0] == operands[1]. */
8396 if (GET_CODE (operands
[2]) == MEM
)
8402 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
8404 if (STACK_TOP_P (operands
[0]))
8405 /* How is it that we are storing to a dead operand[2]?
8406 Well, presumably operands[1] is dead too. We can't
8407 store the result to st(0) as st(0) gets popped on this
8408 instruction. Instead store to operands[2] (which I
8409 think has to be st(1)). st(1) will be popped later.
8410 gcc <= 2.8.1 didn't have this check and generated
8411 assembly code that the Unixware assembler rejected. */
8412 p
= "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8414 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8418 if (STACK_TOP_P (operands
[0]))
8419 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8421 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8426 if (GET_CODE (operands
[1]) == MEM
)
8432 if (GET_CODE (operands
[2]) == MEM
)
8438 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
8441 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
8442 derived assemblers, confusingly reverse the direction of
8443 the operation for fsub{r} and fdiv{r} when the
8444 destination register is not st(0). The Intel assembler
8445 doesn't have this brain damage. Read !SYSV386_COMPAT to
8446 figure out what the hardware really does. */
8447 if (STACK_TOP_P (operands
[0]))
8448 p
= "{p\t%0, %2|rp\t%2, %0}";
8450 p
= "{rp\t%2, %0|p\t%0, %2}";
8452 if (STACK_TOP_P (operands
[0]))
8453 /* As above for fmul/fadd, we can't store to st(0). */
8454 p
= "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8456 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8461 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
8464 if (STACK_TOP_P (operands
[0]))
8465 p
= "{rp\t%0, %1|p\t%1, %0}";
8467 p
= "{p\t%1, %0|rp\t%0, %1}";
8469 if (STACK_TOP_P (operands
[0]))
8470 p
= "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
8472 p
= "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
8477 if (STACK_TOP_P (operands
[0]))
8479 if (STACK_TOP_P (operands
[1]))
8480 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8482 p
= "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
8485 else if (STACK_TOP_P (operands
[1]))
8488 p
= "{\t%1, %0|r\t%0, %1}";
8490 p
= "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
8496 p
= "{r\t%2, %0|\t%0, %2}";
8498 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8511 /* Return needed mode for entity in optimize_mode_switching pass. */
8514 ix86_mode_needed (int entity
, rtx insn
)
8516 enum attr_i387_cw mode
;
8518 /* The mode UNINITIALIZED is used to store control word after a
8519 function call or ASM pattern. The mode ANY specify that function
8520 has no requirements on the control word and make no changes in the
8521 bits we are interested in. */
8524 || (NONJUMP_INSN_P (insn
)
8525 && (asm_noperands (PATTERN (insn
)) >= 0
8526 || GET_CODE (PATTERN (insn
)) == ASM_INPUT
)))
8527 return I387_CW_UNINITIALIZED
;
8529 if (recog_memoized (insn
) < 0)
8532 mode
= get_attr_i387_cw (insn
);
8537 if (mode
== I387_CW_TRUNC
)
8542 if (mode
== I387_CW_FLOOR
)
8547 if (mode
== I387_CW_CEIL
)
8552 if (mode
== I387_CW_MASK_PM
)
8563 /* Output code to initialize control word copies used by trunc?f?i and
8564 rounding patterns. CURRENT_MODE is set to current control word,
8565 while NEW_MODE is set to new control word. */
8568 emit_i387_cw_initialization (int mode
)
8570 rtx stored_mode
= assign_386_stack_local (HImode
, SLOT_CW_STORED
);
8575 rtx reg
= gen_reg_rtx (HImode
);
8577 emit_insn (gen_x86_fnstcw_1 (stored_mode
));
8578 emit_move_insn (reg
, stored_mode
);
8580 if (TARGET_64BIT
|| TARGET_PARTIAL_REG_STALL
|| optimize_size
)
8585 /* round toward zero (truncate) */
8586 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0c00)));
8587 slot
= SLOT_CW_TRUNC
;
8591 /* round down toward -oo */
8592 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
8593 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0400)));
8594 slot
= SLOT_CW_FLOOR
;
8598 /* round up toward +oo */
8599 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
8600 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0800)));
8601 slot
= SLOT_CW_CEIL
;
8604 case I387_CW_MASK_PM
:
8605 /* mask precision exception for nearbyint() */
8606 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
8607 slot
= SLOT_CW_MASK_PM
;
8619 /* round toward zero (truncate) */
8620 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0xc)));
8621 slot
= SLOT_CW_TRUNC
;
8625 /* round down toward -oo */
8626 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x4)));
8627 slot
= SLOT_CW_FLOOR
;
8631 /* round up toward +oo */
8632 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x8)));
8633 slot
= SLOT_CW_CEIL
;
8636 case I387_CW_MASK_PM
:
8637 /* mask precision exception for nearbyint() */
8638 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
8639 slot
= SLOT_CW_MASK_PM
;
8647 gcc_assert (slot
< MAX_386_STACK_LOCALS
);
8649 new_mode
= assign_386_stack_local (HImode
, slot
);
8650 emit_move_insn (new_mode
, reg
);
8653 /* Output code for INSN to convert a float to a signed int. OPERANDS
8654 are the insn operands. The output may be [HSD]Imode and the input
8655 operand may be [SDX]Fmode. */
8658 output_fix_trunc (rtx insn
, rtx
*operands
, int fisttp
)
8660 int stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
8661 int dimode_p
= GET_MODE (operands
[0]) == DImode
;
8662 int round_mode
= get_attr_i387_cw (insn
);
8664 /* Jump through a hoop or two for DImode, since the hardware has no
8665 non-popping instruction. We used to do this a different way, but
8666 that was somewhat fragile and broke with post-reload splitters. */
8667 if ((dimode_p
|| fisttp
) && !stack_top_dies
)
8668 output_asm_insn ("fld\t%y1", operands
);
8670 gcc_assert (STACK_TOP_P (operands
[1]));
8671 gcc_assert (GET_CODE (operands
[0]) == MEM
);
8674 output_asm_insn ("fisttp%z0\t%0", operands
);
8677 if (round_mode
!= I387_CW_ANY
)
8678 output_asm_insn ("fldcw\t%3", operands
);
8679 if (stack_top_dies
|| dimode_p
)
8680 output_asm_insn ("fistp%z0\t%0", operands
);
8682 output_asm_insn ("fist%z0\t%0", operands
);
8683 if (round_mode
!= I387_CW_ANY
)
8684 output_asm_insn ("fldcw\t%2", operands
);
8690 /* Output code for x87 ffreep insn. The OPNO argument, which may only
8691 have the values zero or one, indicates the ffreep insn's operand
8692 from the OPERANDS array. */
8695 output_387_ffreep (rtx
*operands ATTRIBUTE_UNUSED
, int opno
)
8697 if (TARGET_USE_FFREEP
)
8698 #if HAVE_AS_IX86_FFREEP
8699 return opno
? "ffreep\t%y1" : "ffreep\t%y0";
8702 static char retval
[] = ".word\t0xc_df";
8703 int regno
= REGNO (operands
[opno
]);
8705 gcc_assert (FP_REGNO_P (regno
));
8707 retval
[9] = '0' + (regno
- FIRST_STACK_REG
);
8712 return opno
? "fstp\t%y1" : "fstp\t%y0";
8716 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
8717 should be used. UNORDERED_P is true when fucom should be used. */
8720 output_fp_compare (rtx insn
, rtx
*operands
, int eflags_p
, int unordered_p
)
8723 rtx cmp_op0
, cmp_op1
;
8724 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]);
8728 cmp_op0
= operands
[0];
8729 cmp_op1
= operands
[1];
8733 cmp_op0
= operands
[1];
8734 cmp_op1
= operands
[2];
8739 if (GET_MODE (operands
[0]) == SFmode
)
8741 return "ucomiss\t{%1, %0|%0, %1}";
8743 return "comiss\t{%1, %0|%0, %1}";
8746 return "ucomisd\t{%1, %0|%0, %1}";
8748 return "comisd\t{%1, %0|%0, %1}";
8751 gcc_assert (STACK_TOP_P (cmp_op0
));
8753 stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
8755 if (cmp_op1
== CONST0_RTX (GET_MODE (cmp_op1
)))
8759 output_asm_insn ("ftst\n\tfnstsw\t%0", operands
);
8760 return output_387_ffreep (operands
, 1);
8763 return "ftst\n\tfnstsw\t%0";
8766 if (STACK_REG_P (cmp_op1
)
8768 && find_regno_note (insn
, REG_DEAD
, REGNO (cmp_op1
))
8769 && REGNO (cmp_op1
) != FIRST_STACK_REG
)
8771 /* If both the top of the 387 stack dies, and the other operand
8772 is also a stack register that dies, then this must be a
8773 `fcompp' float compare */
8777 /* There is no double popping fcomi variant. Fortunately,
8778 eflags is immune from the fstp's cc clobbering. */
8780 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands
);
8782 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands
);
8783 return output_387_ffreep (operands
, 0);
8788 return "fucompp\n\tfnstsw\t%0";
8790 return "fcompp\n\tfnstsw\t%0";
8795 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
8797 static const char * const alt
[16] =
8799 "fcom%z2\t%y2\n\tfnstsw\t%0",
8800 "fcomp%z2\t%y2\n\tfnstsw\t%0",
8801 "fucom%z2\t%y2\n\tfnstsw\t%0",
8802 "fucomp%z2\t%y2\n\tfnstsw\t%0",
8804 "ficom%z2\t%y2\n\tfnstsw\t%0",
8805 "ficomp%z2\t%y2\n\tfnstsw\t%0",
8809 "fcomi\t{%y1, %0|%0, %y1}",
8810 "fcomip\t{%y1, %0|%0, %y1}",
8811 "fucomi\t{%y1, %0|%0, %y1}",
8812 "fucomip\t{%y1, %0|%0, %y1}",
8823 mask
= eflags_p
<< 3;
8824 mask
|= (GET_MODE_CLASS (GET_MODE (cmp_op1
)) == MODE_INT
) << 2;
8825 mask
|= unordered_p
<< 1;
8826 mask
|= stack_top_dies
;
8828 gcc_assert (mask
< 16);
8837 ix86_output_addr_vec_elt (FILE *file
, int value
)
8839 const char *directive
= ASM_LONG
;
8843 directive
= ASM_QUAD
;
8845 gcc_assert (!TARGET_64BIT
);
8848 fprintf (file
, "%s%s%d\n", directive
, LPREFIX
, value
);
8852 ix86_output_addr_diff_elt (FILE *file
, int value
, int rel
)
8855 fprintf (file
, "%s%s%d-%s%d\n",
8856 ASM_LONG
, LPREFIX
, value
, LPREFIX
, rel
);
8857 else if (HAVE_AS_GOTOFF_IN_DATA
)
8858 fprintf (file
, "%s%s%d@GOTOFF\n", ASM_LONG
, LPREFIX
, value
);
8860 else if (TARGET_MACHO
)
8862 fprintf (file
, "%s%s%d-", ASM_LONG
, LPREFIX
, value
);
8863 machopic_output_function_base_name (file
);
8864 fprintf(file
, "\n");
8868 asm_fprintf (file
, "%s%U%s+[.-%s%d]\n",
8869 ASM_LONG
, GOT_SYMBOL_NAME
, LPREFIX
, value
);
8872 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
8876 ix86_expand_clear (rtx dest
)
8880 /* We play register width games, which are only valid after reload. */
8881 gcc_assert (reload_completed
);
8883 /* Avoid HImode and its attendant prefix byte. */
8884 if (GET_MODE_SIZE (GET_MODE (dest
)) < 4)
8885 dest
= gen_rtx_REG (SImode
, REGNO (dest
));
8887 tmp
= gen_rtx_SET (VOIDmode
, dest
, const0_rtx
);
8889 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
8890 if (reload_completed
&& (!TARGET_USE_MOV0
|| optimize_size
))
8892 rtx clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, 17));
8893 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, clob
));
8899 /* X is an unchanging MEM. If it is a constant pool reference, return
8900 the constant pool rtx, else NULL. */
8903 maybe_get_pool_constant (rtx x
)
8905 x
= ix86_delegitimize_address (XEXP (x
, 0));
8907 if (GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
8908 return get_pool_constant (x
);
8914 ix86_expand_move (enum machine_mode mode
, rtx operands
[])
8916 int strict
= (reload_in_progress
|| reload_completed
);
8918 enum tls_model model
;
8923 if (GET_CODE (op1
) == SYMBOL_REF
)
8925 model
= SYMBOL_REF_TLS_MODEL (op1
);
8928 op1
= legitimize_tls_address (op1
, model
, true);
8929 op1
= force_operand (op1
, op0
);
8934 else if (GET_CODE (op1
) == CONST
8935 && GET_CODE (XEXP (op1
, 0)) == PLUS
8936 && GET_CODE (XEXP (XEXP (op1
, 0), 0)) == SYMBOL_REF
)
8938 model
= SYMBOL_REF_TLS_MODEL (XEXP (XEXP (op1
, 0), 0));
8941 rtx addend
= XEXP (XEXP (op1
, 0), 1);
8942 op1
= legitimize_tls_address (XEXP (XEXP (op1
, 0), 0), model
, true);
8943 op1
= force_operand (op1
, NULL
);
8944 op1
= expand_simple_binop (Pmode
, PLUS
, op1
, addend
,
8945 op0
, 1, OPTAB_DIRECT
);
8951 if (flag_pic
&& mode
== Pmode
&& symbolic_operand (op1
, Pmode
))
8953 if (TARGET_MACHO
&& !TARGET_64BIT
)
8958 rtx temp
= ((reload_in_progress
8959 || ((op0
&& GET_CODE (op0
) == REG
)
8961 ? op0
: gen_reg_rtx (Pmode
));
8962 op1
= machopic_indirect_data_reference (op1
, temp
);
8963 op1
= machopic_legitimize_pic_address (op1
, mode
,
8964 temp
== op1
? 0 : temp
);
8966 else if (MACHOPIC_INDIRECT
)
8967 op1
= machopic_indirect_data_reference (op1
, 0);
8974 if (GET_CODE (op0
) == MEM
)
8975 op1
= force_reg (Pmode
, op1
);
8977 op1
= legitimize_address (op1
, op1
, Pmode
);
8982 if (GET_CODE (op0
) == MEM
8983 && (PUSH_ROUNDING (GET_MODE_SIZE (mode
)) != GET_MODE_SIZE (mode
)
8984 || !push_operand (op0
, mode
))
8985 && GET_CODE (op1
) == MEM
)
8986 op1
= force_reg (mode
, op1
);
8988 if (push_operand (op0
, mode
)
8989 && ! general_no_elim_operand (op1
, mode
))
8990 op1
= copy_to_mode_reg (mode
, op1
);
8992 /* Force large constants in 64bit compilation into register
8993 to get them CSEed. */
8994 if (TARGET_64BIT
&& mode
== DImode
8995 && immediate_operand (op1
, mode
)
8996 && !x86_64_zext_immediate_operand (op1
, VOIDmode
)
8997 && !register_operand (op0
, mode
)
8998 && optimize
&& !reload_completed
&& !reload_in_progress
)
8999 op1
= copy_to_mode_reg (mode
, op1
);
9001 if (FLOAT_MODE_P (mode
))
9003 /* If we are loading a floating point constant to a register,
9004 force the value to memory now, since we'll get better code
9005 out the back end. */
9009 else if (GET_CODE (op1
) == CONST_DOUBLE
)
9011 op1
= validize_mem (force_const_mem (mode
, op1
));
9012 if (!register_operand (op0
, mode
))
9014 rtx temp
= gen_reg_rtx (mode
);
9015 emit_insn (gen_rtx_SET (VOIDmode
, temp
, op1
));
9016 emit_move_insn (op0
, temp
);
9023 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
9027 ix86_expand_vector_move (enum machine_mode mode
, rtx operands
[])
9029 rtx op0
= operands
[0], op1
= operands
[1];
9031 /* Force constants other than zero into memory. We do not know how
9032 the instructions used to build constants modify the upper 64 bits
9033 of the register, once we have that information we may be able
9034 to handle some of them more efficiently. */
9035 if ((reload_in_progress
| reload_completed
) == 0
9036 && register_operand (op0
, mode
)
9038 && standard_sse_constant_p (op1
) <= 0)
9039 op1
= validize_mem (force_const_mem (mode
, op1
));
9041 /* Make operand1 a register if it isn't already. */
9043 && !register_operand (op0
, mode
)
9044 && !register_operand (op1
, mode
))
9046 emit_move_insn (op0
, force_reg (GET_MODE (op0
), op1
));
9050 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
9053 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
9054 straight to ix86_expand_vector_move. */
9057 ix86_expand_vector_move_misalign (enum machine_mode mode
, rtx operands
[])
9066 /* If we're optimizing for size, movups is the smallest. */
9069 op0
= gen_lowpart (V4SFmode
, op0
);
9070 op1
= gen_lowpart (V4SFmode
, op1
);
9071 emit_insn (gen_sse_movups (op0
, op1
));
9075 /* ??? If we have typed data, then it would appear that using
9076 movdqu is the only way to get unaligned data loaded with
9078 if (TARGET_SSE2
&& GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
9080 op0
= gen_lowpart (V16QImode
, op0
);
9081 op1
= gen_lowpart (V16QImode
, op1
);
9082 emit_insn (gen_sse2_movdqu (op0
, op1
));
9086 if (TARGET_SSE2
&& mode
== V2DFmode
)
9090 /* When SSE registers are split into halves, we can avoid
9091 writing to the top half twice. */
9092 if (TARGET_SSE_SPLIT_REGS
)
9094 emit_insn (gen_rtx_CLOBBER (VOIDmode
, op0
));
9099 /* ??? Not sure about the best option for the Intel chips.
9100 The following would seem to satisfy; the register is
9101 entirely cleared, breaking the dependency chain. We
9102 then store to the upper half, with a dependency depth
9103 of one. A rumor has it that Intel recommends two movsd
9104 followed by an unpacklpd, but this is unconfirmed. And
9105 given that the dependency depth of the unpacklpd would
9106 still be one, I'm not sure why this would be better. */
9107 zero
= CONST0_RTX (V2DFmode
);
9110 m
= adjust_address (op1
, DFmode
, 0);
9111 emit_insn (gen_sse2_loadlpd (op0
, zero
, m
));
9112 m
= adjust_address (op1
, DFmode
, 8);
9113 emit_insn (gen_sse2_loadhpd (op0
, op0
, m
));
9117 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY
)
9118 emit_move_insn (op0
, CONST0_RTX (mode
));
9120 emit_insn (gen_rtx_CLOBBER (VOIDmode
, op0
));
9122 if (mode
!= V4SFmode
)
9123 op0
= gen_lowpart (V4SFmode
, op0
);
9124 m
= adjust_address (op1
, V2SFmode
, 0);
9125 emit_insn (gen_sse_loadlps (op0
, op0
, m
));
9126 m
= adjust_address (op1
, V2SFmode
, 8);
9127 emit_insn (gen_sse_loadhps (op0
, op0
, m
));
9130 else if (MEM_P (op0
))
9132 /* If we're optimizing for size, movups is the smallest. */
9135 op0
= gen_lowpart (V4SFmode
, op0
);
9136 op1
= gen_lowpart (V4SFmode
, op1
);
9137 emit_insn (gen_sse_movups (op0
, op1
));
9141 /* ??? Similar to above, only less clear because of quote
9142 typeless stores unquote. */
9143 if (TARGET_SSE2
&& !TARGET_SSE_TYPELESS_STORES
9144 && GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
9146 op0
= gen_lowpart (V16QImode
, op0
);
9147 op1
= gen_lowpart (V16QImode
, op1
);
9148 emit_insn (gen_sse2_movdqu (op0
, op1
));
9152 if (TARGET_SSE2
&& mode
== V2DFmode
)
9154 m
= adjust_address (op0
, DFmode
, 0);
9155 emit_insn (gen_sse2_storelpd (m
, op1
));
9156 m
= adjust_address (op0
, DFmode
, 8);
9157 emit_insn (gen_sse2_storehpd (m
, op1
));
9161 if (mode
!= V4SFmode
)
9162 op1
= gen_lowpart (V4SFmode
, op1
);
9163 m
= adjust_address (op0
, V2SFmode
, 0);
9164 emit_insn (gen_sse_storelps (m
, op1
));
9165 m
= adjust_address (op0
, V2SFmode
, 8);
9166 emit_insn (gen_sse_storehps (m
, op1
));
9173 /* Expand a push in MODE. This is some mode for which we do not support
9174 proper push instructions, at least from the registers that we expect
9175 the value to live in. */
9178 ix86_expand_push (enum machine_mode mode
, rtx x
)
9182 tmp
= expand_simple_binop (Pmode
, PLUS
, stack_pointer_rtx
,
9183 GEN_INT (-GET_MODE_SIZE (mode
)),
9184 stack_pointer_rtx
, 1, OPTAB_DIRECT
);
9185 if (tmp
!= stack_pointer_rtx
)
9186 emit_move_insn (stack_pointer_rtx
, tmp
);
9188 tmp
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
9189 emit_move_insn (tmp
, x
);
9192 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
9193 destination to use for the operation. If different from the true
9194 destination in operands[0], a copy operation will be required. */
9197 ix86_fixup_binary_operands (enum rtx_code code
, enum machine_mode mode
,
9200 int matching_memory
;
9201 rtx src1
, src2
, dst
;
9207 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
9208 if (GET_RTX_CLASS (code
) == RTX_COMM_ARITH
9209 && (rtx_equal_p (dst
, src2
)
9210 || immediate_operand (src1
, mode
)))
9217 /* If the destination is memory, and we do not have matching source
9218 operands, do things in registers. */
9219 matching_memory
= 0;
9220 if (GET_CODE (dst
) == MEM
)
9222 if (rtx_equal_p (dst
, src1
))
9223 matching_memory
= 1;
9224 else if (GET_RTX_CLASS (code
) == RTX_COMM_ARITH
9225 && rtx_equal_p (dst
, src2
))
9226 matching_memory
= 2;
9228 dst
= gen_reg_rtx (mode
);
9231 /* Both source operands cannot be in memory. */
9232 if (GET_CODE (src1
) == MEM
&& GET_CODE (src2
) == MEM
)
9234 if (matching_memory
!= 2)
9235 src2
= force_reg (mode
, src2
);
9237 src1
= force_reg (mode
, src1
);
9240 /* If the operation is not commutable, source 1 cannot be a constant
9241 or non-matching memory. */
9242 if ((CONSTANT_P (src1
)
9243 || (!matching_memory
&& GET_CODE (src1
) == MEM
))
9244 && GET_RTX_CLASS (code
) != RTX_COMM_ARITH
)
9245 src1
= force_reg (mode
, src1
);
9247 src1
= operands
[1] = src1
;
9248 src2
= operands
[2] = src2
;
9252 /* Similarly, but assume that the destination has already been
9256 ix86_fixup_binary_operands_no_copy (enum rtx_code code
,
9257 enum machine_mode mode
, rtx operands
[])
9259 rtx dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
9260 gcc_assert (dst
== operands
[0]);
9263 /* Attempt to expand a binary operator. Make the expansion closer to the
9264 actual machine, then just general_operand, which will allow 3 separate
9265 memory references (one output, two input) in a single insn. */
9268 ix86_expand_binary_operator (enum rtx_code code
, enum machine_mode mode
,
9271 rtx src1
, src2
, dst
, op
, clob
;
9273 dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
9277 /* Emit the instruction. */
9279 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, src1
, src2
));
9280 if (reload_in_progress
)
9282 /* Reload doesn't know about the flags register, and doesn't know that
9283 it doesn't want to clobber it. We can only do this with PLUS. */
9284 gcc_assert (code
== PLUS
);
9289 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
9290 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
9293 /* Fix up the destination if needed. */
9294 if (dst
!= operands
[0])
9295 emit_move_insn (operands
[0], dst
);
9298 /* Return TRUE or FALSE depending on whether the binary operator meets the
9299 appropriate constraints. */
9302 ix86_binary_operator_ok (enum rtx_code code
,
9303 enum machine_mode mode ATTRIBUTE_UNUSED
,
9306 /* Both source operands cannot be in memory. */
9307 if (GET_CODE (operands
[1]) == MEM
&& GET_CODE (operands
[2]) == MEM
)
9309 /* If the operation is not commutable, source 1 cannot be a constant. */
9310 if (CONSTANT_P (operands
[1]) && GET_RTX_CLASS (code
) != RTX_COMM_ARITH
)
9312 /* If the destination is memory, we must have a matching source operand. */
9313 if (GET_CODE (operands
[0]) == MEM
9314 && ! (rtx_equal_p (operands
[0], operands
[1])
9315 || (GET_RTX_CLASS (code
) == RTX_COMM_ARITH
9316 && rtx_equal_p (operands
[0], operands
[2]))))
9318 /* If the operation is not commutable and the source 1 is memory, we must
9319 have a matching destination. */
9320 if (GET_CODE (operands
[1]) == MEM
9321 && GET_RTX_CLASS (code
) != RTX_COMM_ARITH
9322 && ! rtx_equal_p (operands
[0], operands
[1]))
9327 /* Attempt to expand a unary operator. Make the expansion closer to the
9328 actual machine, then just general_operand, which will allow 2 separate
9329 memory references (one output, one input) in a single insn. */
9332 ix86_expand_unary_operator (enum rtx_code code
, enum machine_mode mode
,
9335 int matching_memory
;
9336 rtx src
, dst
, op
, clob
;
9341 /* If the destination is memory, and we do not have matching source
9342 operands, do things in registers. */
9343 matching_memory
= 0;
9346 if (rtx_equal_p (dst
, src
))
9347 matching_memory
= 1;
9349 dst
= gen_reg_rtx (mode
);
9352 /* When source operand is memory, destination must match. */
9353 if (MEM_P (src
) && !matching_memory
)
9354 src
= force_reg (mode
, src
);
9356 /* Emit the instruction. */
9358 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_e (code
, mode
, src
));
9359 if (reload_in_progress
|| code
== NOT
)
9361 /* Reload doesn't know about the flags register, and doesn't know that
9362 it doesn't want to clobber it. */
9363 gcc_assert (code
== NOT
);
9368 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
9369 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
9372 /* Fix up the destination if needed. */
9373 if (dst
!= operands
[0])
9374 emit_move_insn (operands
[0], dst
);
9377 /* Return TRUE or FALSE depending on whether the unary operator meets the
9378 appropriate constraints. */
9381 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED
,
9382 enum machine_mode mode ATTRIBUTE_UNUSED
,
9383 rtx operands
[2] ATTRIBUTE_UNUSED
)
9385 /* If one of operands is memory, source and destination must match. */
9386 if ((GET_CODE (operands
[0]) == MEM
9387 || GET_CODE (operands
[1]) == MEM
)
9388 && ! rtx_equal_p (operands
[0], operands
[1]))
9393 /* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders.
9394 Create a mask for the sign bit in MODE for an SSE register. If VECT is
9395 true, then replicate the mask for all elements of the vector register.
9396 If INVERT is true, then create a mask excluding the sign bit. */
9399 ix86_build_signbit_mask (enum machine_mode mode
, bool vect
, bool invert
)
9401 enum machine_mode vec_mode
;
9402 HOST_WIDE_INT hi
, lo
;
9407 /* Find the sign bit, sign extended to 2*HWI. */
9409 lo
= 0x80000000, hi
= lo
< 0;
9410 else if (HOST_BITS_PER_WIDE_INT
>= 64)
9411 lo
= (HOST_WIDE_INT
)1 << shift
, hi
= -1;
9413 lo
= 0, hi
= (HOST_WIDE_INT
)1 << (shift
- HOST_BITS_PER_WIDE_INT
);
9418 /* Force this value into the low part of a fp vector constant. */
9419 mask
= immed_double_const (lo
, hi
, mode
== SFmode
? SImode
: DImode
);
9420 mask
= gen_lowpart (mode
, mask
);
9425 v
= gen_rtvec (4, mask
, mask
, mask
, mask
);
9427 v
= gen_rtvec (4, mask
, CONST0_RTX (SFmode
),
9428 CONST0_RTX (SFmode
), CONST0_RTX (SFmode
));
9429 vec_mode
= V4SFmode
;
9434 v
= gen_rtvec (2, mask
, mask
);
9436 v
= gen_rtvec (2, mask
, CONST0_RTX (DFmode
));
9437 vec_mode
= V2DFmode
;
9440 return force_reg (vec_mode
, gen_rtx_CONST_VECTOR (vec_mode
, v
));
9443 /* Generate code for floating point ABS or NEG. */
9446 ix86_expand_fp_absneg_operator (enum rtx_code code
, enum machine_mode mode
,
9449 rtx mask
, set
, use
, clob
, dst
, src
;
9450 bool matching_memory
;
9451 bool use_sse
= false;
9452 bool vector_mode
= VECTOR_MODE_P (mode
);
9453 enum machine_mode elt_mode
= mode
;
9457 elt_mode
= GET_MODE_INNER (mode
);
9460 else if (TARGET_SSE_MATH
)
9461 use_sse
= SSE_FLOAT_MODE_P (mode
);
9463 /* NEG and ABS performed with SSE use bitwise mask operations.
9464 Create the appropriate mask now. */
9466 mask
= ix86_build_signbit_mask (elt_mode
, vector_mode
, code
== ABS
);
9473 /* If the destination is memory, and we don't have matching source
9474 operands or we're using the x87, do things in registers. */
9475 matching_memory
= false;
9478 if (use_sse
&& rtx_equal_p (dst
, src
))
9479 matching_memory
= true;
9481 dst
= gen_reg_rtx (mode
);
9483 if (MEM_P (src
) && !matching_memory
)
9484 src
= force_reg (mode
, src
);
9488 set
= gen_rtx_fmt_ee (code
== NEG
? XOR
: AND
, mode
, src
, mask
);
9489 set
= gen_rtx_SET (VOIDmode
, dst
, set
);
9494 set
= gen_rtx_fmt_e (code
, mode
, src
);
9495 set
= gen_rtx_SET (VOIDmode
, dst
, set
);
9498 use
= gen_rtx_USE (VOIDmode
, mask
);
9499 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
9500 emit_insn (gen_rtx_PARALLEL (VOIDmode
,
9501 gen_rtvec (3, set
, use
, clob
)));
9507 if (dst
!= operands
[0])
9508 emit_move_insn (operands
[0], dst
);
9511 /* Expand a copysign operation. Special case operand 0 being a constant. */
9514 ix86_expand_copysign (rtx operands
[])
9516 enum machine_mode mode
, vmode
;
9517 rtx dest
, op0
, op1
, mask
, nmask
;
9523 mode
= GET_MODE (dest
);
9524 vmode
= mode
== SFmode
? V4SFmode
: V2DFmode
;
9526 if (GET_CODE (op0
) == CONST_DOUBLE
)
9530 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0
)))
9531 op0
= simplify_unary_operation (ABS
, mode
, op0
, mode
);
9533 if (op0
== CONST0_RTX (mode
))
9534 op0
= CONST0_RTX (vmode
);
9538 v
= gen_rtvec (4, op0
, CONST0_RTX (SFmode
),
9539 CONST0_RTX (SFmode
), CONST0_RTX (SFmode
));
9541 v
= gen_rtvec (2, op0
, CONST0_RTX (DFmode
));
9542 op0
= force_reg (vmode
, gen_rtx_CONST_VECTOR (vmode
, v
));
9545 mask
= ix86_build_signbit_mask (mode
, 0, 0);
9548 emit_insn (gen_copysignsf3_const (dest
, op0
, op1
, mask
));
9550 emit_insn (gen_copysigndf3_const (dest
, op0
, op1
, mask
));
9554 nmask
= ix86_build_signbit_mask (mode
, 0, 1);
9555 mask
= ix86_build_signbit_mask (mode
, 0, 0);
9558 emit_insn (gen_copysignsf3_var (dest
, NULL
, op0
, op1
, nmask
, mask
));
9560 emit_insn (gen_copysigndf3_var (dest
, NULL
, op0
, op1
, nmask
, mask
));
9564 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
9565 be a constant, and so has already been expanded into a vector constant. */
9568 ix86_split_copysign_const (rtx operands
[])
9570 enum machine_mode mode
, vmode
;
9571 rtx dest
, op0
, op1
, mask
, x
;
9578 mode
= GET_MODE (dest
);
9579 vmode
= GET_MODE (mask
);
9581 dest
= simplify_gen_subreg (vmode
, dest
, mode
, 0);
9582 x
= gen_rtx_AND (vmode
, dest
, mask
);
9583 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
9585 if (op0
!= CONST0_RTX (vmode
))
9587 x
= gen_rtx_IOR (vmode
, dest
, op0
);
9588 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
9592 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
9593 so we have to do two masks. */
9596 ix86_split_copysign_var (rtx operands
[])
9598 enum machine_mode mode
, vmode
;
9599 rtx dest
, scratch
, op0
, op1
, mask
, nmask
, x
;
9602 scratch
= operands
[1];
9605 nmask
= operands
[4];
9608 mode
= GET_MODE (dest
);
9609 vmode
= GET_MODE (mask
);
9611 if (rtx_equal_p (op0
, op1
))
9613 /* Shouldn't happen often (it's useless, obviously), but when it does
9614 we'd generate incorrect code if we continue below. */
9615 emit_move_insn (dest
, op0
);
9619 if (REG_P (mask
) && REGNO (dest
) == REGNO (mask
)) /* alternative 0 */
9621 gcc_assert (REGNO (op1
) == REGNO (scratch
));
9623 x
= gen_rtx_AND (vmode
, scratch
, mask
);
9624 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
9627 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
9628 x
= gen_rtx_NOT (vmode
, dest
);
9629 x
= gen_rtx_AND (vmode
, x
, op0
);
9630 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
9634 if (REGNO (op1
) == REGNO (scratch
)) /* alternative 1,3 */
9636 x
= gen_rtx_AND (vmode
, scratch
, mask
);
9638 else /* alternative 2,4 */
9640 gcc_assert (REGNO (mask
) == REGNO (scratch
));
9641 op1
= simplify_gen_subreg (vmode
, op1
, mode
, 0);
9642 x
= gen_rtx_AND (vmode
, scratch
, op1
);
9644 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
9646 if (REGNO (op0
) == REGNO (dest
)) /* alternative 1,2 */
9648 dest
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
9649 x
= gen_rtx_AND (vmode
, dest
, nmask
);
9651 else /* alternative 3,4 */
9653 gcc_assert (REGNO (nmask
) == REGNO (dest
));
9655 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
9656 x
= gen_rtx_AND (vmode
, dest
, op0
);
9658 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
9661 x
= gen_rtx_IOR (vmode
, dest
, scratch
);
9662 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
9665 /* Return TRUE or FALSE depending on whether the first SET in INSN
9666 has source and destination with matching CC modes, and that the
9667 CC mode is at least as constrained as REQ_MODE. */
9670 ix86_match_ccmode (rtx insn
, enum machine_mode req_mode
)
9673 enum machine_mode set_mode
;
9675 set
= PATTERN (insn
);
9676 if (GET_CODE (set
) == PARALLEL
)
9677 set
= XVECEXP (set
, 0, 0);
9678 gcc_assert (GET_CODE (set
) == SET
);
9679 gcc_assert (GET_CODE (SET_SRC (set
)) == COMPARE
);
9681 set_mode
= GET_MODE (SET_DEST (set
));
9685 if (req_mode
!= CCNOmode
9686 && (req_mode
!= CCmode
9687 || XEXP (SET_SRC (set
), 1) != const0_rtx
))
9691 if (req_mode
== CCGCmode
)
9695 if (req_mode
== CCGOCmode
|| req_mode
== CCNOmode
)
9699 if (req_mode
== CCZmode
)
9709 return (GET_MODE (SET_SRC (set
)) == set_mode
);
9712 /* Generate insn patterns to do an integer compare of OPERANDS. */
9715 ix86_expand_int_compare (enum rtx_code code
, rtx op0
, rtx op1
)
9717 enum machine_mode cmpmode
;
9720 cmpmode
= SELECT_CC_MODE (code
, op0
, op1
);
9721 flags
= gen_rtx_REG (cmpmode
, FLAGS_REG
);
9723 /* This is very simple, but making the interface the same as in the
9724 FP case makes the rest of the code easier. */
9725 tmp
= gen_rtx_COMPARE (cmpmode
, op0
, op1
);
9726 emit_insn (gen_rtx_SET (VOIDmode
, flags
, tmp
));
9728 /* Return the test that should be put into the flags user, i.e.
9729 the bcc, scc, or cmov instruction. */
9730 return gen_rtx_fmt_ee (code
, VOIDmode
, flags
, const0_rtx
);
9733 /* Figure out whether to use ordered or unordered fp comparisons.
9734 Return the appropriate mode to use. */
9737 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED
)
9739 /* ??? In order to make all comparisons reversible, we do all comparisons
9740 non-trapping when compiling for IEEE. Once gcc is able to distinguish
9741 all forms trapping and nontrapping comparisons, we can make inequality
9742 comparisons trapping again, since it results in better code when using
9743 FCOM based compares. */
9744 return TARGET_IEEE_FP
? CCFPUmode
: CCFPmode
;
9748 ix86_cc_mode (enum rtx_code code
, rtx op0
, rtx op1
)
9750 if (SCALAR_FLOAT_MODE_P (GET_MODE (op0
)))
9751 return ix86_fp_compare_mode (code
);
9754 /* Only zero flag is needed. */
9756 case NE
: /* ZF!=0 */
9758 /* Codes needing carry flag. */
9759 case GEU
: /* CF=0 */
9760 case GTU
: /* CF=0 & ZF=0 */
9761 case LTU
: /* CF=1 */
9762 case LEU
: /* CF=1 | ZF=1 */
9764 /* Codes possibly doable only with sign flag when
9765 comparing against zero. */
9766 case GE
: /* SF=OF or SF=0 */
9767 case LT
: /* SF<>OF or SF=1 */
9768 if (op1
== const0_rtx
)
9771 /* For other cases Carry flag is not required. */
9773 /* Codes doable only with sign flag when comparing
9774 against zero, but we miss jump instruction for it
9775 so we need to use relational tests against overflow
9776 that thus needs to be zero. */
9777 case GT
: /* ZF=0 & SF=OF */
9778 case LE
: /* ZF=1 | SF<>OF */
9779 if (op1
== const0_rtx
)
9783 /* strcmp pattern do (use flags) and combine may ask us for proper
9792 /* Return the fixed registers used for condition codes. */
9795 ix86_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
9802 /* If two condition code modes are compatible, return a condition code
9803 mode which is compatible with both. Otherwise, return
9806 static enum machine_mode
9807 ix86_cc_modes_compatible (enum machine_mode m1
, enum machine_mode m2
)
9812 if (GET_MODE_CLASS (m1
) != MODE_CC
|| GET_MODE_CLASS (m2
) != MODE_CC
)
9815 if ((m1
== CCGCmode
&& m2
== CCGOCmode
)
9816 || (m1
== CCGOCmode
&& m2
== CCGCmode
))
9844 /* These are only compatible with themselves, which we already
9850 /* Return true if we should use an FCOMI instruction for this fp comparison. */
9853 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED
)
9855 enum rtx_code swapped_code
= swap_condition (code
);
9856 return ((ix86_fp_comparison_cost (code
) == ix86_fp_comparison_fcomi_cost (code
))
9857 || (ix86_fp_comparison_cost (swapped_code
)
9858 == ix86_fp_comparison_fcomi_cost (swapped_code
)));
9861 /* Swap, force into registers, or otherwise massage the two operands
9862 to a fp comparison. The operands are updated in place; the new
9863 comparison code is returned. */
9865 static enum rtx_code
9866 ix86_prepare_fp_compare_args (enum rtx_code code
, rtx
*pop0
, rtx
*pop1
)
9868 enum machine_mode fpcmp_mode
= ix86_fp_compare_mode (code
);
9869 rtx op0
= *pop0
, op1
= *pop1
;
9870 enum machine_mode op_mode
= GET_MODE (op0
);
9871 int is_sse
= TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (op_mode
);
9873 /* All of the unordered compare instructions only work on registers.
9874 The same is true of the fcomi compare instructions. The XFmode
9875 compare instructions require registers except when comparing
9876 against zero or when converting operand 1 from fixed point to
9880 && (fpcmp_mode
== CCFPUmode
9881 || (op_mode
== XFmode
9882 && ! (standard_80387_constant_p (op0
) == 1
9883 || standard_80387_constant_p (op1
) == 1)
9884 && GET_CODE (op1
) != FLOAT
)
9885 || ix86_use_fcomi_compare (code
)))
9887 op0
= force_reg (op_mode
, op0
);
9888 op1
= force_reg (op_mode
, op1
);
9892 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
9893 things around if they appear profitable, otherwise force op0
9896 if (standard_80387_constant_p (op0
) == 0
9897 || (GET_CODE (op0
) == MEM
9898 && ! (standard_80387_constant_p (op1
) == 0
9899 || GET_CODE (op1
) == MEM
)))
9902 tmp
= op0
, op0
= op1
, op1
= tmp
;
9903 code
= swap_condition (code
);
9906 if (GET_CODE (op0
) != REG
)
9907 op0
= force_reg (op_mode
, op0
);
9909 if (CONSTANT_P (op1
))
9911 int tmp
= standard_80387_constant_p (op1
);
9913 op1
= validize_mem (force_const_mem (op_mode
, op1
));
9917 op1
= force_reg (op_mode
, op1
);
9920 op1
= force_reg (op_mode
, op1
);
9924 /* Try to rearrange the comparison to make it cheaper. */
9925 if (ix86_fp_comparison_cost (code
)
9926 > ix86_fp_comparison_cost (swap_condition (code
))
9927 && (GET_CODE (op1
) == REG
|| !no_new_pseudos
))
9930 tmp
= op0
, op0
= op1
, op1
= tmp
;
9931 code
= swap_condition (code
);
9932 if (GET_CODE (op0
) != REG
)
9933 op0
= force_reg (op_mode
, op0
);
9941 /* Convert comparison codes we use to represent FP comparison to integer
9942 code that will result in proper branch. Return UNKNOWN if no such code
9946 ix86_fp_compare_code_to_integer (enum rtx_code code
)
9975 /* Split comparison code CODE into comparisons we can do using branch
9976 instructions. BYPASS_CODE is comparison code for branch that will
9977 branch around FIRST_CODE and SECOND_CODE. If some of branches
9978 is not required, set value to UNKNOWN.
9979 We never require more than two branches. */
9982 ix86_fp_comparison_codes (enum rtx_code code
, enum rtx_code
*bypass_code
,
9983 enum rtx_code
*first_code
,
9984 enum rtx_code
*second_code
)
9987 *bypass_code
= UNKNOWN
;
9988 *second_code
= UNKNOWN
;
9990 /* The fcomi comparison sets flags as follows:
10000 case GT
: /* GTU - CF=0 & ZF=0 */
10001 case GE
: /* GEU - CF=0 */
10002 case ORDERED
: /* PF=0 */
10003 case UNORDERED
: /* PF=1 */
10004 case UNEQ
: /* EQ - ZF=1 */
10005 case UNLT
: /* LTU - CF=1 */
10006 case UNLE
: /* LEU - CF=1 | ZF=1 */
10007 case LTGT
: /* EQ - ZF=0 */
10009 case LT
: /* LTU - CF=1 - fails on unordered */
10010 *first_code
= UNLT
;
10011 *bypass_code
= UNORDERED
;
10013 case LE
: /* LEU - CF=1 | ZF=1 - fails on unordered */
10014 *first_code
= UNLE
;
10015 *bypass_code
= UNORDERED
;
10017 case EQ
: /* EQ - ZF=1 - fails on unordered */
10018 *first_code
= UNEQ
;
10019 *bypass_code
= UNORDERED
;
10021 case NE
: /* NE - ZF=0 - fails on unordered */
10022 *first_code
= LTGT
;
10023 *second_code
= UNORDERED
;
10025 case UNGE
: /* GEU - CF=0 - fails on unordered */
10027 *second_code
= UNORDERED
;
10029 case UNGT
: /* GTU - CF=0 & ZF=0 - fails on unordered */
10031 *second_code
= UNORDERED
;
10034 gcc_unreachable ();
10036 if (!TARGET_IEEE_FP
)
10038 *second_code
= UNKNOWN
;
10039 *bypass_code
= UNKNOWN
;
10043 /* Return cost of comparison done fcom + arithmetics operations on AX.
10044 All following functions do use number of instructions as a cost metrics.
10045 In future this should be tweaked to compute bytes for optimize_size and
10046 take into account performance of various instructions on various CPUs. */
10048 ix86_fp_comparison_arithmetics_cost (enum rtx_code code
)
10050 if (!TARGET_IEEE_FP
)
10052 /* The cost of code output by ix86_expand_fp_compare. */
10076 gcc_unreachable ();
10080 /* Return cost of comparison done using fcomi operation.
10081 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10083 ix86_fp_comparison_fcomi_cost (enum rtx_code code
)
10085 enum rtx_code bypass_code
, first_code
, second_code
;
10086 /* Return arbitrarily high cost when instruction is not supported - this
10087 prevents gcc from using it. */
10090 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
10091 return (bypass_code
!= UNKNOWN
|| second_code
!= UNKNOWN
) + 2;
10094 /* Return cost of comparison done using sahf operation.
10095 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10097 ix86_fp_comparison_sahf_cost (enum rtx_code code
)
10099 enum rtx_code bypass_code
, first_code
, second_code
;
10100 /* Return arbitrarily high cost when instruction is not preferred - this
10101 avoids gcc from using it. */
10102 if (!TARGET_USE_SAHF
&& !optimize_size
)
10104 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
10105 return (bypass_code
!= UNKNOWN
|| second_code
!= UNKNOWN
) + 3;
10108 /* Compute cost of the comparison done using any method.
10109 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10111 ix86_fp_comparison_cost (enum rtx_code code
)
10113 int fcomi_cost
, sahf_cost
, arithmetics_cost
= 1024;
10116 fcomi_cost
= ix86_fp_comparison_fcomi_cost (code
);
10117 sahf_cost
= ix86_fp_comparison_sahf_cost (code
);
10119 min
= arithmetics_cost
= ix86_fp_comparison_arithmetics_cost (code
);
10120 if (min
> sahf_cost
)
10122 if (min
> fcomi_cost
)
10127 /* Generate insn patterns to do a floating point compare of OPERANDS. */
10130 ix86_expand_fp_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx scratch
,
10131 rtx
*second_test
, rtx
*bypass_test
)
10133 enum machine_mode fpcmp_mode
, intcmp_mode
;
10135 int cost
= ix86_fp_comparison_cost (code
);
10136 enum rtx_code bypass_code
, first_code
, second_code
;
10138 fpcmp_mode
= ix86_fp_compare_mode (code
);
10139 code
= ix86_prepare_fp_compare_args (code
, &op0
, &op1
);
10142 *second_test
= NULL_RTX
;
10144 *bypass_test
= NULL_RTX
;
10146 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
10148 /* Do fcomi/sahf based test when profitable. */
10149 if ((bypass_code
== UNKNOWN
|| bypass_test
)
10150 && (second_code
== UNKNOWN
|| second_test
)
10151 && ix86_fp_comparison_arithmetics_cost (code
) > cost
)
10155 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
10156 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
10162 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
10163 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
10165 scratch
= gen_reg_rtx (HImode
);
10166 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
10167 emit_insn (gen_x86_sahf_1 (scratch
));
10170 /* The FP codes work out to act like unsigned. */
10171 intcmp_mode
= fpcmp_mode
;
10173 if (bypass_code
!= UNKNOWN
)
10174 *bypass_test
= gen_rtx_fmt_ee (bypass_code
, VOIDmode
,
10175 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
10177 if (second_code
!= UNKNOWN
)
10178 *second_test
= gen_rtx_fmt_ee (second_code
, VOIDmode
,
10179 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
10184 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
10185 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
10186 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
10188 scratch
= gen_reg_rtx (HImode
);
10189 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
10191 /* In the unordered case, we have to check C2 for NaN's, which
10192 doesn't happen to work out to anything nice combination-wise.
10193 So do some bit twiddling on the value we've got in AH to come
10194 up with an appropriate set of condition codes. */
10196 intcmp_mode
= CCNOmode
;
10201 if (code
== GT
|| !TARGET_IEEE_FP
)
10203 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
10208 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
10209 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
10210 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x44)));
10211 intcmp_mode
= CCmode
;
10217 if (code
== LT
&& TARGET_IEEE_FP
)
10219 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
10220 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x01)));
10221 intcmp_mode
= CCmode
;
10226 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x01)));
10232 if (code
== GE
|| !TARGET_IEEE_FP
)
10234 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x05)));
10239 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
10240 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
10247 if (code
== LE
&& TARGET_IEEE_FP
)
10249 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
10250 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
10251 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
10252 intcmp_mode
= CCmode
;
10257 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
10263 if (code
== EQ
&& TARGET_IEEE_FP
)
10265 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
10266 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
10267 intcmp_mode
= CCmode
;
10272 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
10279 if (code
== NE
&& TARGET_IEEE_FP
)
10281 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
10282 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
10288 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
10294 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
10298 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
10303 gcc_unreachable ();
10307 /* Return the test that should be put into the flags user, i.e.
10308 the bcc, scc, or cmov instruction. */
10309 return gen_rtx_fmt_ee (code
, VOIDmode
,
10310 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
10315 ix86_expand_compare (enum rtx_code code
, rtx
*second_test
, rtx
*bypass_test
)
10318 op0
= ix86_compare_op0
;
10319 op1
= ix86_compare_op1
;
10322 *second_test
= NULL_RTX
;
10324 *bypass_test
= NULL_RTX
;
10326 if (ix86_compare_emitted
)
10328 ret
= gen_rtx_fmt_ee (code
, VOIDmode
, ix86_compare_emitted
, const0_rtx
);
10329 ix86_compare_emitted
= NULL_RTX
;
10331 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0
)))
10332 ret
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
,
10333 second_test
, bypass_test
);
10335 ret
= ix86_expand_int_compare (code
, op0
, op1
);
10340 /* Return true if the CODE will result in nontrivial jump sequence. */
10342 ix86_fp_jump_nontrivial_p (enum rtx_code code
)
10344 enum rtx_code bypass_code
, first_code
, second_code
;
10347 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
10348 return bypass_code
!= UNKNOWN
|| second_code
!= UNKNOWN
;
10352 ix86_expand_branch (enum rtx_code code
, rtx label
)
10356 /* If we have emitted a compare insn, go straight to simple.
10357 ix86_expand_compare won't emit anything if ix86_compare_emitted
10359 if (ix86_compare_emitted
)
10362 switch (GET_MODE (ix86_compare_op0
))
10368 tmp
= ix86_expand_compare (code
, NULL
, NULL
);
10369 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
10370 gen_rtx_LABEL_REF (VOIDmode
, label
),
10372 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
10381 enum rtx_code bypass_code
, first_code
, second_code
;
10383 code
= ix86_prepare_fp_compare_args (code
, &ix86_compare_op0
,
10384 &ix86_compare_op1
);
10386 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
10388 /* Check whether we will use the natural sequence with one jump. If
10389 so, we can expand jump early. Otherwise delay expansion by
10390 creating compound insn to not confuse optimizers. */
10391 if (bypass_code
== UNKNOWN
&& second_code
== UNKNOWN
10394 ix86_split_fp_branch (code
, ix86_compare_op0
, ix86_compare_op1
,
10395 gen_rtx_LABEL_REF (VOIDmode
, label
),
10396 pc_rtx
, NULL_RTX
, NULL_RTX
);
10400 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
,
10401 ix86_compare_op0
, ix86_compare_op1
);
10402 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
10403 gen_rtx_LABEL_REF (VOIDmode
, label
),
10405 tmp
= gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
);
10407 use_fcomi
= ix86_use_fcomi_compare (code
);
10408 vec
= rtvec_alloc (3 + !use_fcomi
);
10409 RTVEC_ELT (vec
, 0) = tmp
;
10411 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCFPmode
, 18));
10413 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCFPmode
, 17));
10416 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (HImode
));
10418 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode
, vec
));
10427 /* Expand DImode branch into multiple compare+branch. */
10429 rtx lo
[2], hi
[2], label2
;
10430 enum rtx_code code1
, code2
, code3
;
10431 enum machine_mode submode
;
10433 if (CONSTANT_P (ix86_compare_op0
) && ! CONSTANT_P (ix86_compare_op1
))
10435 tmp
= ix86_compare_op0
;
10436 ix86_compare_op0
= ix86_compare_op1
;
10437 ix86_compare_op1
= tmp
;
10438 code
= swap_condition (code
);
10440 if (GET_MODE (ix86_compare_op0
) == DImode
)
10442 split_di (&ix86_compare_op0
, 1, lo
+0, hi
+0);
10443 split_di (&ix86_compare_op1
, 1, lo
+1, hi
+1);
10448 split_ti (&ix86_compare_op0
, 1, lo
+0, hi
+0);
10449 split_ti (&ix86_compare_op1
, 1, lo
+1, hi
+1);
10453 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
10454 avoid two branches. This costs one extra insn, so disable when
10455 optimizing for size. */
10457 if ((code
== EQ
|| code
== NE
)
10459 || hi
[1] == const0_rtx
|| lo
[1] == const0_rtx
))
10464 if (hi
[1] != const0_rtx
)
10465 xor1
= expand_binop (submode
, xor_optab
, xor1
, hi
[1],
10466 NULL_RTX
, 0, OPTAB_WIDEN
);
10469 if (lo
[1] != const0_rtx
)
10470 xor0
= expand_binop (submode
, xor_optab
, xor0
, lo
[1],
10471 NULL_RTX
, 0, OPTAB_WIDEN
);
10473 tmp
= expand_binop (submode
, ior_optab
, xor1
, xor0
,
10474 NULL_RTX
, 0, OPTAB_WIDEN
);
10476 ix86_compare_op0
= tmp
;
10477 ix86_compare_op1
= const0_rtx
;
10478 ix86_expand_branch (code
, label
);
10482 /* Otherwise, if we are doing less-than or greater-or-equal-than,
10483 op1 is a constant and the low word is zero, then we can just
10484 examine the high word. */
10486 if (GET_CODE (hi
[1]) == CONST_INT
&& lo
[1] == const0_rtx
)
10489 case LT
: case LTU
: case GE
: case GEU
:
10490 ix86_compare_op0
= hi
[0];
10491 ix86_compare_op1
= hi
[1];
10492 ix86_expand_branch (code
, label
);
10498 /* Otherwise, we need two or three jumps. */
10500 label2
= gen_label_rtx ();
10503 code2
= swap_condition (code
);
10504 code3
= unsigned_condition (code
);
10508 case LT
: case GT
: case LTU
: case GTU
:
10511 case LE
: code1
= LT
; code2
= GT
; break;
10512 case GE
: code1
= GT
; code2
= LT
; break;
10513 case LEU
: code1
= LTU
; code2
= GTU
; break;
10514 case GEU
: code1
= GTU
; code2
= LTU
; break;
10516 case EQ
: code1
= UNKNOWN
; code2
= NE
; break;
10517 case NE
: code2
= UNKNOWN
; break;
10520 gcc_unreachable ();
10525 * if (hi(a) < hi(b)) goto true;
10526 * if (hi(a) > hi(b)) goto false;
10527 * if (lo(a) < lo(b)) goto true;
10531 ix86_compare_op0
= hi
[0];
10532 ix86_compare_op1
= hi
[1];
10534 if (code1
!= UNKNOWN
)
10535 ix86_expand_branch (code1
, label
);
10536 if (code2
!= UNKNOWN
)
10537 ix86_expand_branch (code2
, label2
);
10539 ix86_compare_op0
= lo
[0];
10540 ix86_compare_op1
= lo
[1];
10541 ix86_expand_branch (code3
, label
);
10543 if (code2
!= UNKNOWN
)
10544 emit_label (label2
);
10549 gcc_unreachable ();
10553 /* Split branch based on floating point condition. */
10555 ix86_split_fp_branch (enum rtx_code code
, rtx op1
, rtx op2
,
10556 rtx target1
, rtx target2
, rtx tmp
, rtx pushed
)
10558 rtx second
, bypass
;
10559 rtx label
= NULL_RTX
;
10561 int bypass_probability
= -1, second_probability
= -1, probability
= -1;
10564 if (target2
!= pc_rtx
)
10567 code
= reverse_condition_maybe_unordered (code
);
10572 condition
= ix86_expand_fp_compare (code
, op1
, op2
,
10573 tmp
, &second
, &bypass
);
10575 /* Remove pushed operand from stack. */
10577 ix86_free_from_memory (GET_MODE (pushed
));
10579 if (split_branch_probability
>= 0)
10581 /* Distribute the probabilities across the jumps.
10582 Assume the BYPASS and SECOND to be always test
10584 probability
= split_branch_probability
;
10586 /* Value of 1 is low enough to make no need for probability
10587 to be updated. Later we may run some experiments and see
10588 if unordered values are more frequent in practice. */
10590 bypass_probability
= 1;
10592 second_probability
= 1;
10594 if (bypass
!= NULL_RTX
)
10596 label
= gen_label_rtx ();
10597 i
= emit_jump_insn (gen_rtx_SET
10599 gen_rtx_IF_THEN_ELSE (VOIDmode
,
10601 gen_rtx_LABEL_REF (VOIDmode
,
10604 if (bypass_probability
>= 0)
10606 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
10607 GEN_INT (bypass_probability
),
10610 i
= emit_jump_insn (gen_rtx_SET
10612 gen_rtx_IF_THEN_ELSE (VOIDmode
,
10613 condition
, target1
, target2
)));
10614 if (probability
>= 0)
10616 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
10617 GEN_INT (probability
),
10619 if (second
!= NULL_RTX
)
10621 i
= emit_jump_insn (gen_rtx_SET
10623 gen_rtx_IF_THEN_ELSE (VOIDmode
, second
, target1
,
10625 if (second_probability
>= 0)
10627 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
10628 GEN_INT (second_probability
),
10631 if (label
!= NULL_RTX
)
10632 emit_label (label
);
10636 ix86_expand_setcc (enum rtx_code code
, rtx dest
)
10638 rtx ret
, tmp
, tmpreg
, equiv
;
10639 rtx second_test
, bypass_test
;
10641 if (GET_MODE (ix86_compare_op0
) == (TARGET_64BIT
? TImode
: DImode
))
10642 return 0; /* FAIL */
10644 gcc_assert (GET_MODE (dest
) == QImode
);
10646 ret
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
10647 PUT_MODE (ret
, QImode
);
10652 emit_insn (gen_rtx_SET (VOIDmode
, tmp
, ret
));
10653 if (bypass_test
|| second_test
)
10655 rtx test
= second_test
;
10657 rtx tmp2
= gen_reg_rtx (QImode
);
10660 gcc_assert (!second_test
);
10661 test
= bypass_test
;
10663 PUT_CODE (test
, reverse_condition_maybe_unordered (GET_CODE (test
)));
10665 PUT_MODE (test
, QImode
);
10666 emit_insn (gen_rtx_SET (VOIDmode
, tmp2
, test
));
10669 emit_insn (gen_andqi3 (tmp
, tmpreg
, tmp2
));
10671 emit_insn (gen_iorqi3 (tmp
, tmpreg
, tmp2
));
10674 /* Attach a REG_EQUAL note describing the comparison result. */
10675 if (ix86_compare_op0
&& ix86_compare_op1
)
10677 equiv
= simplify_gen_relational (code
, QImode
,
10678 GET_MODE (ix86_compare_op0
),
10679 ix86_compare_op0
, ix86_compare_op1
);
10680 set_unique_reg_note (get_last_insn (), REG_EQUAL
, equiv
);
10683 return 1; /* DONE */
10686 /* Expand comparison setting or clearing carry flag. Return true when
10687 successful and set pop for the operation. */
10689 ix86_expand_carry_flag_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx
*pop
)
10691 enum machine_mode mode
=
10692 GET_MODE (op0
) != VOIDmode
? GET_MODE (op0
) : GET_MODE (op1
);
10694 /* Do not handle DImode compares that go through special path. Also we can't
10695 deal with FP compares yet. This is possible to add. */
10696 if (mode
== (TARGET_64BIT
? TImode
: DImode
))
10698 if (FLOAT_MODE_P (mode
))
10700 rtx second_test
= NULL
, bypass_test
= NULL
;
10701 rtx compare_op
, compare_seq
;
10703 /* Shortcut: following common codes never translate into carry flag compares. */
10704 if (code
== EQ
|| code
== NE
|| code
== UNEQ
|| code
== LTGT
10705 || code
== ORDERED
|| code
== UNORDERED
)
10708 /* These comparisons require zero flag; swap operands so they won't. */
10709 if ((code
== GT
|| code
== UNLE
|| code
== LE
|| code
== UNGT
)
10710 && !TARGET_IEEE_FP
)
10715 code
= swap_condition (code
);
10718 /* Try to expand the comparison and verify that we end up with carry flag
10719 based comparison. This is fails to be true only when we decide to expand
10720 comparison using arithmetic that is not too common scenario. */
10722 compare_op
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
,
10723 &second_test
, &bypass_test
);
10724 compare_seq
= get_insns ();
10727 if (second_test
|| bypass_test
)
10729 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
10730 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
10731 code
= ix86_fp_compare_code_to_integer (GET_CODE (compare_op
));
10733 code
= GET_CODE (compare_op
);
10734 if (code
!= LTU
&& code
!= GEU
)
10736 emit_insn (compare_seq
);
10740 if (!INTEGRAL_MODE_P (mode
))
10748 /* Convert a==0 into (unsigned)a<1. */
10751 if (op1
!= const0_rtx
)
10754 code
= (code
== EQ
? LTU
: GEU
);
10757 /* Convert a>b into b<a or a>=b-1. */
10760 if (GET_CODE (op1
) == CONST_INT
)
10762 op1
= gen_int_mode (INTVAL (op1
) + 1, GET_MODE (op0
));
10763 /* Bail out on overflow. We still can swap operands but that
10764 would force loading of the constant into register. */
10765 if (op1
== const0_rtx
10766 || !x86_64_immediate_operand (op1
, GET_MODE (op1
)))
10768 code
= (code
== GTU
? GEU
: LTU
);
10775 code
= (code
== GTU
? LTU
: GEU
);
10779 /* Convert a>=0 into (unsigned)a<0x80000000. */
10782 if (mode
== DImode
|| op1
!= const0_rtx
)
10784 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
10785 code
= (code
== LT
? GEU
: LTU
);
10789 if (mode
== DImode
|| op1
!= constm1_rtx
)
10791 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
10792 code
= (code
== LE
? GEU
: LTU
);
10798 /* Swapping operands may cause constant to appear as first operand. */
10799 if (!nonimmediate_operand (op0
, VOIDmode
))
10801 if (no_new_pseudos
)
10803 op0
= force_reg (mode
, op0
);
10805 ix86_compare_op0
= op0
;
10806 ix86_compare_op1
= op1
;
10807 *pop
= ix86_expand_compare (code
, NULL
, NULL
);
10808 gcc_assert (GET_CODE (*pop
) == LTU
|| GET_CODE (*pop
) == GEU
);
10813 ix86_expand_int_movcc (rtx operands
[])
10815 enum rtx_code code
= GET_CODE (operands
[1]), compare_code
;
10816 rtx compare_seq
, compare_op
;
10817 rtx second_test
, bypass_test
;
10818 enum machine_mode mode
= GET_MODE (operands
[0]);
10819 bool sign_bit_compare_p
= false;;
10822 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
10823 compare_seq
= get_insns ();
10826 compare_code
= GET_CODE (compare_op
);
10828 if ((ix86_compare_op1
== const0_rtx
&& (code
== GE
|| code
== LT
))
10829 || (ix86_compare_op1
== constm1_rtx
&& (code
== GT
|| code
== LE
)))
10830 sign_bit_compare_p
= true;
10832 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
10833 HImode insns, we'd be swallowed in word prefix ops. */
10835 if ((mode
!= HImode
|| TARGET_FAST_PREFIX
)
10836 && (mode
!= (TARGET_64BIT
? TImode
: DImode
))
10837 && GET_CODE (operands
[2]) == CONST_INT
10838 && GET_CODE (operands
[3]) == CONST_INT
)
10840 rtx out
= operands
[0];
10841 HOST_WIDE_INT ct
= INTVAL (operands
[2]);
10842 HOST_WIDE_INT cf
= INTVAL (operands
[3]);
10843 HOST_WIDE_INT diff
;
10846 /* Sign bit compares are better done using shifts than we do by using
10848 if (sign_bit_compare_p
10849 || ix86_expand_carry_flag_compare (code
, ix86_compare_op0
,
10850 ix86_compare_op1
, &compare_op
))
10852 /* Detect overlap between destination and compare sources. */
10855 if (!sign_bit_compare_p
)
10857 bool fpcmp
= false;
10859 compare_code
= GET_CODE (compare_op
);
10861 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
10862 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
10865 compare_code
= ix86_fp_compare_code_to_integer (compare_code
);
10868 /* To simplify rest of code, restrict to the GEU case. */
10869 if (compare_code
== LTU
)
10871 HOST_WIDE_INT tmp
= ct
;
10874 compare_code
= reverse_condition (compare_code
);
10875 code
= reverse_condition (code
);
10880 PUT_CODE (compare_op
,
10881 reverse_condition_maybe_unordered
10882 (GET_CODE (compare_op
)));
10884 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
10888 if (reg_overlap_mentioned_p (out
, ix86_compare_op0
)
10889 || reg_overlap_mentioned_p (out
, ix86_compare_op1
))
10890 tmp
= gen_reg_rtx (mode
);
10892 if (mode
== DImode
)
10893 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp
, compare_op
));
10895 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode
, tmp
), compare_op
));
10899 if (code
== GT
|| code
== GE
)
10900 code
= reverse_condition (code
);
10903 HOST_WIDE_INT tmp
= ct
;
10908 tmp
= emit_store_flag (tmp
, code
, ix86_compare_op0
,
10909 ix86_compare_op1
, VOIDmode
, 0, -1);
10922 tmp
= expand_simple_binop (mode
, PLUS
,
10924 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
10935 tmp
= expand_simple_binop (mode
, IOR
,
10937 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
10939 else if (diff
== -1 && ct
)
10949 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
10951 tmp
= expand_simple_binop (mode
, PLUS
,
10952 copy_rtx (tmp
), GEN_INT (cf
),
10953 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
10961 * andl cf - ct, dest
10971 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
10974 tmp
= expand_simple_binop (mode
, AND
,
10976 gen_int_mode (cf
- ct
, mode
),
10977 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
10979 tmp
= expand_simple_binop (mode
, PLUS
,
10980 copy_rtx (tmp
), GEN_INT (ct
),
10981 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
10984 if (!rtx_equal_p (tmp
, out
))
10985 emit_move_insn (copy_rtx (out
), copy_rtx (tmp
));
10987 return 1; /* DONE */
10993 tmp
= ct
, ct
= cf
, cf
= tmp
;
10995 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0
)))
10997 /* We may be reversing unordered compare to normal compare, that
10998 is not valid in general (we may convert non-trapping condition
10999 to trapping one), however on i386 we currently emit all
11000 comparisons unordered. */
11001 compare_code
= reverse_condition_maybe_unordered (compare_code
);
11002 code
= reverse_condition_maybe_unordered (code
);
11006 compare_code
= reverse_condition (compare_code
);
11007 code
= reverse_condition (code
);
11011 compare_code
= UNKNOWN
;
11012 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0
)) == MODE_INT
11013 && GET_CODE (ix86_compare_op1
) == CONST_INT
)
11015 if (ix86_compare_op1
== const0_rtx
11016 && (code
== LT
|| code
== GE
))
11017 compare_code
= code
;
11018 else if (ix86_compare_op1
== constm1_rtx
)
11022 else if (code
== GT
)
11027 /* Optimize dest = (op0 < 0) ? -1 : cf. */
11028 if (compare_code
!= UNKNOWN
11029 && GET_MODE (ix86_compare_op0
) == GET_MODE (out
)
11030 && (cf
== -1 || ct
== -1))
11032 /* If lea code below could be used, only optimize
11033 if it results in a 2 insn sequence. */
11035 if (! (diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
11036 || diff
== 3 || diff
== 5 || diff
== 9)
11037 || (compare_code
== LT
&& ct
== -1)
11038 || (compare_code
== GE
&& cf
== -1))
11041 * notl op1 (if necessary)
11049 code
= reverse_condition (code
);
11052 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
11053 ix86_compare_op1
, VOIDmode
, 0, -1);
11055 out
= expand_simple_binop (mode
, IOR
,
11057 out
, 1, OPTAB_DIRECT
);
11058 if (out
!= operands
[0])
11059 emit_move_insn (operands
[0], out
);
11061 return 1; /* DONE */
11066 if ((diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
11067 || diff
== 3 || diff
== 5 || diff
== 9)
11068 && ((mode
!= QImode
&& mode
!= HImode
) || !TARGET_PARTIAL_REG_STALL
)
11070 || x86_64_immediate_operand (GEN_INT (cf
), VOIDmode
)))
11076 * lea cf(dest*(ct-cf)),dest
11080 * This also catches the degenerate setcc-only case.
11086 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
11087 ix86_compare_op1
, VOIDmode
, 0, 1);
11090 /* On x86_64 the lea instruction operates on Pmode, so we need
11091 to get arithmetics done in proper mode to match. */
11093 tmp
= copy_rtx (out
);
11097 out1
= copy_rtx (out
);
11098 tmp
= gen_rtx_MULT (mode
, out1
, GEN_INT (diff
& ~1));
11102 tmp
= gen_rtx_PLUS (mode
, tmp
, out1
);
11108 tmp
= gen_rtx_PLUS (mode
, tmp
, GEN_INT (cf
));
11111 if (!rtx_equal_p (tmp
, out
))
11114 out
= force_operand (tmp
, copy_rtx (out
));
11116 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (out
), copy_rtx (tmp
)));
11118 if (!rtx_equal_p (out
, operands
[0]))
11119 emit_move_insn (operands
[0], copy_rtx (out
));
11121 return 1; /* DONE */
11125 * General case: Jumpful:
11126 * xorl dest,dest cmpl op1, op2
11127 * cmpl op1, op2 movl ct, dest
11128 * setcc dest jcc 1f
11129 * decl dest movl cf, dest
11130 * andl (cf-ct),dest 1:
11133 * Size 20. Size 14.
11135 * This is reasonably steep, but branch mispredict costs are
11136 * high on modern cpus, so consider failing only if optimizing
11140 if ((!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
11141 && BRANCH_COST
>= 2)
11147 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0
)))
11148 /* We may be reversing unordered compare to normal compare,
11149 that is not valid in general (we may convert non-trapping
11150 condition to trapping one), however on i386 we currently
11151 emit all comparisons unordered. */
11152 code
= reverse_condition_maybe_unordered (code
);
11155 code
= reverse_condition (code
);
11156 if (compare_code
!= UNKNOWN
)
11157 compare_code
= reverse_condition (compare_code
);
11161 if (compare_code
!= UNKNOWN
)
11163 /* notl op1 (if needed)
11168 For x < 0 (resp. x <= -1) there will be no notl,
11169 so if possible swap the constants to get rid of the
11171 True/false will be -1/0 while code below (store flag
11172 followed by decrement) is 0/-1, so the constants need
11173 to be exchanged once more. */
11175 if (compare_code
== GE
|| !cf
)
11177 code
= reverse_condition (code
);
11182 HOST_WIDE_INT tmp
= cf
;
11187 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
11188 ix86_compare_op1
, VOIDmode
, 0, -1);
11192 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
11193 ix86_compare_op1
, VOIDmode
, 0, 1);
11195 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), constm1_rtx
,
11196 copy_rtx (out
), 1, OPTAB_DIRECT
);
11199 out
= expand_simple_binop (mode
, AND
, copy_rtx (out
),
11200 gen_int_mode (cf
- ct
, mode
),
11201 copy_rtx (out
), 1, OPTAB_DIRECT
);
11203 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), GEN_INT (ct
),
11204 copy_rtx (out
), 1, OPTAB_DIRECT
);
11205 if (!rtx_equal_p (out
, operands
[0]))
11206 emit_move_insn (operands
[0], copy_rtx (out
));
11208 return 1; /* DONE */
11212 if (!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
11214 /* Try a few things more with specific constants and a variable. */
11217 rtx var
, orig_out
, out
, tmp
;
11219 if (BRANCH_COST
<= 2)
11220 return 0; /* FAIL */
11222 /* If one of the two operands is an interesting constant, load a
11223 constant with the above and mask it in with a logical operation. */
11225 if (GET_CODE (operands
[2]) == CONST_INT
)
11228 if (INTVAL (operands
[2]) == 0 && operands
[3] != constm1_rtx
)
11229 operands
[3] = constm1_rtx
, op
= and_optab
;
11230 else if (INTVAL (operands
[2]) == -1 && operands
[3] != const0_rtx
)
11231 operands
[3] = const0_rtx
, op
= ior_optab
;
11233 return 0; /* FAIL */
11235 else if (GET_CODE (operands
[3]) == CONST_INT
)
11238 if (INTVAL (operands
[3]) == 0 && operands
[2] != constm1_rtx
)
11239 operands
[2] = constm1_rtx
, op
= and_optab
;
11240 else if (INTVAL (operands
[3]) == -1 && operands
[3] != const0_rtx
)
11241 operands
[2] = const0_rtx
, op
= ior_optab
;
11243 return 0; /* FAIL */
11246 return 0; /* FAIL */
11248 orig_out
= operands
[0];
11249 tmp
= gen_reg_rtx (mode
);
11252 /* Recurse to get the constant loaded. */
11253 if (ix86_expand_int_movcc (operands
) == 0)
11254 return 0; /* FAIL */
11256 /* Mask in the interesting variable. */
11257 out
= expand_binop (mode
, op
, var
, tmp
, orig_out
, 0,
11259 if (!rtx_equal_p (out
, orig_out
))
11260 emit_move_insn (copy_rtx (orig_out
), copy_rtx (out
));
11262 return 1; /* DONE */
11266 * For comparison with above,
11276 if (! nonimmediate_operand (operands
[2], mode
))
11277 operands
[2] = force_reg (mode
, operands
[2]);
11278 if (! nonimmediate_operand (operands
[3], mode
))
11279 operands
[3] = force_reg (mode
, operands
[3]);
11281 if (bypass_test
&& reg_overlap_mentioned_p (operands
[0], operands
[3]))
11283 rtx tmp
= gen_reg_rtx (mode
);
11284 emit_move_insn (tmp
, operands
[3]);
11287 if (second_test
&& reg_overlap_mentioned_p (operands
[0], operands
[2]))
11289 rtx tmp
= gen_reg_rtx (mode
);
11290 emit_move_insn (tmp
, operands
[2]);
11294 if (! register_operand (operands
[2], VOIDmode
)
11296 || ! register_operand (operands
[3], VOIDmode
)))
11297 operands
[2] = force_reg (mode
, operands
[2]);
11300 && ! register_operand (operands
[3], VOIDmode
))
11301 operands
[3] = force_reg (mode
, operands
[3]);
11303 emit_insn (compare_seq
);
11304 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
11305 gen_rtx_IF_THEN_ELSE (mode
,
11306 compare_op
, operands
[2],
11309 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (operands
[0]),
11310 gen_rtx_IF_THEN_ELSE (mode
,
11312 copy_rtx (operands
[3]),
11313 copy_rtx (operands
[0]))));
11315 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (operands
[0]),
11316 gen_rtx_IF_THEN_ELSE (mode
,
11318 copy_rtx (operands
[2]),
11319 copy_rtx (operands
[0]))));
11321 return 1; /* DONE */
11324 /* Swap, force into registers, or otherwise massage the two operands
11325 to an sse comparison with a mask result. Thus we differ a bit from
11326 ix86_prepare_fp_compare_args which expects to produce a flags result.
11328 The DEST operand exists to help determine whether to commute commutative
11329 operators. The POP0/POP1 operands are updated in place. The new
11330 comparison code is returned, or UNKNOWN if not implementable. */
11332 static enum rtx_code
11333 ix86_prepare_sse_fp_compare_args (rtx dest
, enum rtx_code code
,
11334 rtx
*pop0
, rtx
*pop1
)
11342 /* We have no LTGT as an operator. We could implement it with
11343 NE & ORDERED, but this requires an extra temporary. It's
11344 not clear that it's worth it. */
11351 /* These are supported directly. */
11358 /* For commutative operators, try to canonicalize the destination
11359 operand to be first in the comparison - this helps reload to
11360 avoid extra moves. */
11361 if (!dest
|| !rtx_equal_p (dest
, *pop1
))
11369 /* These are not supported directly. Swap the comparison operands
11370 to transform into something that is supported. */
11374 code
= swap_condition (code
);
11378 gcc_unreachable ();
11384 /* Detect conditional moves that exactly match min/max operational
11385 semantics. Note that this is IEEE safe, as long as we don't
11386 interchange the operands.
11388 Returns FALSE if this conditional move doesn't match a MIN/MAX,
11389 and TRUE if the operation is successful and instructions are emitted. */
11392 ix86_expand_sse_fp_minmax (rtx dest
, enum rtx_code code
, rtx cmp_op0
,
11393 rtx cmp_op1
, rtx if_true
, rtx if_false
)
11395 enum machine_mode mode
;
11401 else if (code
== UNGE
)
11404 if_true
= if_false
;
11410 if (rtx_equal_p (cmp_op0
, if_true
) && rtx_equal_p (cmp_op1
, if_false
))
11412 else if (rtx_equal_p (cmp_op1
, if_true
) && rtx_equal_p (cmp_op0
, if_false
))
11417 mode
= GET_MODE (dest
);
11419 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
11420 but MODE may be a vector mode and thus not appropriate. */
11421 if (!flag_finite_math_only
|| !flag_unsafe_math_optimizations
)
11423 int u
= is_min
? UNSPEC_IEEE_MIN
: UNSPEC_IEEE_MAX
;
11426 if_true
= force_reg (mode
, if_true
);
11427 v
= gen_rtvec (2, if_true
, if_false
);
11428 tmp
= gen_rtx_UNSPEC (mode
, v
, u
);
11432 code
= is_min
? SMIN
: SMAX
;
11433 tmp
= gen_rtx_fmt_ee (code
, mode
, if_true
, if_false
);
11436 emit_insn (gen_rtx_SET (VOIDmode
, dest
, tmp
));
11440 /* Expand an sse vector comparison. Return the register with the result. */
11443 ix86_expand_sse_cmp (rtx dest
, enum rtx_code code
, rtx cmp_op0
, rtx cmp_op1
,
11444 rtx op_true
, rtx op_false
)
11446 enum machine_mode mode
= GET_MODE (dest
);
11449 cmp_op0
= force_reg (mode
, cmp_op0
);
11450 if (!nonimmediate_operand (cmp_op1
, mode
))
11451 cmp_op1
= force_reg (mode
, cmp_op1
);
11454 || reg_overlap_mentioned_p (dest
, op_true
)
11455 || reg_overlap_mentioned_p (dest
, op_false
))
11456 dest
= gen_reg_rtx (mode
);
11458 x
= gen_rtx_fmt_ee (code
, mode
, cmp_op0
, cmp_op1
);
11459 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
11464 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
11465 operations. This is used for both scalar and vector conditional moves. */
11468 ix86_expand_sse_movcc (rtx dest
, rtx cmp
, rtx op_true
, rtx op_false
)
11470 enum machine_mode mode
= GET_MODE (dest
);
11473 if (op_false
== CONST0_RTX (mode
))
11475 op_true
= force_reg (mode
, op_true
);
11476 x
= gen_rtx_AND (mode
, cmp
, op_true
);
11477 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
11479 else if (op_true
== CONST0_RTX (mode
))
11481 op_false
= force_reg (mode
, op_false
);
11482 x
= gen_rtx_NOT (mode
, cmp
);
11483 x
= gen_rtx_AND (mode
, x
, op_false
);
11484 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
11488 op_true
= force_reg (mode
, op_true
);
11489 op_false
= force_reg (mode
, op_false
);
11491 t2
= gen_reg_rtx (mode
);
11493 t3
= gen_reg_rtx (mode
);
11497 x
= gen_rtx_AND (mode
, op_true
, cmp
);
11498 emit_insn (gen_rtx_SET (VOIDmode
, t2
, x
));
11500 x
= gen_rtx_NOT (mode
, cmp
);
11501 x
= gen_rtx_AND (mode
, x
, op_false
);
11502 emit_insn (gen_rtx_SET (VOIDmode
, t3
, x
));
11504 x
= gen_rtx_IOR (mode
, t3
, t2
);
11505 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
11509 /* Expand a floating-point conditional move. Return true if successful. */
11512 ix86_expand_fp_movcc (rtx operands
[])
11514 enum machine_mode mode
= GET_MODE (operands
[0]);
11515 enum rtx_code code
= GET_CODE (operands
[1]);
11516 rtx tmp
, compare_op
, second_test
, bypass_test
;
11518 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
11520 enum machine_mode cmode
;
11522 /* Since we've no cmove for sse registers, don't force bad register
11523 allocation just to gain access to it. Deny movcc when the
11524 comparison mode doesn't match the move mode. */
11525 cmode
= GET_MODE (ix86_compare_op0
);
11526 if (cmode
== VOIDmode
)
11527 cmode
= GET_MODE (ix86_compare_op1
);
11531 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
,
11533 &ix86_compare_op1
);
11534 if (code
== UNKNOWN
)
11537 if (ix86_expand_sse_fp_minmax (operands
[0], code
, ix86_compare_op0
,
11538 ix86_compare_op1
, operands
[2],
11542 tmp
= ix86_expand_sse_cmp (operands
[0], code
, ix86_compare_op0
,
11543 ix86_compare_op1
, operands
[2], operands
[3]);
11544 ix86_expand_sse_movcc (operands
[0], tmp
, operands
[2], operands
[3]);
11548 /* The floating point conditional move instructions don't directly
11549 support conditions resulting from a signed integer comparison. */
11551 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
11553 /* The floating point conditional move instructions don't directly
11554 support signed integer comparisons. */
11556 if (!fcmov_comparison_operator (compare_op
, VOIDmode
))
11558 gcc_assert (!second_test
&& !bypass_test
);
11559 tmp
= gen_reg_rtx (QImode
);
11560 ix86_expand_setcc (code
, tmp
);
11562 ix86_compare_op0
= tmp
;
11563 ix86_compare_op1
= const0_rtx
;
11564 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
11566 if (bypass_test
&& reg_overlap_mentioned_p (operands
[0], operands
[3]))
11568 tmp
= gen_reg_rtx (mode
);
11569 emit_move_insn (tmp
, operands
[3]);
11572 if (second_test
&& reg_overlap_mentioned_p (operands
[0], operands
[2]))
11574 tmp
= gen_reg_rtx (mode
);
11575 emit_move_insn (tmp
, operands
[2]);
11579 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
11580 gen_rtx_IF_THEN_ELSE (mode
, compare_op
,
11581 operands
[2], operands
[3])));
11583 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
11584 gen_rtx_IF_THEN_ELSE (mode
, bypass_test
,
11585 operands
[3], operands
[0])));
11587 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
11588 gen_rtx_IF_THEN_ELSE (mode
, second_test
,
11589 operands
[2], operands
[0])));
11594 /* Expand a floating-point vector conditional move; a vcond operation
11595 rather than a movcc operation. */
11598 ix86_expand_fp_vcond (rtx operands
[])
11600 enum rtx_code code
= GET_CODE (operands
[3]);
11603 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
,
11604 &operands
[4], &operands
[5]);
11605 if (code
== UNKNOWN
)
11608 if (ix86_expand_sse_fp_minmax (operands
[0], code
, operands
[4],
11609 operands
[5], operands
[1], operands
[2]))
11612 cmp
= ix86_expand_sse_cmp (operands
[0], code
, operands
[4], operands
[5],
11613 operands
[1], operands
[2]);
11614 ix86_expand_sse_movcc (operands
[0], cmp
, operands
[1], operands
[2]);
11618 /* Expand a signed integral vector conditional move. */
11621 ix86_expand_int_vcond (rtx operands
[])
11623 enum machine_mode mode
= GET_MODE (operands
[0]);
11624 enum rtx_code code
= GET_CODE (operands
[3]);
11625 bool negate
= false;
11628 cop0
= operands
[4];
11629 cop1
= operands
[5];
11631 /* Canonicalize the comparison to EQ, GT, GTU. */
11642 code
= reverse_condition (code
);
11648 code
= reverse_condition (code
);
11654 code
= swap_condition (code
);
11655 x
= cop0
, cop0
= cop1
, cop1
= x
;
11659 gcc_unreachable ();
11662 /* Unsigned parallel compare is not supported by the hardware. Play some
11663 tricks to turn this into a signed comparison against 0. */
11666 cop0
= force_reg (mode
, cop0
);
11674 /* Perform a parallel modulo subtraction. */
11675 t1
= gen_reg_rtx (mode
);
11676 emit_insn (gen_subv4si3 (t1
, cop0
, cop1
));
11678 /* Extract the original sign bit of op0. */
11679 mask
= GEN_INT (-0x80000000);
11680 mask
= gen_rtx_CONST_VECTOR (mode
,
11681 gen_rtvec (4, mask
, mask
, mask
, mask
));
11682 mask
= force_reg (mode
, mask
);
11683 t2
= gen_reg_rtx (mode
);
11684 emit_insn (gen_andv4si3 (t2
, cop0
, mask
));
11686 /* XOR it back into the result of the subtraction. This results
11687 in the sign bit set iff we saw unsigned underflow. */
11688 x
= gen_reg_rtx (mode
);
11689 emit_insn (gen_xorv4si3 (x
, t1
, t2
));
11697 /* Perform a parallel unsigned saturating subtraction. */
11698 x
= gen_reg_rtx (mode
);
11699 emit_insn (gen_rtx_SET (VOIDmode
, x
,
11700 gen_rtx_US_MINUS (mode
, cop0
, cop1
)));
11707 gcc_unreachable ();
11711 cop1
= CONST0_RTX (mode
);
11714 x
= ix86_expand_sse_cmp (operands
[0], code
, cop0
, cop1
,
11715 operands
[1+negate
], operands
[2-negate
]);
11717 ix86_expand_sse_movcc (operands
[0], x
, operands
[1+negate
],
11718 operands
[2-negate
]);
11722 /* Expand conditional increment or decrement using adb/sbb instructions.
11723 The default case using setcc followed by the conditional move can be
11724 done by generic code. */
11726 ix86_expand_int_addcc (rtx operands
[])
11728 enum rtx_code code
= GET_CODE (operands
[1]);
11730 rtx val
= const0_rtx
;
11731 bool fpcmp
= false;
11732 enum machine_mode mode
= GET_MODE (operands
[0]);
11734 if (operands
[3] != const1_rtx
11735 && operands
[3] != constm1_rtx
)
11737 if (!ix86_expand_carry_flag_compare (code
, ix86_compare_op0
,
11738 ix86_compare_op1
, &compare_op
))
11740 code
= GET_CODE (compare_op
);
11742 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
11743 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
11746 code
= ix86_fp_compare_code_to_integer (code
);
11753 PUT_CODE (compare_op
,
11754 reverse_condition_maybe_unordered
11755 (GET_CODE (compare_op
)));
11757 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
11759 PUT_MODE (compare_op
, mode
);
11761 /* Construct either adc or sbb insn. */
11762 if ((code
== LTU
) == (operands
[3] == constm1_rtx
))
11764 switch (GET_MODE (operands
[0]))
11767 emit_insn (gen_subqi3_carry (operands
[0], operands
[2], val
, compare_op
));
11770 emit_insn (gen_subhi3_carry (operands
[0], operands
[2], val
, compare_op
));
11773 emit_insn (gen_subsi3_carry (operands
[0], operands
[2], val
, compare_op
));
11776 emit_insn (gen_subdi3_carry_rex64 (operands
[0], operands
[2], val
, compare_op
));
11779 gcc_unreachable ();
11784 switch (GET_MODE (operands
[0]))
11787 emit_insn (gen_addqi3_carry (operands
[0], operands
[2], val
, compare_op
));
11790 emit_insn (gen_addhi3_carry (operands
[0], operands
[2], val
, compare_op
));
11793 emit_insn (gen_addsi3_carry (operands
[0], operands
[2], val
, compare_op
));
11796 emit_insn (gen_adddi3_carry_rex64 (operands
[0], operands
[2], val
, compare_op
));
11799 gcc_unreachable ();
11802 return 1; /* DONE */
11806 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
11807 works for floating pointer parameters and nonoffsetable memories.
11808 For pushes, it returns just stack offsets; the values will be saved
11809 in the right order. Maximally three parts are generated. */
11812 ix86_split_to_parts (rtx operand
, rtx
*parts
, enum machine_mode mode
)
11817 size
= mode
==XFmode
? 3 : GET_MODE_SIZE (mode
) / 4;
11819 size
= (GET_MODE_SIZE (mode
) + 4) / 8;
11821 gcc_assert (GET_CODE (operand
) != REG
|| !MMX_REGNO_P (REGNO (operand
)));
11822 gcc_assert (size
>= 2 && size
<= 3);
11824 /* Optimize constant pool reference to immediates. This is used by fp
11825 moves, that force all constants to memory to allow combining. */
11826 if (GET_CODE (operand
) == MEM
&& MEM_READONLY_P (operand
))
11828 rtx tmp
= maybe_get_pool_constant (operand
);
11833 if (GET_CODE (operand
) == MEM
&& !offsettable_memref_p (operand
))
11835 /* The only non-offsetable memories we handle are pushes. */
11836 int ok
= push_operand (operand
, VOIDmode
);
11840 operand
= copy_rtx (operand
);
11841 PUT_MODE (operand
, Pmode
);
11842 parts
[0] = parts
[1] = parts
[2] = operand
;
11846 if (GET_CODE (operand
) == CONST_VECTOR
)
11848 enum machine_mode imode
= int_mode_for_mode (mode
);
11849 /* Caution: if we looked through a constant pool memory above,
11850 the operand may actually have a different mode now. That's
11851 ok, since we want to pun this all the way back to an integer. */
11852 operand
= simplify_subreg (imode
, operand
, GET_MODE (operand
), 0);
11853 gcc_assert (operand
!= NULL
);
11859 if (mode
== DImode
)
11860 split_di (&operand
, 1, &parts
[0], &parts
[1]);
11863 if (REG_P (operand
))
11865 gcc_assert (reload_completed
);
11866 parts
[0] = gen_rtx_REG (SImode
, REGNO (operand
) + 0);
11867 parts
[1] = gen_rtx_REG (SImode
, REGNO (operand
) + 1);
11869 parts
[2] = gen_rtx_REG (SImode
, REGNO (operand
) + 2);
11871 else if (offsettable_memref_p (operand
))
11873 operand
= adjust_address (operand
, SImode
, 0);
11874 parts
[0] = operand
;
11875 parts
[1] = adjust_address (operand
, SImode
, 4);
11877 parts
[2] = adjust_address (operand
, SImode
, 8);
11879 else if (GET_CODE (operand
) == CONST_DOUBLE
)
11884 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
11888 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r
, l
);
11889 parts
[2] = gen_int_mode (l
[2], SImode
);
11892 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
11895 gcc_unreachable ();
11897 parts
[1] = gen_int_mode (l
[1], SImode
);
11898 parts
[0] = gen_int_mode (l
[0], SImode
);
11901 gcc_unreachable ();
11906 if (mode
== TImode
)
11907 split_ti (&operand
, 1, &parts
[0], &parts
[1]);
11908 if (mode
== XFmode
|| mode
== TFmode
)
11910 enum machine_mode upper_mode
= mode
==XFmode
? SImode
: DImode
;
11911 if (REG_P (operand
))
11913 gcc_assert (reload_completed
);
11914 parts
[0] = gen_rtx_REG (DImode
, REGNO (operand
) + 0);
11915 parts
[1] = gen_rtx_REG (upper_mode
, REGNO (operand
) + 1);
11917 else if (offsettable_memref_p (operand
))
11919 operand
= adjust_address (operand
, DImode
, 0);
11920 parts
[0] = operand
;
11921 parts
[1] = adjust_address (operand
, upper_mode
, 8);
11923 else if (GET_CODE (operand
) == CONST_DOUBLE
)
11928 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
11929 real_to_target (l
, &r
, mode
);
11931 /* Do not use shift by 32 to avoid warning on 32bit systems. */
11932 if (HOST_BITS_PER_WIDE_INT
>= 64)
11935 ((l
[0] & (((HOST_WIDE_INT
) 2 << 31) - 1))
11936 + ((((HOST_WIDE_INT
) l
[1]) << 31) << 1),
11939 parts
[0] = immed_double_const (l
[0], l
[1], DImode
);
11941 if (upper_mode
== SImode
)
11942 parts
[1] = gen_int_mode (l
[2], SImode
);
11943 else if (HOST_BITS_PER_WIDE_INT
>= 64)
11946 ((l
[2] & (((HOST_WIDE_INT
) 2 << 31) - 1))
11947 + ((((HOST_WIDE_INT
) l
[3]) << 31) << 1),
11950 parts
[1] = immed_double_const (l
[2], l
[3], DImode
);
11953 gcc_unreachable ();
11960 /* Emit insns to perform a move or push of DI, DF, and XF values.
11961 Return false when normal moves are needed; true when all required
11962 insns have been emitted. Operands 2-4 contain the input values
11963 int the correct order; operands 5-7 contain the output values. */
11966 ix86_split_long_move (rtx operands
[])
11971 int collisions
= 0;
11972 enum machine_mode mode
= GET_MODE (operands
[0]);
11974 /* The DFmode expanders may ask us to move double.
11975 For 64bit target this is single move. By hiding the fact
11976 here we simplify i386.md splitters. */
11977 if (GET_MODE_SIZE (GET_MODE (operands
[0])) == 8 && TARGET_64BIT
)
11979 /* Optimize constant pool reference to immediates. This is used by
11980 fp moves, that force all constants to memory to allow combining. */
11982 if (GET_CODE (operands
[1]) == MEM
11983 && GET_CODE (XEXP (operands
[1], 0)) == SYMBOL_REF
11984 && CONSTANT_POOL_ADDRESS_P (XEXP (operands
[1], 0)))
11985 operands
[1] = get_pool_constant (XEXP (operands
[1], 0));
11986 if (push_operand (operands
[0], VOIDmode
))
11988 operands
[0] = copy_rtx (operands
[0]);
11989 PUT_MODE (operands
[0], Pmode
);
11992 operands
[0] = gen_lowpart (DImode
, operands
[0]);
11993 operands
[1] = gen_lowpart (DImode
, operands
[1]);
11994 emit_move_insn (operands
[0], operands
[1]);
11998 /* The only non-offsettable memory we handle is push. */
11999 if (push_operand (operands
[0], VOIDmode
))
12002 gcc_assert (GET_CODE (operands
[0]) != MEM
12003 || offsettable_memref_p (operands
[0]));
12005 nparts
= ix86_split_to_parts (operands
[1], part
[1], GET_MODE (operands
[0]));
12006 ix86_split_to_parts (operands
[0], part
[0], GET_MODE (operands
[0]));
12008 /* When emitting push, take care for source operands on the stack. */
12009 if (push
&& GET_CODE (operands
[1]) == MEM
12010 && reg_overlap_mentioned_p (stack_pointer_rtx
, operands
[1]))
12013 part
[1][1] = change_address (part
[1][1], GET_MODE (part
[1][1]),
12014 XEXP (part
[1][2], 0));
12015 part
[1][0] = change_address (part
[1][0], GET_MODE (part
[1][0]),
12016 XEXP (part
[1][1], 0));
12019 /* We need to do copy in the right order in case an address register
12020 of the source overlaps the destination. */
12021 if (REG_P (part
[0][0]) && GET_CODE (part
[1][0]) == MEM
)
12023 if (reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0)))
12025 if (reg_overlap_mentioned_p (part
[0][1], XEXP (part
[1][0], 0)))
12028 && reg_overlap_mentioned_p (part
[0][2], XEXP (part
[1][0], 0)))
12031 /* Collision in the middle part can be handled by reordering. */
12032 if (collisions
== 1 && nparts
== 3
12033 && reg_overlap_mentioned_p (part
[0][1], XEXP (part
[1][0], 0)))
12036 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
12037 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
12040 /* If there are more collisions, we can't handle it by reordering.
12041 Do an lea to the last part and use only one colliding move. */
12042 else if (collisions
> 1)
12048 base
= part
[0][nparts
- 1];
12050 /* Handle the case when the last part isn't valid for lea.
12051 Happens in 64-bit mode storing the 12-byte XFmode. */
12052 if (GET_MODE (base
) != Pmode
)
12053 base
= gen_rtx_REG (Pmode
, REGNO (base
));
12055 emit_insn (gen_rtx_SET (VOIDmode
, base
, XEXP (part
[1][0], 0)));
12056 part
[1][0] = replace_equiv_address (part
[1][0], base
);
12057 part
[1][1] = replace_equiv_address (part
[1][1],
12058 plus_constant (base
, UNITS_PER_WORD
));
12060 part
[1][2] = replace_equiv_address (part
[1][2],
12061 plus_constant (base
, 8));
12071 if (TARGET_128BIT_LONG_DOUBLE
&& mode
== XFmode
)
12072 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, GEN_INT (-4)));
12073 emit_move_insn (part
[0][2], part
[1][2]);
12078 /* In 64bit mode we don't have 32bit push available. In case this is
12079 register, it is OK - we will just use larger counterpart. We also
12080 retype memory - these comes from attempt to avoid REX prefix on
12081 moving of second half of TFmode value. */
12082 if (GET_MODE (part
[1][1]) == SImode
)
12084 switch (GET_CODE (part
[1][1]))
12087 part
[1][1] = adjust_address (part
[1][1], DImode
, 0);
12091 part
[1][1] = gen_rtx_REG (DImode
, REGNO (part
[1][1]));
12095 gcc_unreachable ();
12098 if (GET_MODE (part
[1][0]) == SImode
)
12099 part
[1][0] = part
[1][1];
12102 emit_move_insn (part
[0][1], part
[1][1]);
12103 emit_move_insn (part
[0][0], part
[1][0]);
12107 /* Choose correct order to not overwrite the source before it is copied. */
12108 if ((REG_P (part
[0][0])
12109 && REG_P (part
[1][1])
12110 && (REGNO (part
[0][0]) == REGNO (part
[1][1])
12112 && REGNO (part
[0][0]) == REGNO (part
[1][2]))))
12114 && reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0))))
12118 operands
[2] = part
[0][2];
12119 operands
[3] = part
[0][1];
12120 operands
[4] = part
[0][0];
12121 operands
[5] = part
[1][2];
12122 operands
[6] = part
[1][1];
12123 operands
[7] = part
[1][0];
12127 operands
[2] = part
[0][1];
12128 operands
[3] = part
[0][0];
12129 operands
[5] = part
[1][1];
12130 operands
[6] = part
[1][0];
12137 operands
[2] = part
[0][0];
12138 operands
[3] = part
[0][1];
12139 operands
[4] = part
[0][2];
12140 operands
[5] = part
[1][0];
12141 operands
[6] = part
[1][1];
12142 operands
[7] = part
[1][2];
12146 operands
[2] = part
[0][0];
12147 operands
[3] = part
[0][1];
12148 operands
[5] = part
[1][0];
12149 operands
[6] = part
[1][1];
12153 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
12156 if (GET_CODE (operands
[5]) == CONST_INT
12157 && operands
[5] != const0_rtx
12158 && REG_P (operands
[2]))
12160 if (GET_CODE (operands
[6]) == CONST_INT
12161 && INTVAL (operands
[6]) == INTVAL (operands
[5]))
12162 operands
[6] = operands
[2];
12165 && GET_CODE (operands
[7]) == CONST_INT
12166 && INTVAL (operands
[7]) == INTVAL (operands
[5]))
12167 operands
[7] = operands
[2];
12171 && GET_CODE (operands
[6]) == CONST_INT
12172 && operands
[6] != const0_rtx
12173 && REG_P (operands
[3])
12174 && GET_CODE (operands
[7]) == CONST_INT
12175 && INTVAL (operands
[7]) == INTVAL (operands
[6]))
12176 operands
[7] = operands
[3];
12179 emit_move_insn (operands
[2], operands
[5]);
12180 emit_move_insn (operands
[3], operands
[6]);
12182 emit_move_insn (operands
[4], operands
[7]);
12187 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
12188 left shift by a constant, either using a single shift or
12189 a sequence of add instructions. */
12192 ix86_expand_ashl_const (rtx operand
, int count
, enum machine_mode mode
)
12196 emit_insn ((mode
== DImode
12198 : gen_adddi3
) (operand
, operand
, operand
));
12200 else if (!optimize_size
12201 && count
* ix86_cost
->add
<= ix86_cost
->shift_const
)
12204 for (i
=0; i
<count
; i
++)
12206 emit_insn ((mode
== DImode
12208 : gen_adddi3
) (operand
, operand
, operand
));
12212 emit_insn ((mode
== DImode
12214 : gen_ashldi3
) (operand
, operand
, GEN_INT (count
)));
12218 ix86_split_ashl (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
12220 rtx low
[2], high
[2];
12222 const int single_width
= mode
== DImode
? 32 : 64;
12224 if (GET_CODE (operands
[2]) == CONST_INT
)
12226 (mode
== DImode
? split_di
: split_ti
) (operands
, 2, low
, high
);
12227 count
= INTVAL (operands
[2]) & (single_width
* 2 - 1);
12229 if (count
>= single_width
)
12231 emit_move_insn (high
[0], low
[1]);
12232 emit_move_insn (low
[0], const0_rtx
);
12234 if (count
> single_width
)
12235 ix86_expand_ashl_const (high
[0], count
- single_width
, mode
);
12239 if (!rtx_equal_p (operands
[0], operands
[1]))
12240 emit_move_insn (operands
[0], operands
[1]);
12241 emit_insn ((mode
== DImode
12243 : gen_x86_64_shld
) (high
[0], low
[0], GEN_INT (count
)));
12244 ix86_expand_ashl_const (low
[0], count
, mode
);
12249 (mode
== DImode
? split_di
: split_ti
) (operands
, 1, low
, high
);
12251 if (operands
[1] == const1_rtx
)
12253 /* Assuming we've chosen a QImode capable registers, then 1 << N
12254 can be done with two 32/64-bit shifts, no branches, no cmoves. */
12255 if (ANY_QI_REG_P (low
[0]) && ANY_QI_REG_P (high
[0]))
12257 rtx s
, d
, flags
= gen_rtx_REG (CCZmode
, FLAGS_REG
);
12259 ix86_expand_clear (low
[0]);
12260 ix86_expand_clear (high
[0]);
12261 emit_insn (gen_testqi_ccz_1 (operands
[2], GEN_INT (single_width
)));
12263 d
= gen_lowpart (QImode
, low
[0]);
12264 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
12265 s
= gen_rtx_EQ (QImode
, flags
, const0_rtx
);
12266 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
12268 d
= gen_lowpart (QImode
, high
[0]);
12269 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
12270 s
= gen_rtx_NE (QImode
, flags
, const0_rtx
);
12271 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
12274 /* Otherwise, we can get the same results by manually performing
12275 a bit extract operation on bit 5/6, and then performing the two
12276 shifts. The two methods of getting 0/1 into low/high are exactly
12277 the same size. Avoiding the shift in the bit extract case helps
12278 pentium4 a bit; no one else seems to care much either way. */
12283 if (TARGET_PARTIAL_REG_STALL
&& !optimize_size
)
12284 x
= gen_rtx_ZERO_EXTEND (mode
== DImode
? SImode
: DImode
, operands
[2]);
12286 x
= gen_lowpart (mode
== DImode
? SImode
: DImode
, operands
[2]);
12287 emit_insn (gen_rtx_SET (VOIDmode
, high
[0], x
));
12289 emit_insn ((mode
== DImode
12291 : gen_lshrdi3
) (high
[0], high
[0], GEN_INT (mode
== DImode
? 5 : 6)));
12292 emit_insn ((mode
== DImode
12294 : gen_anddi3
) (high
[0], high
[0], GEN_INT (1)));
12295 emit_move_insn (low
[0], high
[0]);
12296 emit_insn ((mode
== DImode
12298 : gen_xordi3
) (low
[0], low
[0], GEN_INT (1)));
12301 emit_insn ((mode
== DImode
12303 : gen_ashldi3
) (low
[0], low
[0], operands
[2]));
12304 emit_insn ((mode
== DImode
12306 : gen_ashldi3
) (high
[0], high
[0], operands
[2]));
12310 if (operands
[1] == constm1_rtx
)
12312 /* For -1 << N, we can avoid the shld instruction, because we
12313 know that we're shifting 0...31/63 ones into a -1. */
12314 emit_move_insn (low
[0], constm1_rtx
);
12316 emit_move_insn (high
[0], low
[0]);
12318 emit_move_insn (high
[0], constm1_rtx
);
12322 if (!rtx_equal_p (operands
[0], operands
[1]))
12323 emit_move_insn (operands
[0], operands
[1]);
12325 (mode
== DImode
? split_di
: split_ti
) (operands
, 1, low
, high
);
12326 emit_insn ((mode
== DImode
12328 : gen_x86_64_shld
) (high
[0], low
[0], operands
[2]));
12331 emit_insn ((mode
== DImode
? gen_ashlsi3
: gen_ashldi3
) (low
[0], low
[0], operands
[2]));
12333 if (TARGET_CMOVE
&& scratch
)
12335 ix86_expand_clear (scratch
);
12336 emit_insn ((mode
== DImode
12337 ? gen_x86_shift_adj_1
12338 : gen_x86_64_shift_adj
) (high
[0], low
[0], operands
[2], scratch
));
12341 emit_insn (gen_x86_shift_adj_2 (high
[0], low
[0], operands
[2]));
12345 ix86_split_ashr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
12347 rtx low
[2], high
[2];
12349 const int single_width
= mode
== DImode
? 32 : 64;
12351 if (GET_CODE (operands
[2]) == CONST_INT
)
12353 (mode
== DImode
? split_di
: split_ti
) (operands
, 2, low
, high
);
12354 count
= INTVAL (operands
[2]) & (single_width
* 2 - 1);
12356 if (count
== single_width
* 2 - 1)
12358 emit_move_insn (high
[0], high
[1]);
12359 emit_insn ((mode
== DImode
12361 : gen_ashrdi3
) (high
[0], high
[0],
12362 GEN_INT (single_width
- 1)));
12363 emit_move_insn (low
[0], high
[0]);
12366 else if (count
>= single_width
)
12368 emit_move_insn (low
[0], high
[1]);
12369 emit_move_insn (high
[0], low
[0]);
12370 emit_insn ((mode
== DImode
12372 : gen_ashrdi3
) (high
[0], high
[0],
12373 GEN_INT (single_width
- 1)));
12374 if (count
> single_width
)
12375 emit_insn ((mode
== DImode
12377 : gen_ashrdi3
) (low
[0], low
[0],
12378 GEN_INT (count
- single_width
)));
12382 if (!rtx_equal_p (operands
[0], operands
[1]))
12383 emit_move_insn (operands
[0], operands
[1]);
12384 emit_insn ((mode
== DImode
12386 : gen_x86_64_shrd
) (low
[0], high
[0], GEN_INT (count
)));
12387 emit_insn ((mode
== DImode
12389 : gen_ashrdi3
) (high
[0], high
[0], GEN_INT (count
)));
12394 if (!rtx_equal_p (operands
[0], operands
[1]))
12395 emit_move_insn (operands
[0], operands
[1]);
12397 (mode
== DImode
? split_di
: split_ti
) (operands
, 1, low
, high
);
12399 emit_insn ((mode
== DImode
12401 : gen_x86_64_shrd
) (low
[0], high
[0], operands
[2]));
12402 emit_insn ((mode
== DImode
12404 : gen_ashrdi3
) (high
[0], high
[0], operands
[2]));
12406 if (TARGET_CMOVE
&& scratch
)
12408 emit_move_insn (scratch
, high
[0]);
12409 emit_insn ((mode
== DImode
12411 : gen_ashrdi3
) (scratch
, scratch
,
12412 GEN_INT (single_width
- 1)));
12413 emit_insn ((mode
== DImode
12414 ? gen_x86_shift_adj_1
12415 : gen_x86_64_shift_adj
) (low
[0], high
[0], operands
[2],
12419 emit_insn (gen_x86_shift_adj_3 (low
[0], high
[0], operands
[2]));
12424 ix86_split_lshr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
12426 rtx low
[2], high
[2];
12428 const int single_width
= mode
== DImode
? 32 : 64;
12430 if (GET_CODE (operands
[2]) == CONST_INT
)
12432 (mode
== DImode
? split_di
: split_ti
) (operands
, 2, low
, high
);
12433 count
= INTVAL (operands
[2]) & (single_width
* 2 - 1);
12435 if (count
>= single_width
)
12437 emit_move_insn (low
[0], high
[1]);
12438 ix86_expand_clear (high
[0]);
12440 if (count
> single_width
)
12441 emit_insn ((mode
== DImode
12443 : gen_lshrdi3
) (low
[0], low
[0],
12444 GEN_INT (count
- single_width
)));
12448 if (!rtx_equal_p (operands
[0], operands
[1]))
12449 emit_move_insn (operands
[0], operands
[1]);
12450 emit_insn ((mode
== DImode
12452 : gen_x86_64_shrd
) (low
[0], high
[0], GEN_INT (count
)));
12453 emit_insn ((mode
== DImode
12455 : gen_lshrdi3
) (high
[0], high
[0], GEN_INT (count
)));
12460 if (!rtx_equal_p (operands
[0], operands
[1]))
12461 emit_move_insn (operands
[0], operands
[1]);
12463 (mode
== DImode
? split_di
: split_ti
) (operands
, 1, low
, high
);
12465 emit_insn ((mode
== DImode
12467 : gen_x86_64_shrd
) (low
[0], high
[0], operands
[2]));
12468 emit_insn ((mode
== DImode
12470 : gen_lshrdi3
) (high
[0], high
[0], operands
[2]));
12472 /* Heh. By reversing the arguments, we can reuse this pattern. */
12473 if (TARGET_CMOVE
&& scratch
)
12475 ix86_expand_clear (scratch
);
12476 emit_insn ((mode
== DImode
12477 ? gen_x86_shift_adj_1
12478 : gen_x86_64_shift_adj
) (low
[0], high
[0], operands
[2],
12482 emit_insn (gen_x86_shift_adj_2 (low
[0], high
[0], operands
[2]));
12486 /* Helper function for the string operations below. Dest VARIABLE whether
12487 it is aligned to VALUE bytes. If true, jump to the label. */
12489 ix86_expand_aligntest (rtx variable
, int value
)
12491 rtx label
= gen_label_rtx ();
12492 rtx tmpcount
= gen_reg_rtx (GET_MODE (variable
));
12493 if (GET_MODE (variable
) == DImode
)
12494 emit_insn (gen_anddi3 (tmpcount
, variable
, GEN_INT (value
)));
12496 emit_insn (gen_andsi3 (tmpcount
, variable
, GEN_INT (value
)));
12497 emit_cmp_and_jump_insns (tmpcount
, const0_rtx
, EQ
, 0, GET_MODE (variable
),
12502 /* Adjust COUNTER by the VALUE. */
12504 ix86_adjust_counter (rtx countreg
, HOST_WIDE_INT value
)
12506 if (GET_MODE (countreg
) == DImode
)
12507 emit_insn (gen_adddi3 (countreg
, countreg
, GEN_INT (-value
)));
12509 emit_insn (gen_addsi3 (countreg
, countreg
, GEN_INT (-value
)));
12512 /* Zero extend possibly SImode EXP to Pmode register. */
12514 ix86_zero_extend_to_Pmode (rtx exp
)
12517 if (GET_MODE (exp
) == VOIDmode
)
12518 return force_reg (Pmode
, exp
);
12519 if (GET_MODE (exp
) == Pmode
)
12520 return copy_to_mode_reg (Pmode
, exp
);
12521 r
= gen_reg_rtx (Pmode
);
12522 emit_insn (gen_zero_extendsidi2 (r
, exp
));
12526 /* Expand string move (memcpy) operation. Use i386 string operations when
12527 profitable. expand_clrmem contains similar code. */
12529 ix86_expand_movmem (rtx dst
, rtx src
, rtx count_exp
, rtx align_exp
)
12531 rtx srcreg
, destreg
, countreg
, srcexp
, destexp
;
12532 enum machine_mode counter_mode
;
12533 HOST_WIDE_INT align
= 0;
12534 unsigned HOST_WIDE_INT count
= 0;
12536 if (GET_CODE (align_exp
) == CONST_INT
)
12537 align
= INTVAL (align_exp
);
12539 /* Can't use any of this if the user has appropriated esi or edi. */
12540 if (global_regs
[4] || global_regs
[5])
12543 /* This simple hack avoids all inlining code and simplifies code below. */
12544 if (!TARGET_ALIGN_STRINGOPS
)
12547 if (GET_CODE (count_exp
) == CONST_INT
)
12549 count
= INTVAL (count_exp
);
12550 if (!TARGET_INLINE_ALL_STRINGOPS
&& count
> 64)
12554 /* Figure out proper mode for counter. For 32bits it is always SImode,
12555 for 64bits use SImode when possible, otherwise DImode.
12556 Set count to number of bytes copied when known at compile time. */
12558 || GET_MODE (count_exp
) == SImode
12559 || x86_64_zext_immediate_operand (count_exp
, VOIDmode
))
12560 counter_mode
= SImode
;
12562 counter_mode
= DImode
;
12564 gcc_assert (counter_mode
== SImode
|| counter_mode
== DImode
);
12566 destreg
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
12567 if (destreg
!= XEXP (dst
, 0))
12568 dst
= replace_equiv_address_nv (dst
, destreg
);
12569 srcreg
= copy_to_mode_reg (Pmode
, XEXP (src
, 0));
12570 if (srcreg
!= XEXP (src
, 0))
12571 src
= replace_equiv_address_nv (src
, srcreg
);
12573 /* When optimizing for size emit simple rep ; movsb instruction for
12574 counts not divisible by 4, except when (movsl;)*(movsw;)?(movsb;)?
12575 sequence is shorter than mov{b,l} $count, %{ecx,cl}; rep; movsb.
12576 Sice of (movsl;)*(movsw;)?(movsb;)? sequence is
12577 count / 4 + (count & 3), the other sequence is either 4 or 7 bytes,
12578 but we don't know whether upper 24 (resp. 56) bits of %ecx will be
12579 known to be zero or not. The rep; movsb sequence causes higher
12580 register pressure though, so take that into account. */
12582 if ((!optimize
|| optimize_size
)
12587 || (count
& 3) + count
/ 4 > 6))))
12589 emit_insn (gen_cld ());
12590 countreg
= ix86_zero_extend_to_Pmode (count_exp
);
12591 destexp
= gen_rtx_PLUS (Pmode
, destreg
, countreg
);
12592 srcexp
= gen_rtx_PLUS (Pmode
, srcreg
, countreg
);
12593 emit_insn (gen_rep_mov (destreg
, dst
, srcreg
, src
, countreg
,
12597 /* For constant aligned (or small unaligned) copies use rep movsl
12598 followed by code copying the rest. For PentiumPro ensure 8 byte
12599 alignment to allow rep movsl acceleration. */
12601 else if (count
!= 0
12603 || (!TARGET_PENTIUMPRO
&& !TARGET_64BIT
&& align
>= 4)
12604 || optimize_size
|| count
< (unsigned int) 64))
12606 unsigned HOST_WIDE_INT offset
= 0;
12607 int size
= TARGET_64BIT
&& !optimize_size
? 8 : 4;
12608 rtx srcmem
, dstmem
;
12610 emit_insn (gen_cld ());
12611 if (count
& ~(size
- 1))
12613 if ((TARGET_SINGLE_STRINGOP
|| optimize_size
) && count
< 5 * 4)
12615 enum machine_mode movs_mode
= size
== 4 ? SImode
: DImode
;
12617 while (offset
< (count
& ~(size
- 1)))
12619 srcmem
= adjust_automodify_address_nv (src
, movs_mode
,
12621 dstmem
= adjust_automodify_address_nv (dst
, movs_mode
,
12623 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
12629 countreg
= GEN_INT ((count
>> (size
== 4 ? 2 : 3))
12630 & (TARGET_64BIT
? -1 : 0x3fffffff));
12631 countreg
= copy_to_mode_reg (counter_mode
, countreg
);
12632 countreg
= ix86_zero_extend_to_Pmode (countreg
);
12634 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
12635 GEN_INT (size
== 4 ? 2 : 3));
12636 srcexp
= gen_rtx_PLUS (Pmode
, destexp
, srcreg
);
12637 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destreg
);
12639 emit_insn (gen_rep_mov (destreg
, dst
, srcreg
, src
,
12640 countreg
, destexp
, srcexp
));
12641 offset
= count
& ~(size
- 1);
12644 if (size
== 8 && (count
& 0x04))
12646 srcmem
= adjust_automodify_address_nv (src
, SImode
, srcreg
,
12648 dstmem
= adjust_automodify_address_nv (dst
, SImode
, destreg
,
12650 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
12655 srcmem
= adjust_automodify_address_nv (src
, HImode
, srcreg
,
12657 dstmem
= adjust_automodify_address_nv (dst
, HImode
, destreg
,
12659 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
12664 srcmem
= adjust_automodify_address_nv (src
, QImode
, srcreg
,
12666 dstmem
= adjust_automodify_address_nv (dst
, QImode
, destreg
,
12668 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
12671 /* The generic code based on the glibc implementation:
12672 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
12673 allowing accelerated copying there)
12674 - copy the data using rep movsl
12675 - copy the rest. */
12680 rtx srcmem
, dstmem
;
12681 int desired_alignment
= (TARGET_PENTIUMPRO
12682 && (count
== 0 || count
>= (unsigned int) 260)
12683 ? 8 : UNITS_PER_WORD
);
12684 /* Get rid of MEM_OFFSETs, they won't be accurate. */
12685 dst
= change_address (dst
, BLKmode
, destreg
);
12686 src
= change_address (src
, BLKmode
, srcreg
);
12688 /* In case we don't know anything about the alignment, default to
12689 library version, since it is usually equally fast and result in
12692 Also emit call when we know that the count is large and call overhead
12693 will not be important. */
12694 if (!TARGET_INLINE_ALL_STRINGOPS
12695 && (align
< UNITS_PER_WORD
|| !TARGET_REP_MOVL_OPTIMAL
))
12698 if (TARGET_SINGLE_STRINGOP
)
12699 emit_insn (gen_cld ());
12701 countreg2
= gen_reg_rtx (Pmode
);
12702 countreg
= copy_to_mode_reg (counter_mode
, count_exp
);
12704 /* We don't use loops to align destination and to copy parts smaller
12705 than 4 bytes, because gcc is able to optimize such code better (in
12706 the case the destination or the count really is aligned, gcc is often
12707 able to predict the branches) and also it is friendlier to the
12708 hardware branch prediction.
12710 Using loops is beneficial for generic case, because we can
12711 handle small counts using the loops. Many CPUs (such as Athlon)
12712 have large REP prefix setup costs.
12714 This is quite costly. Maybe we can revisit this decision later or
12715 add some customizability to this code. */
12717 if (count
== 0 && align
< desired_alignment
)
12719 label
= gen_label_rtx ();
12720 emit_cmp_and_jump_insns (countreg
, GEN_INT (desired_alignment
- 1),
12721 LEU
, 0, counter_mode
, 1, label
);
12725 rtx label
= ix86_expand_aligntest (destreg
, 1);
12726 srcmem
= change_address (src
, QImode
, srcreg
);
12727 dstmem
= change_address (dst
, QImode
, destreg
);
12728 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
12729 ix86_adjust_counter (countreg
, 1);
12730 emit_label (label
);
12731 LABEL_NUSES (label
) = 1;
12735 rtx label
= ix86_expand_aligntest (destreg
, 2);
12736 srcmem
= change_address (src
, HImode
, srcreg
);
12737 dstmem
= change_address (dst
, HImode
, destreg
);
12738 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
12739 ix86_adjust_counter (countreg
, 2);
12740 emit_label (label
);
12741 LABEL_NUSES (label
) = 1;
12743 if (align
<= 4 && desired_alignment
> 4)
12745 rtx label
= ix86_expand_aligntest (destreg
, 4);
12746 srcmem
= change_address (src
, SImode
, srcreg
);
12747 dstmem
= change_address (dst
, SImode
, destreg
);
12748 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
12749 ix86_adjust_counter (countreg
, 4);
12750 emit_label (label
);
12751 LABEL_NUSES (label
) = 1;
12754 if (label
&& desired_alignment
> 4 && !TARGET_64BIT
)
12756 emit_label (label
);
12757 LABEL_NUSES (label
) = 1;
12760 if (!TARGET_SINGLE_STRINGOP
)
12761 emit_insn (gen_cld ());
12764 emit_insn (gen_lshrdi3 (countreg2
, ix86_zero_extend_to_Pmode (countreg
),
12766 destexp
= gen_rtx_ASHIFT (Pmode
, countreg2
, GEN_INT (3));
12770 emit_insn (gen_lshrsi3 (countreg2
, countreg
, const2_rtx
));
12771 destexp
= gen_rtx_ASHIFT (Pmode
, countreg2
, const2_rtx
);
12773 srcexp
= gen_rtx_PLUS (Pmode
, destexp
, srcreg
);
12774 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destreg
);
12775 emit_insn (gen_rep_mov (destreg
, dst
, srcreg
, src
,
12776 countreg2
, destexp
, srcexp
));
12780 emit_label (label
);
12781 LABEL_NUSES (label
) = 1;
12783 if (TARGET_64BIT
&& align
> 4 && count
!= 0 && (count
& 4))
12785 srcmem
= change_address (src
, SImode
, srcreg
);
12786 dstmem
= change_address (dst
, SImode
, destreg
);
12787 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
12789 if ((align
<= 4 || count
== 0) && TARGET_64BIT
)
12791 rtx label
= ix86_expand_aligntest (countreg
, 4);
12792 srcmem
= change_address (src
, SImode
, srcreg
);
12793 dstmem
= change_address (dst
, SImode
, destreg
);
12794 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
12795 emit_label (label
);
12796 LABEL_NUSES (label
) = 1;
12798 if (align
> 2 && count
!= 0 && (count
& 2))
12800 srcmem
= change_address (src
, HImode
, srcreg
);
12801 dstmem
= change_address (dst
, HImode
, destreg
);
12802 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
12804 if (align
<= 2 || count
== 0)
12806 rtx label
= ix86_expand_aligntest (countreg
, 2);
12807 srcmem
= change_address (src
, HImode
, srcreg
);
12808 dstmem
= change_address (dst
, HImode
, destreg
);
12809 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
12810 emit_label (label
);
12811 LABEL_NUSES (label
) = 1;
12813 if (align
> 1 && count
!= 0 && (count
& 1))
12815 srcmem
= change_address (src
, QImode
, srcreg
);
12816 dstmem
= change_address (dst
, QImode
, destreg
);
12817 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
12819 if (align
<= 1 || count
== 0)
12821 rtx label
= ix86_expand_aligntest (countreg
, 1);
12822 srcmem
= change_address (src
, QImode
, srcreg
);
12823 dstmem
= change_address (dst
, QImode
, destreg
);
12824 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
12825 emit_label (label
);
12826 LABEL_NUSES (label
) = 1;
12833 /* Expand string clear operation (bzero). Use i386 string operations when
12834 profitable. expand_movmem contains similar code. */
12836 ix86_expand_clrmem (rtx dst
, rtx count_exp
, rtx align_exp
)
12838 rtx destreg
, zeroreg
, countreg
, destexp
;
12839 enum machine_mode counter_mode
;
12840 HOST_WIDE_INT align
= 0;
12841 unsigned HOST_WIDE_INT count
= 0;
12843 if (GET_CODE (align_exp
) == CONST_INT
)
12844 align
= INTVAL (align_exp
);
12846 /* Can't use any of this if the user has appropriated esi. */
12847 if (global_regs
[4])
12850 /* This simple hack avoids all inlining code and simplifies code below. */
12851 if (!TARGET_ALIGN_STRINGOPS
)
12854 if (GET_CODE (count_exp
) == CONST_INT
)
12856 count
= INTVAL (count_exp
);
12857 if (!TARGET_INLINE_ALL_STRINGOPS
&& count
> 64)
12860 /* Figure out proper mode for counter. For 32bits it is always SImode,
12861 for 64bits use SImode when possible, otherwise DImode.
12862 Set count to number of bytes copied when known at compile time. */
12864 || GET_MODE (count_exp
) == SImode
12865 || x86_64_zext_immediate_operand (count_exp
, VOIDmode
))
12866 counter_mode
= SImode
;
12868 counter_mode
= DImode
;
12870 destreg
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
12871 if (destreg
!= XEXP (dst
, 0))
12872 dst
= replace_equiv_address_nv (dst
, destreg
);
12875 /* When optimizing for size emit simple rep ; movsb instruction for
12876 counts not divisible by 4. The movl $N, %ecx; rep; stosb
12877 sequence is 7 bytes long, so if optimizing for size and count is
12878 small enough that some stosl, stosw and stosb instructions without
12879 rep are shorter, fall back into the next if. */
12881 if ((!optimize
|| optimize_size
)
12884 && (!optimize_size
|| (count
& 0x03) + (count
>> 2) > 7))))
12886 emit_insn (gen_cld ());
12888 countreg
= ix86_zero_extend_to_Pmode (count_exp
);
12889 zeroreg
= copy_to_mode_reg (QImode
, const0_rtx
);
12890 destexp
= gen_rtx_PLUS (Pmode
, destreg
, countreg
);
12891 emit_insn (gen_rep_stos (destreg
, countreg
, dst
, zeroreg
, destexp
));
12893 else if (count
!= 0
12895 || (!TARGET_PENTIUMPRO
&& !TARGET_64BIT
&& align
>= 4)
12896 || optimize_size
|| count
< (unsigned int) 64))
12898 int size
= TARGET_64BIT
&& !optimize_size
? 8 : 4;
12899 unsigned HOST_WIDE_INT offset
= 0;
12901 emit_insn (gen_cld ());
12903 zeroreg
= copy_to_mode_reg (size
== 4 ? SImode
: DImode
, const0_rtx
);
12904 if (count
& ~(size
- 1))
12906 unsigned HOST_WIDE_INT repcount
;
12907 unsigned int max_nonrep
;
12909 repcount
= count
>> (size
== 4 ? 2 : 3);
12911 repcount
&= 0x3fffffff;
12913 /* movl $N, %ecx; rep; stosl is 7 bytes, while N x stosl is N bytes.
12914 movl $N, %ecx; rep; stosq is 8 bytes, while N x stosq is 2xN
12915 bytes. In both cases the latter seems to be faster for small
12917 max_nonrep
= size
== 4 ? 7 : 4;
12918 if (!optimize_size
)
12921 case PROCESSOR_PENTIUM4
:
12922 case PROCESSOR_NOCONA
:
12929 if (repcount
<= max_nonrep
)
12930 while (repcount
-- > 0)
12932 rtx mem
= adjust_automodify_address_nv (dst
,
12933 GET_MODE (zeroreg
),
12935 emit_insn (gen_strset (destreg
, mem
, zeroreg
));
12940 countreg
= copy_to_mode_reg (counter_mode
, GEN_INT (repcount
));
12941 countreg
= ix86_zero_extend_to_Pmode (countreg
);
12942 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
12943 GEN_INT (size
== 4 ? 2 : 3));
12944 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destreg
);
12945 emit_insn (gen_rep_stos (destreg
, countreg
, dst
, zeroreg
,
12947 offset
= count
& ~(size
- 1);
12950 if (size
== 8 && (count
& 0x04))
12952 rtx mem
= adjust_automodify_address_nv (dst
, SImode
, destreg
,
12954 emit_insn (gen_strset (destreg
, mem
,
12955 gen_rtx_SUBREG (SImode
, zeroreg
, 0)));
12960 rtx mem
= adjust_automodify_address_nv (dst
, HImode
, destreg
,
12962 emit_insn (gen_strset (destreg
, mem
,
12963 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
12968 rtx mem
= adjust_automodify_address_nv (dst
, QImode
, destreg
,
12970 emit_insn (gen_strset (destreg
, mem
,
12971 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
12978 /* Compute desired alignment of the string operation. */
12979 int desired_alignment
= (TARGET_PENTIUMPRO
12980 && (count
== 0 || count
>= (unsigned int) 260)
12981 ? 8 : UNITS_PER_WORD
);
12983 /* In case we don't know anything about the alignment, default to
12984 library version, since it is usually equally fast and result in
12987 Also emit call when we know that the count is large and call overhead
12988 will not be important. */
12989 if (!TARGET_INLINE_ALL_STRINGOPS
12990 && (align
< UNITS_PER_WORD
|| !TARGET_REP_MOVL_OPTIMAL
))
12993 if (TARGET_SINGLE_STRINGOP
)
12994 emit_insn (gen_cld ());
12996 countreg2
= gen_reg_rtx (Pmode
);
12997 countreg
= copy_to_mode_reg (counter_mode
, count_exp
);
12998 zeroreg
= copy_to_mode_reg (Pmode
, const0_rtx
);
12999 /* Get rid of MEM_OFFSET, it won't be accurate. */
13000 dst
= change_address (dst
, BLKmode
, destreg
);
13002 if (count
== 0 && align
< desired_alignment
)
13004 label
= gen_label_rtx ();
13005 emit_cmp_and_jump_insns (countreg
, GEN_INT (desired_alignment
- 1),
13006 LEU
, 0, counter_mode
, 1, label
);
13010 rtx label
= ix86_expand_aligntest (destreg
, 1);
13011 emit_insn (gen_strset (destreg
, dst
,
13012 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
13013 ix86_adjust_counter (countreg
, 1);
13014 emit_label (label
);
13015 LABEL_NUSES (label
) = 1;
13019 rtx label
= ix86_expand_aligntest (destreg
, 2);
13020 emit_insn (gen_strset (destreg
, dst
,
13021 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
13022 ix86_adjust_counter (countreg
, 2);
13023 emit_label (label
);
13024 LABEL_NUSES (label
) = 1;
13026 if (align
<= 4 && desired_alignment
> 4)
13028 rtx label
= ix86_expand_aligntest (destreg
, 4);
13029 emit_insn (gen_strset (destreg
, dst
,
13031 ? gen_rtx_SUBREG (SImode
, zeroreg
, 0)
13033 ix86_adjust_counter (countreg
, 4);
13034 emit_label (label
);
13035 LABEL_NUSES (label
) = 1;
13038 if (label
&& desired_alignment
> 4 && !TARGET_64BIT
)
13040 emit_label (label
);
13041 LABEL_NUSES (label
) = 1;
13045 if (!TARGET_SINGLE_STRINGOP
)
13046 emit_insn (gen_cld ());
13049 emit_insn (gen_lshrdi3 (countreg2
, ix86_zero_extend_to_Pmode (countreg
),
13051 destexp
= gen_rtx_ASHIFT (Pmode
, countreg2
, GEN_INT (3));
13055 emit_insn (gen_lshrsi3 (countreg2
, countreg
, const2_rtx
));
13056 destexp
= gen_rtx_ASHIFT (Pmode
, countreg2
, const2_rtx
);
13058 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destreg
);
13059 emit_insn (gen_rep_stos (destreg
, countreg2
, dst
, zeroreg
, destexp
));
13063 emit_label (label
);
13064 LABEL_NUSES (label
) = 1;
13067 if (TARGET_64BIT
&& align
> 4 && count
!= 0 && (count
& 4))
13068 emit_insn (gen_strset (destreg
, dst
,
13069 gen_rtx_SUBREG (SImode
, zeroreg
, 0)));
13070 if (TARGET_64BIT
&& (align
<= 4 || count
== 0))
13072 rtx label
= ix86_expand_aligntest (countreg
, 4);
13073 emit_insn (gen_strset (destreg
, dst
,
13074 gen_rtx_SUBREG (SImode
, zeroreg
, 0)));
13075 emit_label (label
);
13076 LABEL_NUSES (label
) = 1;
13078 if (align
> 2 && count
!= 0 && (count
& 2))
13079 emit_insn (gen_strset (destreg
, dst
,
13080 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
13081 if (align
<= 2 || count
== 0)
13083 rtx label
= ix86_expand_aligntest (countreg
, 2);
13084 emit_insn (gen_strset (destreg
, dst
,
13085 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
13086 emit_label (label
);
13087 LABEL_NUSES (label
) = 1;
13089 if (align
> 1 && count
!= 0 && (count
& 1))
13090 emit_insn (gen_strset (destreg
, dst
,
13091 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
13092 if (align
<= 1 || count
== 0)
13094 rtx label
= ix86_expand_aligntest (countreg
, 1);
13095 emit_insn (gen_strset (destreg
, dst
,
13096 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
13097 emit_label (label
);
13098 LABEL_NUSES (label
) = 1;
13104 /* Expand strlen. */
13106 ix86_expand_strlen (rtx out
, rtx src
, rtx eoschar
, rtx align
)
13108 rtx addr
, scratch1
, scratch2
, scratch3
, scratch4
;
13110 /* The generic case of strlen expander is long. Avoid it's
13111 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
13113 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
13114 && !TARGET_INLINE_ALL_STRINGOPS
13116 && (GET_CODE (align
) != CONST_INT
|| INTVAL (align
) < 4))
13119 addr
= force_reg (Pmode
, XEXP (src
, 0));
13120 scratch1
= gen_reg_rtx (Pmode
);
13122 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
13125 /* Well it seems that some optimizer does not combine a call like
13126 foo(strlen(bar), strlen(bar));
13127 when the move and the subtraction is done here. It does calculate
13128 the length just once when these instructions are done inside of
13129 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
13130 often used and I use one fewer register for the lifetime of
13131 output_strlen_unroll() this is better. */
13133 emit_move_insn (out
, addr
);
13135 ix86_expand_strlensi_unroll_1 (out
, src
, align
);
13137 /* strlensi_unroll_1 returns the address of the zero at the end of
13138 the string, like memchr(), so compute the length by subtracting
13139 the start address. */
13141 emit_insn (gen_subdi3 (out
, out
, addr
));
13143 emit_insn (gen_subsi3 (out
, out
, addr
));
13148 scratch2
= gen_reg_rtx (Pmode
);
13149 scratch3
= gen_reg_rtx (Pmode
);
13150 scratch4
= force_reg (Pmode
, constm1_rtx
);
13152 emit_move_insn (scratch3
, addr
);
13153 eoschar
= force_reg (QImode
, eoschar
);
13155 emit_insn (gen_cld ());
13156 src
= replace_equiv_address_nv (src
, scratch3
);
13158 /* If .md starts supporting :P, this can be done in .md. */
13159 unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (4, src
, eoschar
, align
,
13160 scratch4
), UNSPEC_SCAS
);
13161 emit_insn (gen_strlenqi_1 (scratch1
, scratch3
, unspec
));
13164 emit_insn (gen_one_cmpldi2 (scratch2
, scratch1
));
13165 emit_insn (gen_adddi3 (out
, scratch2
, constm1_rtx
));
13169 emit_insn (gen_one_cmplsi2 (scratch2
, scratch1
));
13170 emit_insn (gen_addsi3 (out
, scratch2
, constm1_rtx
));
13176 /* Expand the appropriate insns for doing strlen if not just doing
13179 out = result, initialized with the start address
13180 align_rtx = alignment of the address.
13181 scratch = scratch register, initialized with the startaddress when
13182 not aligned, otherwise undefined
13184 This is just the body. It needs the initializations mentioned above and
13185 some address computing at the end. These things are done in i386.md. */
13188 ix86_expand_strlensi_unroll_1 (rtx out
, rtx src
, rtx align_rtx
)
13192 rtx align_2_label
= NULL_RTX
;
13193 rtx align_3_label
= NULL_RTX
;
13194 rtx align_4_label
= gen_label_rtx ();
13195 rtx end_0_label
= gen_label_rtx ();
13197 rtx tmpreg
= gen_reg_rtx (SImode
);
13198 rtx scratch
= gen_reg_rtx (SImode
);
13202 if (GET_CODE (align_rtx
) == CONST_INT
)
13203 align
= INTVAL (align_rtx
);
13205 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
13207 /* Is there a known alignment and is it less than 4? */
13210 rtx scratch1
= gen_reg_rtx (Pmode
);
13211 emit_move_insn (scratch1
, out
);
13212 /* Is there a known alignment and is it not 2? */
13215 align_3_label
= gen_label_rtx (); /* Label when aligned to 3-byte */
13216 align_2_label
= gen_label_rtx (); /* Label when aligned to 2-byte */
13218 /* Leave just the 3 lower bits. */
13219 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, GEN_INT (3),
13220 NULL_RTX
, 0, OPTAB_WIDEN
);
13222 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
13223 Pmode
, 1, align_4_label
);
13224 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, EQ
, NULL
,
13225 Pmode
, 1, align_2_label
);
13226 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, GTU
, NULL
,
13227 Pmode
, 1, align_3_label
);
13231 /* Since the alignment is 2, we have to check 2 or 0 bytes;
13232 check if is aligned to 4 - byte. */
13234 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, const2_rtx
,
13235 NULL_RTX
, 0, OPTAB_WIDEN
);
13237 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
13238 Pmode
, 1, align_4_label
);
13241 mem
= change_address (src
, QImode
, out
);
13243 /* Now compare the bytes. */
13245 /* Compare the first n unaligned byte on a byte per byte basis. */
13246 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
,
13247 QImode
, 1, end_0_label
);
13249 /* Increment the address. */
13251 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
13253 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
13255 /* Not needed with an alignment of 2 */
13258 emit_label (align_2_label
);
13260 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
13264 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
13266 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
13268 emit_label (align_3_label
);
13271 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
13275 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
13277 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
13280 /* Generate loop to check 4 bytes at a time. It is not a good idea to
13281 align this loop. It gives only huge programs, but does not help to
13283 emit_label (align_4_label
);
13285 mem
= change_address (src
, SImode
, out
);
13286 emit_move_insn (scratch
, mem
);
13288 emit_insn (gen_adddi3 (out
, out
, GEN_INT (4)));
13290 emit_insn (gen_addsi3 (out
, out
, GEN_INT (4)));
13292 /* This formula yields a nonzero result iff one of the bytes is zero.
13293 This saves three branches inside loop and many cycles. */
13295 emit_insn (gen_addsi3 (tmpreg
, scratch
, GEN_INT (-0x01010101)));
13296 emit_insn (gen_one_cmplsi2 (scratch
, scratch
));
13297 emit_insn (gen_andsi3 (tmpreg
, tmpreg
, scratch
));
13298 emit_insn (gen_andsi3 (tmpreg
, tmpreg
,
13299 gen_int_mode (0x80808080, SImode
)));
13300 emit_cmp_and_jump_insns (tmpreg
, const0_rtx
, EQ
, 0, SImode
, 1,
13305 rtx reg
= gen_reg_rtx (SImode
);
13306 rtx reg2
= gen_reg_rtx (Pmode
);
13307 emit_move_insn (reg
, tmpreg
);
13308 emit_insn (gen_lshrsi3 (reg
, reg
, GEN_INT (16)));
13310 /* If zero is not in the first two bytes, move two bytes forward. */
13311 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
13312 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
13313 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
13314 emit_insn (gen_rtx_SET (VOIDmode
, tmpreg
,
13315 gen_rtx_IF_THEN_ELSE (SImode
, tmp
,
13318 /* Emit lea manually to avoid clobbering of flags. */
13319 emit_insn (gen_rtx_SET (SImode
, reg2
,
13320 gen_rtx_PLUS (Pmode
, out
, const2_rtx
)));
13322 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
13323 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
13324 emit_insn (gen_rtx_SET (VOIDmode
, out
,
13325 gen_rtx_IF_THEN_ELSE (Pmode
, tmp
,
13332 rtx end_2_label
= gen_label_rtx ();
13333 /* Is zero in the first two bytes? */
13335 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
13336 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
13337 tmp
= gen_rtx_NE (VOIDmode
, tmp
, const0_rtx
);
13338 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
13339 gen_rtx_LABEL_REF (VOIDmode
, end_2_label
),
13341 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
13342 JUMP_LABEL (tmp
) = end_2_label
;
13344 /* Not in the first two. Move two bytes forward. */
13345 emit_insn (gen_lshrsi3 (tmpreg
, tmpreg
, GEN_INT (16)));
13347 emit_insn (gen_adddi3 (out
, out
, const2_rtx
));
13349 emit_insn (gen_addsi3 (out
, out
, const2_rtx
));
13351 emit_label (end_2_label
);
13355 /* Avoid branch in fixing the byte. */
13356 tmpreg
= gen_lowpart (QImode
, tmpreg
);
13357 emit_insn (gen_addqi3_cc (tmpreg
, tmpreg
, tmpreg
));
13358 cmp
= gen_rtx_LTU (Pmode
, gen_rtx_REG (CCmode
, 17), const0_rtx
);
13360 emit_insn (gen_subdi3_carry_rex64 (out
, out
, GEN_INT (3), cmp
));
13362 emit_insn (gen_subsi3_carry (out
, out
, GEN_INT (3), cmp
));
13364 emit_label (end_0_label
);
13368 ix86_expand_call (rtx retval
, rtx fnaddr
, rtx callarg1
,
13369 rtx callarg2 ATTRIBUTE_UNUSED
,
13370 rtx pop
, int sibcall
)
13372 rtx use
= NULL
, call
;
13374 if (pop
== const0_rtx
)
13376 gcc_assert (!TARGET_64BIT
|| !pop
);
13378 if (TARGET_MACHO
&& !TARGET_64BIT
)
13381 if (flag_pic
&& GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
)
13382 fnaddr
= machopic_indirect_call_target (fnaddr
);
13387 /* Static functions and indirect calls don't need the pic register. */
13388 if (! TARGET_64BIT
&& flag_pic
13389 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
13390 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr
, 0)))
13391 use_reg (&use
, pic_offset_table_rtx
);
13394 if (TARGET_64BIT
&& INTVAL (callarg2
) >= 0)
13396 rtx al
= gen_rtx_REG (QImode
, 0);
13397 emit_move_insn (al
, callarg2
);
13398 use_reg (&use
, al
);
13401 if (! call_insn_operand (XEXP (fnaddr
, 0), Pmode
))
13403 fnaddr
= copy_to_mode_reg (Pmode
, XEXP (fnaddr
, 0));
13404 fnaddr
= gen_rtx_MEM (QImode
, fnaddr
);
13406 if (sibcall
&& TARGET_64BIT
13407 && !constant_call_address_operand (XEXP (fnaddr
, 0), Pmode
))
13410 addr
= copy_to_mode_reg (Pmode
, XEXP (fnaddr
, 0));
13411 fnaddr
= gen_rtx_REG (Pmode
, FIRST_REX_INT_REG
+ 3 /* R11 */);
13412 emit_move_insn (fnaddr
, addr
);
13413 fnaddr
= gen_rtx_MEM (QImode
, fnaddr
);
13416 call
= gen_rtx_CALL (VOIDmode
, fnaddr
, callarg1
);
13418 call
= gen_rtx_SET (VOIDmode
, retval
, call
);
13421 pop
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, pop
);
13422 pop
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, pop
);
13423 call
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, call
, pop
));
13426 call
= emit_call_insn (call
);
13428 CALL_INSN_FUNCTION_USAGE (call
) = use
;
13432 /* Clear stack slot assignments remembered from previous functions.
13433 This is called from INIT_EXPANDERS once before RTL is emitted for each
13436 static struct machine_function
*
13437 ix86_init_machine_status (void)
13439 struct machine_function
*f
;
13441 f
= ggc_alloc_cleared (sizeof (struct machine_function
));
13442 f
->use_fast_prologue_epilogue_nregs
= -1;
13443 f
->tls_descriptor_call_expanded_p
= 0;
13448 /* Return a MEM corresponding to a stack slot with mode MODE.
13449 Allocate a new slot if necessary.
13451 The RTL for a function can have several slots available: N is
13452 which slot to use. */
13455 assign_386_stack_local (enum machine_mode mode
, enum ix86_stack_slot n
)
13457 struct stack_local_entry
*s
;
13459 gcc_assert (n
< MAX_386_STACK_LOCALS
);
13461 for (s
= ix86_stack_locals
; s
; s
= s
->next
)
13462 if (s
->mode
== mode
&& s
->n
== n
)
13465 s
= (struct stack_local_entry
*)
13466 ggc_alloc (sizeof (struct stack_local_entry
));
13469 s
->rtl
= assign_stack_local (mode
, GET_MODE_SIZE (mode
), 0);
13471 s
->next
= ix86_stack_locals
;
13472 ix86_stack_locals
= s
;
13476 /* Construct the SYMBOL_REF for the tls_get_addr function. */
13478 static GTY(()) rtx ix86_tls_symbol
;
13480 ix86_tls_get_addr (void)
13483 if (!ix86_tls_symbol
)
13485 ix86_tls_symbol
= gen_rtx_SYMBOL_REF (Pmode
,
13486 (TARGET_ANY_GNU_TLS
13488 ? "___tls_get_addr"
13489 : "__tls_get_addr");
13492 return ix86_tls_symbol
;
13495 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
13497 static GTY(()) rtx ix86_tls_module_base_symbol
;
13499 ix86_tls_module_base (void)
13502 if (!ix86_tls_module_base_symbol
)
13504 ix86_tls_module_base_symbol
= gen_rtx_SYMBOL_REF (Pmode
,
13505 "_TLS_MODULE_BASE_");
13506 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol
)
13507 |= TLS_MODEL_GLOBAL_DYNAMIC
<< SYMBOL_FLAG_TLS_SHIFT
;
13510 return ix86_tls_module_base_symbol
;
13513 /* Calculate the length of the memory address in the instruction
13514 encoding. Does not include the one-byte modrm, opcode, or prefix. */
13517 memory_address_length (rtx addr
)
13519 struct ix86_address parts
;
13520 rtx base
, index
, disp
;
13524 if (GET_CODE (addr
) == PRE_DEC
13525 || GET_CODE (addr
) == POST_INC
13526 || GET_CODE (addr
) == PRE_MODIFY
13527 || GET_CODE (addr
) == POST_MODIFY
)
13530 ok
= ix86_decompose_address (addr
, &parts
);
13533 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
13534 parts
.base
= SUBREG_REG (parts
.base
);
13535 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
13536 parts
.index
= SUBREG_REG (parts
.index
);
13539 index
= parts
.index
;
13544 - esp as the base always wants an index,
13545 - ebp as the base always wants a displacement. */
13547 /* Register Indirect. */
13548 if (base
&& !index
&& !disp
)
13550 /* esp (for its index) and ebp (for its displacement) need
13551 the two-byte modrm form. */
13552 if (addr
== stack_pointer_rtx
13553 || addr
== arg_pointer_rtx
13554 || addr
== frame_pointer_rtx
13555 || addr
== hard_frame_pointer_rtx
)
13559 /* Direct Addressing. */
13560 else if (disp
&& !base
&& !index
)
13565 /* Find the length of the displacement constant. */
13568 if (base
&& satisfies_constraint_K (disp
))
13573 /* ebp always wants a displacement. */
13574 else if (base
== hard_frame_pointer_rtx
)
13577 /* An index requires the two-byte modrm form.... */
13579 /* ...like esp, which always wants an index. */
13580 || base
== stack_pointer_rtx
13581 || base
== arg_pointer_rtx
13582 || base
== frame_pointer_rtx
)
13589 /* Compute default value for "length_immediate" attribute. When SHORTFORM
13590 is set, expect that insn have 8bit immediate alternative. */
13592 ix86_attr_length_immediate_default (rtx insn
, int shortform
)
13596 extract_insn_cached (insn
);
13597 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
13598 if (CONSTANT_P (recog_data
.operand
[i
]))
13601 if (shortform
&& satisfies_constraint_K (recog_data
.operand
[i
]))
13605 switch (get_attr_mode (insn
))
13616 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
13621 fatal_insn ("unknown insn mode", insn
);
13627 /* Compute default value for "length_address" attribute. */
13629 ix86_attr_length_address_default (rtx insn
)
13633 if (get_attr_type (insn
) == TYPE_LEA
)
13635 rtx set
= PATTERN (insn
);
13637 if (GET_CODE (set
) == PARALLEL
)
13638 set
= XVECEXP (set
, 0, 0);
13640 gcc_assert (GET_CODE (set
) == SET
);
13642 return memory_address_length (SET_SRC (set
));
13645 extract_insn_cached (insn
);
13646 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
13647 if (GET_CODE (recog_data
.operand
[i
]) == MEM
)
13649 return memory_address_length (XEXP (recog_data
.operand
[i
], 0));
13655 /* Return the maximum number of instructions a cpu can issue. */
13658 ix86_issue_rate (void)
13662 case PROCESSOR_PENTIUM
:
13666 case PROCESSOR_PENTIUMPRO
:
13667 case PROCESSOR_PENTIUM4
:
13668 case PROCESSOR_ATHLON
:
13670 case PROCESSOR_NOCONA
:
13671 case PROCESSOR_GENERIC32
:
13672 case PROCESSOR_GENERIC64
:
13680 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
13681 by DEP_INSN and nothing set by DEP_INSN. */
13684 ix86_flags_dependent (rtx insn
, rtx dep_insn
, enum attr_type insn_type
)
13688 /* Simplify the test for uninteresting insns. */
13689 if (insn_type
!= TYPE_SETCC
13690 && insn_type
!= TYPE_ICMOV
13691 && insn_type
!= TYPE_FCMOV
13692 && insn_type
!= TYPE_IBR
)
13695 if ((set
= single_set (dep_insn
)) != 0)
13697 set
= SET_DEST (set
);
13700 else if (GET_CODE (PATTERN (dep_insn
)) == PARALLEL
13701 && XVECLEN (PATTERN (dep_insn
), 0) == 2
13702 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 0)) == SET
13703 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 1)) == SET
)
13705 set
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
13706 set2
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
13711 if (GET_CODE (set
) != REG
|| REGNO (set
) != FLAGS_REG
)
13714 /* This test is true if the dependent insn reads the flags but
13715 not any other potentially set register. */
13716 if (!reg_overlap_mentioned_p (set
, PATTERN (insn
)))
13719 if (set2
&& reg_overlap_mentioned_p (set2
, PATTERN (insn
)))
13725 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
13726 address with operands set by DEP_INSN. */
13729 ix86_agi_dependent (rtx insn
, rtx dep_insn
, enum attr_type insn_type
)
13733 if (insn_type
== TYPE_LEA
13736 addr
= PATTERN (insn
);
13738 if (GET_CODE (addr
) == PARALLEL
)
13739 addr
= XVECEXP (addr
, 0, 0);
13741 gcc_assert (GET_CODE (addr
) == SET
);
13743 addr
= SET_SRC (addr
);
13748 extract_insn_cached (insn
);
13749 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
13750 if (GET_CODE (recog_data
.operand
[i
]) == MEM
)
13752 addr
= XEXP (recog_data
.operand
[i
], 0);
13759 return modified_in_p (addr
, dep_insn
);
13763 ix86_adjust_cost (rtx insn
, rtx link
, rtx dep_insn
, int cost
)
13765 enum attr_type insn_type
, dep_insn_type
;
13766 enum attr_memory memory
;
13768 int dep_insn_code_number
;
13770 /* Anti and output dependencies have zero cost on all CPUs. */
13771 if (REG_NOTE_KIND (link
) != 0)
13774 dep_insn_code_number
= recog_memoized (dep_insn
);
13776 /* If we can't recognize the insns, we can't really do anything. */
13777 if (dep_insn_code_number
< 0 || recog_memoized (insn
) < 0)
13780 insn_type
= get_attr_type (insn
);
13781 dep_insn_type
= get_attr_type (dep_insn
);
13785 case PROCESSOR_PENTIUM
:
13786 /* Address Generation Interlock adds a cycle of latency. */
13787 if (ix86_agi_dependent (insn
, dep_insn
, insn_type
))
13790 /* ??? Compares pair with jump/setcc. */
13791 if (ix86_flags_dependent (insn
, dep_insn
, insn_type
))
13794 /* Floating point stores require value to be ready one cycle earlier. */
13795 if (insn_type
== TYPE_FMOV
13796 && get_attr_memory (insn
) == MEMORY_STORE
13797 && !ix86_agi_dependent (insn
, dep_insn
, insn_type
))
13801 case PROCESSOR_PENTIUMPRO
:
13802 memory
= get_attr_memory (insn
);
13804 /* INT->FP conversion is expensive. */
13805 if (get_attr_fp_int_src (dep_insn
))
13808 /* There is one cycle extra latency between an FP op and a store. */
13809 if (insn_type
== TYPE_FMOV
13810 && (set
= single_set (dep_insn
)) != NULL_RTX
13811 && (set2
= single_set (insn
)) != NULL_RTX
13812 && rtx_equal_p (SET_DEST (set
), SET_SRC (set2
))
13813 && GET_CODE (SET_DEST (set2
)) == MEM
)
13816 /* Show ability of reorder buffer to hide latency of load by executing
13817 in parallel with previous instruction in case
13818 previous instruction is not needed to compute the address. */
13819 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
13820 && !ix86_agi_dependent (insn
, dep_insn
, insn_type
))
13822 /* Claim moves to take one cycle, as core can issue one load
13823 at time and the next load can start cycle later. */
13824 if (dep_insn_type
== TYPE_IMOV
13825 || dep_insn_type
== TYPE_FMOV
)
13833 memory
= get_attr_memory (insn
);
13835 /* The esp dependency is resolved before the instruction is really
13837 if ((insn_type
== TYPE_PUSH
|| insn_type
== TYPE_POP
)
13838 && (dep_insn_type
== TYPE_PUSH
|| dep_insn_type
== TYPE_POP
))
13841 /* INT->FP conversion is expensive. */
13842 if (get_attr_fp_int_src (dep_insn
))
13845 /* Show ability of reorder buffer to hide latency of load by executing
13846 in parallel with previous instruction in case
13847 previous instruction is not needed to compute the address. */
13848 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
13849 && !ix86_agi_dependent (insn
, dep_insn
, insn_type
))
13851 /* Claim moves to take one cycle, as core can issue one load
13852 at time and the next load can start cycle later. */
13853 if (dep_insn_type
== TYPE_IMOV
13854 || dep_insn_type
== TYPE_FMOV
)
13863 case PROCESSOR_ATHLON
:
13865 case PROCESSOR_GENERIC32
:
13866 case PROCESSOR_GENERIC64
:
13867 memory
= get_attr_memory (insn
);
13869 /* Show ability of reorder buffer to hide latency of load by executing
13870 in parallel with previous instruction in case
13871 previous instruction is not needed to compute the address. */
13872 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
13873 && !ix86_agi_dependent (insn
, dep_insn
, insn_type
))
13875 enum attr_unit unit
= get_attr_unit (insn
);
13878 /* Because of the difference between the length of integer and
13879 floating unit pipeline preparation stages, the memory operands
13880 for floating point are cheaper.
13882 ??? For Athlon it the difference is most probably 2. */
13883 if (unit
== UNIT_INTEGER
|| unit
== UNIT_UNKNOWN
)
13886 loadcost
= TARGET_ATHLON
? 2 : 0;
13888 if (cost
>= loadcost
)
13901 /* How many alternative schedules to try. This should be as wide as the
13902 scheduling freedom in the DFA, but no wider. Making this value too
13903 large results extra work for the scheduler. */
13906 ia32_multipass_dfa_lookahead (void)
13908 if (ix86_tune
== PROCESSOR_PENTIUM
)
13911 if (ix86_tune
== PROCESSOR_PENTIUMPRO
13912 || ix86_tune
== PROCESSOR_K6
)
13920 /* Compute the alignment given to a constant that is being placed in memory.
13921 EXP is the constant and ALIGN is the alignment that the object would
13923 The value of this function is used instead of that alignment to align
13927 ix86_constant_alignment (tree exp
, int align
)
13929 if (TREE_CODE (exp
) == REAL_CST
)
13931 if (TYPE_MODE (TREE_TYPE (exp
)) == DFmode
&& align
< 64)
13933 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp
))) && align
< 128)
13936 else if (!optimize_size
&& TREE_CODE (exp
) == STRING_CST
13937 && TREE_STRING_LENGTH (exp
) >= 31 && align
< BITS_PER_WORD
)
13938 return BITS_PER_WORD
;
13943 /* Compute the alignment for a static variable.
13944 TYPE is the data type, and ALIGN is the alignment that
13945 the object would ordinarily have. The value of this function is used
13946 instead of that alignment to align the object. */
13949 ix86_data_alignment (tree type
, int align
)
13951 int max_align
= optimize_size
? BITS_PER_WORD
: 256;
13953 if (AGGREGATE_TYPE_P (type
)
13954 && TYPE_SIZE (type
)
13955 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
13956 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= (unsigned) max_align
13957 || TREE_INT_CST_HIGH (TYPE_SIZE (type
)))
13958 && align
< max_align
)
13961 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
13962 to 16byte boundary. */
13965 if (AGGREGATE_TYPE_P (type
)
13966 && TYPE_SIZE (type
)
13967 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
13968 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 128
13969 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
13973 if (TREE_CODE (type
) == ARRAY_TYPE
)
13975 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
13977 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
13980 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
13983 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
13985 if (TYPE_MODE (type
) == XCmode
&& align
< 128)
13988 else if ((TREE_CODE (type
) == RECORD_TYPE
13989 || TREE_CODE (type
) == UNION_TYPE
13990 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
13991 && TYPE_FIELDS (type
))
13993 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
13995 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
13998 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
13999 || TREE_CODE (type
) == INTEGER_TYPE
)
14001 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
14003 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
14010 /* Compute the alignment for a local variable.
14011 TYPE is the data type, and ALIGN is the alignment that
14012 the object would ordinarily have. The value of this macro is used
14013 instead of that alignment to align the object. */
14016 ix86_local_alignment (tree type
, int align
)
14018 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
14019 to 16byte boundary. */
14022 if (AGGREGATE_TYPE_P (type
)
14023 && TYPE_SIZE (type
)
14024 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
14025 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 16
14026 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
14029 if (TREE_CODE (type
) == ARRAY_TYPE
)
14031 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
14033 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
14036 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
14038 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
14040 if (TYPE_MODE (type
) == XCmode
&& align
< 128)
14043 else if ((TREE_CODE (type
) == RECORD_TYPE
14044 || TREE_CODE (type
) == UNION_TYPE
14045 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
14046 && TYPE_FIELDS (type
))
14048 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
14050 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
14053 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
14054 || TREE_CODE (type
) == INTEGER_TYPE
)
14057 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
14059 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
14065 /* Emit RTL insns to initialize the variable parts of a trampoline.
14066 FNADDR is an RTX for the address of the function's pure code.
14067 CXT is an RTX for the static chain value for the function. */
14069 x86_initialize_trampoline (rtx tramp
, rtx fnaddr
, rtx cxt
)
14073 /* Compute offset from the end of the jmp to the target function. */
14074 rtx disp
= expand_binop (SImode
, sub_optab
, fnaddr
,
14075 plus_constant (tramp
, 10),
14076 NULL_RTX
, 1, OPTAB_DIRECT
);
14077 emit_move_insn (gen_rtx_MEM (QImode
, tramp
),
14078 gen_int_mode (0xb9, QImode
));
14079 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, 1)), cxt
);
14080 emit_move_insn (gen_rtx_MEM (QImode
, plus_constant (tramp
, 5)),
14081 gen_int_mode (0xe9, QImode
));
14082 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, 6)), disp
);
14087 /* Try to load address using shorter movl instead of movabs.
14088 We may want to support movq for kernel mode, but kernel does not use
14089 trampolines at the moment. */
14090 if (x86_64_zext_immediate_operand (fnaddr
, VOIDmode
))
14092 fnaddr
= copy_to_mode_reg (DImode
, fnaddr
);
14093 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
14094 gen_int_mode (0xbb41, HImode
));
14095 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, offset
+ 2)),
14096 gen_lowpart (SImode
, fnaddr
));
14101 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
14102 gen_int_mode (0xbb49, HImode
));
14103 emit_move_insn (gen_rtx_MEM (DImode
, plus_constant (tramp
, offset
+ 2)),
14107 /* Load static chain using movabs to r10. */
14108 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
14109 gen_int_mode (0xba49, HImode
));
14110 emit_move_insn (gen_rtx_MEM (DImode
, plus_constant (tramp
, offset
+ 2)),
14113 /* Jump to the r11 */
14114 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
14115 gen_int_mode (0xff49, HImode
));
14116 emit_move_insn (gen_rtx_MEM (QImode
, plus_constant (tramp
, offset
+2)),
14117 gen_int_mode (0xe3, QImode
));
14119 gcc_assert (offset
<= TRAMPOLINE_SIZE
);
14122 #ifdef ENABLE_EXECUTE_STACK
14123 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__enable_execute_stack"),
14124 LCT_NORMAL
, VOIDmode
, 1, tramp
, Pmode
);
14128 /* Codes for all the SSE/MMX builtins. */
14131 IX86_BUILTIN_ADDPS
,
14132 IX86_BUILTIN_ADDSS
,
14133 IX86_BUILTIN_DIVPS
,
14134 IX86_BUILTIN_DIVSS
,
14135 IX86_BUILTIN_MULPS
,
14136 IX86_BUILTIN_MULSS
,
14137 IX86_BUILTIN_SUBPS
,
14138 IX86_BUILTIN_SUBSS
,
14140 IX86_BUILTIN_CMPEQPS
,
14141 IX86_BUILTIN_CMPLTPS
,
14142 IX86_BUILTIN_CMPLEPS
,
14143 IX86_BUILTIN_CMPGTPS
,
14144 IX86_BUILTIN_CMPGEPS
,
14145 IX86_BUILTIN_CMPNEQPS
,
14146 IX86_BUILTIN_CMPNLTPS
,
14147 IX86_BUILTIN_CMPNLEPS
,
14148 IX86_BUILTIN_CMPNGTPS
,
14149 IX86_BUILTIN_CMPNGEPS
,
14150 IX86_BUILTIN_CMPORDPS
,
14151 IX86_BUILTIN_CMPUNORDPS
,
14152 IX86_BUILTIN_CMPEQSS
,
14153 IX86_BUILTIN_CMPLTSS
,
14154 IX86_BUILTIN_CMPLESS
,
14155 IX86_BUILTIN_CMPNEQSS
,
14156 IX86_BUILTIN_CMPNLTSS
,
14157 IX86_BUILTIN_CMPNLESS
,
14158 IX86_BUILTIN_CMPNGTSS
,
14159 IX86_BUILTIN_CMPNGESS
,
14160 IX86_BUILTIN_CMPORDSS
,
14161 IX86_BUILTIN_CMPUNORDSS
,
14163 IX86_BUILTIN_COMIEQSS
,
14164 IX86_BUILTIN_COMILTSS
,
14165 IX86_BUILTIN_COMILESS
,
14166 IX86_BUILTIN_COMIGTSS
,
14167 IX86_BUILTIN_COMIGESS
,
14168 IX86_BUILTIN_COMINEQSS
,
14169 IX86_BUILTIN_UCOMIEQSS
,
14170 IX86_BUILTIN_UCOMILTSS
,
14171 IX86_BUILTIN_UCOMILESS
,
14172 IX86_BUILTIN_UCOMIGTSS
,
14173 IX86_BUILTIN_UCOMIGESS
,
14174 IX86_BUILTIN_UCOMINEQSS
,
14176 IX86_BUILTIN_CVTPI2PS
,
14177 IX86_BUILTIN_CVTPS2PI
,
14178 IX86_BUILTIN_CVTSI2SS
,
14179 IX86_BUILTIN_CVTSI642SS
,
14180 IX86_BUILTIN_CVTSS2SI
,
14181 IX86_BUILTIN_CVTSS2SI64
,
14182 IX86_BUILTIN_CVTTPS2PI
,
14183 IX86_BUILTIN_CVTTSS2SI
,
14184 IX86_BUILTIN_CVTTSS2SI64
,
14186 IX86_BUILTIN_MAXPS
,
14187 IX86_BUILTIN_MAXSS
,
14188 IX86_BUILTIN_MINPS
,
14189 IX86_BUILTIN_MINSS
,
14191 IX86_BUILTIN_LOADUPS
,
14192 IX86_BUILTIN_STOREUPS
,
14193 IX86_BUILTIN_MOVSS
,
14195 IX86_BUILTIN_MOVHLPS
,
14196 IX86_BUILTIN_MOVLHPS
,
14197 IX86_BUILTIN_LOADHPS
,
14198 IX86_BUILTIN_LOADLPS
,
14199 IX86_BUILTIN_STOREHPS
,
14200 IX86_BUILTIN_STORELPS
,
14202 IX86_BUILTIN_MASKMOVQ
,
14203 IX86_BUILTIN_MOVMSKPS
,
14204 IX86_BUILTIN_PMOVMSKB
,
14206 IX86_BUILTIN_MOVNTPS
,
14207 IX86_BUILTIN_MOVNTQ
,
14209 IX86_BUILTIN_LOADDQU
,
14210 IX86_BUILTIN_STOREDQU
,
14212 IX86_BUILTIN_PACKSSWB
,
14213 IX86_BUILTIN_PACKSSDW
,
14214 IX86_BUILTIN_PACKUSWB
,
14216 IX86_BUILTIN_PADDB
,
14217 IX86_BUILTIN_PADDW
,
14218 IX86_BUILTIN_PADDD
,
14219 IX86_BUILTIN_PADDQ
,
14220 IX86_BUILTIN_PADDSB
,
14221 IX86_BUILTIN_PADDSW
,
14222 IX86_BUILTIN_PADDUSB
,
14223 IX86_BUILTIN_PADDUSW
,
14224 IX86_BUILTIN_PSUBB
,
14225 IX86_BUILTIN_PSUBW
,
14226 IX86_BUILTIN_PSUBD
,
14227 IX86_BUILTIN_PSUBQ
,
14228 IX86_BUILTIN_PSUBSB
,
14229 IX86_BUILTIN_PSUBSW
,
14230 IX86_BUILTIN_PSUBUSB
,
14231 IX86_BUILTIN_PSUBUSW
,
14234 IX86_BUILTIN_PANDN
,
14238 IX86_BUILTIN_PAVGB
,
14239 IX86_BUILTIN_PAVGW
,
14241 IX86_BUILTIN_PCMPEQB
,
14242 IX86_BUILTIN_PCMPEQW
,
14243 IX86_BUILTIN_PCMPEQD
,
14244 IX86_BUILTIN_PCMPGTB
,
14245 IX86_BUILTIN_PCMPGTW
,
14246 IX86_BUILTIN_PCMPGTD
,
14248 IX86_BUILTIN_PMADDWD
,
14250 IX86_BUILTIN_PMAXSW
,
14251 IX86_BUILTIN_PMAXUB
,
14252 IX86_BUILTIN_PMINSW
,
14253 IX86_BUILTIN_PMINUB
,
14255 IX86_BUILTIN_PMULHUW
,
14256 IX86_BUILTIN_PMULHW
,
14257 IX86_BUILTIN_PMULLW
,
14259 IX86_BUILTIN_PSADBW
,
14260 IX86_BUILTIN_PSHUFW
,
14262 IX86_BUILTIN_PSLLW
,
14263 IX86_BUILTIN_PSLLD
,
14264 IX86_BUILTIN_PSLLQ
,
14265 IX86_BUILTIN_PSRAW
,
14266 IX86_BUILTIN_PSRAD
,
14267 IX86_BUILTIN_PSRLW
,
14268 IX86_BUILTIN_PSRLD
,
14269 IX86_BUILTIN_PSRLQ
,
14270 IX86_BUILTIN_PSLLWI
,
14271 IX86_BUILTIN_PSLLDI
,
14272 IX86_BUILTIN_PSLLQI
,
14273 IX86_BUILTIN_PSRAWI
,
14274 IX86_BUILTIN_PSRADI
,
14275 IX86_BUILTIN_PSRLWI
,
14276 IX86_BUILTIN_PSRLDI
,
14277 IX86_BUILTIN_PSRLQI
,
14279 IX86_BUILTIN_PUNPCKHBW
,
14280 IX86_BUILTIN_PUNPCKHWD
,
14281 IX86_BUILTIN_PUNPCKHDQ
,
14282 IX86_BUILTIN_PUNPCKLBW
,
14283 IX86_BUILTIN_PUNPCKLWD
,
14284 IX86_BUILTIN_PUNPCKLDQ
,
14286 IX86_BUILTIN_SHUFPS
,
14288 IX86_BUILTIN_RCPPS
,
14289 IX86_BUILTIN_RCPSS
,
14290 IX86_BUILTIN_RSQRTPS
,
14291 IX86_BUILTIN_RSQRTSS
,
14292 IX86_BUILTIN_SQRTPS
,
14293 IX86_BUILTIN_SQRTSS
,
14295 IX86_BUILTIN_UNPCKHPS
,
14296 IX86_BUILTIN_UNPCKLPS
,
14298 IX86_BUILTIN_ANDPS
,
14299 IX86_BUILTIN_ANDNPS
,
14301 IX86_BUILTIN_XORPS
,
14304 IX86_BUILTIN_LDMXCSR
,
14305 IX86_BUILTIN_STMXCSR
,
14306 IX86_BUILTIN_SFENCE
,
14308 /* 3DNow! Original */
14309 IX86_BUILTIN_FEMMS
,
14310 IX86_BUILTIN_PAVGUSB
,
14311 IX86_BUILTIN_PF2ID
,
14312 IX86_BUILTIN_PFACC
,
14313 IX86_BUILTIN_PFADD
,
14314 IX86_BUILTIN_PFCMPEQ
,
14315 IX86_BUILTIN_PFCMPGE
,
14316 IX86_BUILTIN_PFCMPGT
,
14317 IX86_BUILTIN_PFMAX
,
14318 IX86_BUILTIN_PFMIN
,
14319 IX86_BUILTIN_PFMUL
,
14320 IX86_BUILTIN_PFRCP
,
14321 IX86_BUILTIN_PFRCPIT1
,
14322 IX86_BUILTIN_PFRCPIT2
,
14323 IX86_BUILTIN_PFRSQIT1
,
14324 IX86_BUILTIN_PFRSQRT
,
14325 IX86_BUILTIN_PFSUB
,
14326 IX86_BUILTIN_PFSUBR
,
14327 IX86_BUILTIN_PI2FD
,
14328 IX86_BUILTIN_PMULHRW
,
14330 /* 3DNow! Athlon Extensions */
14331 IX86_BUILTIN_PF2IW
,
14332 IX86_BUILTIN_PFNACC
,
14333 IX86_BUILTIN_PFPNACC
,
14334 IX86_BUILTIN_PI2FW
,
14335 IX86_BUILTIN_PSWAPDSI
,
14336 IX86_BUILTIN_PSWAPDSF
,
14339 IX86_BUILTIN_ADDPD
,
14340 IX86_BUILTIN_ADDSD
,
14341 IX86_BUILTIN_DIVPD
,
14342 IX86_BUILTIN_DIVSD
,
14343 IX86_BUILTIN_MULPD
,
14344 IX86_BUILTIN_MULSD
,
14345 IX86_BUILTIN_SUBPD
,
14346 IX86_BUILTIN_SUBSD
,
14348 IX86_BUILTIN_CMPEQPD
,
14349 IX86_BUILTIN_CMPLTPD
,
14350 IX86_BUILTIN_CMPLEPD
,
14351 IX86_BUILTIN_CMPGTPD
,
14352 IX86_BUILTIN_CMPGEPD
,
14353 IX86_BUILTIN_CMPNEQPD
,
14354 IX86_BUILTIN_CMPNLTPD
,
14355 IX86_BUILTIN_CMPNLEPD
,
14356 IX86_BUILTIN_CMPNGTPD
,
14357 IX86_BUILTIN_CMPNGEPD
,
14358 IX86_BUILTIN_CMPORDPD
,
14359 IX86_BUILTIN_CMPUNORDPD
,
14360 IX86_BUILTIN_CMPNEPD
,
14361 IX86_BUILTIN_CMPEQSD
,
14362 IX86_BUILTIN_CMPLTSD
,
14363 IX86_BUILTIN_CMPLESD
,
14364 IX86_BUILTIN_CMPNEQSD
,
14365 IX86_BUILTIN_CMPNLTSD
,
14366 IX86_BUILTIN_CMPNLESD
,
14367 IX86_BUILTIN_CMPORDSD
,
14368 IX86_BUILTIN_CMPUNORDSD
,
14369 IX86_BUILTIN_CMPNESD
,
14371 IX86_BUILTIN_COMIEQSD
,
14372 IX86_BUILTIN_COMILTSD
,
14373 IX86_BUILTIN_COMILESD
,
14374 IX86_BUILTIN_COMIGTSD
,
14375 IX86_BUILTIN_COMIGESD
,
14376 IX86_BUILTIN_COMINEQSD
,
14377 IX86_BUILTIN_UCOMIEQSD
,
14378 IX86_BUILTIN_UCOMILTSD
,
14379 IX86_BUILTIN_UCOMILESD
,
14380 IX86_BUILTIN_UCOMIGTSD
,
14381 IX86_BUILTIN_UCOMIGESD
,
14382 IX86_BUILTIN_UCOMINEQSD
,
14384 IX86_BUILTIN_MAXPD
,
14385 IX86_BUILTIN_MAXSD
,
14386 IX86_BUILTIN_MINPD
,
14387 IX86_BUILTIN_MINSD
,
14389 IX86_BUILTIN_ANDPD
,
14390 IX86_BUILTIN_ANDNPD
,
14392 IX86_BUILTIN_XORPD
,
14394 IX86_BUILTIN_SQRTPD
,
14395 IX86_BUILTIN_SQRTSD
,
14397 IX86_BUILTIN_UNPCKHPD
,
14398 IX86_BUILTIN_UNPCKLPD
,
14400 IX86_BUILTIN_SHUFPD
,
14402 IX86_BUILTIN_LOADUPD
,
14403 IX86_BUILTIN_STOREUPD
,
14404 IX86_BUILTIN_MOVSD
,
14406 IX86_BUILTIN_LOADHPD
,
14407 IX86_BUILTIN_LOADLPD
,
14409 IX86_BUILTIN_CVTDQ2PD
,
14410 IX86_BUILTIN_CVTDQ2PS
,
14412 IX86_BUILTIN_CVTPD2DQ
,
14413 IX86_BUILTIN_CVTPD2PI
,
14414 IX86_BUILTIN_CVTPD2PS
,
14415 IX86_BUILTIN_CVTTPD2DQ
,
14416 IX86_BUILTIN_CVTTPD2PI
,
14418 IX86_BUILTIN_CVTPI2PD
,
14419 IX86_BUILTIN_CVTSI2SD
,
14420 IX86_BUILTIN_CVTSI642SD
,
14422 IX86_BUILTIN_CVTSD2SI
,
14423 IX86_BUILTIN_CVTSD2SI64
,
14424 IX86_BUILTIN_CVTSD2SS
,
14425 IX86_BUILTIN_CVTSS2SD
,
14426 IX86_BUILTIN_CVTTSD2SI
,
14427 IX86_BUILTIN_CVTTSD2SI64
,
14429 IX86_BUILTIN_CVTPS2DQ
,
14430 IX86_BUILTIN_CVTPS2PD
,
14431 IX86_BUILTIN_CVTTPS2DQ
,
14433 IX86_BUILTIN_MOVNTI
,
14434 IX86_BUILTIN_MOVNTPD
,
14435 IX86_BUILTIN_MOVNTDQ
,
14438 IX86_BUILTIN_MASKMOVDQU
,
14439 IX86_BUILTIN_MOVMSKPD
,
14440 IX86_BUILTIN_PMOVMSKB128
,
14442 IX86_BUILTIN_PACKSSWB128
,
14443 IX86_BUILTIN_PACKSSDW128
,
14444 IX86_BUILTIN_PACKUSWB128
,
14446 IX86_BUILTIN_PADDB128
,
14447 IX86_BUILTIN_PADDW128
,
14448 IX86_BUILTIN_PADDD128
,
14449 IX86_BUILTIN_PADDQ128
,
14450 IX86_BUILTIN_PADDSB128
,
14451 IX86_BUILTIN_PADDSW128
,
14452 IX86_BUILTIN_PADDUSB128
,
14453 IX86_BUILTIN_PADDUSW128
,
14454 IX86_BUILTIN_PSUBB128
,
14455 IX86_BUILTIN_PSUBW128
,
14456 IX86_BUILTIN_PSUBD128
,
14457 IX86_BUILTIN_PSUBQ128
,
14458 IX86_BUILTIN_PSUBSB128
,
14459 IX86_BUILTIN_PSUBSW128
,
14460 IX86_BUILTIN_PSUBUSB128
,
14461 IX86_BUILTIN_PSUBUSW128
,
14463 IX86_BUILTIN_PAND128
,
14464 IX86_BUILTIN_PANDN128
,
14465 IX86_BUILTIN_POR128
,
14466 IX86_BUILTIN_PXOR128
,
14468 IX86_BUILTIN_PAVGB128
,
14469 IX86_BUILTIN_PAVGW128
,
14471 IX86_BUILTIN_PCMPEQB128
,
14472 IX86_BUILTIN_PCMPEQW128
,
14473 IX86_BUILTIN_PCMPEQD128
,
14474 IX86_BUILTIN_PCMPGTB128
,
14475 IX86_BUILTIN_PCMPGTW128
,
14476 IX86_BUILTIN_PCMPGTD128
,
14478 IX86_BUILTIN_PMADDWD128
,
14480 IX86_BUILTIN_PMAXSW128
,
14481 IX86_BUILTIN_PMAXUB128
,
14482 IX86_BUILTIN_PMINSW128
,
14483 IX86_BUILTIN_PMINUB128
,
14485 IX86_BUILTIN_PMULUDQ
,
14486 IX86_BUILTIN_PMULUDQ128
,
14487 IX86_BUILTIN_PMULHUW128
,
14488 IX86_BUILTIN_PMULHW128
,
14489 IX86_BUILTIN_PMULLW128
,
14491 IX86_BUILTIN_PSADBW128
,
14492 IX86_BUILTIN_PSHUFHW
,
14493 IX86_BUILTIN_PSHUFLW
,
14494 IX86_BUILTIN_PSHUFD
,
14496 IX86_BUILTIN_PSLLW128
,
14497 IX86_BUILTIN_PSLLD128
,
14498 IX86_BUILTIN_PSLLQ128
,
14499 IX86_BUILTIN_PSRAW128
,
14500 IX86_BUILTIN_PSRAD128
,
14501 IX86_BUILTIN_PSRLW128
,
14502 IX86_BUILTIN_PSRLD128
,
14503 IX86_BUILTIN_PSRLQ128
,
14504 IX86_BUILTIN_PSLLDQI128
,
14505 IX86_BUILTIN_PSLLWI128
,
14506 IX86_BUILTIN_PSLLDI128
,
14507 IX86_BUILTIN_PSLLQI128
,
14508 IX86_BUILTIN_PSRAWI128
,
14509 IX86_BUILTIN_PSRADI128
,
14510 IX86_BUILTIN_PSRLDQI128
,
14511 IX86_BUILTIN_PSRLWI128
,
14512 IX86_BUILTIN_PSRLDI128
,
14513 IX86_BUILTIN_PSRLQI128
,
14515 IX86_BUILTIN_PUNPCKHBW128
,
14516 IX86_BUILTIN_PUNPCKHWD128
,
14517 IX86_BUILTIN_PUNPCKHDQ128
,
14518 IX86_BUILTIN_PUNPCKHQDQ128
,
14519 IX86_BUILTIN_PUNPCKLBW128
,
14520 IX86_BUILTIN_PUNPCKLWD128
,
14521 IX86_BUILTIN_PUNPCKLDQ128
,
14522 IX86_BUILTIN_PUNPCKLQDQ128
,
14524 IX86_BUILTIN_CLFLUSH
,
14525 IX86_BUILTIN_MFENCE
,
14526 IX86_BUILTIN_LFENCE
,
14528 /* Prescott New Instructions. */
14529 IX86_BUILTIN_ADDSUBPS
,
14530 IX86_BUILTIN_HADDPS
,
14531 IX86_BUILTIN_HSUBPS
,
14532 IX86_BUILTIN_MOVSHDUP
,
14533 IX86_BUILTIN_MOVSLDUP
,
14534 IX86_BUILTIN_ADDSUBPD
,
14535 IX86_BUILTIN_HADDPD
,
14536 IX86_BUILTIN_HSUBPD
,
14537 IX86_BUILTIN_LDDQU
,
14539 IX86_BUILTIN_MONITOR
,
14540 IX86_BUILTIN_MWAIT
,
14543 IX86_BUILTIN_PHADDW
,
14544 IX86_BUILTIN_PHADDD
,
14545 IX86_BUILTIN_PHADDSW
,
14546 IX86_BUILTIN_PHSUBW
,
14547 IX86_BUILTIN_PHSUBD
,
14548 IX86_BUILTIN_PHSUBSW
,
14549 IX86_BUILTIN_PMADDUBSW
,
14550 IX86_BUILTIN_PMULHRSW
,
14551 IX86_BUILTIN_PSHUFB
,
14552 IX86_BUILTIN_PSIGNB
,
14553 IX86_BUILTIN_PSIGNW
,
14554 IX86_BUILTIN_PSIGND
,
14555 IX86_BUILTIN_PALIGNR
,
14556 IX86_BUILTIN_PABSB
,
14557 IX86_BUILTIN_PABSW
,
14558 IX86_BUILTIN_PABSD
,
14560 IX86_BUILTIN_PHADDW128
,
14561 IX86_BUILTIN_PHADDD128
,
14562 IX86_BUILTIN_PHADDSW128
,
14563 IX86_BUILTIN_PHSUBW128
,
14564 IX86_BUILTIN_PHSUBD128
,
14565 IX86_BUILTIN_PHSUBSW128
,
14566 IX86_BUILTIN_PMADDUBSW128
,
14567 IX86_BUILTIN_PMULHRSW128
,
14568 IX86_BUILTIN_PSHUFB128
,
14569 IX86_BUILTIN_PSIGNB128
,
14570 IX86_BUILTIN_PSIGNW128
,
14571 IX86_BUILTIN_PSIGND128
,
14572 IX86_BUILTIN_PALIGNR128
,
14573 IX86_BUILTIN_PABSB128
,
14574 IX86_BUILTIN_PABSW128
,
14575 IX86_BUILTIN_PABSD128
,
14577 IX86_BUILTIN_VEC_INIT_V2SI
,
14578 IX86_BUILTIN_VEC_INIT_V4HI
,
14579 IX86_BUILTIN_VEC_INIT_V8QI
,
14580 IX86_BUILTIN_VEC_EXT_V2DF
,
14581 IX86_BUILTIN_VEC_EXT_V2DI
,
14582 IX86_BUILTIN_VEC_EXT_V4SF
,
14583 IX86_BUILTIN_VEC_EXT_V4SI
,
14584 IX86_BUILTIN_VEC_EXT_V8HI
,
14585 IX86_BUILTIN_VEC_EXT_V2SI
,
14586 IX86_BUILTIN_VEC_EXT_V4HI
,
14587 IX86_BUILTIN_VEC_SET_V8HI
,
14588 IX86_BUILTIN_VEC_SET_V4HI
,
14593 #define def_builtin(MASK, NAME, TYPE, CODE) \
14595 if ((MASK) & target_flags \
14596 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
14597 add_builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
14598 NULL, NULL_TREE); \
14601 /* Bits for builtin_description.flag. */
14603 /* Set when we don't support the comparison natively, and should
14604 swap_comparison in order to support it. */
14605 #define BUILTIN_DESC_SWAP_OPERANDS 1
14607 struct builtin_description
14609 const unsigned int mask
;
14610 const enum insn_code icode
;
14611 const char *const name
;
14612 const enum ix86_builtins code
;
14613 const enum rtx_code comparison
;
14614 const unsigned int flag
;
14617 static const struct builtin_description bdesc_comi
[] =
14619 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS
, UNEQ
, 0 },
14620 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS
, UNLT
, 0 },
14621 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS
, UNLE
, 0 },
14622 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS
, GT
, 0 },
14623 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS
, GE
, 0 },
14624 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS
, LTGT
, 0 },
14625 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS
, UNEQ
, 0 },
14626 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS
, UNLT
, 0 },
14627 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS
, UNLE
, 0 },
14628 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS
, GT
, 0 },
14629 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS
, GE
, 0 },
14630 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS
, LTGT
, 0 },
14631 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD
, UNEQ
, 0 },
14632 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD
, UNLT
, 0 },
14633 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD
, UNLE
, 0 },
14634 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD
, GT
, 0 },
14635 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD
, GE
, 0 },
14636 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD
, LTGT
, 0 },
14637 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD
, UNEQ
, 0 },
14638 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD
, UNLT
, 0 },
14639 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD
, UNLE
, 0 },
14640 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD
, GT
, 0 },
14641 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD
, GE
, 0 },
14642 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD
, LTGT
, 0 },
14645 static const struct builtin_description bdesc_2arg
[] =
14648 { MASK_SSE
, CODE_FOR_addv4sf3
, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS
, 0, 0 },
14649 { MASK_SSE
, CODE_FOR_subv4sf3
, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS
, 0, 0 },
14650 { MASK_SSE
, CODE_FOR_mulv4sf3
, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS
, 0, 0 },
14651 { MASK_SSE
, CODE_FOR_divv4sf3
, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS
, 0, 0 },
14652 { MASK_SSE
, CODE_FOR_sse_vmaddv4sf3
, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS
, 0, 0 },
14653 { MASK_SSE
, CODE_FOR_sse_vmsubv4sf3
, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS
, 0, 0 },
14654 { MASK_SSE
, CODE_FOR_sse_vmmulv4sf3
, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS
, 0, 0 },
14655 { MASK_SSE
, CODE_FOR_sse_vmdivv4sf3
, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS
, 0, 0 },
14657 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS
, EQ
, 0 },
14658 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS
, LT
, 0 },
14659 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS
, LE
, 0 },
14660 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS
, LT
,
14661 BUILTIN_DESC_SWAP_OPERANDS
},
14662 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS
, LE
,
14663 BUILTIN_DESC_SWAP_OPERANDS
},
14664 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS
, UNORDERED
, 0 },
14665 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS
, NE
, 0 },
14666 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS
, UNGE
, 0 },
14667 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS
, UNGT
, 0 },
14668 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS
, UNGE
,
14669 BUILTIN_DESC_SWAP_OPERANDS
},
14670 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS
, UNGT
,
14671 BUILTIN_DESC_SWAP_OPERANDS
},
14672 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS
, ORDERED
, 0 },
14673 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS
, EQ
, 0 },
14674 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS
, LT
, 0 },
14675 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS
, LE
, 0 },
14676 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS
, UNORDERED
, 0 },
14677 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS
, NE
, 0 },
14678 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS
, UNGE
, 0 },
14679 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS
, UNGT
, 0 },
14680 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS
, UNGE
,
14681 BUILTIN_DESC_SWAP_OPERANDS
},
14682 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS
, UNGT
,
14683 BUILTIN_DESC_SWAP_OPERANDS
},
14684 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS
, UNORDERED
, 0 },
14686 { MASK_SSE
, CODE_FOR_sminv4sf3
, "__builtin_ia32_minps", IX86_BUILTIN_MINPS
, 0, 0 },
14687 { MASK_SSE
, CODE_FOR_smaxv4sf3
, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS
, 0, 0 },
14688 { MASK_SSE
, CODE_FOR_sse_vmsminv4sf3
, "__builtin_ia32_minss", IX86_BUILTIN_MINSS
, 0, 0 },
14689 { MASK_SSE
, CODE_FOR_sse_vmsmaxv4sf3
, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS
, 0, 0 },
14691 { MASK_SSE
, CODE_FOR_andv4sf3
, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS
, 0, 0 },
14692 { MASK_SSE
, CODE_FOR_sse_nandv4sf3
, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS
, 0, 0 },
14693 { MASK_SSE
, CODE_FOR_iorv4sf3
, "__builtin_ia32_orps", IX86_BUILTIN_ORPS
, 0, 0 },
14694 { MASK_SSE
, CODE_FOR_xorv4sf3
, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS
, 0, 0 },
14696 { MASK_SSE
, CODE_FOR_sse_movss
, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS
, 0, 0 },
14697 { MASK_SSE
, CODE_FOR_sse_movhlps
, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS
, 0, 0 },
14698 { MASK_SSE
, CODE_FOR_sse_movlhps
, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS
, 0, 0 },
14699 { MASK_SSE
, CODE_FOR_sse_unpckhps
, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS
, 0, 0 },
14700 { MASK_SSE
, CODE_FOR_sse_unpcklps
, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS
, 0, 0 },
14703 { MASK_MMX
, CODE_FOR_mmx_addv8qi3
, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB
, 0, 0 },
14704 { MASK_MMX
, CODE_FOR_mmx_addv4hi3
, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW
, 0, 0 },
14705 { MASK_MMX
, CODE_FOR_mmx_addv2si3
, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD
, 0, 0 },
14706 { MASK_SSE2
, CODE_FOR_mmx_adddi3
, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ
, 0, 0 },
14707 { MASK_MMX
, CODE_FOR_mmx_subv8qi3
, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB
, 0, 0 },
14708 { MASK_MMX
, CODE_FOR_mmx_subv4hi3
, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW
, 0, 0 },
14709 { MASK_MMX
, CODE_FOR_mmx_subv2si3
, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD
, 0, 0 },
14710 { MASK_SSE2
, CODE_FOR_mmx_subdi3
, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ
, 0, 0 },
14712 { MASK_MMX
, CODE_FOR_mmx_ssaddv8qi3
, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB
, 0, 0 },
14713 { MASK_MMX
, CODE_FOR_mmx_ssaddv4hi3
, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW
, 0, 0 },
14714 { MASK_MMX
, CODE_FOR_mmx_sssubv8qi3
, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB
, 0, 0 },
14715 { MASK_MMX
, CODE_FOR_mmx_sssubv4hi3
, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW
, 0, 0 },
14716 { MASK_MMX
, CODE_FOR_mmx_usaddv8qi3
, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB
, 0, 0 },
14717 { MASK_MMX
, CODE_FOR_mmx_usaddv4hi3
, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW
, 0, 0 },
14718 { MASK_MMX
, CODE_FOR_mmx_ussubv8qi3
, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB
, 0, 0 },
14719 { MASK_MMX
, CODE_FOR_mmx_ussubv4hi3
, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW
, 0, 0 },
14721 { MASK_MMX
, CODE_FOR_mmx_mulv4hi3
, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW
, 0, 0 },
14722 { MASK_MMX
, CODE_FOR_mmx_smulv4hi3_highpart
, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW
, 0, 0 },
14723 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_umulv4hi3_highpart
, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW
, 0, 0 },
14725 { MASK_MMX
, CODE_FOR_mmx_andv2si3
, "__builtin_ia32_pand", IX86_BUILTIN_PAND
, 0, 0 },
14726 { MASK_MMX
, CODE_FOR_mmx_nandv2si3
, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN
, 0, 0 },
14727 { MASK_MMX
, CODE_FOR_mmx_iorv2si3
, "__builtin_ia32_por", IX86_BUILTIN_POR
, 0, 0 },
14728 { MASK_MMX
, CODE_FOR_mmx_xorv2si3
, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR
, 0, 0 },
14730 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB
, 0, 0 },
14731 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_uavgv4hi3
, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW
, 0, 0 },
14733 { MASK_MMX
, CODE_FOR_mmx_eqv8qi3
, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB
, 0, 0 },
14734 { MASK_MMX
, CODE_FOR_mmx_eqv4hi3
, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW
, 0, 0 },
14735 { MASK_MMX
, CODE_FOR_mmx_eqv2si3
, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD
, 0, 0 },
14736 { MASK_MMX
, CODE_FOR_mmx_gtv8qi3
, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB
, 0, 0 },
14737 { MASK_MMX
, CODE_FOR_mmx_gtv4hi3
, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW
, 0, 0 },
14738 { MASK_MMX
, CODE_FOR_mmx_gtv2si3
, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD
, 0, 0 },
14740 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_umaxv8qi3
, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB
, 0, 0 },
14741 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_smaxv4hi3
, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW
, 0, 0 },
14742 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_uminv8qi3
, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB
, 0, 0 },
14743 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_sminv4hi3
, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW
, 0, 0 },
14745 { MASK_MMX
, CODE_FOR_mmx_punpckhbw
, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW
, 0, 0 },
14746 { MASK_MMX
, CODE_FOR_mmx_punpckhwd
, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD
, 0, 0 },
14747 { MASK_MMX
, CODE_FOR_mmx_punpckhdq
, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ
, 0, 0 },
14748 { MASK_MMX
, CODE_FOR_mmx_punpcklbw
, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW
, 0, 0 },
14749 { MASK_MMX
, CODE_FOR_mmx_punpcklwd
, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD
, 0, 0 },
14750 { MASK_MMX
, CODE_FOR_mmx_punpckldq
, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ
, 0, 0 },
14753 { MASK_MMX
, CODE_FOR_mmx_packsswb
, 0, IX86_BUILTIN_PACKSSWB
, 0, 0 },
14754 { MASK_MMX
, CODE_FOR_mmx_packssdw
, 0, IX86_BUILTIN_PACKSSDW
, 0, 0 },
14755 { MASK_MMX
, CODE_FOR_mmx_packuswb
, 0, IX86_BUILTIN_PACKUSWB
, 0, 0 },
14757 { MASK_SSE
, CODE_FOR_sse_cvtpi2ps
, 0, IX86_BUILTIN_CVTPI2PS
, 0, 0 },
14758 { MASK_SSE
, CODE_FOR_sse_cvtsi2ss
, 0, IX86_BUILTIN_CVTSI2SS
, 0, 0 },
14759 { MASK_SSE
| MASK_64BIT
, CODE_FOR_sse_cvtsi2ssq
, 0, IX86_BUILTIN_CVTSI642SS
, 0, 0 },
14761 { MASK_MMX
, CODE_FOR_mmx_ashlv4hi3
, 0, IX86_BUILTIN_PSLLW
, 0, 0 },
14762 { MASK_MMX
, CODE_FOR_mmx_ashlv4hi3
, 0, IX86_BUILTIN_PSLLWI
, 0, 0 },
14763 { MASK_MMX
, CODE_FOR_mmx_ashlv2si3
, 0, IX86_BUILTIN_PSLLD
, 0, 0 },
14764 { MASK_MMX
, CODE_FOR_mmx_ashlv2si3
, 0, IX86_BUILTIN_PSLLDI
, 0, 0 },
14765 { MASK_MMX
, CODE_FOR_mmx_ashldi3
, 0, IX86_BUILTIN_PSLLQ
, 0, 0 },
14766 { MASK_MMX
, CODE_FOR_mmx_ashldi3
, 0, IX86_BUILTIN_PSLLQI
, 0, 0 },
14768 { MASK_MMX
, CODE_FOR_mmx_lshrv4hi3
, 0, IX86_BUILTIN_PSRLW
, 0, 0 },
14769 { MASK_MMX
, CODE_FOR_mmx_lshrv4hi3
, 0, IX86_BUILTIN_PSRLWI
, 0, 0 },
14770 { MASK_MMX
, CODE_FOR_mmx_lshrv2si3
, 0, IX86_BUILTIN_PSRLD
, 0, 0 },
14771 { MASK_MMX
, CODE_FOR_mmx_lshrv2si3
, 0, IX86_BUILTIN_PSRLDI
, 0, 0 },
14772 { MASK_MMX
, CODE_FOR_mmx_lshrdi3
, 0, IX86_BUILTIN_PSRLQ
, 0, 0 },
14773 { MASK_MMX
, CODE_FOR_mmx_lshrdi3
, 0, IX86_BUILTIN_PSRLQI
, 0, 0 },
14775 { MASK_MMX
, CODE_FOR_mmx_ashrv4hi3
, 0, IX86_BUILTIN_PSRAW
, 0, 0 },
14776 { MASK_MMX
, CODE_FOR_mmx_ashrv4hi3
, 0, IX86_BUILTIN_PSRAWI
, 0, 0 },
14777 { MASK_MMX
, CODE_FOR_mmx_ashrv2si3
, 0, IX86_BUILTIN_PSRAD
, 0, 0 },
14778 { MASK_MMX
, CODE_FOR_mmx_ashrv2si3
, 0, IX86_BUILTIN_PSRADI
, 0, 0 },
14780 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_psadbw
, 0, IX86_BUILTIN_PSADBW
, 0, 0 },
14781 { MASK_MMX
, CODE_FOR_mmx_pmaddwd
, 0, IX86_BUILTIN_PMADDWD
, 0, 0 },
14784 { MASK_SSE2
, CODE_FOR_addv2df3
, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD
, 0, 0 },
14785 { MASK_SSE2
, CODE_FOR_subv2df3
, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD
, 0, 0 },
14786 { MASK_SSE2
, CODE_FOR_mulv2df3
, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD
, 0, 0 },
14787 { MASK_SSE2
, CODE_FOR_divv2df3
, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD
, 0, 0 },
14788 { MASK_SSE2
, CODE_FOR_sse2_vmaddv2df3
, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD
, 0, 0 },
14789 { MASK_SSE2
, CODE_FOR_sse2_vmsubv2df3
, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD
, 0, 0 },
14790 { MASK_SSE2
, CODE_FOR_sse2_vmmulv2df3
, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD
, 0, 0 },
14791 { MASK_SSE2
, CODE_FOR_sse2_vmdivv2df3
, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD
, 0, 0 },
14793 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD
, EQ
, 0 },
14794 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD
, LT
, 0 },
14795 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD
, LE
, 0 },
14796 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD
, LT
,
14797 BUILTIN_DESC_SWAP_OPERANDS
},
14798 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD
, LE
,
14799 BUILTIN_DESC_SWAP_OPERANDS
},
14800 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD
, UNORDERED
, 0 },
14801 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD
, NE
, 0 },
14802 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD
, UNGE
, 0 },
14803 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD
, UNGT
, 0 },
14804 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD
, UNGE
,
14805 BUILTIN_DESC_SWAP_OPERANDS
},
14806 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD
, UNGT
,
14807 BUILTIN_DESC_SWAP_OPERANDS
},
14808 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD
, ORDERED
, 0 },
14809 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD
, EQ
, 0 },
14810 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD
, LT
, 0 },
14811 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD
, LE
, 0 },
14812 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD
, UNORDERED
, 0 },
14813 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD
, NE
, 0 },
14814 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD
, UNGE
, 0 },
14815 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD
, UNGT
, 0 },
14816 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD
, ORDERED
, 0 },
14818 { MASK_SSE2
, CODE_FOR_sminv2df3
, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD
, 0, 0 },
14819 { MASK_SSE2
, CODE_FOR_smaxv2df3
, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD
, 0, 0 },
14820 { MASK_SSE2
, CODE_FOR_sse2_vmsminv2df3
, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD
, 0, 0 },
14821 { MASK_SSE2
, CODE_FOR_sse2_vmsmaxv2df3
, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD
, 0, 0 },
14823 { MASK_SSE2
, CODE_FOR_andv2df3
, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD
, 0, 0 },
14824 { MASK_SSE2
, CODE_FOR_sse2_nandv2df3
, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD
, 0, 0 },
14825 { MASK_SSE2
, CODE_FOR_iorv2df3
, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD
, 0, 0 },
14826 { MASK_SSE2
, CODE_FOR_xorv2df3
, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD
, 0, 0 },
14828 { MASK_SSE2
, CODE_FOR_sse2_movsd
, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD
, 0, 0 },
14829 { MASK_SSE2
, CODE_FOR_sse2_unpckhpd
, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD
, 0, 0 },
14830 { MASK_SSE2
, CODE_FOR_sse2_unpcklpd
, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD
, 0, 0 },
14833 { MASK_SSE2
, CODE_FOR_addv16qi3
, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128
, 0, 0 },
14834 { MASK_SSE2
, CODE_FOR_addv8hi3
, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128
, 0, 0 },
14835 { MASK_SSE2
, CODE_FOR_addv4si3
, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128
, 0, 0 },
14836 { MASK_SSE2
, CODE_FOR_addv2di3
, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128
, 0, 0 },
14837 { MASK_SSE2
, CODE_FOR_subv16qi3
, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128
, 0, 0 },
14838 { MASK_SSE2
, CODE_FOR_subv8hi3
, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128
, 0, 0 },
14839 { MASK_SSE2
, CODE_FOR_subv4si3
, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128
, 0, 0 },
14840 { MASK_SSE2
, CODE_FOR_subv2di3
, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128
, 0, 0 },
14842 { MASK_MMX
, CODE_FOR_sse2_ssaddv16qi3
, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128
, 0, 0 },
14843 { MASK_MMX
, CODE_FOR_sse2_ssaddv8hi3
, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128
, 0, 0 },
14844 { MASK_MMX
, CODE_FOR_sse2_sssubv16qi3
, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128
, 0, 0 },
14845 { MASK_MMX
, CODE_FOR_sse2_sssubv8hi3
, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128
, 0, 0 },
14846 { MASK_MMX
, CODE_FOR_sse2_usaddv16qi3
, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128
, 0, 0 },
14847 { MASK_MMX
, CODE_FOR_sse2_usaddv8hi3
, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128
, 0, 0 },
14848 { MASK_MMX
, CODE_FOR_sse2_ussubv16qi3
, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128
, 0, 0 },
14849 { MASK_MMX
, CODE_FOR_sse2_ussubv8hi3
, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128
, 0, 0 },
14851 { MASK_SSE2
, CODE_FOR_mulv8hi3
, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128
, 0, 0 },
14852 { MASK_SSE2
, CODE_FOR_sse2_smulv8hi3_highpart
, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128
, 0, 0 },
14854 { MASK_SSE2
, CODE_FOR_andv2di3
, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128
, 0, 0 },
14855 { MASK_SSE2
, CODE_FOR_sse2_nandv2di3
, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128
, 0, 0 },
14856 { MASK_SSE2
, CODE_FOR_iorv2di3
, "__builtin_ia32_por128", IX86_BUILTIN_POR128
, 0, 0 },
14857 { MASK_SSE2
, CODE_FOR_xorv2di3
, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128
, 0, 0 },
14859 { MASK_SSE2
, CODE_FOR_sse2_uavgv16qi3
, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128
, 0, 0 },
14860 { MASK_SSE2
, CODE_FOR_sse2_uavgv8hi3
, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128
, 0, 0 },
14862 { MASK_SSE2
, CODE_FOR_sse2_eqv16qi3
, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128
, 0, 0 },
14863 { MASK_SSE2
, CODE_FOR_sse2_eqv8hi3
, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128
, 0, 0 },
14864 { MASK_SSE2
, CODE_FOR_sse2_eqv4si3
, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128
, 0, 0 },
14865 { MASK_SSE2
, CODE_FOR_sse2_gtv16qi3
, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128
, 0, 0 },
14866 { MASK_SSE2
, CODE_FOR_sse2_gtv8hi3
, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128
, 0, 0 },
14867 { MASK_SSE2
, CODE_FOR_sse2_gtv4si3
, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128
, 0, 0 },
14869 { MASK_SSE2
, CODE_FOR_umaxv16qi3
, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128
, 0, 0 },
14870 { MASK_SSE2
, CODE_FOR_smaxv8hi3
, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128
, 0, 0 },
14871 { MASK_SSE2
, CODE_FOR_uminv16qi3
, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128
, 0, 0 },
14872 { MASK_SSE2
, CODE_FOR_sminv8hi3
, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128
, 0, 0 },
14874 { MASK_SSE2
, CODE_FOR_sse2_punpckhbw
, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128
, 0, 0 },
14875 { MASK_SSE2
, CODE_FOR_sse2_punpckhwd
, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128
, 0, 0 },
14876 { MASK_SSE2
, CODE_FOR_sse2_punpckhdq
, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128
, 0, 0 },
14877 { MASK_SSE2
, CODE_FOR_sse2_punpckhqdq
, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128
, 0, 0 },
14878 { MASK_SSE2
, CODE_FOR_sse2_punpcklbw
, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128
, 0, 0 },
14879 { MASK_SSE2
, CODE_FOR_sse2_punpcklwd
, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128
, 0, 0 },
14880 { MASK_SSE2
, CODE_FOR_sse2_punpckldq
, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128
, 0, 0 },
14881 { MASK_SSE2
, CODE_FOR_sse2_punpcklqdq
, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128
, 0, 0 },
14883 { MASK_SSE2
, CODE_FOR_sse2_packsswb
, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128
, 0, 0 },
14884 { MASK_SSE2
, CODE_FOR_sse2_packssdw
, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128
, 0, 0 },
14885 { MASK_SSE2
, CODE_FOR_sse2_packuswb
, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128
, 0, 0 },
14887 { MASK_SSE2
, CODE_FOR_sse2_umulv8hi3_highpart
, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128
, 0, 0 },
14888 { MASK_SSE2
, CODE_FOR_sse2_psadbw
, 0, IX86_BUILTIN_PSADBW128
, 0, 0 },
14890 { MASK_SSE2
, CODE_FOR_sse2_umulsidi3
, 0, IX86_BUILTIN_PMULUDQ
, 0, 0 },
14891 { MASK_SSE2
, CODE_FOR_sse2_umulv2siv2di3
, 0, IX86_BUILTIN_PMULUDQ128
, 0, 0 },
14893 { MASK_SSE2
, CODE_FOR_ashlv8hi3
, 0, IX86_BUILTIN_PSLLWI128
, 0, 0 },
14894 { MASK_SSE2
, CODE_FOR_ashlv4si3
, 0, IX86_BUILTIN_PSLLDI128
, 0, 0 },
14895 { MASK_SSE2
, CODE_FOR_ashlv2di3
, 0, IX86_BUILTIN_PSLLQI128
, 0, 0 },
14897 { MASK_SSE2
, CODE_FOR_lshrv8hi3
, 0, IX86_BUILTIN_PSRLWI128
, 0, 0 },
14898 { MASK_SSE2
, CODE_FOR_lshrv4si3
, 0, IX86_BUILTIN_PSRLDI128
, 0, 0 },
14899 { MASK_SSE2
, CODE_FOR_lshrv2di3
, 0, IX86_BUILTIN_PSRLQI128
, 0, 0 },
14901 { MASK_SSE2
, CODE_FOR_ashrv8hi3
, 0, IX86_BUILTIN_PSRAWI128
, 0, 0 },
14902 { MASK_SSE2
, CODE_FOR_ashrv4si3
, 0, IX86_BUILTIN_PSRADI128
, 0, 0 },
14904 { MASK_SSE2
, CODE_FOR_sse2_pmaddwd
, 0, IX86_BUILTIN_PMADDWD128
, 0, 0 },
14906 { MASK_SSE2
, CODE_FOR_sse2_cvtsi2sd
, 0, IX86_BUILTIN_CVTSI2SD
, 0, 0 },
14907 { MASK_SSE2
| MASK_64BIT
, CODE_FOR_sse2_cvtsi2sdq
, 0, IX86_BUILTIN_CVTSI642SD
, 0, 0 },
14908 { MASK_SSE2
, CODE_FOR_sse2_cvtsd2ss
, 0, IX86_BUILTIN_CVTSD2SS
, 0, 0 },
14909 { MASK_SSE2
, CODE_FOR_sse2_cvtss2sd
, 0, IX86_BUILTIN_CVTSS2SD
, 0, 0 },
14912 { MASK_SSE3
, CODE_FOR_sse3_addsubv4sf3
, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS
, 0, 0 },
14913 { MASK_SSE3
, CODE_FOR_sse3_addsubv2df3
, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD
, 0, 0 },
14914 { MASK_SSE3
, CODE_FOR_sse3_haddv4sf3
, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS
, 0, 0 },
14915 { MASK_SSE3
, CODE_FOR_sse3_haddv2df3
, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD
, 0, 0 },
14916 { MASK_SSE3
, CODE_FOR_sse3_hsubv4sf3
, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS
, 0, 0 },
14917 { MASK_SSE3
, CODE_FOR_sse3_hsubv2df3
, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD
, 0, 0 },
14920 { MASK_SSSE3
, CODE_FOR_ssse3_phaddwv8hi3
, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128
, 0, 0 },
14921 { MASK_SSSE3
, CODE_FOR_ssse3_phaddwv4hi3
, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW
, 0, 0 },
14922 { MASK_SSSE3
, CODE_FOR_ssse3_phadddv4si3
, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128
, 0, 0 },
14923 { MASK_SSSE3
, CODE_FOR_ssse3_phadddv2si3
, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD
, 0, 0 },
14924 { MASK_SSSE3
, CODE_FOR_ssse3_phaddswv8hi3
, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128
, 0, 0 },
14925 { MASK_SSSE3
, CODE_FOR_ssse3_phaddswv4hi3
, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW
, 0, 0 },
14926 { MASK_SSSE3
, CODE_FOR_ssse3_phsubwv8hi3
, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128
, 0, 0 },
14927 { MASK_SSSE3
, CODE_FOR_ssse3_phsubwv4hi3
, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW
, 0, 0 },
14928 { MASK_SSSE3
, CODE_FOR_ssse3_phsubdv4si3
, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128
, 0, 0 },
14929 { MASK_SSSE3
, CODE_FOR_ssse3_phsubdv2si3
, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD
, 0, 0 },
14930 { MASK_SSSE3
, CODE_FOR_ssse3_phsubswv8hi3
, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128
, 0, 0 },
14931 { MASK_SSSE3
, CODE_FOR_ssse3_phsubswv4hi3
, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW
, 0, 0 },
14932 { MASK_SSSE3
, CODE_FOR_ssse3_pmaddubswv8hi3
, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128
, 0, 0 },
14933 { MASK_SSSE3
, CODE_FOR_ssse3_pmaddubswv4hi3
, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW
, 0, 0 },
14934 { MASK_SSSE3
, CODE_FOR_ssse3_pmulhrswv8hi3
, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128
, 0, 0 },
14935 { MASK_SSSE3
, CODE_FOR_ssse3_pmulhrswv4hi3
, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW
, 0, 0 },
14936 { MASK_SSSE3
, CODE_FOR_ssse3_pshufbv16qi3
, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128
, 0, 0 },
14937 { MASK_SSSE3
, CODE_FOR_ssse3_pshufbv8qi3
, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB
, 0, 0 },
14938 { MASK_SSSE3
, CODE_FOR_ssse3_psignv16qi3
, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128
, 0, 0 },
14939 { MASK_SSSE3
, CODE_FOR_ssse3_psignv8qi3
, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB
, 0, 0 },
14940 { MASK_SSSE3
, CODE_FOR_ssse3_psignv8hi3
, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128
, 0, 0 },
14941 { MASK_SSSE3
, CODE_FOR_ssse3_psignv4hi3
, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW
, 0, 0 },
14942 { MASK_SSSE3
, CODE_FOR_ssse3_psignv4si3
, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128
, 0, 0 },
14943 { MASK_SSSE3
, CODE_FOR_ssse3_psignv2si3
, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND
, 0, 0 }
14946 static const struct builtin_description bdesc_1arg
[] =
14948 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_pmovmskb
, 0, IX86_BUILTIN_PMOVMSKB
, 0, 0 },
14949 { MASK_SSE
, CODE_FOR_sse_movmskps
, 0, IX86_BUILTIN_MOVMSKPS
, 0, 0 },
14951 { MASK_SSE
, CODE_FOR_sqrtv4sf2
, 0, IX86_BUILTIN_SQRTPS
, 0, 0 },
14952 { MASK_SSE
, CODE_FOR_sse_rsqrtv4sf2
, 0, IX86_BUILTIN_RSQRTPS
, 0, 0 },
14953 { MASK_SSE
, CODE_FOR_sse_rcpv4sf2
, 0, IX86_BUILTIN_RCPPS
, 0, 0 },
14955 { MASK_SSE
, CODE_FOR_sse_cvtps2pi
, 0, IX86_BUILTIN_CVTPS2PI
, 0, 0 },
14956 { MASK_SSE
, CODE_FOR_sse_cvtss2si
, 0, IX86_BUILTIN_CVTSS2SI
, 0, 0 },
14957 { MASK_SSE
| MASK_64BIT
, CODE_FOR_sse_cvtss2siq
, 0, IX86_BUILTIN_CVTSS2SI64
, 0, 0 },
14958 { MASK_SSE
, CODE_FOR_sse_cvttps2pi
, 0, IX86_BUILTIN_CVTTPS2PI
, 0, 0 },
14959 { MASK_SSE
, CODE_FOR_sse_cvttss2si
, 0, IX86_BUILTIN_CVTTSS2SI
, 0, 0 },
14960 { MASK_SSE
| MASK_64BIT
, CODE_FOR_sse_cvttss2siq
, 0, IX86_BUILTIN_CVTTSS2SI64
, 0, 0 },
14962 { MASK_SSE2
, CODE_FOR_sse2_pmovmskb
, 0, IX86_BUILTIN_PMOVMSKB128
, 0, 0 },
14963 { MASK_SSE2
, CODE_FOR_sse2_movmskpd
, 0, IX86_BUILTIN_MOVMSKPD
, 0, 0 },
14965 { MASK_SSE2
, CODE_FOR_sqrtv2df2
, 0, IX86_BUILTIN_SQRTPD
, 0, 0 },
14967 { MASK_SSE2
, CODE_FOR_sse2_cvtdq2pd
, 0, IX86_BUILTIN_CVTDQ2PD
, 0, 0 },
14968 { MASK_SSE2
, CODE_FOR_sse2_cvtdq2ps
, 0, IX86_BUILTIN_CVTDQ2PS
, 0, 0 },
14970 { MASK_SSE2
, CODE_FOR_sse2_cvtpd2dq
, 0, IX86_BUILTIN_CVTPD2DQ
, 0, 0 },
14971 { MASK_SSE2
, CODE_FOR_sse2_cvtpd2pi
, 0, IX86_BUILTIN_CVTPD2PI
, 0, 0 },
14972 { MASK_SSE2
, CODE_FOR_sse2_cvtpd2ps
, 0, IX86_BUILTIN_CVTPD2PS
, 0, 0 },
14973 { MASK_SSE2
, CODE_FOR_sse2_cvttpd2dq
, 0, IX86_BUILTIN_CVTTPD2DQ
, 0, 0 },
14974 { MASK_SSE2
, CODE_FOR_sse2_cvttpd2pi
, 0, IX86_BUILTIN_CVTTPD2PI
, 0, 0 },
14976 { MASK_SSE2
, CODE_FOR_sse2_cvtpi2pd
, 0, IX86_BUILTIN_CVTPI2PD
, 0, 0 },
14978 { MASK_SSE2
, CODE_FOR_sse2_cvtsd2si
, 0, IX86_BUILTIN_CVTSD2SI
, 0, 0 },
14979 { MASK_SSE2
, CODE_FOR_sse2_cvttsd2si
, 0, IX86_BUILTIN_CVTTSD2SI
, 0, 0 },
14980 { MASK_SSE2
| MASK_64BIT
, CODE_FOR_sse2_cvtsd2siq
, 0, IX86_BUILTIN_CVTSD2SI64
, 0, 0 },
14981 { MASK_SSE2
| MASK_64BIT
, CODE_FOR_sse2_cvttsd2siq
, 0, IX86_BUILTIN_CVTTSD2SI64
, 0, 0 },
14983 { MASK_SSE2
, CODE_FOR_sse2_cvtps2dq
, 0, IX86_BUILTIN_CVTPS2DQ
, 0, 0 },
14984 { MASK_SSE2
, CODE_FOR_sse2_cvtps2pd
, 0, IX86_BUILTIN_CVTPS2PD
, 0, 0 },
14985 { MASK_SSE2
, CODE_FOR_sse2_cvttps2dq
, 0, IX86_BUILTIN_CVTTPS2DQ
, 0, 0 },
14988 { MASK_SSE3
, CODE_FOR_sse3_movshdup
, 0, IX86_BUILTIN_MOVSHDUP
, 0, 0 },
14989 { MASK_SSE3
, CODE_FOR_sse3_movsldup
, 0, IX86_BUILTIN_MOVSLDUP
, 0, 0 },
14992 { MASK_SSSE3
, CODE_FOR_absv16qi2
, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128
, 0, 0 },
14993 { MASK_SSSE3
, CODE_FOR_absv8qi2
, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB
, 0, 0 },
14994 { MASK_SSSE3
, CODE_FOR_absv8hi2
, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128
, 0, 0 },
14995 { MASK_SSSE3
, CODE_FOR_absv4hi2
, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW
, 0, 0 },
14996 { MASK_SSSE3
, CODE_FOR_absv4si2
, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128
, 0, 0 },
14997 { MASK_SSSE3
, CODE_FOR_absv2si2
, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD
, 0, 0 },
15001 ix86_init_builtins (void)
15004 ix86_init_mmx_sse_builtins ();
15007 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
15008 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
15011 ix86_init_mmx_sse_builtins (void)
15013 const struct builtin_description
* d
;
15016 tree V16QI_type_node
= build_vector_type_for_mode (intQI_type_node
, V16QImode
);
15017 tree V2SI_type_node
= build_vector_type_for_mode (intSI_type_node
, V2SImode
);
15018 tree V2SF_type_node
= build_vector_type_for_mode (float_type_node
, V2SFmode
);
15019 tree V2DI_type_node
15020 = build_vector_type_for_mode (long_long_integer_type_node
, V2DImode
);
15021 tree V2DF_type_node
= build_vector_type_for_mode (double_type_node
, V2DFmode
);
15022 tree V4SF_type_node
= build_vector_type_for_mode (float_type_node
, V4SFmode
);
15023 tree V4SI_type_node
= build_vector_type_for_mode (intSI_type_node
, V4SImode
);
15024 tree V4HI_type_node
= build_vector_type_for_mode (intHI_type_node
, V4HImode
);
15025 tree V8QI_type_node
= build_vector_type_for_mode (intQI_type_node
, V8QImode
);
15026 tree V8HI_type_node
= build_vector_type_for_mode (intHI_type_node
, V8HImode
);
15028 tree pchar_type_node
= build_pointer_type (char_type_node
);
15029 tree pcchar_type_node
= build_pointer_type (
15030 build_type_variant (char_type_node
, 1, 0));
15031 tree pfloat_type_node
= build_pointer_type (float_type_node
);
15032 tree pcfloat_type_node
= build_pointer_type (
15033 build_type_variant (float_type_node
, 1, 0));
15034 tree pv2si_type_node
= build_pointer_type (V2SI_type_node
);
15035 tree pv2di_type_node
= build_pointer_type (V2DI_type_node
);
15036 tree pdi_type_node
= build_pointer_type (long_long_unsigned_type_node
);
15039 tree int_ftype_v4sf_v4sf
15040 = build_function_type_list (integer_type_node
,
15041 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
15042 tree v4si_ftype_v4sf_v4sf
15043 = build_function_type_list (V4SI_type_node
,
15044 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
15045 /* MMX/SSE/integer conversions. */
15046 tree int_ftype_v4sf
15047 = build_function_type_list (integer_type_node
,
15048 V4SF_type_node
, NULL_TREE
);
15049 tree int64_ftype_v4sf
15050 = build_function_type_list (long_long_integer_type_node
,
15051 V4SF_type_node
, NULL_TREE
);
15052 tree int_ftype_v8qi
15053 = build_function_type_list (integer_type_node
, V8QI_type_node
, NULL_TREE
);
15054 tree v4sf_ftype_v4sf_int
15055 = build_function_type_list (V4SF_type_node
,
15056 V4SF_type_node
, integer_type_node
, NULL_TREE
);
15057 tree v4sf_ftype_v4sf_int64
15058 = build_function_type_list (V4SF_type_node
,
15059 V4SF_type_node
, long_long_integer_type_node
,
15061 tree v4sf_ftype_v4sf_v2si
15062 = build_function_type_list (V4SF_type_node
,
15063 V4SF_type_node
, V2SI_type_node
, NULL_TREE
);
15065 /* Miscellaneous. */
15066 tree v8qi_ftype_v4hi_v4hi
15067 = build_function_type_list (V8QI_type_node
,
15068 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
15069 tree v4hi_ftype_v2si_v2si
15070 = build_function_type_list (V4HI_type_node
,
15071 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
15072 tree v4sf_ftype_v4sf_v4sf_int
15073 = build_function_type_list (V4SF_type_node
,
15074 V4SF_type_node
, V4SF_type_node
,
15075 integer_type_node
, NULL_TREE
);
15076 tree v2si_ftype_v4hi_v4hi
15077 = build_function_type_list (V2SI_type_node
,
15078 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
15079 tree v4hi_ftype_v4hi_int
15080 = build_function_type_list (V4HI_type_node
,
15081 V4HI_type_node
, integer_type_node
, NULL_TREE
);
15082 tree v4hi_ftype_v4hi_di
15083 = build_function_type_list (V4HI_type_node
,
15084 V4HI_type_node
, long_long_unsigned_type_node
,
15086 tree v2si_ftype_v2si_di
15087 = build_function_type_list (V2SI_type_node
,
15088 V2SI_type_node
, long_long_unsigned_type_node
,
15090 tree void_ftype_void
15091 = build_function_type (void_type_node
, void_list_node
);
15092 tree void_ftype_unsigned
15093 = build_function_type_list (void_type_node
, unsigned_type_node
, NULL_TREE
);
15094 tree void_ftype_unsigned_unsigned
15095 = build_function_type_list (void_type_node
, unsigned_type_node
,
15096 unsigned_type_node
, NULL_TREE
);
15097 tree void_ftype_pcvoid_unsigned_unsigned
15098 = build_function_type_list (void_type_node
, const_ptr_type_node
,
15099 unsigned_type_node
, unsigned_type_node
,
15101 tree unsigned_ftype_void
15102 = build_function_type (unsigned_type_node
, void_list_node
);
15103 tree v2si_ftype_v4sf
15104 = build_function_type_list (V2SI_type_node
, V4SF_type_node
, NULL_TREE
);
15105 /* Loads/stores. */
15106 tree void_ftype_v8qi_v8qi_pchar
15107 = build_function_type_list (void_type_node
,
15108 V8QI_type_node
, V8QI_type_node
,
15109 pchar_type_node
, NULL_TREE
);
15110 tree v4sf_ftype_pcfloat
15111 = build_function_type_list (V4SF_type_node
, pcfloat_type_node
, NULL_TREE
);
15112 /* @@@ the type is bogus */
15113 tree v4sf_ftype_v4sf_pv2si
15114 = build_function_type_list (V4SF_type_node
,
15115 V4SF_type_node
, pv2si_type_node
, NULL_TREE
);
15116 tree void_ftype_pv2si_v4sf
15117 = build_function_type_list (void_type_node
,
15118 pv2si_type_node
, V4SF_type_node
, NULL_TREE
);
15119 tree void_ftype_pfloat_v4sf
15120 = build_function_type_list (void_type_node
,
15121 pfloat_type_node
, V4SF_type_node
, NULL_TREE
);
15122 tree void_ftype_pdi_di
15123 = build_function_type_list (void_type_node
,
15124 pdi_type_node
, long_long_unsigned_type_node
,
15126 tree void_ftype_pv2di_v2di
15127 = build_function_type_list (void_type_node
,
15128 pv2di_type_node
, V2DI_type_node
, NULL_TREE
);
15129 /* Normal vector unops. */
15130 tree v4sf_ftype_v4sf
15131 = build_function_type_list (V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
15132 tree v16qi_ftype_v16qi
15133 = build_function_type_list (V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
15134 tree v8hi_ftype_v8hi
15135 = build_function_type_list (V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
15136 tree v4si_ftype_v4si
15137 = build_function_type_list (V4SI_type_node
, V4SI_type_node
, NULL_TREE
);
15138 tree v8qi_ftype_v8qi
15139 = build_function_type_list (V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
15140 tree v4hi_ftype_v4hi
15141 = build_function_type_list (V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
15143 /* Normal vector binops. */
15144 tree v4sf_ftype_v4sf_v4sf
15145 = build_function_type_list (V4SF_type_node
,
15146 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
15147 tree v8qi_ftype_v8qi_v8qi
15148 = build_function_type_list (V8QI_type_node
,
15149 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
15150 tree v4hi_ftype_v4hi_v4hi
15151 = build_function_type_list (V4HI_type_node
,
15152 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
15153 tree v2si_ftype_v2si_v2si
15154 = build_function_type_list (V2SI_type_node
,
15155 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
15156 tree di_ftype_di_di
15157 = build_function_type_list (long_long_unsigned_type_node
,
15158 long_long_unsigned_type_node
,
15159 long_long_unsigned_type_node
, NULL_TREE
);
15161 tree di_ftype_di_di_int
15162 = build_function_type_list (long_long_unsigned_type_node
,
15163 long_long_unsigned_type_node
,
15164 long_long_unsigned_type_node
,
15165 integer_type_node
, NULL_TREE
);
15167 tree v2si_ftype_v2sf
15168 = build_function_type_list (V2SI_type_node
, V2SF_type_node
, NULL_TREE
);
15169 tree v2sf_ftype_v2si
15170 = build_function_type_list (V2SF_type_node
, V2SI_type_node
, NULL_TREE
);
15171 tree v2si_ftype_v2si
15172 = build_function_type_list (V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
15173 tree v2sf_ftype_v2sf
15174 = build_function_type_list (V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
15175 tree v2sf_ftype_v2sf_v2sf
15176 = build_function_type_list (V2SF_type_node
,
15177 V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
15178 tree v2si_ftype_v2sf_v2sf
15179 = build_function_type_list (V2SI_type_node
,
15180 V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
15181 tree pint_type_node
= build_pointer_type (integer_type_node
);
15182 tree pdouble_type_node
= build_pointer_type (double_type_node
);
15183 tree pcdouble_type_node
= build_pointer_type (
15184 build_type_variant (double_type_node
, 1, 0));
15185 tree int_ftype_v2df_v2df
15186 = build_function_type_list (integer_type_node
,
15187 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
15189 tree void_ftype_pcvoid
15190 = build_function_type_list (void_type_node
, const_ptr_type_node
, NULL_TREE
);
15191 tree v4sf_ftype_v4si
15192 = build_function_type_list (V4SF_type_node
, V4SI_type_node
, NULL_TREE
);
15193 tree v4si_ftype_v4sf
15194 = build_function_type_list (V4SI_type_node
, V4SF_type_node
, NULL_TREE
);
15195 tree v2df_ftype_v4si
15196 = build_function_type_list (V2DF_type_node
, V4SI_type_node
, NULL_TREE
);
15197 tree v4si_ftype_v2df
15198 = build_function_type_list (V4SI_type_node
, V2DF_type_node
, NULL_TREE
);
15199 tree v2si_ftype_v2df
15200 = build_function_type_list (V2SI_type_node
, V2DF_type_node
, NULL_TREE
);
15201 tree v4sf_ftype_v2df
15202 = build_function_type_list (V4SF_type_node
, V2DF_type_node
, NULL_TREE
);
15203 tree v2df_ftype_v2si
15204 = build_function_type_list (V2DF_type_node
, V2SI_type_node
, NULL_TREE
);
15205 tree v2df_ftype_v4sf
15206 = build_function_type_list (V2DF_type_node
, V4SF_type_node
, NULL_TREE
);
15207 tree int_ftype_v2df
15208 = build_function_type_list (integer_type_node
, V2DF_type_node
, NULL_TREE
);
15209 tree int64_ftype_v2df
15210 = build_function_type_list (long_long_integer_type_node
,
15211 V2DF_type_node
, NULL_TREE
);
15212 tree v2df_ftype_v2df_int
15213 = build_function_type_list (V2DF_type_node
,
15214 V2DF_type_node
, integer_type_node
, NULL_TREE
);
15215 tree v2df_ftype_v2df_int64
15216 = build_function_type_list (V2DF_type_node
,
15217 V2DF_type_node
, long_long_integer_type_node
,
15219 tree v4sf_ftype_v4sf_v2df
15220 = build_function_type_list (V4SF_type_node
,
15221 V4SF_type_node
, V2DF_type_node
, NULL_TREE
);
15222 tree v2df_ftype_v2df_v4sf
15223 = build_function_type_list (V2DF_type_node
,
15224 V2DF_type_node
, V4SF_type_node
, NULL_TREE
);
15225 tree v2df_ftype_v2df_v2df_int
15226 = build_function_type_list (V2DF_type_node
,
15227 V2DF_type_node
, V2DF_type_node
,
15230 tree v2df_ftype_v2df_pcdouble
15231 = build_function_type_list (V2DF_type_node
,
15232 V2DF_type_node
, pcdouble_type_node
, NULL_TREE
);
15233 tree void_ftype_pdouble_v2df
15234 = build_function_type_list (void_type_node
,
15235 pdouble_type_node
, V2DF_type_node
, NULL_TREE
);
15236 tree void_ftype_pint_int
15237 = build_function_type_list (void_type_node
,
15238 pint_type_node
, integer_type_node
, NULL_TREE
);
15239 tree void_ftype_v16qi_v16qi_pchar
15240 = build_function_type_list (void_type_node
,
15241 V16QI_type_node
, V16QI_type_node
,
15242 pchar_type_node
, NULL_TREE
);
15243 tree v2df_ftype_pcdouble
15244 = build_function_type_list (V2DF_type_node
, pcdouble_type_node
, NULL_TREE
);
15245 tree v2df_ftype_v2df_v2df
15246 = build_function_type_list (V2DF_type_node
,
15247 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
15248 tree v16qi_ftype_v16qi_v16qi
15249 = build_function_type_list (V16QI_type_node
,
15250 V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
15251 tree v8hi_ftype_v8hi_v8hi
15252 = build_function_type_list (V8HI_type_node
,
15253 V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
15254 tree v4si_ftype_v4si_v4si
15255 = build_function_type_list (V4SI_type_node
,
15256 V4SI_type_node
, V4SI_type_node
, NULL_TREE
);
15257 tree v2di_ftype_v2di_v2di
15258 = build_function_type_list (V2DI_type_node
,
15259 V2DI_type_node
, V2DI_type_node
, NULL_TREE
);
15260 tree v2di_ftype_v2df_v2df
15261 = build_function_type_list (V2DI_type_node
,
15262 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
15263 tree v2df_ftype_v2df
15264 = build_function_type_list (V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
15265 tree v2di_ftype_v2di_int
15266 = build_function_type_list (V2DI_type_node
,
15267 V2DI_type_node
, integer_type_node
, NULL_TREE
);
15268 tree v2di_ftype_v2di_v2di_int
15269 = build_function_type_list (V2DI_type_node
, V2DI_type_node
,
15270 V2DI_type_node
, integer_type_node
, NULL_TREE
);
15271 tree v4si_ftype_v4si_int
15272 = build_function_type_list (V4SI_type_node
,
15273 V4SI_type_node
, integer_type_node
, NULL_TREE
);
15274 tree v8hi_ftype_v8hi_int
15275 = build_function_type_list (V8HI_type_node
,
15276 V8HI_type_node
, integer_type_node
, NULL_TREE
);
15277 tree v8hi_ftype_v8hi_v2di
15278 = build_function_type_list (V8HI_type_node
,
15279 V8HI_type_node
, V2DI_type_node
, NULL_TREE
);
15280 tree v4si_ftype_v4si_v2di
15281 = build_function_type_list (V4SI_type_node
,
15282 V4SI_type_node
, V2DI_type_node
, NULL_TREE
);
15283 tree v4si_ftype_v8hi_v8hi
15284 = build_function_type_list (V4SI_type_node
,
15285 V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
15286 tree di_ftype_v8qi_v8qi
15287 = build_function_type_list (long_long_unsigned_type_node
,
15288 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
15289 tree di_ftype_v2si_v2si
15290 = build_function_type_list (long_long_unsigned_type_node
,
15291 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
15292 tree v2di_ftype_v16qi_v16qi
15293 = build_function_type_list (V2DI_type_node
,
15294 V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
15295 tree v2di_ftype_v4si_v4si
15296 = build_function_type_list (V2DI_type_node
,
15297 V4SI_type_node
, V4SI_type_node
, NULL_TREE
);
15298 tree int_ftype_v16qi
15299 = build_function_type_list (integer_type_node
, V16QI_type_node
, NULL_TREE
);
15300 tree v16qi_ftype_pcchar
15301 = build_function_type_list (V16QI_type_node
, pcchar_type_node
, NULL_TREE
);
15302 tree void_ftype_pchar_v16qi
15303 = build_function_type_list (void_type_node
,
15304 pchar_type_node
, V16QI_type_node
, NULL_TREE
);
15307 tree float128_type
;
15310 /* The __float80 type. */
15311 if (TYPE_MODE (long_double_type_node
) == XFmode
)
15312 (*lang_hooks
.types
.register_builtin_type
) (long_double_type_node
,
15316 /* The __float80 type. */
15317 float80_type
= make_node (REAL_TYPE
);
15318 TYPE_PRECISION (float80_type
) = 80;
15319 layout_type (float80_type
);
15320 (*lang_hooks
.types
.register_builtin_type
) (float80_type
, "__float80");
15325 float128_type
= make_node (REAL_TYPE
);
15326 TYPE_PRECISION (float128_type
) = 128;
15327 layout_type (float128_type
);
15328 (*lang_hooks
.types
.register_builtin_type
) (float128_type
, "__float128");
15331 /* Add all builtins that are more or less simple operations on two
15333 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
15335 /* Use one of the operands; the target can have a different mode for
15336 mask-generating compares. */
15337 enum machine_mode mode
;
15342 mode
= insn_data
[d
->icode
].operand
[1].mode
;
15347 type
= v16qi_ftype_v16qi_v16qi
;
15350 type
= v8hi_ftype_v8hi_v8hi
;
15353 type
= v4si_ftype_v4si_v4si
;
15356 type
= v2di_ftype_v2di_v2di
;
15359 type
= v2df_ftype_v2df_v2df
;
15362 type
= v4sf_ftype_v4sf_v4sf
;
15365 type
= v8qi_ftype_v8qi_v8qi
;
15368 type
= v4hi_ftype_v4hi_v4hi
;
15371 type
= v2si_ftype_v2si_v2si
;
15374 type
= di_ftype_di_di
;
15378 gcc_unreachable ();
15381 /* Override for comparisons. */
15382 if (d
->icode
== CODE_FOR_sse_maskcmpv4sf3
15383 || d
->icode
== CODE_FOR_sse_vmmaskcmpv4sf3
)
15384 type
= v4si_ftype_v4sf_v4sf
;
15386 if (d
->icode
== CODE_FOR_sse2_maskcmpv2df3
15387 || d
->icode
== CODE_FOR_sse2_vmmaskcmpv2df3
)
15388 type
= v2di_ftype_v2df_v2df
;
15390 def_builtin (d
->mask
, d
->name
, type
, d
->code
);
15393 /* Add all builtins that are more or less simple operations on 1 operand. */
15394 for (i
= 0, d
= bdesc_1arg
; i
< ARRAY_SIZE (bdesc_1arg
); i
++, d
++)
15396 enum machine_mode mode
;
15401 mode
= insn_data
[d
->icode
].operand
[1].mode
;
15406 type
= v16qi_ftype_v16qi
;
15409 type
= v8hi_ftype_v8hi
;
15412 type
= v4si_ftype_v4si
;
15415 type
= v2df_ftype_v2df
;
15418 type
= v4sf_ftype_v4sf
;
15421 type
= v8qi_ftype_v8qi
;
15424 type
= v4hi_ftype_v4hi
;
15427 type
= v2si_ftype_v2si
;
15434 def_builtin (d
->mask
, d
->name
, type
, d
->code
);
15437 /* Add the remaining MMX insns with somewhat more complicated types. */
15438 def_builtin (MASK_MMX
, "__builtin_ia32_emms", void_ftype_void
, IX86_BUILTIN_EMMS
);
15439 def_builtin (MASK_MMX
, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSLLW
);
15440 def_builtin (MASK_MMX
, "__builtin_ia32_pslld", v2si_ftype_v2si_di
, IX86_BUILTIN_PSLLD
);
15441 def_builtin (MASK_MMX
, "__builtin_ia32_psllq", di_ftype_di_di
, IX86_BUILTIN_PSLLQ
);
15443 def_builtin (MASK_MMX
, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSRLW
);
15444 def_builtin (MASK_MMX
, "__builtin_ia32_psrld", v2si_ftype_v2si_di
, IX86_BUILTIN_PSRLD
);
15445 def_builtin (MASK_MMX
, "__builtin_ia32_psrlq", di_ftype_di_di
, IX86_BUILTIN_PSRLQ
);
15447 def_builtin (MASK_MMX
, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSRAW
);
15448 def_builtin (MASK_MMX
, "__builtin_ia32_psrad", v2si_ftype_v2si_di
, IX86_BUILTIN_PSRAD
);
15450 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int
, IX86_BUILTIN_PSHUFW
);
15451 def_builtin (MASK_MMX
, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi
, IX86_BUILTIN_PMADDWD
);
15453 /* comi/ucomi insns. */
15454 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
15455 if (d
->mask
== MASK_SSE2
)
15456 def_builtin (d
->mask
, d
->name
, int_ftype_v2df_v2df
, d
->code
);
15458 def_builtin (d
->mask
, d
->name
, int_ftype_v4sf_v4sf
, d
->code
);
15460 def_builtin (MASK_MMX
, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi
, IX86_BUILTIN_PACKSSWB
);
15461 def_builtin (MASK_MMX
, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si
, IX86_BUILTIN_PACKSSDW
);
15462 def_builtin (MASK_MMX
, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi
, IX86_BUILTIN_PACKUSWB
);
15464 def_builtin (MASK_SSE
, "__builtin_ia32_ldmxcsr", void_ftype_unsigned
, IX86_BUILTIN_LDMXCSR
);
15465 def_builtin (MASK_SSE
, "__builtin_ia32_stmxcsr", unsigned_ftype_void
, IX86_BUILTIN_STMXCSR
);
15466 def_builtin (MASK_SSE
, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si
, IX86_BUILTIN_CVTPI2PS
);
15467 def_builtin (MASK_SSE
, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf
, IX86_BUILTIN_CVTPS2PI
);
15468 def_builtin (MASK_SSE
, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int
, IX86_BUILTIN_CVTSI2SS
);
15469 def_builtin (MASK_SSE
| MASK_64BIT
, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64
, IX86_BUILTIN_CVTSI642SS
);
15470 def_builtin (MASK_SSE
, "__builtin_ia32_cvtss2si", int_ftype_v4sf
, IX86_BUILTIN_CVTSS2SI
);
15471 def_builtin (MASK_SSE
| MASK_64BIT
, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf
, IX86_BUILTIN_CVTSS2SI64
);
15472 def_builtin (MASK_SSE
, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf
, IX86_BUILTIN_CVTTPS2PI
);
15473 def_builtin (MASK_SSE
, "__builtin_ia32_cvttss2si", int_ftype_v4sf
, IX86_BUILTIN_CVTTSS2SI
);
15474 def_builtin (MASK_SSE
| MASK_64BIT
, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf
, IX86_BUILTIN_CVTTSS2SI64
);
15476 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar
, IX86_BUILTIN_MASKMOVQ
);
15478 def_builtin (MASK_SSE
, "__builtin_ia32_loadups", v4sf_ftype_pcfloat
, IX86_BUILTIN_LOADUPS
);
15479 def_builtin (MASK_SSE
, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf
, IX86_BUILTIN_STOREUPS
);
15481 def_builtin (MASK_SSE
, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si
, IX86_BUILTIN_LOADHPS
);
15482 def_builtin (MASK_SSE
, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si
, IX86_BUILTIN_LOADLPS
);
15483 def_builtin (MASK_SSE
, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf
, IX86_BUILTIN_STOREHPS
);
15484 def_builtin (MASK_SSE
, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf
, IX86_BUILTIN_STORELPS
);
15486 def_builtin (MASK_SSE
, "__builtin_ia32_movmskps", int_ftype_v4sf
, IX86_BUILTIN_MOVMSKPS
);
15487 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_pmovmskb", int_ftype_v8qi
, IX86_BUILTIN_PMOVMSKB
);
15488 def_builtin (MASK_SSE
, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf
, IX86_BUILTIN_MOVNTPS
);
15489 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_movntq", void_ftype_pdi_di
, IX86_BUILTIN_MOVNTQ
);
15491 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_sfence", void_ftype_void
, IX86_BUILTIN_SFENCE
);
15493 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi
, IX86_BUILTIN_PSADBW
);
15495 def_builtin (MASK_SSE
, "__builtin_ia32_rcpps", v4sf_ftype_v4sf
, IX86_BUILTIN_RCPPS
);
15496 def_builtin (MASK_SSE
, "__builtin_ia32_rcpss", v4sf_ftype_v4sf
, IX86_BUILTIN_RCPSS
);
15497 def_builtin (MASK_SSE
, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf
, IX86_BUILTIN_RSQRTPS
);
15498 def_builtin (MASK_SSE
, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf
, IX86_BUILTIN_RSQRTSS
);
15499 def_builtin (MASK_SSE
, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf
, IX86_BUILTIN_SQRTPS
);
15500 def_builtin (MASK_SSE
, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf
, IX86_BUILTIN_SQRTSS
);
15502 def_builtin (MASK_SSE
, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int
, IX86_BUILTIN_SHUFPS
);
15504 /* Original 3DNow! */
15505 def_builtin (MASK_3DNOW
, "__builtin_ia32_femms", void_ftype_void
, IX86_BUILTIN_FEMMS
);
15506 def_builtin (MASK_3DNOW
, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi
, IX86_BUILTIN_PAVGUSB
);
15507 def_builtin (MASK_3DNOW
, "__builtin_ia32_pf2id", v2si_ftype_v2sf
, IX86_BUILTIN_PF2ID
);
15508 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFACC
);
15509 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFADD
);
15510 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPEQ
);
15511 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPGE
);
15512 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPGT
);
15513 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMAX
);
15514 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMIN
);
15515 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMUL
);
15516 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf
, IX86_BUILTIN_PFRCP
);
15517 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRCPIT1
);
15518 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRCPIT2
);
15519 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf
, IX86_BUILTIN_PFRSQRT
);
15520 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRSQIT1
);
15521 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFSUB
);
15522 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFSUBR
);
15523 def_builtin (MASK_3DNOW
, "__builtin_ia32_pi2fd", v2sf_ftype_v2si
, IX86_BUILTIN_PI2FD
);
15524 def_builtin (MASK_3DNOW
, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi
, IX86_BUILTIN_PMULHRW
);
15526 /* 3DNow! extension as used in the Athlon CPU. */
15527 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pf2iw", v2si_ftype_v2sf
, IX86_BUILTIN_PF2IW
);
15528 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFNACC
);
15529 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFPNACC
);
15530 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pi2fw", v2sf_ftype_v2si
, IX86_BUILTIN_PI2FW
);
15531 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf
, IX86_BUILTIN_PSWAPDSF
);
15532 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pswapdsi", v2si_ftype_v2si
, IX86_BUILTIN_PSWAPDSI
);
15535 def_builtin (MASK_SSE2
, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar
, IX86_BUILTIN_MASKMOVDQU
);
15537 def_builtin (MASK_SSE2
, "__builtin_ia32_loadupd", v2df_ftype_pcdouble
, IX86_BUILTIN_LOADUPD
);
15538 def_builtin (MASK_SSE2
, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df
, IX86_BUILTIN_STOREUPD
);
15540 def_builtin (MASK_SSE2
, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble
, IX86_BUILTIN_LOADHPD
);
15541 def_builtin (MASK_SSE2
, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble
, IX86_BUILTIN_LOADLPD
);
15543 def_builtin (MASK_SSE2
, "__builtin_ia32_movmskpd", int_ftype_v2df
, IX86_BUILTIN_MOVMSKPD
);
15544 def_builtin (MASK_SSE2
, "__builtin_ia32_pmovmskb128", int_ftype_v16qi
, IX86_BUILTIN_PMOVMSKB128
);
15545 def_builtin (MASK_SSE2
, "__builtin_ia32_movnti", void_ftype_pint_int
, IX86_BUILTIN_MOVNTI
);
15546 def_builtin (MASK_SSE2
, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df
, IX86_BUILTIN_MOVNTPD
);
15547 def_builtin (MASK_SSE2
, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di
, IX86_BUILTIN_MOVNTDQ
);
15549 def_builtin (MASK_SSE2
, "__builtin_ia32_pshufd", v4si_ftype_v4si_int
, IX86_BUILTIN_PSHUFD
);
15550 def_builtin (MASK_SSE2
, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSHUFLW
);
15551 def_builtin (MASK_SSE2
, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSHUFHW
);
15552 def_builtin (MASK_SSE2
, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi
, IX86_BUILTIN_PSADBW128
);
15554 def_builtin (MASK_SSE2
, "__builtin_ia32_sqrtpd", v2df_ftype_v2df
, IX86_BUILTIN_SQRTPD
);
15555 def_builtin (MASK_SSE2
, "__builtin_ia32_sqrtsd", v2df_ftype_v2df
, IX86_BUILTIN_SQRTSD
);
15557 def_builtin (MASK_SSE2
, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int
, IX86_BUILTIN_SHUFPD
);
15559 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si
, IX86_BUILTIN_CVTDQ2PD
);
15560 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si
, IX86_BUILTIN_CVTDQ2PS
);
15562 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df
, IX86_BUILTIN_CVTPD2DQ
);
15563 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df
, IX86_BUILTIN_CVTPD2PI
);
15564 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df
, IX86_BUILTIN_CVTPD2PS
);
15565 def_builtin (MASK_SSE2
, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df
, IX86_BUILTIN_CVTTPD2DQ
);
15566 def_builtin (MASK_SSE2
, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df
, IX86_BUILTIN_CVTTPD2PI
);
15568 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si
, IX86_BUILTIN_CVTPI2PD
);
15570 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtsd2si", int_ftype_v2df
, IX86_BUILTIN_CVTSD2SI
);
15571 def_builtin (MASK_SSE2
, "__builtin_ia32_cvttsd2si", int_ftype_v2df
, IX86_BUILTIN_CVTTSD2SI
);
15572 def_builtin (MASK_SSE2
| MASK_64BIT
, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df
, IX86_BUILTIN_CVTSD2SI64
);
15573 def_builtin (MASK_SSE2
| MASK_64BIT
, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df
, IX86_BUILTIN_CVTTSD2SI64
);
15575 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf
, IX86_BUILTIN_CVTPS2DQ
);
15576 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf
, IX86_BUILTIN_CVTPS2PD
);
15577 def_builtin (MASK_SSE2
, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf
, IX86_BUILTIN_CVTTPS2DQ
);
15579 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int
, IX86_BUILTIN_CVTSI2SD
);
15580 def_builtin (MASK_SSE2
| MASK_64BIT
, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64
, IX86_BUILTIN_CVTSI642SD
);
15581 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df
, IX86_BUILTIN_CVTSD2SS
);
15582 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf
, IX86_BUILTIN_CVTSS2SD
);
15584 def_builtin (MASK_SSE2
, "__builtin_ia32_clflush", void_ftype_pcvoid
, IX86_BUILTIN_CLFLUSH
);
15585 def_builtin (MASK_SSE2
, "__builtin_ia32_lfence", void_ftype_void
, IX86_BUILTIN_LFENCE
);
15586 def_builtin (MASK_SSE2
, "__builtin_ia32_mfence", void_ftype_void
, IX86_BUILTIN_MFENCE
);
15588 def_builtin (MASK_SSE2
, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar
, IX86_BUILTIN_LOADDQU
);
15589 def_builtin (MASK_SSE2
, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi
, IX86_BUILTIN_STOREDQU
);
15591 def_builtin (MASK_SSE2
, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si
, IX86_BUILTIN_PMULUDQ
);
15592 def_builtin (MASK_SSE2
, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si
, IX86_BUILTIN_PMULUDQ128
);
15594 def_builtin (MASK_SSE2
, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSLLW128
);
15595 def_builtin (MASK_SSE2
, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSLLD128
);
15596 def_builtin (MASK_SSE2
, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di
, IX86_BUILTIN_PSLLQ128
);
15598 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSRLW128
);
15599 def_builtin (MASK_SSE2
, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSRLD128
);
15600 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di
, IX86_BUILTIN_PSRLQ128
);
15602 def_builtin (MASK_SSE2
, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSRAW128
);
15603 def_builtin (MASK_SSE2
, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSRAD128
);
15605 def_builtin (MASK_SSE2
, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSLLDQI128
);
15606 def_builtin (MASK_SSE2
, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSLLWI128
);
15607 def_builtin (MASK_SSE2
, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSLLDI128
);
15608 def_builtin (MASK_SSE2
, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSLLQI128
);
15610 def_builtin (MASK_SSE2
, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSRLDQI128
);
15611 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSRLWI128
);
15612 def_builtin (MASK_SSE2
, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSRLDI128
);
15613 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSRLQI128
);
15615 def_builtin (MASK_SSE2
, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSRAWI128
);
15616 def_builtin (MASK_SSE2
, "__builtin_ia32_psradi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSRADI128
);
15618 def_builtin (MASK_SSE2
, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi
, IX86_BUILTIN_PMADDWD128
);
15620 /* Prescott New Instructions. */
15621 def_builtin (MASK_SSE3
, "__builtin_ia32_monitor",
15622 void_ftype_pcvoid_unsigned_unsigned
,
15623 IX86_BUILTIN_MONITOR
);
15624 def_builtin (MASK_SSE3
, "__builtin_ia32_mwait",
15625 void_ftype_unsigned_unsigned
,
15626 IX86_BUILTIN_MWAIT
);
15627 def_builtin (MASK_SSE3
, "__builtin_ia32_movshdup",
15629 IX86_BUILTIN_MOVSHDUP
);
15630 def_builtin (MASK_SSE3
, "__builtin_ia32_movsldup",
15632 IX86_BUILTIN_MOVSLDUP
);
15633 def_builtin (MASK_SSE3
, "__builtin_ia32_lddqu",
15634 v16qi_ftype_pcchar
, IX86_BUILTIN_LDDQU
);
15637 def_builtin (MASK_SSSE3
, "__builtin_ia32_palignr128",
15638 v2di_ftype_v2di_v2di_int
, IX86_BUILTIN_PALIGNR128
);
15639 def_builtin (MASK_SSSE3
, "__builtin_ia32_palignr", di_ftype_di_di_int
,
15640 IX86_BUILTIN_PALIGNR
);
15642 /* Access to the vec_init patterns. */
15643 ftype
= build_function_type_list (V2SI_type_node
, integer_type_node
,
15644 integer_type_node
, NULL_TREE
);
15645 def_builtin (MASK_MMX
, "__builtin_ia32_vec_init_v2si",
15646 ftype
, IX86_BUILTIN_VEC_INIT_V2SI
);
15648 ftype
= build_function_type_list (V4HI_type_node
, short_integer_type_node
,
15649 short_integer_type_node
,
15650 short_integer_type_node
,
15651 short_integer_type_node
, NULL_TREE
);
15652 def_builtin (MASK_MMX
, "__builtin_ia32_vec_init_v4hi",
15653 ftype
, IX86_BUILTIN_VEC_INIT_V4HI
);
15655 ftype
= build_function_type_list (V8QI_type_node
, char_type_node
,
15656 char_type_node
, char_type_node
,
15657 char_type_node
, char_type_node
,
15658 char_type_node
, char_type_node
,
15659 char_type_node
, NULL_TREE
);
15660 def_builtin (MASK_MMX
, "__builtin_ia32_vec_init_v8qi",
15661 ftype
, IX86_BUILTIN_VEC_INIT_V8QI
);
15663 /* Access to the vec_extract patterns. */
15664 ftype
= build_function_type_list (double_type_node
, V2DF_type_node
,
15665 integer_type_node
, NULL_TREE
);
15666 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v2df",
15667 ftype
, IX86_BUILTIN_VEC_EXT_V2DF
);
15669 ftype
= build_function_type_list (long_long_integer_type_node
,
15670 V2DI_type_node
, integer_type_node
,
15672 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v2di",
15673 ftype
, IX86_BUILTIN_VEC_EXT_V2DI
);
15675 ftype
= build_function_type_list (float_type_node
, V4SF_type_node
,
15676 integer_type_node
, NULL_TREE
);
15677 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v4sf",
15678 ftype
, IX86_BUILTIN_VEC_EXT_V4SF
);
15680 ftype
= build_function_type_list (intSI_type_node
, V4SI_type_node
,
15681 integer_type_node
, NULL_TREE
);
15682 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v4si",
15683 ftype
, IX86_BUILTIN_VEC_EXT_V4SI
);
15685 ftype
= build_function_type_list (intHI_type_node
, V8HI_type_node
,
15686 integer_type_node
, NULL_TREE
);
15687 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v8hi",
15688 ftype
, IX86_BUILTIN_VEC_EXT_V8HI
);
15690 ftype
= build_function_type_list (intHI_type_node
, V4HI_type_node
,
15691 integer_type_node
, NULL_TREE
);
15692 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_vec_ext_v4hi",
15693 ftype
, IX86_BUILTIN_VEC_EXT_V4HI
);
15695 ftype
= build_function_type_list (intSI_type_node
, V2SI_type_node
,
15696 integer_type_node
, NULL_TREE
);
15697 def_builtin (MASK_MMX
, "__builtin_ia32_vec_ext_v2si",
15698 ftype
, IX86_BUILTIN_VEC_EXT_V2SI
);
15700 /* Access to the vec_set patterns. */
15701 ftype
= build_function_type_list (V8HI_type_node
, V8HI_type_node
,
15703 integer_type_node
, NULL_TREE
);
15704 def_builtin (MASK_SSE
, "__builtin_ia32_vec_set_v8hi",
15705 ftype
, IX86_BUILTIN_VEC_SET_V8HI
);
15707 ftype
= build_function_type_list (V4HI_type_node
, V4HI_type_node
,
15709 integer_type_node
, NULL_TREE
);
15710 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_vec_set_v4hi",
15711 ftype
, IX86_BUILTIN_VEC_SET_V4HI
);
15714 /* Errors in the source file can cause expand_expr to return const0_rtx
15715 where we expect a vector. To avoid crashing, use one of the vector
15716 clear instructions. */
15718 safe_vector_operand (rtx x
, enum machine_mode mode
)
15720 if (x
== const0_rtx
)
15721 x
= CONST0_RTX (mode
);
15725 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
15728 ix86_expand_binop_builtin (enum insn_code icode
, tree arglist
, rtx target
)
15731 tree arg0
= TREE_VALUE (arglist
);
15732 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
15733 rtx op0
= expand_normal (arg0
);
15734 rtx op1
= expand_normal (arg1
);
15735 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
15736 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
15737 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
15739 if (VECTOR_MODE_P (mode0
))
15740 op0
= safe_vector_operand (op0
, mode0
);
15741 if (VECTOR_MODE_P (mode1
))
15742 op1
= safe_vector_operand (op1
, mode1
);
15744 if (optimize
|| !target
15745 || GET_MODE (target
) != tmode
15746 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
15747 target
= gen_reg_rtx (tmode
);
15749 if (GET_MODE (op1
) == SImode
&& mode1
== TImode
)
15751 rtx x
= gen_reg_rtx (V4SImode
);
15752 emit_insn (gen_sse2_loadd (x
, op1
));
15753 op1
= gen_lowpart (TImode
, x
);
15756 /* The insn must want input operands in the same modes as the
15758 gcc_assert ((GET_MODE (op0
) == mode0
|| GET_MODE (op0
) == VOIDmode
)
15759 && (GET_MODE (op1
) == mode1
|| GET_MODE (op1
) == VOIDmode
));
15761 if (!(*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
15762 op0
= copy_to_mode_reg (mode0
, op0
);
15763 if (!(*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
15764 op1
= copy_to_mode_reg (mode1
, op1
);
15766 /* ??? Using ix86_fixup_binary_operands is problematic when
15767 we've got mismatched modes. Fake it. */
15773 if (tmode
== mode0
&& tmode
== mode1
)
15775 target
= ix86_fixup_binary_operands (UNKNOWN
, tmode
, xops
);
15779 else if (optimize
|| !ix86_binary_operator_ok (UNKNOWN
, tmode
, xops
))
15781 op0
= force_reg (mode0
, op0
);
15782 op1
= force_reg (mode1
, op1
);
15783 target
= gen_reg_rtx (tmode
);
15786 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
15793 /* Subroutine of ix86_expand_builtin to take care of stores. */
15796 ix86_expand_store_builtin (enum insn_code icode
, tree arglist
)
15799 tree arg0
= TREE_VALUE (arglist
);
15800 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
15801 rtx op0
= expand_normal (arg0
);
15802 rtx op1
= expand_normal (arg1
);
15803 enum machine_mode mode0
= insn_data
[icode
].operand
[0].mode
;
15804 enum machine_mode mode1
= insn_data
[icode
].operand
[1].mode
;
15806 if (VECTOR_MODE_P (mode1
))
15807 op1
= safe_vector_operand (op1
, mode1
);
15809 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
15810 op1
= copy_to_mode_reg (mode1
, op1
);
15812 pat
= GEN_FCN (icode
) (op0
, op1
);
15818 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
15821 ix86_expand_unop_builtin (enum insn_code icode
, tree arglist
,
15822 rtx target
, int do_load
)
15825 tree arg0
= TREE_VALUE (arglist
);
15826 rtx op0
= expand_normal (arg0
);
15827 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
15828 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
15830 if (optimize
|| !target
15831 || GET_MODE (target
) != tmode
15832 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
15833 target
= gen_reg_rtx (tmode
);
15835 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
15838 if (VECTOR_MODE_P (mode0
))
15839 op0
= safe_vector_operand (op0
, mode0
);
15841 if ((optimize
&& !register_operand (op0
, mode0
))
15842 || ! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
15843 op0
= copy_to_mode_reg (mode0
, op0
);
15846 pat
= GEN_FCN (icode
) (target
, op0
);
15853 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
15854 sqrtss, rsqrtss, rcpss. */
15857 ix86_expand_unop1_builtin (enum insn_code icode
, tree arglist
, rtx target
)
15860 tree arg0
= TREE_VALUE (arglist
);
15861 rtx op1
, op0
= expand_normal (arg0
);
15862 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
15863 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
15865 if (optimize
|| !target
15866 || GET_MODE (target
) != tmode
15867 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
15868 target
= gen_reg_rtx (tmode
);
15870 if (VECTOR_MODE_P (mode0
))
15871 op0
= safe_vector_operand (op0
, mode0
);
15873 if ((optimize
&& !register_operand (op0
, mode0
))
15874 || ! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
15875 op0
= copy_to_mode_reg (mode0
, op0
);
15878 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode0
))
15879 op1
= copy_to_mode_reg (mode0
, op1
);
15881 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
15888 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
15891 ix86_expand_sse_compare (const struct builtin_description
*d
, tree arglist
,
15895 tree arg0
= TREE_VALUE (arglist
);
15896 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
15897 rtx op0
= expand_normal (arg0
);
15898 rtx op1
= expand_normal (arg1
);
15900 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
15901 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
15902 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
15903 enum rtx_code comparison
= d
->comparison
;
15905 if (VECTOR_MODE_P (mode0
))
15906 op0
= safe_vector_operand (op0
, mode0
);
15907 if (VECTOR_MODE_P (mode1
))
15908 op1
= safe_vector_operand (op1
, mode1
);
15910 /* Swap operands if we have a comparison that isn't available in
15912 if (d
->flag
& BUILTIN_DESC_SWAP_OPERANDS
)
15914 rtx tmp
= gen_reg_rtx (mode1
);
15915 emit_move_insn (tmp
, op1
);
15920 if (optimize
|| !target
15921 || GET_MODE (target
) != tmode
15922 || ! (*insn_data
[d
->icode
].operand
[0].predicate
) (target
, tmode
))
15923 target
= gen_reg_rtx (tmode
);
15925 if ((optimize
&& !register_operand (op0
, mode0
))
15926 || ! (*insn_data
[d
->icode
].operand
[1].predicate
) (op0
, mode0
))
15927 op0
= copy_to_mode_reg (mode0
, op0
);
15928 if ((optimize
&& !register_operand (op1
, mode1
))
15929 || ! (*insn_data
[d
->icode
].operand
[2].predicate
) (op1
, mode1
))
15930 op1
= copy_to_mode_reg (mode1
, op1
);
15932 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
15933 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
15940 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
15943 ix86_expand_sse_comi (const struct builtin_description
*d
, tree arglist
,
15947 tree arg0
= TREE_VALUE (arglist
);
15948 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
15949 rtx op0
= expand_normal (arg0
);
15950 rtx op1
= expand_normal (arg1
);
15952 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
15953 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
15954 enum rtx_code comparison
= d
->comparison
;
15956 if (VECTOR_MODE_P (mode0
))
15957 op0
= safe_vector_operand (op0
, mode0
);
15958 if (VECTOR_MODE_P (mode1
))
15959 op1
= safe_vector_operand (op1
, mode1
);
15961 /* Swap operands if we have a comparison that isn't available in
15963 if (d
->flag
& BUILTIN_DESC_SWAP_OPERANDS
)
15970 target
= gen_reg_rtx (SImode
);
15971 emit_move_insn (target
, const0_rtx
);
15972 target
= gen_rtx_SUBREG (QImode
, target
, 0);
15974 if ((optimize
&& !register_operand (op0
, mode0
))
15975 || !(*insn_data
[d
->icode
].operand
[0].predicate
) (op0
, mode0
))
15976 op0
= copy_to_mode_reg (mode0
, op0
);
15977 if ((optimize
&& !register_operand (op1
, mode1
))
15978 || !(*insn_data
[d
->icode
].operand
[1].predicate
) (op1
, mode1
))
15979 op1
= copy_to_mode_reg (mode1
, op1
);
15981 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
15982 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
15986 emit_insn (gen_rtx_SET (VOIDmode
,
15987 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
15988 gen_rtx_fmt_ee (comparison
, QImode
,
15992 return SUBREG_REG (target
);
15995 /* Return the integer constant in ARG. Constrain it to be in the range
15996 of the subparts of VEC_TYPE; issue an error if not. */
15999 get_element_number (tree vec_type
, tree arg
)
16001 unsigned HOST_WIDE_INT elt
, max
= TYPE_VECTOR_SUBPARTS (vec_type
) - 1;
16003 if (!host_integerp (arg
, 1)
16004 || (elt
= tree_low_cst (arg
, 1), elt
> max
))
16006 error ("selector must be an integer constant in the range 0..%wi", max
);
16013 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
16014 ix86_expand_vector_init. We DO have language-level syntax for this, in
16015 the form of (type){ init-list }. Except that since we can't place emms
16016 instructions from inside the compiler, we can't allow the use of MMX
16017 registers unless the user explicitly asks for it. So we do *not* define
16018 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
16019 we have builtins invoked by mmintrin.h that gives us license to emit
16020 these sorts of instructions. */
16023 ix86_expand_vec_init_builtin (tree type
, tree arglist
, rtx target
)
16025 enum machine_mode tmode
= TYPE_MODE (type
);
16026 enum machine_mode inner_mode
= GET_MODE_INNER (tmode
);
16027 int i
, n_elt
= GET_MODE_NUNITS (tmode
);
16028 rtvec v
= rtvec_alloc (n_elt
);
16030 gcc_assert (VECTOR_MODE_P (tmode
));
16032 for (i
= 0; i
< n_elt
; ++i
, arglist
= TREE_CHAIN (arglist
))
16034 rtx x
= expand_normal (TREE_VALUE (arglist
));
16035 RTVEC_ELT (v
, i
) = gen_lowpart (inner_mode
, x
);
16038 gcc_assert (arglist
== NULL
);
16040 if (!target
|| !register_operand (target
, tmode
))
16041 target
= gen_reg_rtx (tmode
);
16043 ix86_expand_vector_init (true, target
, gen_rtx_PARALLEL (tmode
, v
));
16047 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
16048 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
16049 had a language-level syntax for referencing vector elements. */
16052 ix86_expand_vec_ext_builtin (tree arglist
, rtx target
)
16054 enum machine_mode tmode
, mode0
;
16059 arg0
= TREE_VALUE (arglist
);
16060 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
16062 op0
= expand_normal (arg0
);
16063 elt
= get_element_number (TREE_TYPE (arg0
), arg1
);
16065 tmode
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
16066 mode0
= TYPE_MODE (TREE_TYPE (arg0
));
16067 gcc_assert (VECTOR_MODE_P (mode0
));
16069 op0
= force_reg (mode0
, op0
);
16071 if (optimize
|| !target
|| !register_operand (target
, tmode
))
16072 target
= gen_reg_rtx (tmode
);
16074 ix86_expand_vector_extract (true, target
, op0
, elt
);
16079 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
16080 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
16081 a language-level syntax for referencing vector elements. */
16084 ix86_expand_vec_set_builtin (tree arglist
)
16086 enum machine_mode tmode
, mode1
;
16087 tree arg0
, arg1
, arg2
;
16091 arg0
= TREE_VALUE (arglist
);
16092 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
16093 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
16095 tmode
= TYPE_MODE (TREE_TYPE (arg0
));
16096 mode1
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
16097 gcc_assert (VECTOR_MODE_P (tmode
));
16099 op0
= expand_expr (arg0
, NULL_RTX
, tmode
, 0);
16100 op1
= expand_expr (arg1
, NULL_RTX
, mode1
, 0);
16101 elt
= get_element_number (TREE_TYPE (arg0
), arg2
);
16103 if (GET_MODE (op1
) != mode1
&& GET_MODE (op1
) != VOIDmode
)
16104 op1
= convert_modes (mode1
, GET_MODE (op1
), op1
, true);
16106 op0
= force_reg (tmode
, op0
);
16107 op1
= force_reg (mode1
, op1
);
16109 ix86_expand_vector_set (true, op0
, op1
, elt
);
16114 /* Expand an expression EXP that calls a built-in function,
16115 with result going to TARGET if that's convenient
16116 (and in mode MODE if that's convenient).
16117 SUBTARGET may be used as the target for computing one of EXP's operands.
16118 IGNORE is nonzero if the value is to be ignored. */
16121 ix86_expand_builtin (tree exp
, rtx target
, rtx subtarget ATTRIBUTE_UNUSED
,
16122 enum machine_mode mode ATTRIBUTE_UNUSED
,
16123 int ignore ATTRIBUTE_UNUSED
)
16125 const struct builtin_description
*d
;
16127 enum insn_code icode
;
16128 tree fndecl
= TREE_OPERAND (TREE_OPERAND (exp
, 0), 0);
16129 tree arglist
= TREE_OPERAND (exp
, 1);
16130 tree arg0
, arg1
, arg2
;
16131 rtx op0
, op1
, op2
, pat
;
16132 enum machine_mode tmode
, mode0
, mode1
, mode2
, mode3
;
16133 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
16137 case IX86_BUILTIN_EMMS
:
16138 emit_insn (gen_mmx_emms ());
16141 case IX86_BUILTIN_SFENCE
:
16142 emit_insn (gen_sse_sfence ());
16145 case IX86_BUILTIN_MASKMOVQ
:
16146 case IX86_BUILTIN_MASKMOVDQU
:
16147 icode
= (fcode
== IX86_BUILTIN_MASKMOVQ
16148 ? CODE_FOR_mmx_maskmovq
16149 : CODE_FOR_sse2_maskmovdqu
);
16150 /* Note the arg order is different from the operand order. */
16151 arg1
= TREE_VALUE (arglist
);
16152 arg2
= TREE_VALUE (TREE_CHAIN (arglist
));
16153 arg0
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
16154 op0
= expand_normal (arg0
);
16155 op1
= expand_normal (arg1
);
16156 op2
= expand_normal (arg2
);
16157 mode0
= insn_data
[icode
].operand
[0].mode
;
16158 mode1
= insn_data
[icode
].operand
[1].mode
;
16159 mode2
= insn_data
[icode
].operand
[2].mode
;
16161 op0
= force_reg (Pmode
, op0
);
16162 op0
= gen_rtx_MEM (mode1
, op0
);
16164 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, mode0
))
16165 op0
= copy_to_mode_reg (mode0
, op0
);
16166 if (! (*insn_data
[icode
].operand
[1].predicate
) (op1
, mode1
))
16167 op1
= copy_to_mode_reg (mode1
, op1
);
16168 if (! (*insn_data
[icode
].operand
[2].predicate
) (op2
, mode2
))
16169 op2
= copy_to_mode_reg (mode2
, op2
);
16170 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
16176 case IX86_BUILTIN_SQRTSS
:
16177 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2
, arglist
, target
);
16178 case IX86_BUILTIN_RSQRTSS
:
16179 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2
, arglist
, target
);
16180 case IX86_BUILTIN_RCPSS
:
16181 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2
, arglist
, target
);
16183 case IX86_BUILTIN_LOADUPS
:
16184 return ix86_expand_unop_builtin (CODE_FOR_sse_movups
, arglist
, target
, 1);
16186 case IX86_BUILTIN_STOREUPS
:
16187 return ix86_expand_store_builtin (CODE_FOR_sse_movups
, arglist
);
16189 case IX86_BUILTIN_LOADHPS
:
16190 case IX86_BUILTIN_LOADLPS
:
16191 case IX86_BUILTIN_LOADHPD
:
16192 case IX86_BUILTIN_LOADLPD
:
16193 icode
= (fcode
== IX86_BUILTIN_LOADHPS
? CODE_FOR_sse_loadhps
16194 : fcode
== IX86_BUILTIN_LOADLPS
? CODE_FOR_sse_loadlps
16195 : fcode
== IX86_BUILTIN_LOADHPD
? CODE_FOR_sse2_loadhpd
16196 : CODE_FOR_sse2_loadlpd
);
16197 arg0
= TREE_VALUE (arglist
);
16198 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
16199 op0
= expand_normal (arg0
);
16200 op1
= expand_normal (arg1
);
16201 tmode
= insn_data
[icode
].operand
[0].mode
;
16202 mode0
= insn_data
[icode
].operand
[1].mode
;
16203 mode1
= insn_data
[icode
].operand
[2].mode
;
16205 op0
= force_reg (mode0
, op0
);
16206 op1
= gen_rtx_MEM (mode1
, copy_to_mode_reg (Pmode
, op1
));
16207 if (optimize
|| target
== 0
16208 || GET_MODE (target
) != tmode
16209 || !register_operand (target
, tmode
))
16210 target
= gen_reg_rtx (tmode
);
16211 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
16217 case IX86_BUILTIN_STOREHPS
:
16218 case IX86_BUILTIN_STORELPS
:
16219 icode
= (fcode
== IX86_BUILTIN_STOREHPS
? CODE_FOR_sse_storehps
16220 : CODE_FOR_sse_storelps
);
16221 arg0
= TREE_VALUE (arglist
);
16222 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
16223 op0
= expand_normal (arg0
);
16224 op1
= expand_normal (arg1
);
16225 mode0
= insn_data
[icode
].operand
[0].mode
;
16226 mode1
= insn_data
[icode
].operand
[1].mode
;
16228 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
16229 op1
= force_reg (mode1
, op1
);
16231 pat
= GEN_FCN (icode
) (op0
, op1
);
16237 case IX86_BUILTIN_MOVNTPS
:
16238 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf
, arglist
);
16239 case IX86_BUILTIN_MOVNTQ
:
16240 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi
, arglist
);
16242 case IX86_BUILTIN_LDMXCSR
:
16243 op0
= expand_normal (TREE_VALUE (arglist
));
16244 target
= assign_386_stack_local (SImode
, SLOT_TEMP
);
16245 emit_move_insn (target
, op0
);
16246 emit_insn (gen_sse_ldmxcsr (target
));
16249 case IX86_BUILTIN_STMXCSR
:
16250 target
= assign_386_stack_local (SImode
, SLOT_TEMP
);
16251 emit_insn (gen_sse_stmxcsr (target
));
16252 return copy_to_mode_reg (SImode
, target
);
16254 case IX86_BUILTIN_SHUFPS
:
16255 case IX86_BUILTIN_SHUFPD
:
16256 icode
= (fcode
== IX86_BUILTIN_SHUFPS
16257 ? CODE_FOR_sse_shufps
16258 : CODE_FOR_sse2_shufpd
);
16259 arg0
= TREE_VALUE (arglist
);
16260 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
16261 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
16262 op0
= expand_normal (arg0
);
16263 op1
= expand_normal (arg1
);
16264 op2
= expand_normal (arg2
);
16265 tmode
= insn_data
[icode
].operand
[0].mode
;
16266 mode0
= insn_data
[icode
].operand
[1].mode
;
16267 mode1
= insn_data
[icode
].operand
[2].mode
;
16268 mode2
= insn_data
[icode
].operand
[3].mode
;
16270 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
16271 op0
= copy_to_mode_reg (mode0
, op0
);
16272 if ((optimize
&& !register_operand (op1
, mode1
))
16273 || !(*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
16274 op1
= copy_to_mode_reg (mode1
, op1
);
16275 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
16277 /* @@@ better error message */
16278 error ("mask must be an immediate");
16279 return gen_reg_rtx (tmode
);
16281 if (optimize
|| target
== 0
16282 || GET_MODE (target
) != tmode
16283 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
16284 target
= gen_reg_rtx (tmode
);
16285 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
16291 case IX86_BUILTIN_PSHUFW
:
16292 case IX86_BUILTIN_PSHUFD
:
16293 case IX86_BUILTIN_PSHUFHW
:
16294 case IX86_BUILTIN_PSHUFLW
:
16295 icode
= ( fcode
== IX86_BUILTIN_PSHUFHW
? CODE_FOR_sse2_pshufhw
16296 : fcode
== IX86_BUILTIN_PSHUFLW
? CODE_FOR_sse2_pshuflw
16297 : fcode
== IX86_BUILTIN_PSHUFD
? CODE_FOR_sse2_pshufd
16298 : CODE_FOR_mmx_pshufw
);
16299 arg0
= TREE_VALUE (arglist
);
16300 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
16301 op0
= expand_normal (arg0
);
16302 op1
= expand_normal (arg1
);
16303 tmode
= insn_data
[icode
].operand
[0].mode
;
16304 mode1
= insn_data
[icode
].operand
[1].mode
;
16305 mode2
= insn_data
[icode
].operand
[2].mode
;
16307 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
16308 op0
= copy_to_mode_reg (mode1
, op0
);
16309 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
16311 /* @@@ better error message */
16312 error ("mask must be an immediate");
16316 || GET_MODE (target
) != tmode
16317 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
16318 target
= gen_reg_rtx (tmode
);
16319 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
16325 case IX86_BUILTIN_PSLLDQI128
:
16326 case IX86_BUILTIN_PSRLDQI128
:
16327 icode
= ( fcode
== IX86_BUILTIN_PSLLDQI128
? CODE_FOR_sse2_ashlti3
16328 : CODE_FOR_sse2_lshrti3
);
16329 arg0
= TREE_VALUE (arglist
);
16330 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
16331 op0
= expand_normal (arg0
);
16332 op1
= expand_normal (arg1
);
16333 tmode
= insn_data
[icode
].operand
[0].mode
;
16334 mode1
= insn_data
[icode
].operand
[1].mode
;
16335 mode2
= insn_data
[icode
].operand
[2].mode
;
16337 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
16339 op0
= copy_to_reg (op0
);
16340 op0
= simplify_gen_subreg (mode1
, op0
, GET_MODE (op0
), 0);
16342 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
16344 error ("shift must be an immediate");
16347 target
= gen_reg_rtx (V2DImode
);
16348 pat
= GEN_FCN (icode
) (simplify_gen_subreg (tmode
, target
, V2DImode
, 0), op0
, op1
);
16354 case IX86_BUILTIN_FEMMS
:
16355 emit_insn (gen_mmx_femms ());
16358 case IX86_BUILTIN_PAVGUSB
:
16359 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3
, arglist
, target
);
16361 case IX86_BUILTIN_PF2ID
:
16362 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id
, arglist
, target
, 0);
16364 case IX86_BUILTIN_PFACC
:
16365 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3
, arglist
, target
);
16367 case IX86_BUILTIN_PFADD
:
16368 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3
, arglist
, target
);
16370 case IX86_BUILTIN_PFCMPEQ
:
16371 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3
, arglist
, target
);
16373 case IX86_BUILTIN_PFCMPGE
:
16374 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3
, arglist
, target
);
16376 case IX86_BUILTIN_PFCMPGT
:
16377 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3
, arglist
, target
);
16379 case IX86_BUILTIN_PFMAX
:
16380 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3
, arglist
, target
);
16382 case IX86_BUILTIN_PFMIN
:
16383 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3
, arglist
, target
);
16385 case IX86_BUILTIN_PFMUL
:
16386 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3
, arglist
, target
);
16388 case IX86_BUILTIN_PFRCP
:
16389 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2
, arglist
, target
, 0);
16391 case IX86_BUILTIN_PFRCPIT1
:
16392 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3
, arglist
, target
);
16394 case IX86_BUILTIN_PFRCPIT2
:
16395 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3
, arglist
, target
);
16397 case IX86_BUILTIN_PFRSQIT1
:
16398 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3
, arglist
, target
);
16400 case IX86_BUILTIN_PFRSQRT
:
16401 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2
, arglist
, target
, 0);
16403 case IX86_BUILTIN_PFSUB
:
16404 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3
, arglist
, target
);
16406 case IX86_BUILTIN_PFSUBR
:
16407 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3
, arglist
, target
);
16409 case IX86_BUILTIN_PI2FD
:
16410 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2
, arglist
, target
, 0);
16412 case IX86_BUILTIN_PMULHRW
:
16413 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3
, arglist
, target
);
16415 case IX86_BUILTIN_PF2IW
:
16416 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw
, arglist
, target
, 0);
16418 case IX86_BUILTIN_PFNACC
:
16419 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3
, arglist
, target
);
16421 case IX86_BUILTIN_PFPNACC
:
16422 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3
, arglist
, target
);
16424 case IX86_BUILTIN_PI2FW
:
16425 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw
, arglist
, target
, 0);
16427 case IX86_BUILTIN_PSWAPDSI
:
16428 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2
, arglist
, target
, 0);
16430 case IX86_BUILTIN_PSWAPDSF
:
16431 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2
, arglist
, target
, 0);
16433 case IX86_BUILTIN_SQRTSD
:
16434 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2
, arglist
, target
);
16435 case IX86_BUILTIN_LOADUPD
:
16436 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd
, arglist
, target
, 1);
16437 case IX86_BUILTIN_STOREUPD
:
16438 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd
, arglist
);
16440 case IX86_BUILTIN_MFENCE
:
16441 emit_insn (gen_sse2_mfence ());
16443 case IX86_BUILTIN_LFENCE
:
16444 emit_insn (gen_sse2_lfence ());
16447 case IX86_BUILTIN_CLFLUSH
:
16448 arg0
= TREE_VALUE (arglist
);
16449 op0
= expand_normal (arg0
);
16450 icode
= CODE_FOR_sse2_clflush
;
16451 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, Pmode
))
16452 op0
= copy_to_mode_reg (Pmode
, op0
);
16454 emit_insn (gen_sse2_clflush (op0
));
16457 case IX86_BUILTIN_MOVNTPD
:
16458 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df
, arglist
);
16459 case IX86_BUILTIN_MOVNTDQ
:
16460 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di
, arglist
);
16461 case IX86_BUILTIN_MOVNTI
:
16462 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi
, arglist
);
16464 case IX86_BUILTIN_LOADDQU
:
16465 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu
, arglist
, target
, 1);
16466 case IX86_BUILTIN_STOREDQU
:
16467 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu
, arglist
);
16469 case IX86_BUILTIN_MONITOR
:
16470 arg0
= TREE_VALUE (arglist
);
16471 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
16472 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
16473 op0
= expand_normal (arg0
);
16474 op1
= expand_normal (arg1
);
16475 op2
= expand_normal (arg2
);
16477 op0
= copy_to_mode_reg (Pmode
, op0
);
16479 op1
= copy_to_mode_reg (SImode
, op1
);
16481 op2
= copy_to_mode_reg (SImode
, op2
);
16483 emit_insn (gen_sse3_monitor (op0
, op1
, op2
));
16485 emit_insn (gen_sse3_monitor64 (op0
, op1
, op2
));
16488 case IX86_BUILTIN_MWAIT
:
16489 arg0
= TREE_VALUE (arglist
);
16490 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
16491 op0
= expand_normal (arg0
);
16492 op1
= expand_normal (arg1
);
16494 op0
= copy_to_mode_reg (SImode
, op0
);
16496 op1
= copy_to_mode_reg (SImode
, op1
);
16497 emit_insn (gen_sse3_mwait (op0
, op1
));
16500 case IX86_BUILTIN_LDDQU
:
16501 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu
, arglist
,
16504 case IX86_BUILTIN_PALIGNR
:
16505 case IX86_BUILTIN_PALIGNR128
:
16506 if (fcode
== IX86_BUILTIN_PALIGNR
)
16508 icode
= CODE_FOR_ssse3_palignrdi
;
16513 icode
= CODE_FOR_ssse3_palignrti
;
16516 arg0
= TREE_VALUE (arglist
);
16517 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
16518 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
16519 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
16520 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
16521 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, 0);
16522 tmode
= insn_data
[icode
].operand
[0].mode
;
16523 mode1
= insn_data
[icode
].operand
[1].mode
;
16524 mode2
= insn_data
[icode
].operand
[2].mode
;
16525 mode3
= insn_data
[icode
].operand
[3].mode
;
16527 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
16529 op0
= copy_to_reg (op0
);
16530 op0
= simplify_gen_subreg (mode1
, op0
, GET_MODE (op0
), 0);
16532 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
16534 op1
= copy_to_reg (op1
);
16535 op1
= simplify_gen_subreg (mode2
, op1
, GET_MODE (op1
), 0);
16537 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode3
))
16539 error ("shift must be an immediate");
16542 target
= gen_reg_rtx (mode
);
16543 pat
= GEN_FCN (icode
) (simplify_gen_subreg (tmode
, target
, mode
, 0),
16550 case IX86_BUILTIN_VEC_INIT_V2SI
:
16551 case IX86_BUILTIN_VEC_INIT_V4HI
:
16552 case IX86_BUILTIN_VEC_INIT_V8QI
:
16553 return ix86_expand_vec_init_builtin (TREE_TYPE (exp
), arglist
, target
);
16555 case IX86_BUILTIN_VEC_EXT_V2DF
:
16556 case IX86_BUILTIN_VEC_EXT_V2DI
:
16557 case IX86_BUILTIN_VEC_EXT_V4SF
:
16558 case IX86_BUILTIN_VEC_EXT_V4SI
:
16559 case IX86_BUILTIN_VEC_EXT_V8HI
:
16560 case IX86_BUILTIN_VEC_EXT_V2SI
:
16561 case IX86_BUILTIN_VEC_EXT_V4HI
:
16562 return ix86_expand_vec_ext_builtin (arglist
, target
);
16564 case IX86_BUILTIN_VEC_SET_V8HI
:
16565 case IX86_BUILTIN_VEC_SET_V4HI
:
16566 return ix86_expand_vec_set_builtin (arglist
);
16572 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
16573 if (d
->code
== fcode
)
16575 /* Compares are treated specially. */
16576 if (d
->icode
== CODE_FOR_sse_maskcmpv4sf3
16577 || d
->icode
== CODE_FOR_sse_vmmaskcmpv4sf3
16578 || d
->icode
== CODE_FOR_sse2_maskcmpv2df3
16579 || d
->icode
== CODE_FOR_sse2_vmmaskcmpv2df3
)
16580 return ix86_expand_sse_compare (d
, arglist
, target
);
16582 return ix86_expand_binop_builtin (d
->icode
, arglist
, target
);
16585 for (i
= 0, d
= bdesc_1arg
; i
< ARRAY_SIZE (bdesc_1arg
); i
++, d
++)
16586 if (d
->code
== fcode
)
16587 return ix86_expand_unop_builtin (d
->icode
, arglist
, target
, 0);
16589 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
16590 if (d
->code
== fcode
)
16591 return ix86_expand_sse_comi (d
, arglist
, target
);
16593 gcc_unreachable ();
16596 /* Store OPERAND to the memory after reload is completed. This means
16597 that we can't easily use assign_stack_local. */
16599 ix86_force_to_memory (enum machine_mode mode
, rtx operand
)
16603 gcc_assert (reload_completed
);
16604 if (TARGET_RED_ZONE
)
16606 result
= gen_rtx_MEM (mode
,
16607 gen_rtx_PLUS (Pmode
,
16609 GEN_INT (-RED_ZONE_SIZE
)));
16610 emit_move_insn (result
, operand
);
16612 else if (!TARGET_RED_ZONE
&& TARGET_64BIT
)
16618 operand
= gen_lowpart (DImode
, operand
);
16622 gen_rtx_SET (VOIDmode
,
16623 gen_rtx_MEM (DImode
,
16624 gen_rtx_PRE_DEC (DImode
,
16625 stack_pointer_rtx
)),
16629 gcc_unreachable ();
16631 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
16640 split_di (&operand
, 1, operands
, operands
+ 1);
16642 gen_rtx_SET (VOIDmode
,
16643 gen_rtx_MEM (SImode
,
16644 gen_rtx_PRE_DEC (Pmode
,
16645 stack_pointer_rtx
)),
16648 gen_rtx_SET (VOIDmode
,
16649 gen_rtx_MEM (SImode
,
16650 gen_rtx_PRE_DEC (Pmode
,
16651 stack_pointer_rtx
)),
16656 /* Store HImodes as SImodes. */
16657 operand
= gen_lowpart (SImode
, operand
);
16661 gen_rtx_SET (VOIDmode
,
16662 gen_rtx_MEM (GET_MODE (operand
),
16663 gen_rtx_PRE_DEC (SImode
,
16664 stack_pointer_rtx
)),
16668 gcc_unreachable ();
16670 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
16675 /* Free operand from the memory. */
16677 ix86_free_from_memory (enum machine_mode mode
)
16679 if (!TARGET_RED_ZONE
)
16683 if (mode
== DImode
|| TARGET_64BIT
)
16687 /* Use LEA to deallocate stack space. In peephole2 it will be converted
16688 to pop or add instruction if registers are available. */
16689 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
16690 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
16695 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
16696 QImode must go into class Q_REGS.
16697 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
16698 movdf to do mem-to-mem moves through integer regs. */
16700 ix86_preferred_reload_class (rtx x
, enum reg_class
class)
16702 enum machine_mode mode
= GET_MODE (x
);
16704 /* We're only allowed to return a subclass of CLASS. Many of the
16705 following checks fail for NO_REGS, so eliminate that early. */
16706 if (class == NO_REGS
)
16709 /* All classes can load zeros. */
16710 if (x
== CONST0_RTX (mode
))
16713 /* Force constants into memory if we are loading a (nonzero) constant into
16714 an MMX or SSE register. This is because there are no MMX/SSE instructions
16715 to load from a constant. */
16717 && (MAYBE_MMX_CLASS_P (class) || MAYBE_SSE_CLASS_P (class)))
16720 /* Prefer SSE regs only, if we can use them for math. */
16721 if (TARGET_SSE_MATH
&& !TARGET_MIX_SSE_I387
&& SSE_FLOAT_MODE_P (mode
))
16722 return SSE_CLASS_P (class) ? class : NO_REGS
;
16724 /* Floating-point constants need more complex checks. */
16725 if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) != VOIDmode
)
16727 /* General regs can load everything. */
16728 if (reg_class_subset_p (class, GENERAL_REGS
))
16731 /* Floats can load 0 and 1 plus some others. Note that we eliminated
16732 zero above. We only want to wind up preferring 80387 registers if
16733 we plan on doing computation with them. */
16735 && standard_80387_constant_p (x
))
16737 /* Limit class to non-sse. */
16738 if (class == FLOAT_SSE_REGS
)
16740 if (class == FP_TOP_SSE_REGS
)
16742 if (class == FP_SECOND_SSE_REGS
)
16743 return FP_SECOND_REG
;
16744 if (class == FLOAT_INT_REGS
|| class == FLOAT_REGS
)
16751 /* Generally when we see PLUS here, it's the function invariant
16752 (plus soft-fp const_int). Which can only be computed into general
16754 if (GET_CODE (x
) == PLUS
)
16755 return reg_class_subset_p (class, GENERAL_REGS
) ? class : NO_REGS
;
16757 /* QImode constants are easy to load, but non-constant QImode data
16758 must go into Q_REGS. */
16759 if (GET_MODE (x
) == QImode
&& !CONSTANT_P (x
))
16761 if (reg_class_subset_p (class, Q_REGS
))
16763 if (reg_class_subset_p (Q_REGS
, class))
16771 /* Discourage putting floating-point values in SSE registers unless
16772 SSE math is being used, and likewise for the 387 registers. */
16774 ix86_preferred_output_reload_class (rtx x
, enum reg_class
class)
16776 enum machine_mode mode
= GET_MODE (x
);
16778 /* Restrict the output reload class to the register bank that we are doing
16779 math on. If we would like not to return a subset of CLASS, reject this
16780 alternative: if reload cannot do this, it will still use its choice. */
16781 mode
= GET_MODE (x
);
16782 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
16783 return MAYBE_SSE_CLASS_P (class) ? SSE_REGS
: NO_REGS
;
16785 if (TARGET_80387
&& SCALAR_FLOAT_MODE_P (mode
))
16787 if (class == FP_TOP_SSE_REGS
)
16789 else if (class == FP_SECOND_SSE_REGS
)
16790 return FP_SECOND_REG
;
16792 return FLOAT_CLASS_P (class) ? class : NO_REGS
;
16798 /* If we are copying between general and FP registers, we need a memory
16799 location. The same is true for SSE and MMX registers.
16801 The macro can't work reliably when one of the CLASSES is class containing
16802 registers from multiple units (SSE, MMX, integer). We avoid this by never
16803 combining those units in single alternative in the machine description.
16804 Ensure that this constraint holds to avoid unexpected surprises.
16806 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
16807 enforce these sanity checks. */
16810 ix86_secondary_memory_needed (enum reg_class class1
, enum reg_class class2
,
16811 enum machine_mode mode
, int strict
)
16813 if (MAYBE_FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class1
)
16814 || MAYBE_FLOAT_CLASS_P (class2
) != FLOAT_CLASS_P (class2
)
16815 || MAYBE_SSE_CLASS_P (class1
) != SSE_CLASS_P (class1
)
16816 || MAYBE_SSE_CLASS_P (class2
) != SSE_CLASS_P (class2
)
16817 || MAYBE_MMX_CLASS_P (class1
) != MMX_CLASS_P (class1
)
16818 || MAYBE_MMX_CLASS_P (class2
) != MMX_CLASS_P (class2
))
16820 gcc_assert (!strict
);
16824 if (FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class2
))
16827 /* ??? This is a lie. We do have moves between mmx/general, and for
16828 mmx/sse2. But by saying we need secondary memory we discourage the
16829 register allocator from using the mmx registers unless needed. */
16830 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
))
16833 if (SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
16835 /* SSE1 doesn't have any direct moves from other classes. */
16839 /* If the target says that inter-unit moves are more expensive
16840 than moving through memory, then don't generate them. */
16841 if (!TARGET_INTER_UNIT_MOVES
&& !optimize_size
)
16844 /* Between SSE and general, we have moves no larger than word size. */
16845 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
16848 /* ??? For the cost of one register reformat penalty, we could use
16849 the same instructions to move SFmode and DFmode data, but the
16850 relevant move patterns don't support those alternatives. */
16851 if (mode
== SFmode
|| mode
== DFmode
)
16858 /* Return true if the registers in CLASS cannot represent the change from
16859 modes FROM to TO. */
16862 ix86_cannot_change_mode_class (enum machine_mode from
, enum machine_mode to
,
16863 enum reg_class
class)
16868 /* x87 registers can't do subreg at all, as all values are reformatted
16869 to extended precision. */
16870 if (MAYBE_FLOAT_CLASS_P (class))
16873 if (MAYBE_SSE_CLASS_P (class) || MAYBE_MMX_CLASS_P (class))
16875 /* Vector registers do not support QI or HImode loads. If we don't
16876 disallow a change to these modes, reload will assume it's ok to
16877 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
16878 the vec_dupv4hi pattern. */
16879 if (GET_MODE_SIZE (from
) < 4)
16882 /* Vector registers do not support subreg with nonzero offsets, which
16883 are otherwise valid for integer registers. Since we can't see
16884 whether we have a nonzero offset from here, prohibit all
16885 nonparadoxical subregs changing size. */
16886 if (GET_MODE_SIZE (to
) < GET_MODE_SIZE (from
))
16893 /* Return the cost of moving data from a register in class CLASS1 to
16894 one in class CLASS2.
16896 It is not required that the cost always equal 2 when FROM is the same as TO;
16897 on some machines it is expensive to move between registers if they are not
16898 general registers. */
16901 ix86_register_move_cost (enum machine_mode mode
, enum reg_class class1
,
16902 enum reg_class class2
)
16904 /* In case we require secondary memory, compute cost of the store followed
16905 by load. In order to avoid bad register allocation choices, we need
16906 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
16908 if (ix86_secondary_memory_needed (class1
, class2
, mode
, 0))
16912 cost
+= MAX (MEMORY_MOVE_COST (mode
, class1
, 0),
16913 MEMORY_MOVE_COST (mode
, class1
, 1));
16914 cost
+= MAX (MEMORY_MOVE_COST (mode
, class2
, 0),
16915 MEMORY_MOVE_COST (mode
, class2
, 1));
16917 /* In case of copying from general_purpose_register we may emit multiple
16918 stores followed by single load causing memory size mismatch stall.
16919 Count this as arbitrarily high cost of 20. */
16920 if (CLASS_MAX_NREGS (class1
, mode
) > CLASS_MAX_NREGS (class2
, mode
))
16923 /* In the case of FP/MMX moves, the registers actually overlap, and we
16924 have to switch modes in order to treat them differently. */
16925 if ((MMX_CLASS_P (class1
) && MAYBE_FLOAT_CLASS_P (class2
))
16926 || (MMX_CLASS_P (class2
) && MAYBE_FLOAT_CLASS_P (class1
)))
16932 /* Moves between SSE/MMX and integer unit are expensive. */
16933 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
)
16934 || SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
16935 return ix86_cost
->mmxsse_to_integer
;
16936 if (MAYBE_FLOAT_CLASS_P (class1
))
16937 return ix86_cost
->fp_move
;
16938 if (MAYBE_SSE_CLASS_P (class1
))
16939 return ix86_cost
->sse_move
;
16940 if (MAYBE_MMX_CLASS_P (class1
))
16941 return ix86_cost
->mmx_move
;
16945 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
16948 ix86_hard_regno_mode_ok (int regno
, enum machine_mode mode
)
16950 /* Flags and only flags can only hold CCmode values. */
16951 if (CC_REGNO_P (regno
))
16952 return GET_MODE_CLASS (mode
) == MODE_CC
;
16953 if (GET_MODE_CLASS (mode
) == MODE_CC
16954 || GET_MODE_CLASS (mode
) == MODE_RANDOM
16955 || GET_MODE_CLASS (mode
) == MODE_PARTIAL_INT
)
16957 if (FP_REGNO_P (regno
))
16958 return VALID_FP_MODE_P (mode
);
16959 if (SSE_REGNO_P (regno
))
16961 /* We implement the move patterns for all vector modes into and
16962 out of SSE registers, even when no operation instructions
16964 return (VALID_SSE_REG_MODE (mode
)
16965 || VALID_SSE2_REG_MODE (mode
)
16966 || VALID_MMX_REG_MODE (mode
)
16967 || VALID_MMX_REG_MODE_3DNOW (mode
));
16969 if (MMX_REGNO_P (regno
))
16971 /* We implement the move patterns for 3DNOW modes even in MMX mode,
16972 so if the register is available at all, then we can move data of
16973 the given mode into or out of it. */
16974 return (VALID_MMX_REG_MODE (mode
)
16975 || VALID_MMX_REG_MODE_3DNOW (mode
));
16978 if (mode
== QImode
)
16980 /* Take care for QImode values - they can be in non-QI regs,
16981 but then they do cause partial register stalls. */
16982 if (regno
< 4 || TARGET_64BIT
)
16984 if (!TARGET_PARTIAL_REG_STALL
)
16986 return reload_in_progress
|| reload_completed
;
16988 /* We handle both integer and floats in the general purpose registers. */
16989 else if (VALID_INT_MODE_P (mode
))
16991 else if (VALID_FP_MODE_P (mode
))
16993 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
16994 on to use that value in smaller contexts, this can easily force a
16995 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
16996 supporting DImode, allow it. */
16997 else if (VALID_MMX_REG_MODE_3DNOW (mode
) || VALID_MMX_REG_MODE (mode
))
17003 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
17004 tieable integer mode. */
17007 ix86_tieable_integer_mode_p (enum machine_mode mode
)
17016 return TARGET_64BIT
|| !TARGET_PARTIAL_REG_STALL
;
17019 return TARGET_64BIT
;
17026 /* Return true if MODE1 is accessible in a register that can hold MODE2
17027 without copying. That is, all register classes that can hold MODE2
17028 can also hold MODE1. */
17031 ix86_modes_tieable_p (enum machine_mode mode1
, enum machine_mode mode2
)
17033 if (mode1
== mode2
)
17036 if (ix86_tieable_integer_mode_p (mode1
)
17037 && ix86_tieable_integer_mode_p (mode2
))
17040 /* MODE2 being XFmode implies fp stack or general regs, which means we
17041 can tie any smaller floating point modes to it. Note that we do not
17042 tie this with TFmode. */
17043 if (mode2
== XFmode
)
17044 return mode1
== SFmode
|| mode1
== DFmode
;
17046 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
17047 that we can tie it with SFmode. */
17048 if (mode2
== DFmode
)
17049 return mode1
== SFmode
;
17051 /* If MODE2 is only appropriate for an SSE register, then tie with
17052 any other mode acceptable to SSE registers. */
17053 if (GET_MODE_SIZE (mode2
) >= 8
17054 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode2
))
17055 return ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode1
);
17057 /* If MODE2 is appropriate for an MMX (or SSE) register, then tie
17058 with any other mode acceptable to MMX registers. */
17059 if (GET_MODE_SIZE (mode2
) == 8
17060 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode2
))
17061 return ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode1
);
17066 /* Return the cost of moving data of mode M between a
17067 register and memory. A value of 2 is the default; this cost is
17068 relative to those in `REGISTER_MOVE_COST'.
17070 If moving between registers and memory is more expensive than
17071 between two registers, you should define this macro to express the
17074 Model also increased moving costs of QImode registers in non
17078 ix86_memory_move_cost (enum machine_mode mode
, enum reg_class
class, int in
)
17080 if (FLOAT_CLASS_P (class))
17097 return in
? ix86_cost
->fp_load
[index
] : ix86_cost
->fp_store
[index
];
17099 if (SSE_CLASS_P (class))
17102 switch (GET_MODE_SIZE (mode
))
17116 return in
? ix86_cost
->sse_load
[index
] : ix86_cost
->sse_store
[index
];
17118 if (MMX_CLASS_P (class))
17121 switch (GET_MODE_SIZE (mode
))
17132 return in
? ix86_cost
->mmx_load
[index
] : ix86_cost
->mmx_store
[index
];
17134 switch (GET_MODE_SIZE (mode
))
17138 return (Q_CLASS_P (class) ? ix86_cost
->int_load
[0]
17139 : ix86_cost
->movzbl_load
);
17141 return (Q_CLASS_P (class) ? ix86_cost
->int_store
[0]
17142 : ix86_cost
->int_store
[0] + 4);
17145 return in
? ix86_cost
->int_load
[1] : ix86_cost
->int_store
[1];
17147 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
17148 if (mode
== TFmode
)
17150 return ((in
? ix86_cost
->int_load
[2] : ix86_cost
->int_store
[2])
17151 * (((int) GET_MODE_SIZE (mode
)
17152 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
));
17156 /* Compute a (partial) cost for rtx X. Return true if the complete
17157 cost has been computed, and false if subexpressions should be
17158 scanned. In either case, *TOTAL contains the cost result. */
17161 ix86_rtx_costs (rtx x
, int code
, int outer_code
, int *total
)
17163 enum machine_mode mode
= GET_MODE (x
);
17171 if (TARGET_64BIT
&& !x86_64_immediate_operand (x
, VOIDmode
))
17173 else if (TARGET_64BIT
&& !x86_64_zext_immediate_operand (x
, VOIDmode
))
17175 else if (flag_pic
&& SYMBOLIC_CONST (x
)
17177 || (!GET_CODE (x
) != LABEL_REF
17178 && (GET_CODE (x
) != SYMBOL_REF
17179 || !SYMBOL_REF_LOCAL_P (x
)))))
17186 if (mode
== VOIDmode
)
17189 switch (standard_80387_constant_p (x
))
17194 default: /* Other constants */
17199 /* Start with (MEM (SYMBOL_REF)), since that's where
17200 it'll probably end up. Add a penalty for size. */
17201 *total
= (COSTS_N_INSNS (1)
17202 + (flag_pic
!= 0 && !TARGET_64BIT
)
17203 + (mode
== SFmode
? 0 : mode
== DFmode
? 1 : 2));
17209 /* The zero extensions is often completely free on x86_64, so make
17210 it as cheap as possible. */
17211 if (TARGET_64BIT
&& mode
== DImode
17212 && GET_MODE (XEXP (x
, 0)) == SImode
)
17214 else if (TARGET_ZERO_EXTEND_WITH_AND
)
17215 *total
= ix86_cost
->add
;
17217 *total
= ix86_cost
->movzx
;
17221 *total
= ix86_cost
->movsx
;
17225 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
17226 && (GET_MODE (XEXP (x
, 0)) != DImode
|| TARGET_64BIT
))
17228 HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
17231 *total
= ix86_cost
->add
;
17234 if ((value
== 2 || value
== 3)
17235 && ix86_cost
->lea
<= ix86_cost
->shift_const
)
17237 *total
= ix86_cost
->lea
;
17247 if (!TARGET_64BIT
&& GET_MODE (XEXP (x
, 0)) == DImode
)
17249 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
17251 if (INTVAL (XEXP (x
, 1)) > 32)
17252 *total
= ix86_cost
->shift_const
+ COSTS_N_INSNS (2);
17254 *total
= ix86_cost
->shift_const
* 2;
17258 if (GET_CODE (XEXP (x
, 1)) == AND
)
17259 *total
= ix86_cost
->shift_var
* 2;
17261 *total
= ix86_cost
->shift_var
* 6 + COSTS_N_INSNS (2);
17266 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
17267 *total
= ix86_cost
->shift_const
;
17269 *total
= ix86_cost
->shift_var
;
17274 if (FLOAT_MODE_P (mode
))
17276 *total
= ix86_cost
->fmul
;
17281 rtx op0
= XEXP (x
, 0);
17282 rtx op1
= XEXP (x
, 1);
17284 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
17286 unsigned HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
17287 for (nbits
= 0; value
!= 0; value
&= value
- 1)
17291 /* This is arbitrary. */
17294 /* Compute costs correctly for widening multiplication. */
17295 if ((GET_CODE (op0
) == SIGN_EXTEND
|| GET_CODE (op1
) == ZERO_EXTEND
)
17296 && GET_MODE_SIZE (GET_MODE (XEXP (op0
, 0))) * 2
17297 == GET_MODE_SIZE (mode
))
17299 int is_mulwiden
= 0;
17300 enum machine_mode inner_mode
= GET_MODE (op0
);
17302 if (GET_CODE (op0
) == GET_CODE (op1
))
17303 is_mulwiden
= 1, op1
= XEXP (op1
, 0);
17304 else if (GET_CODE (op1
) == CONST_INT
)
17306 if (GET_CODE (op0
) == SIGN_EXTEND
)
17307 is_mulwiden
= trunc_int_for_mode (INTVAL (op1
), inner_mode
)
17310 is_mulwiden
= !(INTVAL (op1
) & ~GET_MODE_MASK (inner_mode
));
17314 op0
= XEXP (op0
, 0), mode
= GET_MODE (op0
);
17317 *total
= (ix86_cost
->mult_init
[MODE_INDEX (mode
)]
17318 + nbits
* ix86_cost
->mult_bit
17319 + rtx_cost (op0
, outer_code
) + rtx_cost (op1
, outer_code
));
17328 if (FLOAT_MODE_P (mode
))
17329 *total
= ix86_cost
->fdiv
;
17331 *total
= ix86_cost
->divide
[MODE_INDEX (mode
)];
17335 if (FLOAT_MODE_P (mode
))
17336 *total
= ix86_cost
->fadd
;
17337 else if (GET_MODE_CLASS (mode
) == MODE_INT
17338 && GET_MODE_BITSIZE (mode
) <= GET_MODE_BITSIZE (Pmode
))
17340 if (GET_CODE (XEXP (x
, 0)) == PLUS
17341 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
17342 && GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)) == CONST_INT
17343 && CONSTANT_P (XEXP (x
, 1)))
17345 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (XEXP (x
, 0), 0), 1));
17346 if (val
== 2 || val
== 4 || val
== 8)
17348 *total
= ix86_cost
->lea
;
17349 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1), outer_code
);
17350 *total
+= rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0),
17352 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
17356 else if (GET_CODE (XEXP (x
, 0)) == MULT
17357 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
)
17359 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (x
, 0), 1));
17360 if (val
== 2 || val
== 4 || val
== 8)
17362 *total
= ix86_cost
->lea
;
17363 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
);
17364 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
17368 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
17370 *total
= ix86_cost
->lea
;
17371 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
);
17372 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1), outer_code
);
17373 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
17380 if (FLOAT_MODE_P (mode
))
17382 *total
= ix86_cost
->fadd
;
17390 if (!TARGET_64BIT
&& mode
== DImode
)
17392 *total
= (ix86_cost
->add
* 2
17393 + (rtx_cost (XEXP (x
, 0), outer_code
)
17394 << (GET_MODE (XEXP (x
, 0)) != DImode
))
17395 + (rtx_cost (XEXP (x
, 1), outer_code
)
17396 << (GET_MODE (XEXP (x
, 1)) != DImode
)));
17402 if (FLOAT_MODE_P (mode
))
17404 *total
= ix86_cost
->fchs
;
17410 if (!TARGET_64BIT
&& mode
== DImode
)
17411 *total
= ix86_cost
->add
* 2;
17413 *total
= ix86_cost
->add
;
17417 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTRACT
17418 && XEXP (XEXP (x
, 0), 1) == const1_rtx
17419 && GET_CODE (XEXP (XEXP (x
, 0), 2)) == CONST_INT
17420 && XEXP (x
, 1) == const0_rtx
)
17422 /* This kind of construct is implemented using test[bwl].
17423 Treat it as if we had an AND. */
17424 *total
= (ix86_cost
->add
17425 + rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
)
17426 + rtx_cost (const1_rtx
, outer_code
));
17432 if (!TARGET_SSE_MATH
17434 || (mode
== DFmode
&& !TARGET_SSE2
))
17435 /* For standard 80387 constants, raise the cost to prevent
17436 compress_float_constant() to generate load from memory. */
17437 switch (standard_80387_constant_p (XEXP (x
, 0)))
17447 *total
= (x86_ext_80387_constants
& TUNEMASK
17454 if (FLOAT_MODE_P (mode
))
17455 *total
= ix86_cost
->fabs
;
17459 if (FLOAT_MODE_P (mode
))
17460 *total
= ix86_cost
->fsqrt
;
17464 if (XINT (x
, 1) == UNSPEC_TP
)
17475 static int current_machopic_label_num
;
17477 /* Given a symbol name and its associated stub, write out the
17478 definition of the stub. */
17481 machopic_output_stub (FILE *file
, const char *symb
, const char *stub
)
17483 unsigned int length
;
17484 char *binder_name
, *symbol_name
, lazy_ptr_name
[32];
17485 int label
= ++current_machopic_label_num
;
17487 /* For 64-bit we shouldn't get here. */
17488 gcc_assert (!TARGET_64BIT
);
17490 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
17491 symb
= (*targetm
.strip_name_encoding
) (symb
);
17493 length
= strlen (stub
);
17494 binder_name
= alloca (length
+ 32);
17495 GEN_BINDER_NAME_FOR_STUB (binder_name
, stub
, length
);
17497 length
= strlen (symb
);
17498 symbol_name
= alloca (length
+ 32);
17499 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name
, symb
, length
);
17501 sprintf (lazy_ptr_name
, "L%d$lz", label
);
17504 switch_to_section (darwin_sections
[machopic_picsymbol_stub_section
]);
17506 switch_to_section (darwin_sections
[machopic_symbol_stub_section
]);
17508 fprintf (file
, "%s:\n", stub
);
17509 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
17513 fprintf (file
, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label
, label
);
17514 fprintf (file
, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name
, label
);
17515 fprintf (file
, "\tjmp\t*%%edx\n");
17518 fprintf (file
, "\tjmp\t*%s\n", lazy_ptr_name
);
17520 fprintf (file
, "%s:\n", binder_name
);
17524 fprintf (file
, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name
, label
);
17525 fprintf (file
, "\tpushl\t%%eax\n");
17528 fprintf (file
, "\tpushl\t$%s\n", lazy_ptr_name
);
17530 fprintf (file
, "\tjmp\tdyld_stub_binding_helper\n");
17532 switch_to_section (darwin_sections
[machopic_lazy_symbol_ptr_section
]);
17533 fprintf (file
, "%s:\n", lazy_ptr_name
);
17534 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
17535 fprintf (file
, "\t.long %s\n", binder_name
);
17539 darwin_x86_file_end (void)
17541 darwin_file_end ();
17544 #endif /* TARGET_MACHO */
17546 /* Order the registers for register allocator. */
17549 x86_order_regs_for_local_alloc (void)
17554 /* First allocate the local general purpose registers. */
17555 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
17556 if (GENERAL_REGNO_P (i
) && call_used_regs
[i
])
17557 reg_alloc_order
[pos
++] = i
;
17559 /* Global general purpose registers. */
17560 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
17561 if (GENERAL_REGNO_P (i
) && !call_used_regs
[i
])
17562 reg_alloc_order
[pos
++] = i
;
17564 /* x87 registers come first in case we are doing FP math
17566 if (!TARGET_SSE_MATH
)
17567 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
17568 reg_alloc_order
[pos
++] = i
;
17570 /* SSE registers. */
17571 for (i
= FIRST_SSE_REG
; i
<= LAST_SSE_REG
; i
++)
17572 reg_alloc_order
[pos
++] = i
;
17573 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
17574 reg_alloc_order
[pos
++] = i
;
17576 /* x87 registers. */
17577 if (TARGET_SSE_MATH
)
17578 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
17579 reg_alloc_order
[pos
++] = i
;
17581 for (i
= FIRST_MMX_REG
; i
<= LAST_MMX_REG
; i
++)
17582 reg_alloc_order
[pos
++] = i
;
17584 /* Initialize the rest of array as we do not allocate some registers
17586 while (pos
< FIRST_PSEUDO_REGISTER
)
17587 reg_alloc_order
[pos
++] = 0;
17590 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
17591 struct attribute_spec.handler. */
17593 ix86_handle_struct_attribute (tree
*node
, tree name
,
17594 tree args ATTRIBUTE_UNUSED
,
17595 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
17598 if (DECL_P (*node
))
17600 if (TREE_CODE (*node
) == TYPE_DECL
)
17601 type
= &TREE_TYPE (*node
);
17606 if (!(type
&& (TREE_CODE (*type
) == RECORD_TYPE
17607 || TREE_CODE (*type
) == UNION_TYPE
)))
17609 warning (OPT_Wattributes
, "%qs attribute ignored",
17610 IDENTIFIER_POINTER (name
));
17611 *no_add_attrs
= true;
17614 else if ((is_attribute_p ("ms_struct", name
)
17615 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type
)))
17616 || ((is_attribute_p ("gcc_struct", name
)
17617 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type
)))))
17619 warning (OPT_Wattributes
, "%qs incompatible attribute ignored",
17620 IDENTIFIER_POINTER (name
));
17621 *no_add_attrs
= true;
17628 ix86_ms_bitfield_layout_p (tree record_type
)
17630 return (TARGET_MS_BITFIELD_LAYOUT
&&
17631 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type
)))
17632 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type
));
17635 /* Returns an expression indicating where the this parameter is
17636 located on entry to the FUNCTION. */
17639 x86_this_parameter (tree function
)
17641 tree type
= TREE_TYPE (function
);
17645 int n
= aggregate_value_p (TREE_TYPE (type
), type
) != 0;
17646 return gen_rtx_REG (DImode
, x86_64_int_parameter_registers
[n
]);
17649 if (ix86_function_regparm (type
, function
) > 0)
17653 parm
= TYPE_ARG_TYPES (type
);
17654 /* Figure out whether or not the function has a variable number of
17656 for (; parm
; parm
= TREE_CHAIN (parm
))
17657 if (TREE_VALUE (parm
) == void_type_node
)
17659 /* If not, the this parameter is in the first argument. */
17663 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type
)))
17665 return gen_rtx_REG (SImode
, regno
);
17669 if (aggregate_value_p (TREE_TYPE (type
), type
))
17670 return gen_rtx_MEM (SImode
, plus_constant (stack_pointer_rtx
, 8));
17672 return gen_rtx_MEM (SImode
, plus_constant (stack_pointer_rtx
, 4));
17675 /* Determine whether x86_output_mi_thunk can succeed. */
17678 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED
,
17679 HOST_WIDE_INT delta ATTRIBUTE_UNUSED
,
17680 HOST_WIDE_INT vcall_offset
, tree function
)
17682 /* 64-bit can handle anything. */
17686 /* For 32-bit, everything's fine if we have one free register. */
17687 if (ix86_function_regparm (TREE_TYPE (function
), function
) < 3)
17690 /* Need a free register for vcall_offset. */
17694 /* Need a free register for GOT references. */
17695 if (flag_pic
&& !(*targetm
.binds_local_p
) (function
))
17698 /* Otherwise ok. */
17702 /* Output the assembler code for a thunk function. THUNK_DECL is the
17703 declaration for the thunk function itself, FUNCTION is the decl for
17704 the target function. DELTA is an immediate constant offset to be
17705 added to THIS. If VCALL_OFFSET is nonzero, the word at
17706 *(*this + vcall_offset) should be added to THIS. */
17709 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED
,
17710 tree thunk ATTRIBUTE_UNUSED
, HOST_WIDE_INT delta
,
17711 HOST_WIDE_INT vcall_offset
, tree function
)
17714 rtx
this = x86_this_parameter (function
);
17717 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
17718 pull it in now and let DELTA benefit. */
17721 else if (vcall_offset
)
17723 /* Put the this parameter into %eax. */
17725 xops
[1] = this_reg
= gen_rtx_REG (Pmode
, 0);
17726 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
17729 this_reg
= NULL_RTX
;
17731 /* Adjust the this parameter by a fixed constant. */
17734 xops
[0] = GEN_INT (delta
);
17735 xops
[1] = this_reg
? this_reg
: this;
17738 if (!x86_64_general_operand (xops
[0], DImode
))
17740 tmp
= gen_rtx_REG (DImode
, FIRST_REX_INT_REG
+ 2 /* R10 */);
17742 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops
);
17746 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops
);
17749 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops
);
17752 /* Adjust the this parameter by a value stored in the vtable. */
17756 tmp
= gen_rtx_REG (DImode
, FIRST_REX_INT_REG
+ 2 /* R10 */);
17759 int tmp_regno
= 2 /* ECX */;
17760 if (lookup_attribute ("fastcall",
17761 TYPE_ATTRIBUTES (TREE_TYPE (function
))))
17762 tmp_regno
= 0 /* EAX */;
17763 tmp
= gen_rtx_REG (SImode
, tmp_regno
);
17766 xops
[0] = gen_rtx_MEM (Pmode
, this_reg
);
17769 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops
);
17771 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
17773 /* Adjust the this parameter. */
17774 xops
[0] = gen_rtx_MEM (Pmode
, plus_constant (tmp
, vcall_offset
));
17775 if (TARGET_64BIT
&& !memory_operand (xops
[0], Pmode
))
17777 rtx tmp2
= gen_rtx_REG (DImode
, FIRST_REX_INT_REG
+ 3 /* R11 */);
17778 xops
[0] = GEN_INT (vcall_offset
);
17780 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops
);
17781 xops
[0] = gen_rtx_MEM (Pmode
, gen_rtx_PLUS (Pmode
, tmp
, tmp2
));
17783 xops
[1] = this_reg
;
17785 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops
);
17787 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops
);
17790 /* If necessary, drop THIS back to its stack slot. */
17791 if (this_reg
&& this_reg
!= this)
17793 xops
[0] = this_reg
;
17795 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
17798 xops
[0] = XEXP (DECL_RTL (function
), 0);
17801 if (!flag_pic
|| (*targetm
.binds_local_p
) (function
))
17802 output_asm_insn ("jmp\t%P0", xops
);
17805 tmp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, xops
[0]), UNSPEC_GOTPCREL
);
17806 tmp
= gen_rtx_CONST (Pmode
, tmp
);
17807 tmp
= gen_rtx_MEM (QImode
, tmp
);
17809 output_asm_insn ("jmp\t%A0", xops
);
17814 if (!flag_pic
|| (*targetm
.binds_local_p
) (function
))
17815 output_asm_insn ("jmp\t%P0", xops
);
17820 rtx sym_ref
= XEXP (DECL_RTL (function
), 0);
17821 tmp
= (gen_rtx_SYMBOL_REF
17823 machopic_indirection_name (sym_ref
, /*stub_p=*/true)));
17824 tmp
= gen_rtx_MEM (QImode
, tmp
);
17826 output_asm_insn ("jmp\t%0", xops
);
17829 #endif /* TARGET_MACHO */
17831 tmp
= gen_rtx_REG (SImode
, 2 /* ECX */);
17832 output_set_got (tmp
, NULL_RTX
);
17835 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops
);
17836 output_asm_insn ("jmp\t{*}%1", xops
);
17842 x86_file_start (void)
17844 default_file_start ();
17846 darwin_file_start ();
17848 if (X86_FILE_START_VERSION_DIRECTIVE
)
17849 fputs ("\t.version\t\"01.01\"\n", asm_out_file
);
17850 if (X86_FILE_START_FLTUSED
)
17851 fputs ("\t.global\t__fltused\n", asm_out_file
);
17852 if (ix86_asm_dialect
== ASM_INTEL
)
17853 fputs ("\t.intel_syntax\n", asm_out_file
);
17857 x86_field_alignment (tree field
, int computed
)
17859 enum machine_mode mode
;
17860 tree type
= TREE_TYPE (field
);
17862 if (TARGET_64BIT
|| TARGET_ALIGN_DOUBLE
)
17864 mode
= TYPE_MODE (TREE_CODE (type
) == ARRAY_TYPE
17865 ? get_inner_array_type (type
) : type
);
17866 if (mode
== DFmode
|| mode
== DCmode
17867 || GET_MODE_CLASS (mode
) == MODE_INT
17868 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_INT
)
17869 return MIN (32, computed
);
17873 /* Output assembler code to FILE to increment profiler label # LABELNO
17874 for profiling a function entry. */
17876 x86_function_profiler (FILE *file
, int labelno ATTRIBUTE_UNUSED
)
17881 #ifndef NO_PROFILE_COUNTERS
17882 fprintf (file
, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX
, labelno
);
17884 fprintf (file
, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME
);
17888 #ifndef NO_PROFILE_COUNTERS
17889 fprintf (file
, "\tmovq\t$%sP%d,%%r11\n", LPREFIX
, labelno
);
17891 fprintf (file
, "\tcall\t%s\n", MCOUNT_NAME
);
17895 #ifndef NO_PROFILE_COUNTERS
17896 fprintf (file
, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
17897 LPREFIX
, labelno
, PROFILE_COUNT_REGISTER
);
17899 fprintf (file
, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME
);
17903 #ifndef NO_PROFILE_COUNTERS
17904 fprintf (file
, "\tmovl\t$%sP%d,%%%s\n", LPREFIX
, labelno
,
17905 PROFILE_COUNT_REGISTER
);
17907 fprintf (file
, "\tcall\t%s\n", MCOUNT_NAME
);
17911 /* We don't have exact information about the insn sizes, but we may assume
17912 quite safely that we are informed about all 1 byte insns and memory
17913 address sizes. This is enough to eliminate unnecessary padding in
17917 min_insn_size (rtx insn
)
17921 if (!INSN_P (insn
) || !active_insn_p (insn
))
17924 /* Discard alignments we've emit and jump instructions. */
17925 if (GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
17926 && XINT (PATTERN (insn
), 1) == UNSPECV_ALIGN
)
17928 if (GET_CODE (insn
) == JUMP_INSN
17929 && (GET_CODE (PATTERN (insn
)) == ADDR_VEC
17930 || GET_CODE (PATTERN (insn
)) == ADDR_DIFF_VEC
))
17933 /* Important case - calls are always 5 bytes.
17934 It is common to have many calls in the row. */
17935 if (GET_CODE (insn
) == CALL_INSN
17936 && symbolic_reference_mentioned_p (PATTERN (insn
))
17937 && !SIBLING_CALL_P (insn
))
17939 if (get_attr_length (insn
) <= 1)
17942 /* For normal instructions we may rely on the sizes of addresses
17943 and the presence of symbol to require 4 bytes of encoding.
17944 This is not the case for jumps where references are PC relative. */
17945 if (GET_CODE (insn
) != JUMP_INSN
)
17947 l
= get_attr_length_address (insn
);
17948 if (l
< 4 && symbolic_reference_mentioned_p (PATTERN (insn
)))
17957 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
17961 ix86_avoid_jump_misspredicts (void)
17963 rtx insn
, start
= get_insns ();
17964 int nbytes
= 0, njumps
= 0;
17967 /* Look for all minimal intervals of instructions containing 4 jumps.
17968 The intervals are bounded by START and INSN. NBYTES is the total
17969 size of instructions in the interval including INSN and not including
17970 START. When the NBYTES is smaller than 16 bytes, it is possible
17971 that the end of START and INSN ends up in the same 16byte page.
17973 The smallest offset in the page INSN can start is the case where START
17974 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
17975 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
17977 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
17980 nbytes
+= min_insn_size (insn
);
17982 fprintf(dump_file
, "Insn %i estimated to %i bytes\n",
17983 INSN_UID (insn
), min_insn_size (insn
));
17984 if ((GET_CODE (insn
) == JUMP_INSN
17985 && GET_CODE (PATTERN (insn
)) != ADDR_VEC
17986 && GET_CODE (PATTERN (insn
)) != ADDR_DIFF_VEC
)
17987 || GET_CODE (insn
) == CALL_INSN
)
17994 start
= NEXT_INSN (start
);
17995 if ((GET_CODE (start
) == JUMP_INSN
17996 && GET_CODE (PATTERN (start
)) != ADDR_VEC
17997 && GET_CODE (PATTERN (start
)) != ADDR_DIFF_VEC
)
17998 || GET_CODE (start
) == CALL_INSN
)
17999 njumps
--, isjump
= 1;
18002 nbytes
-= min_insn_size (start
);
18004 gcc_assert (njumps
>= 0);
18006 fprintf (dump_file
, "Interval %i to %i has %i bytes\n",
18007 INSN_UID (start
), INSN_UID (insn
), nbytes
);
18009 if (njumps
== 3 && isjump
&& nbytes
< 16)
18011 int padsize
= 15 - nbytes
+ min_insn_size (insn
);
18014 fprintf (dump_file
, "Padding insn %i by %i bytes!\n",
18015 INSN_UID (insn
), padsize
);
18016 emit_insn_before (gen_align (GEN_INT (padsize
)), insn
);
18021 /* AMD Athlon works faster
18022 when RET is not destination of conditional jump or directly preceded
18023 by other jump instruction. We avoid the penalty by inserting NOP just
18024 before the RET instructions in such cases. */
18026 ix86_pad_returns (void)
18031 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
18033 basic_block bb
= e
->src
;
18034 rtx ret
= BB_END (bb
);
18036 bool replace
= false;
18038 if (GET_CODE (ret
) != JUMP_INSN
|| GET_CODE (PATTERN (ret
)) != RETURN
18039 || !maybe_hot_bb_p (bb
))
18041 for (prev
= PREV_INSN (ret
); prev
; prev
= PREV_INSN (prev
))
18042 if (active_insn_p (prev
) || GET_CODE (prev
) == CODE_LABEL
)
18044 if (prev
&& GET_CODE (prev
) == CODE_LABEL
)
18049 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
18050 if (EDGE_FREQUENCY (e
) && e
->src
->index
>= 0
18051 && !(e
->flags
& EDGE_FALLTHRU
))
18056 prev
= prev_active_insn (ret
);
18058 && ((GET_CODE (prev
) == JUMP_INSN
&& any_condjump_p (prev
))
18059 || GET_CODE (prev
) == CALL_INSN
))
18061 /* Empty functions get branch mispredict even when the jump destination
18062 is not visible to us. */
18063 if (!prev
&& cfun
->function_frequency
> FUNCTION_FREQUENCY_UNLIKELY_EXECUTED
)
18068 emit_insn_before (gen_return_internal_long (), ret
);
18074 /* Implement machine specific optimizations. We implement padding of returns
18075 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
18079 if (TARGET_PAD_RETURNS
&& optimize
&& !optimize_size
)
18080 ix86_pad_returns ();
18081 if (TARGET_FOUR_JUMP_LIMIT
&& optimize
&& !optimize_size
)
18082 ix86_avoid_jump_misspredicts ();
18085 /* Return nonzero when QImode register that must be represented via REX prefix
18088 x86_extended_QIreg_mentioned_p (rtx insn
)
18091 extract_insn_cached (insn
);
18092 for (i
= 0; i
< recog_data
.n_operands
; i
++)
18093 if (REG_P (recog_data
.operand
[i
])
18094 && REGNO (recog_data
.operand
[i
]) >= 4)
18099 /* Return nonzero when P points to register encoded via REX prefix.
18100 Called via for_each_rtx. */
18102 extended_reg_mentioned_1 (rtx
*p
, void *data ATTRIBUTE_UNUSED
)
18104 unsigned int regno
;
18107 regno
= REGNO (*p
);
18108 return REX_INT_REGNO_P (regno
) || REX_SSE_REGNO_P (regno
);
18111 /* Return true when INSN mentions register that must be encoded using REX
18114 x86_extended_reg_mentioned_p (rtx insn
)
18116 return for_each_rtx (&PATTERN (insn
), extended_reg_mentioned_1
, NULL
);
18119 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
18120 optabs would emit if we didn't have TFmode patterns. */
18123 x86_emit_floatuns (rtx operands
[2])
18125 rtx neglab
, donelab
, i0
, i1
, f0
, in
, out
;
18126 enum machine_mode mode
, inmode
;
18128 inmode
= GET_MODE (operands
[1]);
18129 gcc_assert (inmode
== SImode
|| inmode
== DImode
);
18132 in
= force_reg (inmode
, operands
[1]);
18133 mode
= GET_MODE (out
);
18134 neglab
= gen_label_rtx ();
18135 donelab
= gen_label_rtx ();
18136 i1
= gen_reg_rtx (Pmode
);
18137 f0
= gen_reg_rtx (mode
);
18139 emit_cmp_and_jump_insns (in
, const0_rtx
, LT
, const0_rtx
, Pmode
, 0, neglab
);
18141 emit_insn (gen_rtx_SET (VOIDmode
, out
, gen_rtx_FLOAT (mode
, in
)));
18142 emit_jump_insn (gen_jump (donelab
));
18145 emit_label (neglab
);
18147 i0
= expand_simple_binop (Pmode
, LSHIFTRT
, in
, const1_rtx
, NULL
, 1, OPTAB_DIRECT
);
18148 i1
= expand_simple_binop (Pmode
, AND
, in
, const1_rtx
, NULL
, 1, OPTAB_DIRECT
);
18149 i0
= expand_simple_binop (Pmode
, IOR
, i0
, i1
, i0
, 1, OPTAB_DIRECT
);
18150 expand_float (f0
, i0
, 0);
18151 emit_insn (gen_rtx_SET (VOIDmode
, out
, gen_rtx_PLUS (mode
, f0
, f0
)));
18153 emit_label (donelab
);
18156 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
18157 with all elements equal to VAR. Return true if successful. */
18160 ix86_expand_vector_init_duplicate (bool mmx_ok
, enum machine_mode mode
,
18161 rtx target
, rtx val
)
18163 enum machine_mode smode
, wsmode
, wvmode
;
18178 val
= force_reg (GET_MODE_INNER (mode
), val
);
18179 x
= gen_rtx_VEC_DUPLICATE (mode
, val
);
18180 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
18186 if (TARGET_SSE
|| TARGET_3DNOW_A
)
18188 val
= gen_lowpart (SImode
, val
);
18189 x
= gen_rtx_TRUNCATE (HImode
, val
);
18190 x
= gen_rtx_VEC_DUPLICATE (mode
, x
);
18191 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
18213 /* Extend HImode to SImode using a paradoxical SUBREG. */
18214 tmp1
= gen_reg_rtx (SImode
);
18215 emit_move_insn (tmp1
, gen_lowpart (SImode
, val
));
18216 /* Insert the SImode value as low element of V4SImode vector. */
18217 tmp2
= gen_reg_rtx (V4SImode
);
18218 tmp1
= gen_rtx_VEC_MERGE (V4SImode
,
18219 gen_rtx_VEC_DUPLICATE (V4SImode
, tmp1
),
18220 CONST0_RTX (V4SImode
),
18222 emit_insn (gen_rtx_SET (VOIDmode
, tmp2
, tmp1
));
18223 /* Cast the V4SImode vector back to a V8HImode vector. */
18224 tmp1
= gen_reg_rtx (V8HImode
);
18225 emit_move_insn (tmp1
, gen_lowpart (V8HImode
, tmp2
));
18226 /* Duplicate the low short through the whole low SImode word. */
18227 emit_insn (gen_sse2_punpcklwd (tmp1
, tmp1
, tmp1
));
18228 /* Cast the V8HImode vector back to a V4SImode vector. */
18229 tmp2
= gen_reg_rtx (V4SImode
);
18230 emit_move_insn (tmp2
, gen_lowpart (V4SImode
, tmp1
));
18231 /* Replicate the low element of the V4SImode vector. */
18232 emit_insn (gen_sse2_pshufd (tmp2
, tmp2
, const0_rtx
));
18233 /* Cast the V2SImode back to V8HImode, and store in target. */
18234 emit_move_insn (target
, gen_lowpart (V8HImode
, tmp2
));
18245 /* Extend QImode to SImode using a paradoxical SUBREG. */
18246 tmp1
= gen_reg_rtx (SImode
);
18247 emit_move_insn (tmp1
, gen_lowpart (SImode
, val
));
18248 /* Insert the SImode value as low element of V4SImode vector. */
18249 tmp2
= gen_reg_rtx (V4SImode
);
18250 tmp1
= gen_rtx_VEC_MERGE (V4SImode
,
18251 gen_rtx_VEC_DUPLICATE (V4SImode
, tmp1
),
18252 CONST0_RTX (V4SImode
),
18254 emit_insn (gen_rtx_SET (VOIDmode
, tmp2
, tmp1
));
18255 /* Cast the V4SImode vector back to a V16QImode vector. */
18256 tmp1
= gen_reg_rtx (V16QImode
);
18257 emit_move_insn (tmp1
, gen_lowpart (V16QImode
, tmp2
));
18258 /* Duplicate the low byte through the whole low SImode word. */
18259 emit_insn (gen_sse2_punpcklbw (tmp1
, tmp1
, tmp1
));
18260 emit_insn (gen_sse2_punpcklbw (tmp1
, tmp1
, tmp1
));
18261 /* Cast the V16QImode vector back to a V4SImode vector. */
18262 tmp2
= gen_reg_rtx (V4SImode
);
18263 emit_move_insn (tmp2
, gen_lowpart (V4SImode
, tmp1
));
18264 /* Replicate the low element of the V4SImode vector. */
18265 emit_insn (gen_sse2_pshufd (tmp2
, tmp2
, const0_rtx
));
18266 /* Cast the V2SImode back to V16QImode, and store in target. */
18267 emit_move_insn (target
, gen_lowpart (V16QImode
, tmp2
));
18275 /* Replicate the value once into the next wider mode and recurse. */
18276 val
= convert_modes (wsmode
, smode
, val
, true);
18277 x
= expand_simple_binop (wsmode
, ASHIFT
, val
,
18278 GEN_INT (GET_MODE_BITSIZE (smode
)),
18279 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
18280 val
= expand_simple_binop (wsmode
, IOR
, val
, x
, x
, 1, OPTAB_LIB_WIDEN
);
18282 x
= gen_reg_rtx (wvmode
);
18283 if (!ix86_expand_vector_init_duplicate (mmx_ok
, wvmode
, x
, val
))
18284 gcc_unreachable ();
18285 emit_move_insn (target
, gen_lowpart (mode
, x
));
18293 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
18294 whose ONE_VAR element is VAR, and other elements are zero. Return true
18298 ix86_expand_vector_init_one_nonzero (bool mmx_ok
, enum machine_mode mode
,
18299 rtx target
, rtx var
, int one_var
)
18301 enum machine_mode vsimode
;
18317 var
= force_reg (GET_MODE_INNER (mode
), var
);
18318 x
= gen_rtx_VEC_CONCAT (mode
, var
, CONST0_RTX (GET_MODE_INNER (mode
)));
18319 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
18324 if (!REG_P (target
) || REGNO (target
) < FIRST_PSEUDO_REGISTER
)
18325 new_target
= gen_reg_rtx (mode
);
18327 new_target
= target
;
18328 var
= force_reg (GET_MODE_INNER (mode
), var
);
18329 x
= gen_rtx_VEC_DUPLICATE (mode
, var
);
18330 x
= gen_rtx_VEC_MERGE (mode
, x
, CONST0_RTX (mode
), const1_rtx
);
18331 emit_insn (gen_rtx_SET (VOIDmode
, new_target
, x
));
18334 /* We need to shuffle the value to the correct position, so
18335 create a new pseudo to store the intermediate result. */
18337 /* With SSE2, we can use the integer shuffle insns. */
18338 if (mode
!= V4SFmode
&& TARGET_SSE2
)
18340 emit_insn (gen_sse2_pshufd_1 (new_target
, new_target
,
18342 GEN_INT (one_var
== 1 ? 0 : 1),
18343 GEN_INT (one_var
== 2 ? 0 : 1),
18344 GEN_INT (one_var
== 3 ? 0 : 1)));
18345 if (target
!= new_target
)
18346 emit_move_insn (target
, new_target
);
18350 /* Otherwise convert the intermediate result to V4SFmode and
18351 use the SSE1 shuffle instructions. */
18352 if (mode
!= V4SFmode
)
18354 tmp
= gen_reg_rtx (V4SFmode
);
18355 emit_move_insn (tmp
, gen_lowpart (V4SFmode
, new_target
));
18360 emit_insn (gen_sse_shufps_1 (tmp
, tmp
, tmp
,
18362 GEN_INT (one_var
== 1 ? 0 : 1),
18363 GEN_INT (one_var
== 2 ? 0+4 : 1+4),
18364 GEN_INT (one_var
== 3 ? 0+4 : 1+4)));
18366 if (mode
!= V4SFmode
)
18367 emit_move_insn (target
, gen_lowpart (V4SImode
, tmp
));
18368 else if (tmp
!= target
)
18369 emit_move_insn (target
, tmp
);
18371 else if (target
!= new_target
)
18372 emit_move_insn (target
, new_target
);
18377 vsimode
= V4SImode
;
18383 vsimode
= V2SImode
;
18389 /* Zero extend the variable element to SImode and recurse. */
18390 var
= convert_modes (SImode
, GET_MODE_INNER (mode
), var
, true);
18392 x
= gen_reg_rtx (vsimode
);
18393 if (!ix86_expand_vector_init_one_nonzero (mmx_ok
, vsimode
, x
,
18395 gcc_unreachable ();
18397 emit_move_insn (target
, gen_lowpart (mode
, x
));
18405 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
18406 consisting of the values in VALS. It is known that all elements
18407 except ONE_VAR are constants. Return true if successful. */
18410 ix86_expand_vector_init_one_var (bool mmx_ok
, enum machine_mode mode
,
18411 rtx target
, rtx vals
, int one_var
)
18413 rtx var
= XVECEXP (vals
, 0, one_var
);
18414 enum machine_mode wmode
;
18417 const_vec
= copy_rtx (vals
);
18418 XVECEXP (const_vec
, 0, one_var
) = CONST0_RTX (GET_MODE_INNER (mode
));
18419 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (const_vec
, 0));
18427 /* For the two element vectors, it's just as easy to use
18428 the general case. */
18444 /* There's no way to set one QImode entry easily. Combine
18445 the variable value with its adjacent constant value, and
18446 promote to an HImode set. */
18447 x
= XVECEXP (vals
, 0, one_var
^ 1);
18450 var
= convert_modes (HImode
, QImode
, var
, true);
18451 var
= expand_simple_binop (HImode
, ASHIFT
, var
, GEN_INT (8),
18452 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
18453 x
= GEN_INT (INTVAL (x
) & 0xff);
18457 var
= convert_modes (HImode
, QImode
, var
, true);
18458 x
= gen_int_mode (INTVAL (x
) << 8, HImode
);
18460 if (x
!= const0_rtx
)
18461 var
= expand_simple_binop (HImode
, IOR
, var
, x
, var
,
18462 1, OPTAB_LIB_WIDEN
);
18464 x
= gen_reg_rtx (wmode
);
18465 emit_move_insn (x
, gen_lowpart (wmode
, const_vec
));
18466 ix86_expand_vector_set (mmx_ok
, x
, var
, one_var
>> 1);
18468 emit_move_insn (target
, gen_lowpart (mode
, x
));
18475 emit_move_insn (target
, const_vec
);
18476 ix86_expand_vector_set (mmx_ok
, target
, var
, one_var
);
18480 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
18481 all values variable, and none identical. */
18484 ix86_expand_vector_init_general (bool mmx_ok
, enum machine_mode mode
,
18485 rtx target
, rtx vals
)
18487 enum machine_mode half_mode
= GET_MODE_INNER (mode
);
18488 rtx op0
= NULL
, op1
= NULL
;
18489 bool use_vec_concat
= false;
18495 if (!mmx_ok
&& !TARGET_SSE
)
18501 /* For the two element vectors, we always implement VEC_CONCAT. */
18502 op0
= XVECEXP (vals
, 0, 0);
18503 op1
= XVECEXP (vals
, 0, 1);
18504 use_vec_concat
= true;
18508 half_mode
= V2SFmode
;
18511 half_mode
= V2SImode
;
18517 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
18518 Recurse to load the two halves. */
18520 op0
= gen_reg_rtx (half_mode
);
18521 v
= gen_rtvec (2, XVECEXP (vals
, 0, 0), XVECEXP (vals
, 0, 1));
18522 ix86_expand_vector_init (false, op0
, gen_rtx_PARALLEL (half_mode
, v
));
18524 op1
= gen_reg_rtx (half_mode
);
18525 v
= gen_rtvec (2, XVECEXP (vals
, 0, 2), XVECEXP (vals
, 0, 3));
18526 ix86_expand_vector_init (false, op1
, gen_rtx_PARALLEL (half_mode
, v
));
18528 use_vec_concat
= true;
18539 gcc_unreachable ();
18542 if (use_vec_concat
)
18544 if (!register_operand (op0
, half_mode
))
18545 op0
= force_reg (half_mode
, op0
);
18546 if (!register_operand (op1
, half_mode
))
18547 op1
= force_reg (half_mode
, op1
);
18549 emit_insn (gen_rtx_SET (VOIDmode
, target
,
18550 gen_rtx_VEC_CONCAT (mode
, op0
, op1
)));
18554 int i
, j
, n_elts
, n_words
, n_elt_per_word
;
18555 enum machine_mode inner_mode
;
18556 rtx words
[4], shift
;
18558 inner_mode
= GET_MODE_INNER (mode
);
18559 n_elts
= GET_MODE_NUNITS (mode
);
18560 n_words
= GET_MODE_SIZE (mode
) / UNITS_PER_WORD
;
18561 n_elt_per_word
= n_elts
/ n_words
;
18562 shift
= GEN_INT (GET_MODE_BITSIZE (inner_mode
));
18564 for (i
= 0; i
< n_words
; ++i
)
18566 rtx word
= NULL_RTX
;
18568 for (j
= 0; j
< n_elt_per_word
; ++j
)
18570 rtx elt
= XVECEXP (vals
, 0, (i
+1)*n_elt_per_word
- j
- 1);
18571 elt
= convert_modes (word_mode
, inner_mode
, elt
, true);
18577 word
= expand_simple_binop (word_mode
, ASHIFT
, word
, shift
,
18578 word
, 1, OPTAB_LIB_WIDEN
);
18579 word
= expand_simple_binop (word_mode
, IOR
, word
, elt
,
18580 word
, 1, OPTAB_LIB_WIDEN
);
18588 emit_move_insn (target
, gen_lowpart (mode
, words
[0]));
18589 else if (n_words
== 2)
18591 rtx tmp
= gen_reg_rtx (mode
);
18592 emit_insn (gen_rtx_CLOBBER (VOIDmode
, tmp
));
18593 emit_move_insn (gen_lowpart (word_mode
, tmp
), words
[0]);
18594 emit_move_insn (gen_highpart (word_mode
, tmp
), words
[1]);
18595 emit_move_insn (target
, tmp
);
18597 else if (n_words
== 4)
18599 rtx tmp
= gen_reg_rtx (V4SImode
);
18600 vals
= gen_rtx_PARALLEL (V4SImode
, gen_rtvec_v (4, words
));
18601 ix86_expand_vector_init_general (false, V4SImode
, tmp
, vals
);
18602 emit_move_insn (target
, gen_lowpart (mode
, tmp
));
18605 gcc_unreachable ();
18609 /* Initialize vector TARGET via VALS. Suppress the use of MMX
18610 instructions unless MMX_OK is true. */
18613 ix86_expand_vector_init (bool mmx_ok
, rtx target
, rtx vals
)
18615 enum machine_mode mode
= GET_MODE (target
);
18616 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
18617 int n_elts
= GET_MODE_NUNITS (mode
);
18618 int n_var
= 0, one_var
= -1;
18619 bool all_same
= true, all_const_zero
= true;
18623 for (i
= 0; i
< n_elts
; ++i
)
18625 x
= XVECEXP (vals
, 0, i
);
18626 if (!CONSTANT_P (x
))
18627 n_var
++, one_var
= i
;
18628 else if (x
!= CONST0_RTX (inner_mode
))
18629 all_const_zero
= false;
18630 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
18634 /* Constants are best loaded from the constant pool. */
18637 emit_move_insn (target
, gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0)));
18641 /* If all values are identical, broadcast the value. */
18643 && ix86_expand_vector_init_duplicate (mmx_ok
, mode
, target
,
18644 XVECEXP (vals
, 0, 0)))
18647 /* Values where only one field is non-constant are best loaded from
18648 the pool and overwritten via move later. */
18652 && ix86_expand_vector_init_one_nonzero (mmx_ok
, mode
, target
,
18653 XVECEXP (vals
, 0, one_var
),
18657 if (ix86_expand_vector_init_one_var (mmx_ok
, mode
, target
, vals
, one_var
))
18661 ix86_expand_vector_init_general (mmx_ok
, mode
, target
, vals
);
18665 ix86_expand_vector_set (bool mmx_ok
, rtx target
, rtx val
, int elt
)
18667 enum machine_mode mode
= GET_MODE (target
);
18668 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
18669 bool use_vec_merge
= false;
18678 tmp
= gen_reg_rtx (GET_MODE_INNER (mode
));
18679 ix86_expand_vector_extract (true, tmp
, target
, 1 - elt
);
18681 tmp
= gen_rtx_VEC_CONCAT (mode
, tmp
, val
);
18683 tmp
= gen_rtx_VEC_CONCAT (mode
, val
, tmp
);
18684 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
18694 /* For the two element vectors, we implement a VEC_CONCAT with
18695 the extraction of the other element. */
18697 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (1 - elt
)));
18698 tmp
= gen_rtx_VEC_SELECT (inner_mode
, target
, tmp
);
18701 op0
= val
, op1
= tmp
;
18703 op0
= tmp
, op1
= val
;
18705 tmp
= gen_rtx_VEC_CONCAT (mode
, op0
, op1
);
18706 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
18714 use_vec_merge
= true;
18718 /* tmp = target = A B C D */
18719 tmp
= copy_to_reg (target
);
18720 /* target = A A B B */
18721 emit_insn (gen_sse_unpcklps (target
, target
, target
));
18722 /* target = X A B B */
18723 ix86_expand_vector_set (false, target
, val
, 0);
18724 /* target = A X C D */
18725 emit_insn (gen_sse_shufps_1 (target
, target
, tmp
,
18726 GEN_INT (1), GEN_INT (0),
18727 GEN_INT (2+4), GEN_INT (3+4)));
18731 /* tmp = target = A B C D */
18732 tmp
= copy_to_reg (target
);
18733 /* tmp = X B C D */
18734 ix86_expand_vector_set (false, tmp
, val
, 0);
18735 /* target = A B X D */
18736 emit_insn (gen_sse_shufps_1 (target
, target
, tmp
,
18737 GEN_INT (0), GEN_INT (1),
18738 GEN_INT (0+4), GEN_INT (3+4)));
18742 /* tmp = target = A B C D */
18743 tmp
= copy_to_reg (target
);
18744 /* tmp = X B C D */
18745 ix86_expand_vector_set (false, tmp
, val
, 0);
18746 /* target = A B X D */
18747 emit_insn (gen_sse_shufps_1 (target
, target
, tmp
,
18748 GEN_INT (0), GEN_INT (1),
18749 GEN_INT (2+4), GEN_INT (0+4)));
18753 gcc_unreachable ();
18758 /* Element 0 handled by vec_merge below. */
18761 use_vec_merge
= true;
18767 /* With SSE2, use integer shuffles to swap element 0 and ELT,
18768 store into element 0, then shuffle them back. */
18772 order
[0] = GEN_INT (elt
);
18773 order
[1] = const1_rtx
;
18774 order
[2] = const2_rtx
;
18775 order
[3] = GEN_INT (3);
18776 order
[elt
] = const0_rtx
;
18778 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
18779 order
[1], order
[2], order
[3]));
18781 ix86_expand_vector_set (false, target
, val
, 0);
18783 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
18784 order
[1], order
[2], order
[3]));
18788 /* For SSE1, we have to reuse the V4SF code. */
18789 ix86_expand_vector_set (false, gen_lowpart (V4SFmode
, target
),
18790 gen_lowpart (SFmode
, val
), elt
);
18795 use_vec_merge
= TARGET_SSE2
;
18798 use_vec_merge
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
18809 tmp
= gen_rtx_VEC_DUPLICATE (mode
, val
);
18810 tmp
= gen_rtx_VEC_MERGE (mode
, tmp
, target
, GEN_INT (1 << elt
));
18811 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
18815 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
), false);
18817 emit_move_insn (mem
, target
);
18819 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
18820 emit_move_insn (tmp
, val
);
18822 emit_move_insn (target
, mem
);
18827 ix86_expand_vector_extract (bool mmx_ok
, rtx target
, rtx vec
, int elt
)
18829 enum machine_mode mode
= GET_MODE (vec
);
18830 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
18831 bool use_vec_extr
= false;
18844 use_vec_extr
= true;
18856 tmp
= gen_reg_rtx (mode
);
18857 emit_insn (gen_sse_shufps_1 (tmp
, vec
, vec
,
18858 GEN_INT (elt
), GEN_INT (elt
),
18859 GEN_INT (elt
+4), GEN_INT (elt
+4)));
18863 tmp
= gen_reg_rtx (mode
);
18864 emit_insn (gen_sse_unpckhps (tmp
, vec
, vec
));
18868 gcc_unreachable ();
18871 use_vec_extr
= true;
18886 tmp
= gen_reg_rtx (mode
);
18887 emit_insn (gen_sse2_pshufd_1 (tmp
, vec
,
18888 GEN_INT (elt
), GEN_INT (elt
),
18889 GEN_INT (elt
), GEN_INT (elt
)));
18893 tmp
= gen_reg_rtx (mode
);
18894 emit_insn (gen_sse2_punpckhdq (tmp
, vec
, vec
));
18898 gcc_unreachable ();
18901 use_vec_extr
= true;
18906 /* For SSE1, we have to reuse the V4SF code. */
18907 ix86_expand_vector_extract (false, gen_lowpart (SFmode
, target
),
18908 gen_lowpart (V4SFmode
, vec
), elt
);
18914 use_vec_extr
= TARGET_SSE2
;
18917 use_vec_extr
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
18922 /* ??? Could extract the appropriate HImode element and shift. */
18929 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (elt
)));
18930 tmp
= gen_rtx_VEC_SELECT (inner_mode
, vec
, tmp
);
18932 /* Let the rtl optimizers know about the zero extension performed. */
18933 if (inner_mode
== HImode
)
18935 tmp
= gen_rtx_ZERO_EXTEND (SImode
, tmp
);
18936 target
= gen_lowpart (SImode
, target
);
18939 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
18943 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
), false);
18945 emit_move_insn (mem
, vec
);
18947 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
18948 emit_move_insn (target
, tmp
);
18952 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
18953 pattern to reduce; DEST is the destination; IN is the input vector. */
18956 ix86_expand_reduc_v4sf (rtx (*fn
) (rtx
, rtx
, rtx
), rtx dest
, rtx in
)
18958 rtx tmp1
, tmp2
, tmp3
;
18960 tmp1
= gen_reg_rtx (V4SFmode
);
18961 tmp2
= gen_reg_rtx (V4SFmode
);
18962 tmp3
= gen_reg_rtx (V4SFmode
);
18964 emit_insn (gen_sse_movhlps (tmp1
, in
, in
));
18965 emit_insn (fn (tmp2
, tmp1
, in
));
18967 emit_insn (gen_sse_shufps_1 (tmp3
, tmp2
, tmp2
,
18968 GEN_INT (1), GEN_INT (1),
18969 GEN_INT (1+4), GEN_INT (1+4)));
18970 emit_insn (fn (dest
, tmp2
, tmp3
));
18973 /* Target hook for scalar_mode_supported_p. */
18975 ix86_scalar_mode_supported_p (enum machine_mode mode
)
18977 if (DECIMAL_FLOAT_MODE_P (mode
))
18980 return default_scalar_mode_supported_p (mode
);
18983 /* Implements target hook vector_mode_supported_p. */
18985 ix86_vector_mode_supported_p (enum machine_mode mode
)
18987 if (TARGET_SSE
&& VALID_SSE_REG_MODE (mode
))
18989 if (TARGET_SSE2
&& VALID_SSE2_REG_MODE (mode
))
18991 if (TARGET_MMX
&& VALID_MMX_REG_MODE (mode
))
18993 if (TARGET_3DNOW
&& VALID_MMX_REG_MODE_3DNOW (mode
))
18998 /* Worker function for TARGET_MD_ASM_CLOBBERS.
19000 We do this in the new i386 backend to maintain source compatibility
19001 with the old cc0-based compiler. */
19004 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED
,
19005 tree inputs ATTRIBUTE_UNUSED
,
19008 clobbers
= tree_cons (NULL_TREE
, build_string (5, "flags"),
19010 clobbers
= tree_cons (NULL_TREE
, build_string (4, "fpsr"),
19012 clobbers
= tree_cons (NULL_TREE
, build_string (7, "dirflag"),
19017 /* Return true if this goes in small data/bss. */
19020 ix86_in_large_data_p (tree exp
)
19022 if (ix86_cmodel
!= CM_MEDIUM
&& ix86_cmodel
!= CM_MEDIUM_PIC
)
19025 /* Functions are never large data. */
19026 if (TREE_CODE (exp
) == FUNCTION_DECL
)
19029 if (TREE_CODE (exp
) == VAR_DECL
&& DECL_SECTION_NAME (exp
))
19031 const char *section
= TREE_STRING_POINTER (DECL_SECTION_NAME (exp
));
19032 if (strcmp (section
, ".ldata") == 0
19033 || strcmp (section
, ".lbss") == 0)
19039 HOST_WIDE_INT size
= int_size_in_bytes (TREE_TYPE (exp
));
19041 /* If this is an incomplete type with size 0, then we can't put it
19042 in data because it might be too big when completed. */
19043 if (!size
|| size
> ix86_section_threshold
)
19050 ix86_encode_section_info (tree decl
, rtx rtl
, int first
)
19052 default_encode_section_info (decl
, rtl
, first
);
19054 if (TREE_CODE (decl
) == VAR_DECL
19055 && (TREE_STATIC (decl
) || DECL_EXTERNAL (decl
))
19056 && ix86_in_large_data_p (decl
))
19057 SYMBOL_REF_FLAGS (XEXP (rtl
, 0)) |= SYMBOL_FLAG_FAR_ADDR
;
19060 /* Worker function for REVERSE_CONDITION. */
19063 ix86_reverse_condition (enum rtx_code code
, enum machine_mode mode
)
19065 return (mode
!= CCFPmode
&& mode
!= CCFPUmode
19066 ? reverse_condition (code
)
19067 : reverse_condition_maybe_unordered (code
));
19070 /* Output code to perform an x87 FP register move, from OPERANDS[1]
19074 output_387_reg_move (rtx insn
, rtx
*operands
)
19076 if (REG_P (operands
[1])
19077 && find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
19079 if (REGNO (operands
[0]) == FIRST_STACK_REG
)
19080 return output_387_ffreep (operands
, 0);
19081 return "fstp\t%y0";
19083 if (STACK_TOP_P (operands
[0]))
19084 return "fld%z1\t%y1";
19088 /* Output code to perform a conditional jump to LABEL, if C2 flag in
19089 FP status register is set. */
19092 ix86_emit_fp_unordered_jump (rtx label
)
19094 rtx reg
= gen_reg_rtx (HImode
);
19097 emit_insn (gen_x86_fnstsw_1 (reg
));
19099 if (TARGET_USE_SAHF
)
19101 emit_insn (gen_x86_sahf_1 (reg
));
19103 temp
= gen_rtx_REG (CCmode
, FLAGS_REG
);
19104 temp
= gen_rtx_UNORDERED (VOIDmode
, temp
, const0_rtx
);
19108 emit_insn (gen_testqi_ext_ccno_0 (reg
, GEN_INT (0x04)));
19110 temp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
19111 temp
= gen_rtx_NE (VOIDmode
, temp
, const0_rtx
);
19114 temp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, temp
,
19115 gen_rtx_LABEL_REF (VOIDmode
, label
),
19117 temp
= gen_rtx_SET (VOIDmode
, pc_rtx
, temp
);
19118 emit_jump_insn (temp
);
19121 /* Output code to perform a log1p XFmode calculation. */
19123 void ix86_emit_i387_log1p (rtx op0
, rtx op1
)
19125 rtx label1
= gen_label_rtx ();
19126 rtx label2
= gen_label_rtx ();
19128 rtx tmp
= gen_reg_rtx (XFmode
);
19129 rtx tmp2
= gen_reg_rtx (XFmode
);
19131 emit_insn (gen_absxf2 (tmp
, op1
));
19132 emit_insn (gen_cmpxf (tmp
,
19133 CONST_DOUBLE_FROM_REAL_VALUE (
19134 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode
),
19136 emit_jump_insn (gen_bge (label1
));
19138 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
19139 emit_insn (gen_fyl2xp1_xf3 (op0
, tmp2
, op1
));
19140 emit_jump (label2
);
19142 emit_label (label1
);
19143 emit_move_insn (tmp
, CONST1_RTX (XFmode
));
19144 emit_insn (gen_addxf3 (tmp
, op1
, tmp
));
19145 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
19146 emit_insn (gen_fyl2x_xf3 (op0
, tmp2
, tmp
));
19148 emit_label (label2
);
19151 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
19154 i386_solaris_elf_named_section (const char *name
, unsigned int flags
,
19157 /* With Binutils 2.15, the "@unwind" marker must be specified on
19158 every occurrence of the ".eh_frame" section, not just the first
19161 && strcmp (name
, ".eh_frame") == 0)
19163 fprintf (asm_out_file
, "\t.section\t%s,\"%s\",@unwind\n", name
,
19164 flags
& SECTION_WRITE
? "aw" : "a");
19167 default_elf_asm_named_section (name
, flags
, decl
);
19170 /* Return the mangling of TYPE if it is an extended fundamental type. */
19172 static const char *
19173 ix86_mangle_fundamental_type (tree type
)
19175 switch (TYPE_MODE (type
))
19178 /* __float128 is "g". */
19181 /* "long double" or __float80 is "e". */
19188 /* For 32-bit code we can save PIC register setup by using
19189 __stack_chk_fail_local hidden function instead of calling
19190 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
19191 register, so it is better to call __stack_chk_fail directly. */
19194 ix86_stack_protect_fail (void)
19196 return TARGET_64BIT
19197 ? default_external_stack_protect_fail ()
19198 : default_hidden_stack_protect_fail ();
19201 /* Select a format to encode pointers in exception handling data. CODE
19202 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
19203 true if the symbol may be affected by dynamic relocations.
19205 ??? All x86 object file formats are capable of representing this.
19206 After all, the relocation needed is the same as for the call insn.
19207 Whether or not a particular assembler allows us to enter such, I
19208 guess we'll have to see. */
19210 asm_preferred_eh_data_format (int code
, int global
)
19214 int type
= DW_EH_PE_sdata8
;
19216 || ix86_cmodel
== CM_SMALL_PIC
19217 || (ix86_cmodel
== CM_MEDIUM_PIC
&& (global
|| code
)))
19218 type
= DW_EH_PE_sdata4
;
19219 return (global
? DW_EH_PE_indirect
: 0) | DW_EH_PE_pcrel
| type
;
19221 if (ix86_cmodel
== CM_SMALL
19222 || (ix86_cmodel
== CM_MEDIUM
&& code
))
19223 return DW_EH_PE_udata4
;
19224 return DW_EH_PE_absptr
;
19227 /* Expand copysign from SIGN to the positive value ABS_VALUE
19228 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
19231 ix86_sse_copysign_to_positive (rtx result
, rtx abs_value
, rtx sign
, rtx mask
)
19233 enum machine_mode mode
= GET_MODE (sign
);
19234 rtx sgn
= gen_reg_rtx (mode
);
19235 if (mask
== NULL_RTX
)
19237 mask
= ix86_build_signbit_mask (mode
, VECTOR_MODE_P (mode
), false);
19238 if (!VECTOR_MODE_P (mode
))
19240 /* We need to generate a scalar mode mask in this case. */
19241 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
19242 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
19243 mask
= gen_reg_rtx (mode
);
19244 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
19248 mask
= gen_rtx_NOT (mode
, mask
);
19249 emit_insn (gen_rtx_SET (VOIDmode
, sgn
,
19250 gen_rtx_AND (mode
, mask
, sign
)));
19251 emit_insn (gen_rtx_SET (VOIDmode
, result
,
19252 gen_rtx_IOR (mode
, abs_value
, sgn
)));
19255 /* Expand fabs (OP0) and return a new rtx that holds the result. The
19256 mask for masking out the sign-bit is stored in *SMASK, if that is
19259 ix86_expand_sse_fabs (rtx op0
, rtx
*smask
)
19261 enum machine_mode mode
= GET_MODE (op0
);
19264 xa
= gen_reg_rtx (mode
);
19265 mask
= ix86_build_signbit_mask (mode
, VECTOR_MODE_P (mode
), true);
19266 if (!VECTOR_MODE_P (mode
))
19268 /* We need to generate a scalar mode mask in this case. */
19269 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
19270 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
19271 mask
= gen_reg_rtx (mode
);
19272 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
19274 emit_insn (gen_rtx_SET (VOIDmode
, xa
,
19275 gen_rtx_AND (mode
, op0
, mask
)));
19283 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
19284 swapping the operands if SWAP_OPERANDS is true. The expanded
19285 code is a forward jump to a newly created label in case the
19286 comparison is true. The generated label rtx is returned. */
19288 ix86_expand_sse_compare_and_jump (enum rtx_code code
, rtx op0
, rtx op1
,
19289 bool swap_operands
)
19300 label
= gen_label_rtx ();
19301 tmp
= gen_rtx_REG (CCFPUmode
, FLAGS_REG
);
19302 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
19303 gen_rtx_COMPARE (CCFPUmode
, op0
, op1
)));
19304 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
, tmp
, const0_rtx
);
19305 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
19306 gen_rtx_LABEL_REF (VOIDmode
, label
), pc_rtx
);
19307 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
19308 JUMP_LABEL (tmp
) = label
;
19313 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
19314 using comparison code CODE. Operands are swapped for the comparison if
19315 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
19317 ix86_expand_sse_compare_mask (enum rtx_code code
, rtx op0
, rtx op1
,
19318 bool swap_operands
)
19320 enum machine_mode mode
= GET_MODE (op0
);
19321 rtx mask
= gen_reg_rtx (mode
);
19330 if (mode
== DFmode
)
19331 emit_insn (gen_sse2_maskcmpdf3 (mask
, op0
, op1
,
19332 gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
19334 emit_insn (gen_sse_maskcmpsf3 (mask
, op0
, op1
,
19335 gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
19340 /* Generate and return a rtx of mode MODE for 2**n where n is the number
19341 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
19343 ix86_gen_TWO52 (enum machine_mode mode
)
19345 REAL_VALUE_TYPE TWO52r
;
19348 real_ldexp (&TWO52r
, &dconst1
, mode
== DFmode
? 52 : 23);
19349 TWO52
= const_double_from_real_value (TWO52r
, mode
);
19350 TWO52
= force_reg (mode
, TWO52
);
19355 /* Expand SSE sequence for computing lround from OP1 storing
19358 ix86_expand_lround (rtx op0
, rtx op1
)
19360 /* C code for the stuff we're doing below:
19361 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
19364 enum machine_mode mode
= GET_MODE (op1
);
19365 const struct real_format
*fmt
;
19366 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
19369 /* load nextafter (0.5, 0.0) */
19370 fmt
= REAL_MODE_FORMAT (mode
);
19371 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1);
19372 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
19374 /* adj = copysign (0.5, op1) */
19375 adj
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
19376 ix86_sse_copysign_to_positive (adj
, adj
, force_reg (mode
, op1
), NULL_RTX
);
19378 /* adj = op1 + adj */
19379 expand_simple_binop (mode
, PLUS
, adj
, op1
, adj
, 0, OPTAB_DIRECT
);
19381 /* op0 = (imode)adj */
19382 expand_fix (op0
, adj
, 0);
19385 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
19388 ix86_expand_lfloorceil (rtx op0
, rtx op1
, bool do_floor
)
19390 /* C code for the stuff we're doing below (for do_floor):
19392 xi -= (double)xi > op1 ? 1 : 0;
19395 enum machine_mode fmode
= GET_MODE (op1
);
19396 enum machine_mode imode
= GET_MODE (op0
);
19397 rtx ireg
, freg
, label
;
19399 /* reg = (long)op1 */
19400 ireg
= gen_reg_rtx (imode
);
19401 expand_fix (ireg
, op1
, 0);
19403 /* freg = (double)reg */
19404 freg
= gen_reg_rtx (fmode
);
19405 expand_float (freg
, ireg
, 0);
19407 /* ireg = (freg > op1) ? ireg - 1 : ireg */
19408 label
= ix86_expand_sse_compare_and_jump (UNLE
,
19409 freg
, op1
, !do_floor
);
19410 expand_simple_binop (imode
, do_floor
? MINUS
: PLUS
,
19411 ireg
, const1_rtx
, ireg
, 0, OPTAB_DIRECT
);
19412 emit_label (label
);
19413 LABEL_NUSES (label
) = 1;
19415 emit_move_insn (op0
, ireg
);
19418 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
19419 result in OPERAND0. */
19421 ix86_expand_rint (rtx operand0
, rtx operand1
)
19423 /* C code for the stuff we're doing below:
19424 if (!isless (fabs (operand1), 2**52))
19426 tmp = copysign (2**52, operand1);
19427 return operand1 + tmp - tmp;
19429 enum machine_mode mode
= GET_MODE (operand0
);
19430 rtx res
, xa
, label
, TWO52
, mask
;
19432 res
= gen_reg_rtx (mode
);
19433 emit_move_insn (res
, operand1
);
19435 /* xa = abs (operand1) */
19436 xa
= ix86_expand_sse_fabs (res
, &mask
);
19438 /* if (!isless (xa, TWO52)) goto label; */
19439 TWO52
= ix86_gen_TWO52 (mode
);
19440 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
19442 ix86_sse_copysign_to_positive (TWO52
, TWO52
, res
, mask
);
19444 expand_simple_binop (mode
, PLUS
, res
, TWO52
, res
, 0, OPTAB_DIRECT
);
19445 expand_simple_binop (mode
, MINUS
, res
, TWO52
, res
, 0, OPTAB_DIRECT
);
19447 emit_label (label
);
19448 LABEL_NUSES (label
) = 1;
19450 emit_move_insn (operand0
, res
);
19453 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
19456 ix86_expand_floorceildf_32 (rtx operand0
, rtx operand1
, bool do_floor
)
19458 /* C code for the stuff we expand below.
19459 double xa = fabs (x), x2;
19460 if (!isless (xa, TWO52))
19462 xa = xa + TWO52 - TWO52;
19463 x2 = copysign (xa, x);
19472 enum machine_mode mode
= GET_MODE (operand0
);
19473 rtx xa
, TWO52
, tmp
, label
, one
, res
, mask
;
19475 TWO52
= ix86_gen_TWO52 (mode
);
19477 /* Temporary for holding the result, initialized to the input
19478 operand to ease control flow. */
19479 res
= gen_reg_rtx (mode
);
19480 emit_move_insn (res
, operand1
);
19482 /* xa = abs (operand1) */
19483 xa
= ix86_expand_sse_fabs (res
, &mask
);
19485 /* if (!isless (xa, TWO52)) goto label; */
19486 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
19488 /* xa = xa + TWO52 - TWO52; */
19489 expand_simple_binop (mode
, PLUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
19490 expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
19492 /* xa = copysign (xa, operand1) */
19493 ix86_sse_copysign_to_positive (xa
, xa
, res
, mask
);
19496 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
19498 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
19499 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
19500 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
19501 gen_rtx_AND (mode
, one
, tmp
)));
19502 expand_simple_binop (mode
, do_floor
? MINUS
: PLUS
,
19503 xa
, tmp
, res
, 0, OPTAB_DIRECT
);
19505 emit_label (label
);
19506 LABEL_NUSES (label
) = 1;
19508 emit_move_insn (operand0
, res
);
19511 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
19514 ix86_expand_floorceil (rtx operand0
, rtx operand1
, bool do_floor
)
19516 /* C code for the stuff we expand below.
19517 double xa = fabs (x), x2;
19518 if (!isless (xa, TWO52))
19520 x2 = (double)(long)x;
19529 enum machine_mode mode
= GET_MODE (operand0
);
19530 rtx xa
, xi
, TWO52
, tmp
, label
, one
, res
;
19532 TWO52
= ix86_gen_TWO52 (mode
);
19534 /* Temporary for holding the result, initialized to the input
19535 operand to ease control flow. */
19536 res
= gen_reg_rtx (mode
);
19537 emit_move_insn (res
, operand1
);
19539 /* xa = abs (operand1) */
19540 xa
= ix86_expand_sse_fabs (res
, NULL
);
19542 /* if (!isless (xa, TWO52)) goto label; */
19543 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
19545 /* xa = (double)(long)x */
19546 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
19547 expand_fix (xi
, res
, 0);
19548 expand_float (xa
, xi
, 0);
19551 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
19553 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
19554 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
19555 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
19556 gen_rtx_AND (mode
, one
, tmp
)));
19557 expand_simple_binop (mode
, do_floor
? MINUS
: PLUS
,
19558 xa
, tmp
, res
, 0, OPTAB_DIRECT
);
19560 emit_label (label
);
19561 LABEL_NUSES (label
) = 1;
19563 emit_move_insn (operand0
, res
);
19566 /* Expand SSE sequence for computing round from OPERAND1 storing
19567 into OPERAND0. Sequence that works without relying on DImode truncation
19568 via cvttsd2siq that is only available on 64bit targets. */
19570 ix86_expand_rounddf_32 (rtx operand0
, rtx operand1
)
19572 /* C code for the stuff we expand below.
19573 double xa = fabs (x), xa2, x2;
19574 if (!isless (xa, TWO52))
19576 Using the absolute value and copying back sign makes
19577 -0.0 -> -0.0 correct.
19578 xa2 = xa + TWO52 - TWO52;
19583 else if (dxa > 0.5)
19585 x2 = copysign (xa2, x);
19588 enum machine_mode mode
= GET_MODE (operand0
);
19589 rtx xa
, xa2
, dxa
, TWO52
, tmp
, label
, half
, mhalf
, one
, res
, mask
;
19591 TWO52
= ix86_gen_TWO52 (mode
);
19593 /* Temporary for holding the result, initialized to the input
19594 operand to ease control flow. */
19595 res
= gen_reg_rtx (mode
);
19596 emit_move_insn (res
, operand1
);
19598 /* xa = abs (operand1) */
19599 xa
= ix86_expand_sse_fabs (res
, &mask
);
19601 /* if (!isless (xa, TWO52)) goto label; */
19602 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
19604 /* xa2 = xa + TWO52 - TWO52; */
19605 xa2
= gen_reg_rtx (mode
);
19606 expand_simple_binop (mode
, PLUS
, xa
, TWO52
, xa2
, 0, OPTAB_DIRECT
);
19607 expand_simple_binop (mode
, MINUS
, xa2
, TWO52
, xa2
, 0, OPTAB_DIRECT
);
19609 /* dxa = xa2 - xa; */
19610 dxa
= gen_reg_rtx (mode
);
19611 expand_simple_binop (mode
, MINUS
, xa2
, xa
, dxa
, 0, OPTAB_DIRECT
);
19613 /* generate 0.5, 1.0 and -0.5 */
19614 half
= force_reg (mode
, const_double_from_real_value (dconsthalf
, mode
));
19615 one
= gen_reg_rtx (mode
);
19616 expand_simple_binop (mode
, PLUS
, half
, half
, one
, 0, OPTAB_DIRECT
);
19617 mhalf
= gen_reg_rtx (mode
);
19618 expand_simple_binop (mode
, MINUS
, half
, one
, mhalf
, 0, OPTAB_DIRECT
);
19621 tmp
= gen_reg_rtx (mode
);
19622 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
19623 tmp
= ix86_expand_sse_compare_mask (UNGT
, dxa
, half
, false);
19624 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
19625 gen_rtx_AND (mode
, one
, tmp
)));
19626 expand_simple_binop (mode
, MINUS
, xa2
, tmp
, xa2
, 0, OPTAB_DIRECT
);
19627 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
19628 tmp
= ix86_expand_sse_compare_mask (UNGE
, mhalf
, dxa
, false);
19629 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
19630 gen_rtx_AND (mode
, one
, tmp
)));
19631 expand_simple_binop (mode
, PLUS
, xa2
, tmp
, xa2
, 0, OPTAB_DIRECT
);
19633 /* res = copysign (xa2, operand1) */
19634 ix86_sse_copysign_to_positive (res
, xa2
, force_reg (mode
, operand1
), mask
);
19636 emit_label (label
);
19637 LABEL_NUSES (label
) = 1;
19639 emit_move_insn (operand0
, res
);
19642 /* Expand SSE sequence for computing round from OPERAND1 storing
19645 ix86_expand_round (rtx operand0
, rtx operand1
)
19647 /* C code for the stuff we're doing below:
19648 double xa = fabs (x);
19649 if (!isless (xa, TWO52))
19651 xa = (double)(long)(xa + nextafter (0.5, 0.0));
19652 return copysign (xa, x);
19654 enum machine_mode mode
= GET_MODE (operand0
);
19655 rtx res
, TWO52
, xa
, label
, xi
, half
, mask
;
19656 const struct real_format
*fmt
;
19657 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
19659 /* Temporary for holding the result, initialized to the input
19660 operand to ease control flow. */
19661 res
= gen_reg_rtx (mode
);
19662 emit_move_insn (res
, operand1
);
19664 TWO52
= ix86_gen_TWO52 (mode
);
19665 xa
= ix86_expand_sse_fabs (res
, &mask
);
19666 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
19668 /* load nextafter (0.5, 0.0) */
19669 fmt
= REAL_MODE_FORMAT (mode
);
19670 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1);
19671 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
19673 /* xa = xa + 0.5 */
19674 half
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
19675 expand_simple_binop (mode
, PLUS
, xa
, half
, xa
, 0, OPTAB_DIRECT
);
19677 /* xa = (double)(int64_t)xa */
19678 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
19679 expand_fix (xi
, xa
, 0);
19680 expand_float (xa
, xi
, 0);
19682 /* res = copysign (xa, operand1) */
19683 ix86_sse_copysign_to_positive (res
, xa
, force_reg (mode
, operand1
), mask
);
19685 emit_label (label
);
19686 LABEL_NUSES (label
) = 1;
19688 emit_move_insn (operand0
, res
);
19691 #include "gt-i386.h"