1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 Boston, MA 02110-1301, USA. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "langhooks.h"
50 #include "tree-gimple.h"
52 #include "tm-constrs.h"
54 #ifndef CHECK_STACK_LIMIT
55 #define CHECK_STACK_LIMIT (-1)
58 /* Return index of given mode in mult and division cost tables. */
59 #define MODE_INDEX(mode) \
60 ((mode) == QImode ? 0 \
61 : (mode) == HImode ? 1 \
62 : (mode) == SImode ? 2 \
63 : (mode) == DImode ? 3 \
66 /* Processor costs (relative to an add) */
67 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
68 #define COSTS_N_BYTES(N) ((N) * 2)
71 struct processor_costs size_cost
= { /* costs for tuning for size */
72 COSTS_N_BYTES (2), /* cost of an add instruction */
73 COSTS_N_BYTES (3), /* cost of a lea instruction */
74 COSTS_N_BYTES (2), /* variable shift costs */
75 COSTS_N_BYTES (3), /* constant shift costs */
76 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
77 COSTS_N_BYTES (3), /* HI */
78 COSTS_N_BYTES (3), /* SI */
79 COSTS_N_BYTES (3), /* DI */
80 COSTS_N_BYTES (5)}, /* other */
81 0, /* cost of multiply per each bit set */
82 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
83 COSTS_N_BYTES (3), /* HI */
84 COSTS_N_BYTES (3), /* SI */
85 COSTS_N_BYTES (3), /* DI */
86 COSTS_N_BYTES (5)}, /* other */
87 COSTS_N_BYTES (3), /* cost of movsx */
88 COSTS_N_BYTES (3), /* cost of movzx */
91 2, /* cost for loading QImode using movzbl */
92 {2, 2, 2}, /* cost of loading integer registers
93 in QImode, HImode and SImode.
94 Relative to reg-reg move (2). */
95 {2, 2, 2}, /* cost of storing integer registers */
96 2, /* cost of reg,reg fld/fst */
97 {2, 2, 2}, /* cost of loading fp registers
98 in SFmode, DFmode and XFmode */
99 {2, 2, 2}, /* cost of storing fp registers
100 in SFmode, DFmode and XFmode */
101 3, /* cost of moving MMX register */
102 {3, 3}, /* cost of loading MMX registers
103 in SImode and DImode */
104 {3, 3}, /* cost of storing MMX registers
105 in SImode and DImode */
106 3, /* cost of moving SSE register */
107 {3, 3, 3}, /* cost of loading SSE registers
108 in SImode, DImode and TImode */
109 {3, 3, 3}, /* cost of storing SSE registers
110 in SImode, DImode and TImode */
111 3, /* MMX or SSE register to integer */
112 0, /* size of prefetch block */
113 0, /* number of parallel prefetches */
115 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
116 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
117 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
118 COSTS_N_BYTES (2), /* cost of FABS instruction. */
119 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
120 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
123 /* Processor costs (relative to an add) */
125 struct processor_costs i386_cost
= { /* 386 specific costs */
126 COSTS_N_INSNS (1), /* cost of an add instruction */
127 COSTS_N_INSNS (1), /* cost of a lea instruction */
128 COSTS_N_INSNS (3), /* variable shift costs */
129 COSTS_N_INSNS (2), /* constant shift costs */
130 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
131 COSTS_N_INSNS (6), /* HI */
132 COSTS_N_INSNS (6), /* SI */
133 COSTS_N_INSNS (6), /* DI */
134 COSTS_N_INSNS (6)}, /* other */
135 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
136 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
137 COSTS_N_INSNS (23), /* HI */
138 COSTS_N_INSNS (23), /* SI */
139 COSTS_N_INSNS (23), /* DI */
140 COSTS_N_INSNS (23)}, /* other */
141 COSTS_N_INSNS (3), /* cost of movsx */
142 COSTS_N_INSNS (2), /* cost of movzx */
143 15, /* "large" insn */
145 4, /* cost for loading QImode using movzbl */
146 {2, 4, 2}, /* cost of loading integer registers
147 in QImode, HImode and SImode.
148 Relative to reg-reg move (2). */
149 {2, 4, 2}, /* cost of storing integer registers */
150 2, /* cost of reg,reg fld/fst */
151 {8, 8, 8}, /* cost of loading fp registers
152 in SFmode, DFmode and XFmode */
153 {8, 8, 8}, /* cost of storing fp registers
154 in SFmode, DFmode and XFmode */
155 2, /* cost of moving MMX register */
156 {4, 8}, /* cost of loading MMX registers
157 in SImode and DImode */
158 {4, 8}, /* cost of storing MMX registers
159 in SImode and DImode */
160 2, /* cost of moving SSE register */
161 {4, 8, 16}, /* cost of loading SSE registers
162 in SImode, DImode and TImode */
163 {4, 8, 16}, /* cost of storing SSE registers
164 in SImode, DImode and TImode */
165 3, /* MMX or SSE register to integer */
166 0, /* size of prefetch block */
167 0, /* number of parallel prefetches */
169 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
170 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
171 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
172 COSTS_N_INSNS (22), /* cost of FABS instruction. */
173 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
174 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
178 struct processor_costs i486_cost
= { /* 486 specific costs */
179 COSTS_N_INSNS (1), /* cost of an add instruction */
180 COSTS_N_INSNS (1), /* cost of a lea instruction */
181 COSTS_N_INSNS (3), /* variable shift costs */
182 COSTS_N_INSNS (2), /* constant shift costs */
183 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
184 COSTS_N_INSNS (12), /* HI */
185 COSTS_N_INSNS (12), /* SI */
186 COSTS_N_INSNS (12), /* DI */
187 COSTS_N_INSNS (12)}, /* other */
188 1, /* cost of multiply per each bit set */
189 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
190 COSTS_N_INSNS (40), /* HI */
191 COSTS_N_INSNS (40), /* SI */
192 COSTS_N_INSNS (40), /* DI */
193 COSTS_N_INSNS (40)}, /* other */
194 COSTS_N_INSNS (3), /* cost of movsx */
195 COSTS_N_INSNS (2), /* cost of movzx */
196 15, /* "large" insn */
198 4, /* cost for loading QImode using movzbl */
199 {2, 4, 2}, /* cost of loading integer registers
200 in QImode, HImode and SImode.
201 Relative to reg-reg move (2). */
202 {2, 4, 2}, /* cost of storing integer registers */
203 2, /* cost of reg,reg fld/fst */
204 {8, 8, 8}, /* cost of loading fp registers
205 in SFmode, DFmode and XFmode */
206 {8, 8, 8}, /* cost of storing fp registers
207 in SFmode, DFmode and XFmode */
208 2, /* cost of moving MMX register */
209 {4, 8}, /* cost of loading MMX registers
210 in SImode and DImode */
211 {4, 8}, /* cost of storing MMX registers
212 in SImode and DImode */
213 2, /* cost of moving SSE register */
214 {4, 8, 16}, /* cost of loading SSE registers
215 in SImode, DImode and TImode */
216 {4, 8, 16}, /* cost of storing SSE registers
217 in SImode, DImode and TImode */
218 3, /* MMX or SSE register to integer */
219 0, /* size of prefetch block */
220 0, /* number of parallel prefetches */
222 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
223 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
224 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
225 COSTS_N_INSNS (3), /* cost of FABS instruction. */
226 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
227 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
231 struct processor_costs pentium_cost
= {
232 COSTS_N_INSNS (1), /* cost of an add instruction */
233 COSTS_N_INSNS (1), /* cost of a lea instruction */
234 COSTS_N_INSNS (4), /* variable shift costs */
235 COSTS_N_INSNS (1), /* constant shift costs */
236 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
237 COSTS_N_INSNS (11), /* HI */
238 COSTS_N_INSNS (11), /* SI */
239 COSTS_N_INSNS (11), /* DI */
240 COSTS_N_INSNS (11)}, /* other */
241 0, /* cost of multiply per each bit set */
242 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
243 COSTS_N_INSNS (25), /* HI */
244 COSTS_N_INSNS (25), /* SI */
245 COSTS_N_INSNS (25), /* DI */
246 COSTS_N_INSNS (25)}, /* other */
247 COSTS_N_INSNS (3), /* cost of movsx */
248 COSTS_N_INSNS (2), /* cost of movzx */
249 8, /* "large" insn */
251 6, /* cost for loading QImode using movzbl */
252 {2, 4, 2}, /* cost of loading integer registers
253 in QImode, HImode and SImode.
254 Relative to reg-reg move (2). */
255 {2, 4, 2}, /* cost of storing integer registers */
256 2, /* cost of reg,reg fld/fst */
257 {2, 2, 6}, /* cost of loading fp registers
258 in SFmode, DFmode and XFmode */
259 {4, 4, 6}, /* cost of storing fp registers
260 in SFmode, DFmode and XFmode */
261 8, /* cost of moving MMX register */
262 {8, 8}, /* cost of loading MMX registers
263 in SImode and DImode */
264 {8, 8}, /* cost of storing MMX registers
265 in SImode and DImode */
266 2, /* cost of moving SSE register */
267 {4, 8, 16}, /* cost of loading SSE registers
268 in SImode, DImode and TImode */
269 {4, 8, 16}, /* cost of storing SSE registers
270 in SImode, DImode and TImode */
271 3, /* MMX or SSE register to integer */
272 0, /* size of prefetch block */
273 0, /* number of parallel prefetches */
275 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
276 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
277 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
278 COSTS_N_INSNS (1), /* cost of FABS instruction. */
279 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
280 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
284 struct processor_costs pentiumpro_cost
= {
285 COSTS_N_INSNS (1), /* cost of an add instruction */
286 COSTS_N_INSNS (1), /* cost of a lea instruction */
287 COSTS_N_INSNS (1), /* variable shift costs */
288 COSTS_N_INSNS (1), /* constant shift costs */
289 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
290 COSTS_N_INSNS (4), /* HI */
291 COSTS_N_INSNS (4), /* SI */
292 COSTS_N_INSNS (4), /* DI */
293 COSTS_N_INSNS (4)}, /* other */
294 0, /* cost of multiply per each bit set */
295 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
296 COSTS_N_INSNS (17), /* HI */
297 COSTS_N_INSNS (17), /* SI */
298 COSTS_N_INSNS (17), /* DI */
299 COSTS_N_INSNS (17)}, /* other */
300 COSTS_N_INSNS (1), /* cost of movsx */
301 COSTS_N_INSNS (1), /* cost of movzx */
302 8, /* "large" insn */
304 2, /* cost for loading QImode using movzbl */
305 {4, 4, 4}, /* cost of loading integer registers
306 in QImode, HImode and SImode.
307 Relative to reg-reg move (2). */
308 {2, 2, 2}, /* cost of storing integer registers */
309 2, /* cost of reg,reg fld/fst */
310 {2, 2, 6}, /* cost of loading fp registers
311 in SFmode, DFmode and XFmode */
312 {4, 4, 6}, /* cost of storing fp registers
313 in SFmode, DFmode and XFmode */
314 2, /* cost of moving MMX register */
315 {2, 2}, /* cost of loading MMX registers
316 in SImode and DImode */
317 {2, 2}, /* cost of storing MMX registers
318 in SImode and DImode */
319 2, /* cost of moving SSE register */
320 {2, 2, 8}, /* cost of loading SSE registers
321 in SImode, DImode and TImode */
322 {2, 2, 8}, /* cost of storing SSE registers
323 in SImode, DImode and TImode */
324 3, /* MMX or SSE register to integer */
325 32, /* size of prefetch block */
326 6, /* number of parallel prefetches */
328 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
329 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
330 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
331 COSTS_N_INSNS (2), /* cost of FABS instruction. */
332 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
333 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
337 struct processor_costs geode_cost
= {
338 COSTS_N_INSNS (1), /* cost of an add instruction */
339 COSTS_N_INSNS (1), /* cost of a lea instruction */
340 COSTS_N_INSNS (2), /* variable shift costs */
341 COSTS_N_INSNS (1), /* constant shift costs */
342 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
343 COSTS_N_INSNS (4), /* HI */
344 COSTS_N_INSNS (7), /* SI */
345 COSTS_N_INSNS (7), /* DI */
346 COSTS_N_INSNS (7)}, /* other */
347 0, /* cost of multiply per each bit set */
348 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
349 COSTS_N_INSNS (23), /* HI */
350 COSTS_N_INSNS (39), /* SI */
351 COSTS_N_INSNS (39), /* DI */
352 COSTS_N_INSNS (39)}, /* other */
353 COSTS_N_INSNS (1), /* cost of movsx */
354 COSTS_N_INSNS (1), /* cost of movzx */
355 8, /* "large" insn */
357 1, /* cost for loading QImode using movzbl */
358 {1, 1, 1}, /* cost of loading integer registers
359 in QImode, HImode and SImode.
360 Relative to reg-reg move (2). */
361 {1, 1, 1}, /* cost of storing integer registers */
362 1, /* cost of reg,reg fld/fst */
363 {1, 1, 1}, /* cost of loading fp registers
364 in SFmode, DFmode and XFmode */
365 {4, 6, 6}, /* cost of storing fp registers
366 in SFmode, DFmode and XFmode */
368 1, /* cost of moving MMX register */
369 {1, 1}, /* cost of loading MMX registers
370 in SImode and DImode */
371 {1, 1}, /* cost of storing MMX registers
372 in SImode and DImode */
373 1, /* cost of moving SSE register */
374 {1, 1, 1}, /* cost of loading SSE registers
375 in SImode, DImode and TImode */
376 {1, 1, 1}, /* cost of storing SSE registers
377 in SImode, DImode and TImode */
378 1, /* MMX or SSE register to integer */
379 32, /* size of prefetch block */
380 1, /* number of parallel prefetches */
382 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
383 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
384 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
385 COSTS_N_INSNS (1), /* cost of FABS instruction. */
386 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
387 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
391 struct processor_costs k6_cost
= {
392 COSTS_N_INSNS (1), /* cost of an add instruction */
393 COSTS_N_INSNS (2), /* cost of a lea instruction */
394 COSTS_N_INSNS (1), /* variable shift costs */
395 COSTS_N_INSNS (1), /* constant shift costs */
396 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
397 COSTS_N_INSNS (3), /* HI */
398 COSTS_N_INSNS (3), /* SI */
399 COSTS_N_INSNS (3), /* DI */
400 COSTS_N_INSNS (3)}, /* other */
401 0, /* cost of multiply per each bit set */
402 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
403 COSTS_N_INSNS (18), /* HI */
404 COSTS_N_INSNS (18), /* SI */
405 COSTS_N_INSNS (18), /* DI */
406 COSTS_N_INSNS (18)}, /* other */
407 COSTS_N_INSNS (2), /* cost of movsx */
408 COSTS_N_INSNS (2), /* cost of movzx */
409 8, /* "large" insn */
411 3, /* cost for loading QImode using movzbl */
412 {4, 5, 4}, /* cost of loading integer registers
413 in QImode, HImode and SImode.
414 Relative to reg-reg move (2). */
415 {2, 3, 2}, /* cost of storing integer registers */
416 4, /* cost of reg,reg fld/fst */
417 {6, 6, 6}, /* cost of loading fp registers
418 in SFmode, DFmode and XFmode */
419 {4, 4, 4}, /* cost of storing fp registers
420 in SFmode, DFmode and XFmode */
421 2, /* cost of moving MMX register */
422 {2, 2}, /* cost of loading MMX registers
423 in SImode and DImode */
424 {2, 2}, /* cost of storing MMX registers
425 in SImode and DImode */
426 2, /* cost of moving SSE register */
427 {2, 2, 8}, /* cost of loading SSE registers
428 in SImode, DImode and TImode */
429 {2, 2, 8}, /* cost of storing SSE registers
430 in SImode, DImode and TImode */
431 6, /* MMX or SSE register to integer */
432 32, /* size of prefetch block */
433 1, /* number of parallel prefetches */
435 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
436 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
437 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
438 COSTS_N_INSNS (2), /* cost of FABS instruction. */
439 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
440 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
444 struct processor_costs athlon_cost
= {
445 COSTS_N_INSNS (1), /* cost of an add instruction */
446 COSTS_N_INSNS (2), /* cost of a lea instruction */
447 COSTS_N_INSNS (1), /* variable shift costs */
448 COSTS_N_INSNS (1), /* constant shift costs */
449 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
450 COSTS_N_INSNS (5), /* HI */
451 COSTS_N_INSNS (5), /* SI */
452 COSTS_N_INSNS (5), /* DI */
453 COSTS_N_INSNS (5)}, /* other */
454 0, /* cost of multiply per each bit set */
455 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
456 COSTS_N_INSNS (26), /* HI */
457 COSTS_N_INSNS (42), /* SI */
458 COSTS_N_INSNS (74), /* DI */
459 COSTS_N_INSNS (74)}, /* other */
460 COSTS_N_INSNS (1), /* cost of movsx */
461 COSTS_N_INSNS (1), /* cost of movzx */
462 8, /* "large" insn */
464 4, /* cost for loading QImode using movzbl */
465 {3, 4, 3}, /* cost of loading integer registers
466 in QImode, HImode and SImode.
467 Relative to reg-reg move (2). */
468 {3, 4, 3}, /* cost of storing integer registers */
469 4, /* cost of reg,reg fld/fst */
470 {4, 4, 12}, /* cost of loading fp registers
471 in SFmode, DFmode and XFmode */
472 {6, 6, 8}, /* cost of storing fp registers
473 in SFmode, DFmode and XFmode */
474 2, /* cost of moving MMX register */
475 {4, 4}, /* cost of loading MMX registers
476 in SImode and DImode */
477 {4, 4}, /* cost of storing MMX registers
478 in SImode and DImode */
479 2, /* cost of moving SSE register */
480 {4, 4, 6}, /* cost of loading SSE registers
481 in SImode, DImode and TImode */
482 {4, 4, 5}, /* cost of storing SSE registers
483 in SImode, DImode and TImode */
484 5, /* MMX or SSE register to integer */
485 64, /* size of prefetch block */
486 6, /* number of parallel prefetches */
488 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
489 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
490 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
491 COSTS_N_INSNS (2), /* cost of FABS instruction. */
492 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
493 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
497 struct processor_costs k8_cost
= {
498 COSTS_N_INSNS (1), /* cost of an add instruction */
499 COSTS_N_INSNS (2), /* cost of a lea instruction */
500 COSTS_N_INSNS (1), /* variable shift costs */
501 COSTS_N_INSNS (1), /* constant shift costs */
502 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
503 COSTS_N_INSNS (4), /* HI */
504 COSTS_N_INSNS (3), /* SI */
505 COSTS_N_INSNS (4), /* DI */
506 COSTS_N_INSNS (5)}, /* other */
507 0, /* cost of multiply per each bit set */
508 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
509 COSTS_N_INSNS (26), /* HI */
510 COSTS_N_INSNS (42), /* SI */
511 COSTS_N_INSNS (74), /* DI */
512 COSTS_N_INSNS (74)}, /* other */
513 COSTS_N_INSNS (1), /* cost of movsx */
514 COSTS_N_INSNS (1), /* cost of movzx */
515 8, /* "large" insn */
517 4, /* cost for loading QImode using movzbl */
518 {3, 4, 3}, /* cost of loading integer registers
519 in QImode, HImode and SImode.
520 Relative to reg-reg move (2). */
521 {3, 4, 3}, /* cost of storing integer registers */
522 4, /* cost of reg,reg fld/fst */
523 {4, 4, 12}, /* cost of loading fp registers
524 in SFmode, DFmode and XFmode */
525 {6, 6, 8}, /* cost of storing fp registers
526 in SFmode, DFmode and XFmode */
527 2, /* cost of moving MMX register */
528 {3, 3}, /* cost of loading MMX registers
529 in SImode and DImode */
530 {4, 4}, /* cost of storing MMX registers
531 in SImode and DImode */
532 2, /* cost of moving SSE register */
533 {4, 3, 6}, /* cost of loading SSE registers
534 in SImode, DImode and TImode */
535 {4, 4, 5}, /* cost of storing SSE registers
536 in SImode, DImode and TImode */
537 5, /* MMX or SSE register to integer */
538 64, /* size of prefetch block */
539 6, /* number of parallel prefetches */
541 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
542 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
543 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
544 COSTS_N_INSNS (2), /* cost of FABS instruction. */
545 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
546 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
550 struct processor_costs pentium4_cost
= {
551 COSTS_N_INSNS (1), /* cost of an add instruction */
552 COSTS_N_INSNS (3), /* cost of a lea instruction */
553 COSTS_N_INSNS (4), /* variable shift costs */
554 COSTS_N_INSNS (4), /* constant shift costs */
555 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
556 COSTS_N_INSNS (15), /* HI */
557 COSTS_N_INSNS (15), /* SI */
558 COSTS_N_INSNS (15), /* DI */
559 COSTS_N_INSNS (15)}, /* other */
560 0, /* cost of multiply per each bit set */
561 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
562 COSTS_N_INSNS (56), /* HI */
563 COSTS_N_INSNS (56), /* SI */
564 COSTS_N_INSNS (56), /* DI */
565 COSTS_N_INSNS (56)}, /* other */
566 COSTS_N_INSNS (1), /* cost of movsx */
567 COSTS_N_INSNS (1), /* cost of movzx */
568 16, /* "large" insn */
570 2, /* cost for loading QImode using movzbl */
571 {4, 5, 4}, /* cost of loading integer registers
572 in QImode, HImode and SImode.
573 Relative to reg-reg move (2). */
574 {2, 3, 2}, /* cost of storing integer registers */
575 2, /* cost of reg,reg fld/fst */
576 {2, 2, 6}, /* cost of loading fp registers
577 in SFmode, DFmode and XFmode */
578 {4, 4, 6}, /* cost of storing fp registers
579 in SFmode, DFmode and XFmode */
580 2, /* cost of moving MMX register */
581 {2, 2}, /* cost of loading MMX registers
582 in SImode and DImode */
583 {2, 2}, /* cost of storing MMX registers
584 in SImode and DImode */
585 12, /* cost of moving SSE register */
586 {12, 12, 12}, /* cost of loading SSE registers
587 in SImode, DImode and TImode */
588 {2, 2, 8}, /* cost of storing SSE registers
589 in SImode, DImode and TImode */
590 10, /* MMX or SSE register to integer */
591 64, /* size of prefetch block */
592 6, /* number of parallel prefetches */
594 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
595 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
596 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
597 COSTS_N_INSNS (2), /* cost of FABS instruction. */
598 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
599 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
603 struct processor_costs nocona_cost
= {
604 COSTS_N_INSNS (1), /* cost of an add instruction */
605 COSTS_N_INSNS (1), /* cost of a lea instruction */
606 COSTS_N_INSNS (1), /* variable shift costs */
607 COSTS_N_INSNS (1), /* constant shift costs */
608 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
609 COSTS_N_INSNS (10), /* HI */
610 COSTS_N_INSNS (10), /* SI */
611 COSTS_N_INSNS (10), /* DI */
612 COSTS_N_INSNS (10)}, /* other */
613 0, /* cost of multiply per each bit set */
614 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
615 COSTS_N_INSNS (66), /* HI */
616 COSTS_N_INSNS (66), /* SI */
617 COSTS_N_INSNS (66), /* DI */
618 COSTS_N_INSNS (66)}, /* other */
619 COSTS_N_INSNS (1), /* cost of movsx */
620 COSTS_N_INSNS (1), /* cost of movzx */
621 16, /* "large" insn */
623 4, /* cost for loading QImode using movzbl */
624 {4, 4, 4}, /* cost of loading integer registers
625 in QImode, HImode and SImode.
626 Relative to reg-reg move (2). */
627 {4, 4, 4}, /* cost of storing integer registers */
628 3, /* cost of reg,reg fld/fst */
629 {12, 12, 12}, /* cost of loading fp registers
630 in SFmode, DFmode and XFmode */
631 {4, 4, 4}, /* cost of storing fp registers
632 in SFmode, DFmode and XFmode */
633 6, /* cost of moving MMX register */
634 {12, 12}, /* cost of loading MMX registers
635 in SImode and DImode */
636 {12, 12}, /* cost of storing MMX registers
637 in SImode and DImode */
638 6, /* cost of moving SSE register */
639 {12, 12, 12}, /* cost of loading SSE registers
640 in SImode, DImode and TImode */
641 {12, 12, 12}, /* cost of storing SSE registers
642 in SImode, DImode and TImode */
643 8, /* MMX or SSE register to integer */
644 128, /* size of prefetch block */
645 8, /* number of parallel prefetches */
647 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
648 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
649 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
650 COSTS_N_INSNS (3), /* cost of FABS instruction. */
651 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
652 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
655 /* Generic64 should produce code tuned for Nocona and K8. */
657 struct processor_costs generic64_cost
= {
658 COSTS_N_INSNS (1), /* cost of an add instruction */
659 /* On all chips taken into consideration lea is 2 cycles and more. With
660 this cost however our current implementation of synth_mult results in
661 use of unnecessary temporary registers causing regression on several
662 SPECfp benchmarks. */
663 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
664 COSTS_N_INSNS (1), /* variable shift costs */
665 COSTS_N_INSNS (1), /* constant shift costs */
666 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
667 COSTS_N_INSNS (4), /* HI */
668 COSTS_N_INSNS (3), /* SI */
669 COSTS_N_INSNS (4), /* DI */
670 COSTS_N_INSNS (2)}, /* other */
671 0, /* cost of multiply per each bit set */
672 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
673 COSTS_N_INSNS (26), /* HI */
674 COSTS_N_INSNS (42), /* SI */
675 COSTS_N_INSNS (74), /* DI */
676 COSTS_N_INSNS (74)}, /* other */
677 COSTS_N_INSNS (1), /* cost of movsx */
678 COSTS_N_INSNS (1), /* cost of movzx */
679 8, /* "large" insn */
681 4, /* cost for loading QImode using movzbl */
682 {4, 4, 4}, /* cost of loading integer registers
683 in QImode, HImode and SImode.
684 Relative to reg-reg move (2). */
685 {4, 4, 4}, /* cost of storing integer registers */
686 4, /* cost of reg,reg fld/fst */
687 {12, 12, 12}, /* cost of loading fp registers
688 in SFmode, DFmode and XFmode */
689 {6, 6, 8}, /* cost of storing fp registers
690 in SFmode, DFmode and XFmode */
691 2, /* cost of moving MMX register */
692 {8, 8}, /* cost of loading MMX registers
693 in SImode and DImode */
694 {8, 8}, /* cost of storing MMX registers
695 in SImode and DImode */
696 2, /* cost of moving SSE register */
697 {8, 8, 8}, /* cost of loading SSE registers
698 in SImode, DImode and TImode */
699 {8, 8, 8}, /* cost of storing SSE registers
700 in SImode, DImode and TImode */
701 5, /* MMX or SSE register to integer */
702 64, /* size of prefetch block */
703 6, /* number of parallel prefetches */
704 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
705 is increased to perhaps more appropriate value of 5. */
707 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
708 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
709 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
710 COSTS_N_INSNS (8), /* cost of FABS instruction. */
711 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
712 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
715 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
717 struct processor_costs generic32_cost
= {
718 COSTS_N_INSNS (1), /* cost of an add instruction */
719 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
720 COSTS_N_INSNS (1), /* variable shift costs */
721 COSTS_N_INSNS (1), /* constant shift costs */
722 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
723 COSTS_N_INSNS (4), /* HI */
724 COSTS_N_INSNS (3), /* SI */
725 COSTS_N_INSNS (4), /* DI */
726 COSTS_N_INSNS (2)}, /* other */
727 0, /* cost of multiply per each bit set */
728 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
729 COSTS_N_INSNS (26), /* HI */
730 COSTS_N_INSNS (42), /* SI */
731 COSTS_N_INSNS (74), /* DI */
732 COSTS_N_INSNS (74)}, /* other */
733 COSTS_N_INSNS (1), /* cost of movsx */
734 COSTS_N_INSNS (1), /* cost of movzx */
735 8, /* "large" insn */
737 4, /* cost for loading QImode using movzbl */
738 {4, 4, 4}, /* cost of loading integer registers
739 in QImode, HImode and SImode.
740 Relative to reg-reg move (2). */
741 {4, 4, 4}, /* cost of storing integer registers */
742 4, /* cost of reg,reg fld/fst */
743 {12, 12, 12}, /* cost of loading fp registers
744 in SFmode, DFmode and XFmode */
745 {6, 6, 8}, /* cost of storing fp registers
746 in SFmode, DFmode and XFmode */
747 2, /* cost of moving MMX register */
748 {8, 8}, /* cost of loading MMX registers
749 in SImode and DImode */
750 {8, 8}, /* cost of storing MMX registers
751 in SImode and DImode */
752 2, /* cost of moving SSE register */
753 {8, 8, 8}, /* cost of loading SSE registers
754 in SImode, DImode and TImode */
755 {8, 8, 8}, /* cost of storing SSE registers
756 in SImode, DImode and TImode */
757 5, /* MMX or SSE register to integer */
758 64, /* size of prefetch block */
759 6, /* number of parallel prefetches */
761 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
762 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
763 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
764 COSTS_N_INSNS (8), /* cost of FABS instruction. */
765 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
766 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
769 const struct processor_costs
*ix86_cost
= &pentium_cost
;
771 /* Processor feature/optimization bitmasks. */
772 #define m_386 (1<<PROCESSOR_I386)
773 #define m_486 (1<<PROCESSOR_I486)
774 #define m_PENT (1<<PROCESSOR_PENTIUM)
775 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
776 #define m_GEODE (1<<PROCESSOR_GEODE)
777 #define m_K6_GEODE (m_K6 | m_GEODE)
778 #define m_K6 (1<<PROCESSOR_K6)
779 #define m_ATHLON (1<<PROCESSOR_ATHLON)
780 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
781 #define m_K8 (1<<PROCESSOR_K8)
782 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
783 #define m_NOCONA (1<<PROCESSOR_NOCONA)
784 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
785 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
786 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
788 /* Generic instruction choice should be common subset of supported CPUs
789 (PPro/PENT4/NOCONA/Athlon/K8). */
791 /* Leave is not affecting Nocona SPEC2000 results negatively, so enabling for
792 Generic64 seems like good code size tradeoff. We can't enable it for 32bit
793 generic because it is not working well with PPro base chips. */
794 const int x86_use_leave
= m_386
| m_K6_GEODE
| m_ATHLON_K8
| m_GENERIC64
;
795 const int x86_push_memory
= m_386
| m_K6_GEODE
| m_ATHLON_K8
| m_PENT4
| m_NOCONA
| m_GENERIC
;
796 const int x86_zero_extend_with_and
= m_486
| m_PENT
;
797 const int x86_movx
= m_ATHLON_K8
| m_PPRO
| m_PENT4
| m_NOCONA
| m_GENERIC
| m_GEODE
/* m_386 | m_K6 */;
798 const int x86_double_with_add
= ~m_386
;
799 const int x86_use_bit_test
= m_386
;
800 const int x86_unroll_strlen
= m_486
| m_PENT
| m_PPRO
| m_ATHLON_K8
| m_K6
| m_GENERIC
;
801 const int x86_cmove
= m_PPRO
| m_GEODE
| m_ATHLON_K8
| m_PENT4
| m_NOCONA
;
802 const int x86_3dnow_a
= m_ATHLON_K8
;
803 const int x86_deep_branch
= m_PPRO
| m_K6_GEODE
| m_ATHLON_K8
| m_PENT4
| m_NOCONA
| m_GENERIC
;
804 /* Branch hints were put in P4 based on simulation result. But
805 after P4 was made, no performance benefit was observed with
806 branch hints. It also increases the code size. As the result,
807 icc never generates branch hints. */
808 const int x86_branch_hints
= 0;
809 const int x86_use_sahf
= m_PPRO
| m_K6_GEODE
| m_PENT4
| m_NOCONA
| m_GENERIC32
; /*m_GENERIC | m_ATHLON_K8 ? */
810 /* We probably ought to watch for partial register stalls on Generic32
811 compilation setting as well. However in current implementation the
812 partial register stalls are not eliminated very well - they can
813 be introduced via subregs synthesized by combine and can happen
814 in caller/callee saving sequences.
815 Because this option pays back little on PPro based chips and is in conflict
816 with partial reg. dependencies used by Athlon/P4 based chips, it is better
817 to leave it off for generic32 for now. */
818 const int x86_partial_reg_stall
= m_PPRO
;
819 const int x86_partial_flag_reg_stall
= m_GENERIC
;
820 const int x86_use_himode_fiop
= m_386
| m_486
| m_K6_GEODE
;
821 const int x86_use_simode_fiop
= ~(m_PPRO
| m_ATHLON_K8
| m_PENT
| m_GENERIC
);
822 const int x86_use_mov0
= m_K6
;
823 const int x86_use_cltd
= ~(m_PENT
| m_K6
| m_GENERIC
);
824 const int x86_read_modify_write
= ~m_PENT
;
825 const int x86_read_modify
= ~(m_PENT
| m_PPRO
);
826 const int x86_split_long_moves
= m_PPRO
;
827 const int x86_promote_QImode
= m_K6_GEODE
| m_PENT
| m_386
| m_486
| m_ATHLON_K8
| m_GENERIC
; /* m_PENT4 ? */
828 const int x86_fast_prefix
= ~(m_PENT
| m_486
| m_386
);
829 const int x86_single_stringop
= m_386
| m_PENT4
| m_NOCONA
;
830 const int x86_qimode_math
= ~(0);
831 const int x86_promote_qi_regs
= 0;
832 /* On PPro this flag is meant to avoid partial register stalls. Just like
833 the x86_partial_reg_stall this option might be considered for Generic32
834 if our scheme for avoiding partial stalls was more effective. */
835 const int x86_himode_math
= ~(m_PPRO
);
836 const int x86_promote_hi_regs
= m_PPRO
;
837 const int x86_sub_esp_4
= m_ATHLON_K8
| m_PPRO
| m_PENT4
| m_NOCONA
| m_GENERIC
;
838 const int x86_sub_esp_8
= m_ATHLON_K8
| m_PPRO
| m_386
| m_486
| m_PENT4
| m_NOCONA
| m_GENERIC
;
839 const int x86_add_esp_4
= m_ATHLON_K8
| m_K6_GEODE
| m_PENT4
| m_NOCONA
| m_GENERIC
;
840 const int x86_add_esp_8
= m_ATHLON_K8
| m_PPRO
| m_K6_GEODE
| m_386
| m_486
| m_PENT4
| m_NOCONA
| m_GENERIC
;
841 const int x86_integer_DFmode_moves
= ~(m_ATHLON_K8
| m_PENT4
| m_NOCONA
| m_PPRO
| m_GENERIC
| m_GEODE
);
842 const int x86_partial_reg_dependency
= m_ATHLON_K8
| m_PENT4
| m_NOCONA
| m_GENERIC
;
843 const int x86_memory_mismatch_stall
= m_ATHLON_K8
| m_PENT4
| m_NOCONA
| m_GENERIC
;
844 const int x86_accumulate_outgoing_args
= m_ATHLON_K8
| m_PENT4
| m_NOCONA
| m_PPRO
| m_GENERIC
;
845 const int x86_prologue_using_move
= m_ATHLON_K8
| m_PPRO
| m_GENERIC
;
846 const int x86_epilogue_using_move
= m_ATHLON_K8
| m_PPRO
| m_GENERIC
;
847 const int x86_shift1
= ~m_486
;
848 const int x86_arch_always_fancy_math_387
= m_PENT
| m_PPRO
| m_ATHLON_K8
| m_PENT4
| m_NOCONA
| m_GENERIC
;
849 /* In Generic model we have an conflict here in between PPro/Pentium4 based chips
850 that thread 128bit SSE registers as single units versus K8 based chips that
851 divide SSE registers to two 64bit halves.
852 x86_sse_partial_reg_dependency promote all store destinations to be 128bit
853 to allow register renaming on 128bit SSE units, but usually results in one
854 extra microop on 64bit SSE units. Experimental results shows that disabling
855 this option on P4 brings over 20% SPECfp regression, while enabling it on
856 K8 brings roughly 2.4% regression that can be partly masked by careful scheduling
858 const int x86_sse_partial_reg_dependency
= m_PENT4
| m_NOCONA
| m_PPRO
| m_GENERIC
;
859 /* Set for machines where the type and dependencies are resolved on SSE
860 register parts instead of whole registers, so we may maintain just
861 lower part of scalar values in proper format leaving the upper part
863 const int x86_sse_split_regs
= m_ATHLON_K8
;
864 const int x86_sse_typeless_stores
= m_ATHLON_K8
;
865 const int x86_sse_load0_by_pxor
= m_PPRO
| m_PENT4
| m_NOCONA
;
866 const int x86_use_ffreep
= m_ATHLON_K8
;
867 const int x86_rep_movl_optimal
= m_386
| m_PENT
| m_PPRO
| m_K6_GEODE
;
868 const int x86_use_incdec
= ~(m_PENT4
| m_NOCONA
| m_GENERIC
);
870 /* ??? Allowing interunit moves makes it all too easy for the compiler to put
871 integer data in xmm registers. Which results in pretty abysmal code. */
872 const int x86_inter_unit_moves
= 0 /* ~(m_ATHLON_K8) */;
874 const int x86_ext_80387_constants
= m_K6_GEODE
| m_ATHLON
| m_PENT4
| m_NOCONA
| m_PPRO
| m_GENERIC32
;
875 /* Some CPU cores are not able to predict more than 4 branch instructions in
876 the 16 byte window. */
877 const int x86_four_jump_limit
= m_PPRO
| m_ATHLON_K8
| m_PENT4
| m_NOCONA
| m_GENERIC
;
878 const int x86_schedule
= m_PPRO
| m_ATHLON_K8
| m_K6_GEODE
| m_PENT
| m_GENERIC
;
879 const int x86_use_bt
= m_ATHLON_K8
;
880 /* Compare and exchange was added for 80486. */
881 const int x86_cmpxchg
= ~m_386
;
882 /* Compare and exchange 8 bytes was added for pentium. */
883 const int x86_cmpxchg8b
= ~(m_386
| m_486
);
884 /* Compare and exchange 16 bytes was added for nocona. */
885 const int x86_cmpxchg16b
= m_NOCONA
;
886 /* Exchange and add was added for 80486. */
887 const int x86_xadd
= ~m_386
;
888 /* Byteswap was added for 80486. */
889 const int x86_bswap
= ~m_386
;
890 const int x86_pad_returns
= m_ATHLON_K8
| m_GENERIC
;
892 /* In case the average insn count for single function invocation is
893 lower than this constant, emit fast (but longer) prologue and
895 #define FAST_PROLOGUE_INSN_COUNT 20
897 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
898 static const char *const qi_reg_name
[] = QI_REGISTER_NAMES
;
899 static const char *const qi_high_reg_name
[] = QI_HIGH_REGISTER_NAMES
;
900 static const char *const hi_reg_name
[] = HI_REGISTER_NAMES
;
902 /* Array of the smallest class containing reg number REGNO, indexed by
903 REGNO. Used by REGNO_REG_CLASS in i386.h. */
905 enum reg_class
const regclass_map
[FIRST_PSEUDO_REGISTER
] =
908 AREG
, DREG
, CREG
, BREG
,
910 SIREG
, DIREG
, NON_Q_REGS
, NON_Q_REGS
,
912 FP_TOP_REG
, FP_SECOND_REG
, FLOAT_REGS
, FLOAT_REGS
,
913 FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
,
916 /* flags, fpsr, fpcr, dirflag, frame */
917 NO_REGS
, NO_REGS
, NO_REGS
, NO_REGS
, NON_Q_REGS
,
918 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
920 MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
,
922 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
923 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
924 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
928 /* The "default" register map used in 32bit mode. */
930 int const dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
932 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
933 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
934 -1, -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, dir, frame */
935 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
936 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
937 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
938 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
941 static int const x86_64_int_parameter_registers
[6] =
943 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
944 FIRST_REX_INT_REG
/*R8 */, FIRST_REX_INT_REG
+ 1 /*R9 */
947 static int const x86_64_int_return_registers
[4] =
949 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
952 /* The "default" register map used in 64bit mode. */
953 int const dbx64_register_map
[FIRST_PSEUDO_REGISTER
] =
955 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
956 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
957 -1, -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, dir, frame */
958 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
959 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
960 8,9,10,11,12,13,14,15, /* extended integer registers */
961 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
964 /* Define the register numbers to be used in Dwarf debugging information.
965 The SVR4 reference port C compiler uses the following register numbers
966 in its Dwarf output code:
967 0 for %eax (gcc regno = 0)
968 1 for %ecx (gcc regno = 2)
969 2 for %edx (gcc regno = 1)
970 3 for %ebx (gcc regno = 3)
971 4 for %esp (gcc regno = 7)
972 5 for %ebp (gcc regno = 6)
973 6 for %esi (gcc regno = 4)
974 7 for %edi (gcc regno = 5)
975 The following three DWARF register numbers are never generated by
976 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
977 believes these numbers have these meanings.
978 8 for %eip (no gcc equivalent)
979 9 for %eflags (gcc regno = 17)
980 10 for %trapno (no gcc equivalent)
981 It is not at all clear how we should number the FP stack registers
982 for the x86 architecture. If the version of SDB on x86/svr4 were
983 a bit less brain dead with respect to floating-point then we would
984 have a precedent to follow with respect to DWARF register numbers
985 for x86 FP registers, but the SDB on x86/svr4 is so completely
986 broken with respect to FP registers that it is hardly worth thinking
987 of it as something to strive for compatibility with.
988 The version of x86/svr4 SDB I have at the moment does (partially)
989 seem to believe that DWARF register number 11 is associated with
990 the x86 register %st(0), but that's about all. Higher DWARF
991 register numbers don't seem to be associated with anything in
992 particular, and even for DWARF regno 11, SDB only seems to under-
993 stand that it should say that a variable lives in %st(0) (when
994 asked via an `=' command) if we said it was in DWARF regno 11,
995 but SDB still prints garbage when asked for the value of the
996 variable in question (via a `/' command).
997 (Also note that the labels SDB prints for various FP stack regs
998 when doing an `x' command are all wrong.)
999 Note that these problems generally don't affect the native SVR4
1000 C compiler because it doesn't allow the use of -O with -g and
1001 because when it is *not* optimizing, it allocates a memory
1002 location for each floating-point variable, and the memory
1003 location is what gets described in the DWARF AT_location
1004 attribute for the variable in question.
1005 Regardless of the severe mental illness of the x86/svr4 SDB, we
1006 do something sensible here and we use the following DWARF
1007 register numbers. Note that these are all stack-top-relative
1009 11 for %st(0) (gcc regno = 8)
1010 12 for %st(1) (gcc regno = 9)
1011 13 for %st(2) (gcc regno = 10)
1012 14 for %st(3) (gcc regno = 11)
1013 15 for %st(4) (gcc regno = 12)
1014 16 for %st(5) (gcc regno = 13)
1015 17 for %st(6) (gcc regno = 14)
1016 18 for %st(7) (gcc regno = 15)
1018 int const svr4_dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
1020 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1021 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1022 -1, 9, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, dir, frame */
1023 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1024 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1025 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1026 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1029 /* Test and compare insns in i386.md store the information needed to
1030 generate branch and scc insns here. */
1032 rtx ix86_compare_op0
= NULL_RTX
;
1033 rtx ix86_compare_op1
= NULL_RTX
;
1034 rtx ix86_compare_emitted
= NULL_RTX
;
1036 /* Size of the register save area. */
1037 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
1039 /* Define the structure for the machine field in struct function. */
1041 struct stack_local_entry
GTY(())
1043 unsigned short mode
;
1046 struct stack_local_entry
*next
;
1049 /* Structure describing stack frame layout.
1050 Stack grows downward:
1056 saved frame pointer if frame_pointer_needed
1057 <- HARD_FRAME_POINTER
1062 [va_arg registers] (
1063 > to_allocate <- FRAME_POINTER
1073 HOST_WIDE_INT frame
;
1075 int outgoing_arguments_size
;
1078 HOST_WIDE_INT to_allocate
;
1079 /* The offsets relative to ARG_POINTER. */
1080 HOST_WIDE_INT frame_pointer_offset
;
1081 HOST_WIDE_INT hard_frame_pointer_offset
;
1082 HOST_WIDE_INT stack_pointer_offset
;
1084 /* When save_regs_using_mov is set, emit prologue using
1085 move instead of push instructions. */
1086 bool save_regs_using_mov
;
1089 /* Code model option. */
1090 enum cmodel ix86_cmodel
;
1092 enum asm_dialect ix86_asm_dialect
= ASM_ATT
;
1094 enum tls_dialect ix86_tls_dialect
= TLS_DIALECT_GNU
;
1096 /* Which unit we are generating floating point math for. */
1097 enum fpmath_unit ix86_fpmath
;
1099 /* Which cpu are we scheduling for. */
1100 enum processor_type ix86_tune
;
1101 /* Which instruction set architecture to use. */
1102 enum processor_type ix86_arch
;
1104 /* true if sse prefetch instruction is not NOOP. */
1105 int x86_prefetch_sse
;
1107 /* ix86_regparm_string as a number */
1108 static int ix86_regparm
;
1110 /* -mstackrealign option */
1111 extern int ix86_force_align_arg_pointer
;
1112 static const char ix86_force_align_arg_pointer_string
[] = "force_align_arg_pointer";
1114 /* Preferred alignment for stack boundary in bits. */
1115 unsigned int ix86_preferred_stack_boundary
;
1117 /* Values 1-5: see jump.c */
1118 int ix86_branch_cost
;
1120 /* Variables which are this size or smaller are put in the data/bss
1121 or ldata/lbss sections. */
1123 int ix86_section_threshold
= 65536;
1125 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1126 char internal_label_prefix
[16];
1127 int internal_label_prefix_len
;
1129 static bool ix86_handle_option (size_t, const char *, int);
1130 static void output_pic_addr_const (FILE *, rtx
, int);
1131 static void put_condition_code (enum rtx_code
, enum machine_mode
,
1133 static const char *get_some_local_dynamic_name (void);
1134 static int get_some_local_dynamic_name_1 (rtx
*, void *);
1135 static rtx
ix86_expand_int_compare (enum rtx_code
, rtx
, rtx
);
1136 static enum rtx_code
ix86_prepare_fp_compare_args (enum rtx_code
, rtx
*,
1138 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
1139 static enum machine_mode
ix86_cc_modes_compatible (enum machine_mode
,
1141 static rtx
get_thread_pointer (int);
1142 static rtx
legitimize_tls_address (rtx
, enum tls_model
, int);
1143 static void get_pc_thunk_name (char [32], unsigned int);
1144 static rtx
gen_push (rtx
);
1145 static int ix86_flags_dependent (rtx
, rtx
, enum attr_type
);
1146 static int ix86_agi_dependent (rtx
, rtx
, enum attr_type
);
1147 static struct machine_function
* ix86_init_machine_status (void);
1148 static int ix86_split_to_parts (rtx
, rtx
*, enum machine_mode
);
1149 static int ix86_nsaved_regs (void);
1150 static void ix86_emit_save_regs (void);
1151 static void ix86_emit_save_regs_using_mov (rtx
, HOST_WIDE_INT
);
1152 static void ix86_emit_restore_regs_using_mov (rtx
, HOST_WIDE_INT
, int);
1153 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT
);
1154 static HOST_WIDE_INT
ix86_GOT_alias_set (void);
1155 static void ix86_adjust_counter (rtx
, HOST_WIDE_INT
);
1156 static rtx
ix86_expand_aligntest (rtx
, int);
1157 static void ix86_expand_strlensi_unroll_1 (rtx
, rtx
, rtx
);
1158 static int ix86_issue_rate (void);
1159 static int ix86_adjust_cost (rtx
, rtx
, rtx
, int);
1160 static int ia32_multipass_dfa_lookahead (void);
1161 static void ix86_init_mmx_sse_builtins (void);
1162 static rtx
x86_this_parameter (tree
);
1163 static void x86_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
,
1164 HOST_WIDE_INT
, tree
);
1165 static bool x86_can_output_mi_thunk (tree
, HOST_WIDE_INT
, HOST_WIDE_INT
, tree
);
1166 static void x86_file_start (void);
1167 static void ix86_reorg (void);
1168 static bool ix86_expand_carry_flag_compare (enum rtx_code
, rtx
, rtx
, rtx
*);
1169 static tree
ix86_build_builtin_va_list (void);
1170 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS
*, enum machine_mode
,
1172 static tree
ix86_gimplify_va_arg (tree
, tree
, tree
*, tree
*);
1173 static bool ix86_scalar_mode_supported_p (enum machine_mode
);
1174 static bool ix86_vector_mode_supported_p (enum machine_mode
);
1176 static int ix86_address_cost (rtx
);
1177 static bool ix86_cannot_force_const_mem (rtx
);
1178 static rtx
ix86_delegitimize_address (rtx
);
1180 static void i386_output_dwarf_dtprel (FILE *, int, rtx
) ATTRIBUTE_UNUSED
;
1182 struct builtin_description
;
1183 static rtx
ix86_expand_sse_comi (const struct builtin_description
*,
1185 static rtx
ix86_expand_sse_compare (const struct builtin_description
*,
1187 static rtx
ix86_expand_unop1_builtin (enum insn_code
, tree
, rtx
);
1188 static rtx
ix86_expand_unop_builtin (enum insn_code
, tree
, rtx
, int);
1189 static rtx
ix86_expand_binop_builtin (enum insn_code
, tree
, rtx
);
1190 static rtx
ix86_expand_store_builtin (enum insn_code
, tree
);
1191 static rtx
safe_vector_operand (rtx
, enum machine_mode
);
1192 static rtx
ix86_expand_fp_compare (enum rtx_code
, rtx
, rtx
, rtx
, rtx
*, rtx
*);
1193 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code
);
1194 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code
);
1195 static int ix86_fp_comparison_sahf_cost (enum rtx_code code
);
1196 static int ix86_fp_comparison_cost (enum rtx_code code
);
1197 static unsigned int ix86_select_alt_pic_regnum (void);
1198 static int ix86_save_reg (unsigned int, int);
1199 static void ix86_compute_frame_layout (struct ix86_frame
*);
1200 static int ix86_comp_type_attributes (tree
, tree
);
1201 static int ix86_function_regparm (tree
, tree
);
1202 const struct attribute_spec ix86_attribute_table
[];
1203 static bool ix86_function_ok_for_sibcall (tree
, tree
);
1204 static tree
ix86_handle_cconv_attribute (tree
*, tree
, tree
, int, bool *);
1205 static int ix86_value_regno (enum machine_mode
, tree
, tree
);
1206 static bool contains_128bit_aligned_vector_p (tree
);
1207 static rtx
ix86_struct_value_rtx (tree
, int);
1208 static bool ix86_ms_bitfield_layout_p (tree
);
1209 static tree
ix86_handle_struct_attribute (tree
*, tree
, tree
, int, bool *);
1210 static int extended_reg_mentioned_1 (rtx
*, void *);
1211 static bool ix86_rtx_costs (rtx
, int, int, int *);
1212 static int min_insn_size (rtx
);
1213 static tree
ix86_md_asm_clobbers (tree outputs
, tree inputs
, tree clobbers
);
1214 static bool ix86_must_pass_in_stack (enum machine_mode mode
, tree type
);
1215 static bool ix86_pass_by_reference (CUMULATIVE_ARGS
*, enum machine_mode
,
1217 static void ix86_init_builtins (void);
1218 static rtx
ix86_expand_builtin (tree
, rtx
, rtx
, enum machine_mode
, int);
1219 static const char *ix86_mangle_fundamental_type (tree
);
1220 static tree
ix86_stack_protect_fail (void);
1221 static rtx
ix86_internal_arg_pointer (void);
1222 static void ix86_dwarf_handle_frame_unspec (const char *, rtx
, int);
1224 /* This function is only used on Solaris. */
1225 static void i386_solaris_elf_named_section (const char *, unsigned int, tree
)
1228 /* Register class used for passing given 64bit part of the argument.
1229 These represent classes as documented by the PS ABI, with the exception
1230 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1231 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1233 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1234 whenever possible (upper half does contain padding).
1236 enum x86_64_reg_class
1239 X86_64_INTEGER_CLASS
,
1240 X86_64_INTEGERSI_CLASS
,
1247 X86_64_COMPLEX_X87_CLASS
,
1250 static const char * const x86_64_reg_class_name
[] = {
1251 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1252 "sseup", "x87", "x87up", "cplx87", "no"
1255 #define MAX_CLASSES 4
1257 /* Table of constants used by fldpi, fldln2, etc.... */
1258 static REAL_VALUE_TYPE ext_80387_constants_table
[5];
1259 static bool ext_80387_constants_init
= 0;
1260 static void init_ext_80387_constants (void);
1261 static bool ix86_in_large_data_p (tree
) ATTRIBUTE_UNUSED
;
1262 static void ix86_encode_section_info (tree
, rtx
, int) ATTRIBUTE_UNUSED
;
1263 static void x86_64_elf_unique_section (tree decl
, int reloc
) ATTRIBUTE_UNUSED
;
1264 static section
*x86_64_elf_select_section (tree decl
, int reloc
,
1265 unsigned HOST_WIDE_INT align
)
1268 /* Initialize the GCC target structure. */
1269 #undef TARGET_ATTRIBUTE_TABLE
1270 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
1271 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1272 # undef TARGET_MERGE_DECL_ATTRIBUTES
1273 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
1276 #undef TARGET_COMP_TYPE_ATTRIBUTES
1277 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
1279 #undef TARGET_INIT_BUILTINS
1280 #define TARGET_INIT_BUILTINS ix86_init_builtins
1281 #undef TARGET_EXPAND_BUILTIN
1282 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
1284 #undef TARGET_ASM_FUNCTION_EPILOGUE
1285 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
1287 #undef TARGET_ENCODE_SECTION_INFO
1288 #ifndef SUBTARGET_ENCODE_SECTION_INFO
1289 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
1291 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
1294 #undef TARGET_ASM_OPEN_PAREN
1295 #define TARGET_ASM_OPEN_PAREN ""
1296 #undef TARGET_ASM_CLOSE_PAREN
1297 #define TARGET_ASM_CLOSE_PAREN ""
1299 #undef TARGET_ASM_ALIGNED_HI_OP
1300 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
1301 #undef TARGET_ASM_ALIGNED_SI_OP
1302 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
1304 #undef TARGET_ASM_ALIGNED_DI_OP
1305 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
1308 #undef TARGET_ASM_UNALIGNED_HI_OP
1309 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1310 #undef TARGET_ASM_UNALIGNED_SI_OP
1311 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1312 #undef TARGET_ASM_UNALIGNED_DI_OP
1313 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1315 #undef TARGET_SCHED_ADJUST_COST
1316 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1317 #undef TARGET_SCHED_ISSUE_RATE
1318 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1319 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1320 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1321 ia32_multipass_dfa_lookahead
1323 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1324 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1327 #undef TARGET_HAVE_TLS
1328 #define TARGET_HAVE_TLS true
1330 #undef TARGET_CANNOT_FORCE_CONST_MEM
1331 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1332 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1333 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_rtx_true
1335 #undef TARGET_DELEGITIMIZE_ADDRESS
1336 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1338 #undef TARGET_MS_BITFIELD_LAYOUT_P
1339 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1342 #undef TARGET_BINDS_LOCAL_P
1343 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1346 #undef TARGET_ASM_OUTPUT_MI_THUNK
1347 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1348 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1349 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1351 #undef TARGET_ASM_FILE_START
1352 #define TARGET_ASM_FILE_START x86_file_start
1354 #undef TARGET_DEFAULT_TARGET_FLAGS
1355 #define TARGET_DEFAULT_TARGET_FLAGS \
1357 | TARGET_64BIT_DEFAULT \
1358 | TARGET_SUBTARGET_DEFAULT \
1359 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
1361 #undef TARGET_HANDLE_OPTION
1362 #define TARGET_HANDLE_OPTION ix86_handle_option
1364 #undef TARGET_RTX_COSTS
1365 #define TARGET_RTX_COSTS ix86_rtx_costs
1366 #undef TARGET_ADDRESS_COST
1367 #define TARGET_ADDRESS_COST ix86_address_cost
1369 #undef TARGET_FIXED_CONDITION_CODE_REGS
1370 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1371 #undef TARGET_CC_MODES_COMPATIBLE
1372 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1374 #undef TARGET_MACHINE_DEPENDENT_REORG
1375 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1377 #undef TARGET_BUILD_BUILTIN_VA_LIST
1378 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1380 #undef TARGET_MD_ASM_CLOBBERS
1381 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1383 #undef TARGET_PROMOTE_PROTOTYPES
1384 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1385 #undef TARGET_STRUCT_VALUE_RTX
1386 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1387 #undef TARGET_SETUP_INCOMING_VARARGS
1388 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1389 #undef TARGET_MUST_PASS_IN_STACK
1390 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
1391 #undef TARGET_PASS_BY_REFERENCE
1392 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
1393 #undef TARGET_INTERNAL_ARG_POINTER
1394 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
1395 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
1396 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
1398 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1399 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1401 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1402 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
1404 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1405 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
1408 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1409 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
1412 #ifdef SUBTARGET_INSERT_ATTRIBUTES
1413 #undef TARGET_INSERT_ATTRIBUTES
1414 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1417 #undef TARGET_MANGLE_FUNDAMENTAL_TYPE
1418 #define TARGET_MANGLE_FUNDAMENTAL_TYPE ix86_mangle_fundamental_type
1420 #undef TARGET_STACK_PROTECT_FAIL
1421 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
1423 #undef TARGET_FUNCTION_VALUE
1424 #define TARGET_FUNCTION_VALUE ix86_function_value
1426 struct gcc_target targetm
= TARGET_INITIALIZER
;
1429 /* The svr4 ABI for the i386 says that records and unions are returned
1431 #ifndef DEFAULT_PCC_STRUCT_RETURN
1432 #define DEFAULT_PCC_STRUCT_RETURN 1
1435 /* Implement TARGET_HANDLE_OPTION. */
1438 ix86_handle_option (size_t code
, const char *arg ATTRIBUTE_UNUSED
, int value
)
1445 target_flags
&= ~MASK_3DNOW_A
;
1446 target_flags_explicit
|= MASK_3DNOW_A
;
1453 target_flags
&= ~(MASK_3DNOW
| MASK_3DNOW_A
);
1454 target_flags_explicit
|= MASK_3DNOW
| MASK_3DNOW_A
;
1461 target_flags
&= ~(MASK_SSE2
| MASK_SSE3
);
1462 target_flags_explicit
|= MASK_SSE2
| MASK_SSE3
;
1469 target_flags
&= ~MASK_SSE3
;
1470 target_flags_explicit
|= MASK_SSE3
;
1479 /* Sometimes certain combinations of command options do not make
1480 sense on a particular target machine. You can define a macro
1481 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1482 defined, is executed once just after all the command options have
1485 Don't use this macro to turn on various extra optimizations for
1486 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1489 override_options (void)
1492 int ix86_tune_defaulted
= 0;
1494 /* Comes from final.c -- no real reason to change it. */
1495 #define MAX_CODE_ALIGN 16
1499 const struct processor_costs
*cost
; /* Processor costs */
1500 const int target_enable
; /* Target flags to enable. */
1501 const int target_disable
; /* Target flags to disable. */
1502 const int align_loop
; /* Default alignments. */
1503 const int align_loop_max_skip
;
1504 const int align_jump
;
1505 const int align_jump_max_skip
;
1506 const int align_func
;
1508 const processor_target_table
[PROCESSOR_max
] =
1510 {&i386_cost
, 0, 0, 4, 3, 4, 3, 4},
1511 {&i486_cost
, 0, 0, 16, 15, 16, 15, 16},
1512 {&pentium_cost
, 0, 0, 16, 7, 16, 7, 16},
1513 {&pentiumpro_cost
, 0, 0, 16, 15, 16, 7, 16},
1514 {&geode_cost
, 0, 0, 0, 0, 0, 0, 0},
1515 {&k6_cost
, 0, 0, 32, 7, 32, 7, 32},
1516 {&athlon_cost
, 0, 0, 16, 7, 16, 7, 16},
1517 {&pentium4_cost
, 0, 0, 0, 0, 0, 0, 0},
1518 {&k8_cost
, 0, 0, 16, 7, 16, 7, 16},
1519 {&nocona_cost
, 0, 0, 0, 0, 0, 0, 0},
1520 {&generic32_cost
, 0, 0, 16, 7, 16, 7, 16},
1521 {&generic64_cost
, 0, 0, 16, 7, 16, 7, 16}
1524 static const char * const cpu_names
[] = TARGET_CPU_DEFAULT_NAMES
;
1527 const char *const name
; /* processor name or nickname. */
1528 const enum processor_type processor
;
1529 const enum pta_flags
1535 PTA_PREFETCH_SSE
= 16,
1542 const processor_alias_table
[] =
1544 {"i386", PROCESSOR_I386
, 0},
1545 {"i486", PROCESSOR_I486
, 0},
1546 {"i586", PROCESSOR_PENTIUM
, 0},
1547 {"pentium", PROCESSOR_PENTIUM
, 0},
1548 {"pentium-mmx", PROCESSOR_PENTIUM
, PTA_MMX
},
1549 {"winchip-c6", PROCESSOR_I486
, PTA_MMX
},
1550 {"winchip2", PROCESSOR_I486
, PTA_MMX
| PTA_3DNOW
},
1551 {"c3", PROCESSOR_I486
, PTA_MMX
| PTA_3DNOW
},
1552 {"c3-2", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_SSE
},
1553 {"i686", PROCESSOR_PENTIUMPRO
, 0},
1554 {"pentiumpro", PROCESSOR_PENTIUMPRO
, 0},
1555 {"pentium2", PROCESSOR_PENTIUMPRO
, PTA_MMX
},
1556 {"pentium3", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
| PTA_PREFETCH_SSE
},
1557 {"pentium3m", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
| PTA_PREFETCH_SSE
},
1558 {"pentium-m", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
| PTA_PREFETCH_SSE
| PTA_SSE2
},
1559 {"pentium4", PROCESSOR_PENTIUM4
, PTA_SSE
| PTA_SSE2
1560 | PTA_MMX
| PTA_PREFETCH_SSE
},
1561 {"pentium4m", PROCESSOR_PENTIUM4
, PTA_SSE
| PTA_SSE2
1562 | PTA_MMX
| PTA_PREFETCH_SSE
},
1563 {"prescott", PROCESSOR_NOCONA
, PTA_SSE
| PTA_SSE2
| PTA_SSE3
1564 | PTA_MMX
| PTA_PREFETCH_SSE
},
1565 {"nocona", PROCESSOR_NOCONA
, PTA_SSE
| PTA_SSE2
| PTA_SSE3
| PTA_64BIT
1566 | PTA_MMX
| PTA_PREFETCH_SSE
},
1567 {"geode", PROCESSOR_GEODE
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1569 {"k6", PROCESSOR_K6
, PTA_MMX
},
1570 {"k6-2", PROCESSOR_K6
, PTA_MMX
| PTA_3DNOW
},
1571 {"k6-3", PROCESSOR_K6
, PTA_MMX
| PTA_3DNOW
},
1572 {"athlon", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1574 {"athlon-tbird", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
1575 | PTA_3DNOW
| PTA_3DNOW_A
},
1576 {"athlon-4", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1577 | PTA_3DNOW_A
| PTA_SSE
},
1578 {"athlon-xp", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1579 | PTA_3DNOW_A
| PTA_SSE
},
1580 {"athlon-mp", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1581 | PTA_3DNOW_A
| PTA_SSE
},
1582 {"x86-64", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_64BIT
1583 | PTA_SSE
| PTA_SSE2
},
1584 {"k8", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1585 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
},
1586 {"opteron", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1587 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
},
1588 {"athlon64", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1589 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
},
1590 {"athlon-fx", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1591 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
},
1592 {"generic32", PROCESSOR_GENERIC32
, 0 /* flags are only used for -march switch. */ },
1593 {"generic64", PROCESSOR_GENERIC64
, PTA_64BIT
/* flags are only used for -march switch. */ },
1596 int const pta_size
= ARRAY_SIZE (processor_alias_table
);
1598 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1599 SUBTARGET_OVERRIDE_OPTIONS
;
1602 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
1603 SUBSUBTARGET_OVERRIDE_OPTIONS
;
1606 /* -fPIC is the default for x86_64. */
1607 if (TARGET_MACHO
&& TARGET_64BIT
)
1610 /* Set the default values for switches whose default depends on TARGET_64BIT
1611 in case they weren't overwritten by command line options. */
1614 /* Mach-O doesn't support omitting the frame pointer for now. */
1615 if (flag_omit_frame_pointer
== 2)
1616 flag_omit_frame_pointer
= (TARGET_MACHO
? 0 : 1);
1617 if (flag_asynchronous_unwind_tables
== 2)
1618 flag_asynchronous_unwind_tables
= 1;
1619 if (flag_pcc_struct_return
== 2)
1620 flag_pcc_struct_return
= 0;
1624 if (flag_omit_frame_pointer
== 2)
1625 flag_omit_frame_pointer
= 0;
1626 if (flag_asynchronous_unwind_tables
== 2)
1627 flag_asynchronous_unwind_tables
= 0;
1628 if (flag_pcc_struct_return
== 2)
1629 flag_pcc_struct_return
= DEFAULT_PCC_STRUCT_RETURN
;
1632 /* Need to check -mtune=generic first. */
1633 if (ix86_tune_string
)
1635 if (!strcmp (ix86_tune_string
, "generic")
1636 || !strcmp (ix86_tune_string
, "i686")
1637 /* As special support for cross compilers we read -mtune=native
1638 as -mtune=generic. With native compilers we won't see the
1639 -mtune=native, as it was changed by the driver. */
1640 || !strcmp (ix86_tune_string
, "native"))
1643 ix86_tune_string
= "generic64";
1645 ix86_tune_string
= "generic32";
1647 else if (!strncmp (ix86_tune_string
, "generic", 7))
1648 error ("bad value (%s) for -mtune= switch", ix86_tune_string
);
1652 if (ix86_arch_string
)
1653 ix86_tune_string
= ix86_arch_string
;
1654 if (!ix86_tune_string
)
1656 ix86_tune_string
= cpu_names
[TARGET_CPU_DEFAULT
];
1657 ix86_tune_defaulted
= 1;
1660 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
1661 need to use a sensible tune option. */
1662 if (!strcmp (ix86_tune_string
, "generic")
1663 || !strcmp (ix86_tune_string
, "x86-64")
1664 || !strcmp (ix86_tune_string
, "i686"))
1667 ix86_tune_string
= "generic64";
1669 ix86_tune_string
= "generic32";
1672 if (!strcmp (ix86_tune_string
, "x86-64"))
1673 warning (OPT_Wdeprecated
, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
1674 "-mtune=generic instead as appropriate.");
1676 if (!ix86_arch_string
)
1677 ix86_arch_string
= TARGET_64BIT
? "x86-64" : "i386";
1678 if (!strcmp (ix86_arch_string
, "generic"))
1679 error ("generic CPU can be used only for -mtune= switch");
1680 if (!strncmp (ix86_arch_string
, "generic", 7))
1681 error ("bad value (%s) for -march= switch", ix86_arch_string
);
1683 if (ix86_cmodel_string
!= 0)
1685 if (!strcmp (ix86_cmodel_string
, "small"))
1686 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
1687 else if (!strcmp (ix86_cmodel_string
, "medium"))
1688 ix86_cmodel
= flag_pic
? CM_MEDIUM_PIC
: CM_MEDIUM
;
1690 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string
);
1691 else if (!strcmp (ix86_cmodel_string
, "32"))
1692 ix86_cmodel
= CM_32
;
1693 else if (!strcmp (ix86_cmodel_string
, "kernel") && !flag_pic
)
1694 ix86_cmodel
= CM_KERNEL
;
1695 else if (!strcmp (ix86_cmodel_string
, "large") && !flag_pic
)
1696 ix86_cmodel
= CM_LARGE
;
1698 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string
);
1702 ix86_cmodel
= CM_32
;
1704 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
1706 if (ix86_asm_string
!= 0)
1709 && !strcmp (ix86_asm_string
, "intel"))
1710 ix86_asm_dialect
= ASM_INTEL
;
1711 else if (!strcmp (ix86_asm_string
, "att"))
1712 ix86_asm_dialect
= ASM_ATT
;
1714 error ("bad value (%s) for -masm= switch", ix86_asm_string
);
1716 if ((TARGET_64BIT
== 0) != (ix86_cmodel
== CM_32
))
1717 error ("code model %qs not supported in the %s bit mode",
1718 ix86_cmodel_string
, TARGET_64BIT
? "64" : "32");
1719 if (ix86_cmodel
== CM_LARGE
)
1720 sorry ("code model %<large%> not supported yet");
1721 if ((TARGET_64BIT
!= 0) != ((target_flags
& MASK_64BIT
) != 0))
1722 sorry ("%i-bit mode not compiled in",
1723 (target_flags
& MASK_64BIT
) ? 64 : 32);
1725 for (i
= 0; i
< pta_size
; i
++)
1726 if (! strcmp (ix86_arch_string
, processor_alias_table
[i
].name
))
1728 ix86_arch
= processor_alias_table
[i
].processor
;
1729 /* Default cpu tuning to the architecture. */
1730 ix86_tune
= ix86_arch
;
1731 if (processor_alias_table
[i
].flags
& PTA_MMX
1732 && !(target_flags_explicit
& MASK_MMX
))
1733 target_flags
|= MASK_MMX
;
1734 if (processor_alias_table
[i
].flags
& PTA_3DNOW
1735 && !(target_flags_explicit
& MASK_3DNOW
))
1736 target_flags
|= MASK_3DNOW
;
1737 if (processor_alias_table
[i
].flags
& PTA_3DNOW_A
1738 && !(target_flags_explicit
& MASK_3DNOW_A
))
1739 target_flags
|= MASK_3DNOW_A
;
1740 if (processor_alias_table
[i
].flags
& PTA_SSE
1741 && !(target_flags_explicit
& MASK_SSE
))
1742 target_flags
|= MASK_SSE
;
1743 if (processor_alias_table
[i
].flags
& PTA_SSE2
1744 && !(target_flags_explicit
& MASK_SSE2
))
1745 target_flags
|= MASK_SSE2
;
1746 if (processor_alias_table
[i
].flags
& PTA_SSE3
1747 && !(target_flags_explicit
& MASK_SSE3
))
1748 target_flags
|= MASK_SSE3
;
1749 if (processor_alias_table
[i
].flags
& PTA_SSSE3
1750 && !(target_flags_explicit
& MASK_SSSE3
))
1751 target_flags
|= MASK_SSSE3
;
1752 if (processor_alias_table
[i
].flags
& PTA_PREFETCH_SSE
)
1753 x86_prefetch_sse
= true;
1754 if (TARGET_64BIT
&& !(processor_alias_table
[i
].flags
& PTA_64BIT
))
1755 error ("CPU you selected does not support x86-64 "
1761 error ("bad value (%s) for -march= switch", ix86_arch_string
);
1763 for (i
= 0; i
< pta_size
; i
++)
1764 if (! strcmp (ix86_tune_string
, processor_alias_table
[i
].name
))
1766 ix86_tune
= processor_alias_table
[i
].processor
;
1767 if (TARGET_64BIT
&& !(processor_alias_table
[i
].flags
& PTA_64BIT
))
1769 if (ix86_tune_defaulted
)
1771 ix86_tune_string
= "x86-64";
1772 for (i
= 0; i
< pta_size
; i
++)
1773 if (! strcmp (ix86_tune_string
,
1774 processor_alias_table
[i
].name
))
1776 ix86_tune
= processor_alias_table
[i
].processor
;
1779 error ("CPU you selected does not support x86-64 "
1782 /* Intel CPUs have always interpreted SSE prefetch instructions as
1783 NOPs; so, we can enable SSE prefetch instructions even when
1784 -mtune (rather than -march) points us to a processor that has them.
1785 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
1786 higher processors. */
1787 if (TARGET_CMOVE
&& (processor_alias_table
[i
].flags
& PTA_PREFETCH_SSE
))
1788 x86_prefetch_sse
= true;
1792 error ("bad value (%s) for -mtune= switch", ix86_tune_string
);
1795 ix86_cost
= &size_cost
;
1797 ix86_cost
= processor_target_table
[ix86_tune
].cost
;
1798 target_flags
|= processor_target_table
[ix86_tune
].target_enable
;
1799 target_flags
&= ~processor_target_table
[ix86_tune
].target_disable
;
1801 /* Arrange to set up i386_stack_locals for all functions. */
1802 init_machine_status
= ix86_init_machine_status
;
1804 /* Validate -mregparm= value. */
1805 if (ix86_regparm_string
)
1807 i
= atoi (ix86_regparm_string
);
1808 if (i
< 0 || i
> REGPARM_MAX
)
1809 error ("-mregparm=%d is not between 0 and %d", i
, REGPARM_MAX
);
1815 ix86_regparm
= REGPARM_MAX
;
1817 /* If the user has provided any of the -malign-* options,
1818 warn and use that value only if -falign-* is not set.
1819 Remove this code in GCC 3.2 or later. */
1820 if (ix86_align_loops_string
)
1822 warning (0, "-malign-loops is obsolete, use -falign-loops");
1823 if (align_loops
== 0)
1825 i
= atoi (ix86_align_loops_string
);
1826 if (i
< 0 || i
> MAX_CODE_ALIGN
)
1827 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
1829 align_loops
= 1 << i
;
1833 if (ix86_align_jumps_string
)
1835 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
1836 if (align_jumps
== 0)
1838 i
= atoi (ix86_align_jumps_string
);
1839 if (i
< 0 || i
> MAX_CODE_ALIGN
)
1840 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
1842 align_jumps
= 1 << i
;
1846 if (ix86_align_funcs_string
)
1848 warning (0, "-malign-functions is obsolete, use -falign-functions");
1849 if (align_functions
== 0)
1851 i
= atoi (ix86_align_funcs_string
);
1852 if (i
< 0 || i
> MAX_CODE_ALIGN
)
1853 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
1855 align_functions
= 1 << i
;
1859 /* Default align_* from the processor table. */
1860 if (align_loops
== 0)
1862 align_loops
= processor_target_table
[ix86_tune
].align_loop
;
1863 align_loops_max_skip
= processor_target_table
[ix86_tune
].align_loop_max_skip
;
1865 if (align_jumps
== 0)
1867 align_jumps
= processor_target_table
[ix86_tune
].align_jump
;
1868 align_jumps_max_skip
= processor_target_table
[ix86_tune
].align_jump_max_skip
;
1870 if (align_functions
== 0)
1872 align_functions
= processor_target_table
[ix86_tune
].align_func
;
1875 /* Validate -mbranch-cost= value, or provide default. */
1876 ix86_branch_cost
= ix86_cost
->branch_cost
;
1877 if (ix86_branch_cost_string
)
1879 i
= atoi (ix86_branch_cost_string
);
1881 error ("-mbranch-cost=%d is not between 0 and 5", i
);
1883 ix86_branch_cost
= i
;
1885 if (ix86_section_threshold_string
)
1887 i
= atoi (ix86_section_threshold_string
);
1889 error ("-mlarge-data-threshold=%d is negative", i
);
1891 ix86_section_threshold
= i
;
1894 if (ix86_tls_dialect_string
)
1896 if (strcmp (ix86_tls_dialect_string
, "gnu") == 0)
1897 ix86_tls_dialect
= TLS_DIALECT_GNU
;
1898 else if (strcmp (ix86_tls_dialect_string
, "gnu2") == 0)
1899 ix86_tls_dialect
= TLS_DIALECT_GNU2
;
1900 else if (strcmp (ix86_tls_dialect_string
, "sun") == 0)
1901 ix86_tls_dialect
= TLS_DIALECT_SUN
;
1903 error ("bad value (%s) for -mtls-dialect= switch",
1904 ix86_tls_dialect_string
);
1907 /* Keep nonleaf frame pointers. */
1908 if (flag_omit_frame_pointer
)
1909 target_flags
&= ~MASK_OMIT_LEAF_FRAME_POINTER
;
1910 else if (TARGET_OMIT_LEAF_FRAME_POINTER
)
1911 flag_omit_frame_pointer
= 1;
1913 /* If we're doing fast math, we don't care about comparison order
1914 wrt NaNs. This lets us use a shorter comparison sequence. */
1915 if (flag_finite_math_only
)
1916 target_flags
&= ~MASK_IEEE_FP
;
1918 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1919 since the insns won't need emulation. */
1920 if (x86_arch_always_fancy_math_387
& (1 << ix86_arch
))
1921 target_flags
&= ~MASK_NO_FANCY_MATH_387
;
1923 /* Likewise, if the target doesn't have a 387, or we've specified
1924 software floating point, don't use 387 inline intrinsics. */
1926 target_flags
|= MASK_NO_FANCY_MATH_387
;
1928 /* Turn on SSE3 builtins for -mssse3. */
1930 target_flags
|= MASK_SSE3
;
1932 /* Turn on SSE2 builtins for -msse3. */
1934 target_flags
|= MASK_SSE2
;
1936 /* Turn on SSE builtins for -msse2. */
1938 target_flags
|= MASK_SSE
;
1940 /* Turn on MMX builtins for -msse. */
1943 target_flags
|= MASK_MMX
& ~target_flags_explicit
;
1944 x86_prefetch_sse
= true;
1947 /* Turn on MMX builtins for 3Dnow. */
1949 target_flags
|= MASK_MMX
;
1953 if (TARGET_ALIGN_DOUBLE
)
1954 error ("-malign-double makes no sense in the 64bit mode");
1956 error ("-mrtd calling convention not supported in the 64bit mode");
1958 /* Enable by default the SSE and MMX builtins. Do allow the user to
1959 explicitly disable any of these. In particular, disabling SSE and
1960 MMX for kernel code is extremely useful. */
1962 |= ((MASK_SSE2
| MASK_SSE
| MASK_MMX
| MASK_128BIT_LONG_DOUBLE
)
1963 & ~target_flags_explicit
);
1967 /* i386 ABI does not specify red zone. It still makes sense to use it
1968 when programmer takes care to stack from being destroyed. */
1969 if (!(target_flags_explicit
& MASK_NO_RED_ZONE
))
1970 target_flags
|= MASK_NO_RED_ZONE
;
1973 /* Validate -mpreferred-stack-boundary= value, or provide default.
1974 The default of 128 bits is for Pentium III's SSE __m128. We can't
1975 change it because of optimize_size. Otherwise, we can't mix object
1976 files compiled with -Os and -On. */
1977 ix86_preferred_stack_boundary
= 128;
1978 if (ix86_preferred_stack_boundary_string
)
1980 i
= atoi (ix86_preferred_stack_boundary_string
);
1981 if (i
< (TARGET_64BIT
? 4 : 2) || i
> 12)
1982 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i
,
1983 TARGET_64BIT
? 4 : 2);
1985 ix86_preferred_stack_boundary
= (1 << i
) * BITS_PER_UNIT
;
1988 /* Accept -msseregparm only if at least SSE support is enabled. */
1989 if (TARGET_SSEREGPARM
1991 error ("-msseregparm used without SSE enabled");
1993 ix86_fpmath
= TARGET_FPMATH_DEFAULT
;
1995 if (ix86_fpmath_string
!= 0)
1997 if (! strcmp (ix86_fpmath_string
, "387"))
1998 ix86_fpmath
= FPMATH_387
;
1999 else if (! strcmp (ix86_fpmath_string
, "sse"))
2003 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2004 ix86_fpmath
= FPMATH_387
;
2007 ix86_fpmath
= FPMATH_SSE
;
2009 else if (! strcmp (ix86_fpmath_string
, "387,sse")
2010 || ! strcmp (ix86_fpmath_string
, "sse,387"))
2014 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2015 ix86_fpmath
= FPMATH_387
;
2017 else if (!TARGET_80387
)
2019 warning (0, "387 instruction set disabled, using SSE arithmetics");
2020 ix86_fpmath
= FPMATH_SSE
;
2023 ix86_fpmath
= FPMATH_SSE
| FPMATH_387
;
2026 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string
);
2029 /* If the i387 is disabled, then do not return values in it. */
2031 target_flags
&= ~MASK_FLOAT_RETURNS
;
2033 if ((x86_accumulate_outgoing_args
& TUNEMASK
)
2034 && !(target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
2036 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
2038 /* ??? Unwind info is not correct around the CFG unless either a frame
2039 pointer is present or M_A_O_A is set. Fixing this requires rewriting
2040 unwind info generation to be aware of the CFG and propagating states
2042 if ((flag_unwind_tables
|| flag_asynchronous_unwind_tables
2043 || flag_exceptions
|| flag_non_call_exceptions
)
2044 && flag_omit_frame_pointer
2045 && !(target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS
))
2047 if (target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
2048 warning (0, "unwind tables currently require either a frame pointer "
2049 "or -maccumulate-outgoing-args for correctness");
2050 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
2053 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
2056 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix
, "LX", 0);
2057 p
= strchr (internal_label_prefix
, 'X');
2058 internal_label_prefix_len
= p
- internal_label_prefix
;
2062 /* When scheduling description is not available, disable scheduler pass
2063 so it won't slow down the compilation and make x87 code slower. */
2064 if (!TARGET_SCHEDULE
)
2065 flag_schedule_insns_after_reload
= flag_schedule_insns
= 0;
2068 /* switch to the appropriate section for output of DECL.
2069 DECL is either a `VAR_DECL' node or a constant of some sort.
2070 RELOC indicates whether forming the initial value of DECL requires
2071 link-time relocations. */
2074 x86_64_elf_select_section (tree decl
, int reloc
,
2075 unsigned HOST_WIDE_INT align
)
2077 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
2078 && ix86_in_large_data_p (decl
))
2080 const char *sname
= NULL
;
2081 unsigned int flags
= SECTION_WRITE
;
2082 switch (categorize_decl_for_section (decl
, reloc
, flag_pic
))
2087 case SECCAT_DATA_REL
:
2088 sname
= ".ldata.rel";
2090 case SECCAT_DATA_REL_LOCAL
:
2091 sname
= ".ldata.rel.local";
2093 case SECCAT_DATA_REL_RO
:
2094 sname
= ".ldata.rel.ro";
2096 case SECCAT_DATA_REL_RO_LOCAL
:
2097 sname
= ".ldata.rel.ro.local";
2101 flags
|= SECTION_BSS
;
2104 case SECCAT_RODATA_MERGE_STR
:
2105 case SECCAT_RODATA_MERGE_STR_INIT
:
2106 case SECCAT_RODATA_MERGE_CONST
:
2110 case SECCAT_SRODATA
:
2117 /* We don't split these for medium model. Place them into
2118 default sections and hope for best. */
2123 /* We might get called with string constants, but get_named_section
2124 doesn't like them as they are not DECLs. Also, we need to set
2125 flags in that case. */
2127 return get_section (sname
, flags
, NULL
);
2128 return get_named_section (decl
, sname
, reloc
);
2131 return default_elf_select_section (decl
, reloc
, align
);
2134 /* Build up a unique section name, expressed as a
2135 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2136 RELOC indicates whether the initial value of EXP requires
2137 link-time relocations. */
2140 x86_64_elf_unique_section (tree decl
, int reloc
)
2142 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
2143 && ix86_in_large_data_p (decl
))
2145 const char *prefix
= NULL
;
2146 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2147 bool one_only
= DECL_ONE_ONLY (decl
) && !HAVE_COMDAT_GROUP
;
2149 switch (categorize_decl_for_section (decl
, reloc
, flag_pic
))
2152 case SECCAT_DATA_REL
:
2153 case SECCAT_DATA_REL_LOCAL
:
2154 case SECCAT_DATA_REL_RO
:
2155 case SECCAT_DATA_REL_RO_LOCAL
:
2156 prefix
= one_only
? ".gnu.linkonce.ld." : ".ldata.";
2159 prefix
= one_only
? ".gnu.linkonce.lb." : ".lbss.";
2162 case SECCAT_RODATA_MERGE_STR
:
2163 case SECCAT_RODATA_MERGE_STR_INIT
:
2164 case SECCAT_RODATA_MERGE_CONST
:
2165 prefix
= one_only
? ".gnu.linkonce.lr." : ".lrodata.";
2167 case SECCAT_SRODATA
:
2174 /* We don't split these for medium model. Place them into
2175 default sections and hope for best. */
2183 plen
= strlen (prefix
);
2185 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
2186 name
= targetm
.strip_name_encoding (name
);
2187 nlen
= strlen (name
);
2189 string
= alloca (nlen
+ plen
+ 1);
2190 memcpy (string
, prefix
, plen
);
2191 memcpy (string
+ plen
, name
, nlen
+ 1);
2193 DECL_SECTION_NAME (decl
) = build_string (nlen
+ plen
, string
);
2197 default_unique_section (decl
, reloc
);
2200 #ifdef COMMON_ASM_OP
2201 /* This says how to output assembler code to declare an
2202 uninitialized external linkage data object.
2204 For medium model x86-64 we need to use .largecomm opcode for
2207 x86_elf_aligned_common (FILE *file
,
2208 const char *name
, unsigned HOST_WIDE_INT size
,
2211 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
2212 && size
> (unsigned int)ix86_section_threshold
)
2213 fprintf (file
, ".largecomm\t");
2215 fprintf (file
, "%s", COMMON_ASM_OP
);
2216 assemble_name (file
, name
);
2217 fprintf (file
, ","HOST_WIDE_INT_PRINT_UNSIGNED
",%u\n",
2218 size
, align
/ BITS_PER_UNIT
);
2221 /* Utility function for targets to use in implementing
2222 ASM_OUTPUT_ALIGNED_BSS. */
2225 x86_output_aligned_bss (FILE *file
, tree decl ATTRIBUTE_UNUSED
,
2226 const char *name
, unsigned HOST_WIDE_INT size
,
2229 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
2230 && size
> (unsigned int)ix86_section_threshold
)
2231 switch_to_section (get_named_section (decl
, ".lbss", 0));
2233 switch_to_section (bss_section
);
2234 ASM_OUTPUT_ALIGN (file
, floor_log2 (align
/ BITS_PER_UNIT
));
2235 #ifdef ASM_DECLARE_OBJECT_NAME
2236 last_assemble_variable_decl
= decl
;
2237 ASM_DECLARE_OBJECT_NAME (file
, name
, decl
);
2239 /* Standard thing is just output label for the object. */
2240 ASM_OUTPUT_LABEL (file
, name
);
2241 #endif /* ASM_DECLARE_OBJECT_NAME */
2242 ASM_OUTPUT_SKIP (file
, size
? size
: 1);
2247 optimization_options (int level
, int size ATTRIBUTE_UNUSED
)
2249 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2250 make the problem with not enough registers even worse. */
2251 #ifdef INSN_SCHEDULING
2253 flag_schedule_insns
= 0;
2257 /* The Darwin libraries never set errno, so we might as well
2258 avoid calling them when that's the only reason we would. */
2259 flag_errno_math
= 0;
2261 /* The default values of these switches depend on the TARGET_64BIT
2262 that is not known at this moment. Mark these values with 2 and
2263 let user the to override these. In case there is no command line option
2264 specifying them, we will set the defaults in override_options. */
2266 flag_omit_frame_pointer
= 2;
2267 flag_pcc_struct_return
= 2;
2268 flag_asynchronous_unwind_tables
= 2;
2269 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2270 SUBTARGET_OPTIMIZATION_OPTIONS
;
2274 /* Table of valid machine attributes. */
2275 const struct attribute_spec ix86_attribute_table
[] =
2277 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
2278 /* Stdcall attribute says callee is responsible for popping arguments
2279 if they are not variable. */
2280 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
2281 /* Fastcall attribute says callee is responsible for popping arguments
2282 if they are not variable. */
2283 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
2284 /* Cdecl attribute says the callee is a normal C declaration */
2285 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
2286 /* Regparm attribute specifies how many integer arguments are to be
2287 passed in registers. */
2288 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute
},
2289 /* Sseregparm attribute says we are using x86_64 calling conventions
2290 for FP arguments. */
2291 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
2292 /* force_align_arg_pointer says this function realigns the stack at entry. */
2293 { (const char *)&ix86_force_align_arg_pointer_string
, 0, 0,
2294 false, true, true, ix86_handle_cconv_attribute
},
2295 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2296 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
},
2297 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
},
2298 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute
},
2300 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
},
2301 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
},
2302 #ifdef SUBTARGET_ATTRIBUTE_TABLE
2303 SUBTARGET_ATTRIBUTE_TABLE
,
2305 { NULL
, 0, 0, false, false, false, NULL
}
2308 /* Decide whether we can make a sibling call to a function. DECL is the
2309 declaration of the function being targeted by the call and EXP is the
2310 CALL_EXPR representing the call. */
2313 ix86_function_ok_for_sibcall (tree decl
, tree exp
)
2318 /* If we are generating position-independent code, we cannot sibcall
2319 optimize any indirect call, or a direct call to a global function,
2320 as the PLT requires %ebx be live. */
2321 if (!TARGET_64BIT
&& flag_pic
&& (!decl
|| !targetm
.binds_local_p (decl
)))
2328 func
= TREE_TYPE (TREE_OPERAND (exp
, 0));
2329 if (POINTER_TYPE_P (func
))
2330 func
= TREE_TYPE (func
);
2333 /* Check that the return value locations are the same. Like
2334 if we are returning floats on the 80387 register stack, we cannot
2335 make a sibcall from a function that doesn't return a float to a
2336 function that does or, conversely, from a function that does return
2337 a float to a function that doesn't; the necessary stack adjustment
2338 would not be executed. This is also the place we notice
2339 differences in the return value ABI. Note that it is ok for one
2340 of the functions to have void return type as long as the return
2341 value of the other is passed in a register. */
2342 a
= ix86_function_value (TREE_TYPE (exp
), func
, false);
2343 b
= ix86_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
2345 if (STACK_REG_P (a
) || STACK_REG_P (b
))
2347 if (!rtx_equal_p (a
, b
))
2350 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
2352 else if (!rtx_equal_p (a
, b
))
2355 /* If this call is indirect, we'll need to be able to use a call-clobbered
2356 register for the address of the target function. Make sure that all
2357 such registers are not used for passing parameters. */
2358 if (!decl
&& !TARGET_64BIT
)
2362 /* We're looking at the CALL_EXPR, we need the type of the function. */
2363 type
= TREE_OPERAND (exp
, 0); /* pointer expression */
2364 type
= TREE_TYPE (type
); /* pointer type */
2365 type
= TREE_TYPE (type
); /* function type */
2367 if (ix86_function_regparm (type
, NULL
) >= 3)
2369 /* ??? Need to count the actual number of registers to be used,
2370 not the possible number of registers. Fix later. */
2375 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2376 /* Dllimport'd functions are also called indirectly. */
2377 if (decl
&& DECL_DLLIMPORT_P (decl
)
2378 && ix86_function_regparm (TREE_TYPE (decl
), NULL
) >= 3)
2382 /* If we forced aligned the stack, then sibcalling would unalign the
2383 stack, which may break the called function. */
2384 if (cfun
->machine
->force_align_arg_pointer
)
2387 /* Otherwise okay. That also includes certain types of indirect calls. */
2391 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
2392 calling convention attributes;
2393 arguments as in struct attribute_spec.handler. */
2396 ix86_handle_cconv_attribute (tree
*node
, tree name
,
2398 int flags ATTRIBUTE_UNUSED
,
2401 if (TREE_CODE (*node
) != FUNCTION_TYPE
2402 && TREE_CODE (*node
) != METHOD_TYPE
2403 && TREE_CODE (*node
) != FIELD_DECL
2404 && TREE_CODE (*node
) != TYPE_DECL
)
2406 warning (OPT_Wattributes
, "%qs attribute only applies to functions",
2407 IDENTIFIER_POINTER (name
));
2408 *no_add_attrs
= true;
2412 /* Can combine regparm with all attributes but fastcall. */
2413 if (is_attribute_p ("regparm", name
))
2417 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
2419 error ("fastcall and regparm attributes are not compatible");
2422 cst
= TREE_VALUE (args
);
2423 if (TREE_CODE (cst
) != INTEGER_CST
)
2425 warning (OPT_Wattributes
,
2426 "%qs attribute requires an integer constant argument",
2427 IDENTIFIER_POINTER (name
));
2428 *no_add_attrs
= true;
2430 else if (compare_tree_int (cst
, REGPARM_MAX
) > 0)
2432 warning (OPT_Wattributes
, "argument to %qs attribute larger than %d",
2433 IDENTIFIER_POINTER (name
), REGPARM_MAX
);
2434 *no_add_attrs
= true;
2438 && lookup_attribute (ix86_force_align_arg_pointer_string
,
2439 TYPE_ATTRIBUTES (*node
))
2440 && compare_tree_int (cst
, REGPARM_MAX
-1))
2442 error ("%s functions limited to %d register parameters",
2443 ix86_force_align_arg_pointer_string
, REGPARM_MAX
-1);
2451 warning (OPT_Wattributes
, "%qs attribute ignored",
2452 IDENTIFIER_POINTER (name
));
2453 *no_add_attrs
= true;
2457 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
2458 if (is_attribute_p ("fastcall", name
))
2460 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
2462 error ("fastcall and cdecl attributes are not compatible");
2464 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
2466 error ("fastcall and stdcall attributes are not compatible");
2468 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node
)))
2470 error ("fastcall and regparm attributes are not compatible");
2474 /* Can combine stdcall with fastcall (redundant), regparm and
2476 else if (is_attribute_p ("stdcall", name
))
2478 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
2480 error ("stdcall and cdecl attributes are not compatible");
2482 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
2484 error ("stdcall and fastcall attributes are not compatible");
2488 /* Can combine cdecl with regparm and sseregparm. */
2489 else if (is_attribute_p ("cdecl", name
))
2491 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
2493 error ("stdcall and cdecl attributes are not compatible");
2495 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
2497 error ("fastcall and cdecl attributes are not compatible");
2501 /* Can combine sseregparm with all attributes. */
2506 /* Return 0 if the attributes for two types are incompatible, 1 if they
2507 are compatible, and 2 if they are nearly compatible (which causes a
2508 warning to be generated). */
2511 ix86_comp_type_attributes (tree type1
, tree type2
)
2513 /* Check for mismatch of non-default calling convention. */
2514 const char *const rtdstr
= TARGET_RTD
? "cdecl" : "stdcall";
2516 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
2519 /* Check for mismatched fastcall/regparm types. */
2520 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1
))
2521 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2
)))
2522 || (ix86_function_regparm (type1
, NULL
)
2523 != ix86_function_regparm (type2
, NULL
)))
2526 /* Check for mismatched sseregparm types. */
2527 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1
))
2528 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2
)))
2531 /* Check for mismatched return types (cdecl vs stdcall). */
2532 if (!lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type1
))
2533 != !lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type2
)))
2539 /* Return the regparm value for a function with the indicated TYPE and DECL.
2540 DECL may be NULL when calling function indirectly
2541 or considering a libcall. */
2544 ix86_function_regparm (tree type
, tree decl
)
2547 int regparm
= ix86_regparm
;
2548 bool user_convention
= false;
2552 attr
= lookup_attribute ("regparm", TYPE_ATTRIBUTES (type
));
2555 regparm
= TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
)));
2556 user_convention
= true;
2559 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type
)))
2562 user_convention
= true;
2565 /* Use register calling convention for local functions when possible. */
2566 if (!TARGET_64BIT
&& !user_convention
&& decl
2567 && flag_unit_at_a_time
&& !profile_flag
)
2569 struct cgraph_local_info
*i
= cgraph_local_info (decl
);
2572 int local_regparm
, globals
= 0, regno
;
2574 /* Make sure no regparm register is taken by a global register
2576 for (local_regparm
= 0; local_regparm
< 3; local_regparm
++)
2577 if (global_regs
[local_regparm
])
2579 /* We can't use regparm(3) for nested functions as these use
2580 static chain pointer in third argument. */
2581 if (local_regparm
== 3
2582 && decl_function_context (decl
)
2583 && !DECL_NO_STATIC_CHAIN (decl
))
2585 /* If the function realigns its stackpointer, the
2586 prologue will clobber %ecx. If we've already
2587 generated code for the callee, the callee
2588 DECL_STRUCT_FUNCTION is gone, so we fall back to
2589 scanning the attributes for the self-realigning
2591 if ((DECL_STRUCT_FUNCTION (decl
)
2592 && DECL_STRUCT_FUNCTION (decl
)->machine
->force_align_arg_pointer
)
2593 || (!DECL_STRUCT_FUNCTION (decl
)
2594 && lookup_attribute (ix86_force_align_arg_pointer_string
,
2595 TYPE_ATTRIBUTES (TREE_TYPE (decl
)))))
2597 /* Each global register variable increases register preassure,
2598 so the more global reg vars there are, the smaller regparm
2599 optimization use, unless requested by the user explicitly. */
2600 for (regno
= 0; regno
< 6; regno
++)
2601 if (global_regs
[regno
])
2604 = globals
< local_regparm
? local_regparm
- globals
: 0;
2606 if (local_regparm
> regparm
)
2607 regparm
= local_regparm
;
2614 /* Return 1 or 2, if we can pass up to 8 SFmode (1) and DFmode (2) arguments
2615 in SSE registers for a function with the indicated TYPE and DECL.
2616 DECL may be NULL when calling function indirectly
2617 or considering a libcall. Otherwise return 0. */
2620 ix86_function_sseregparm (tree type
, tree decl
)
2622 /* Use SSE registers to pass SFmode and DFmode arguments if requested
2623 by the sseregparm attribute. */
2624 if (TARGET_SSEREGPARM
2626 && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type
))))
2631 error ("Calling %qD with attribute sseregparm without "
2632 "SSE/SSE2 enabled", decl
);
2634 error ("Calling %qT with attribute sseregparm without "
2635 "SSE/SSE2 enabled", type
);
2642 /* For local functions, pass SFmode (and DFmode for SSE2) arguments
2643 in SSE registers even for 32-bit mode and not just 3, but up to
2644 8 SSE arguments in registers. */
2645 if (!TARGET_64BIT
&& decl
2646 && TARGET_SSE_MATH
&& flag_unit_at_a_time
&& !profile_flag
)
2648 struct cgraph_local_info
*i
= cgraph_local_info (decl
);
2650 return TARGET_SSE2
? 2 : 1;
2656 /* Return true if EAX is live at the start of the function. Used by
2657 ix86_expand_prologue to determine if we need special help before
2658 calling allocate_stack_worker. */
2661 ix86_eax_live_at_start_p (void)
2663 /* Cheat. Don't bother working forward from ix86_function_regparm
2664 to the function type to whether an actual argument is located in
2665 eax. Instead just look at cfg info, which is still close enough
2666 to correct at this point. This gives false positives for broken
2667 functions that might use uninitialized data that happens to be
2668 allocated in eax, but who cares? */
2669 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR
->il
.rtl
->global_live_at_end
, 0);
2672 /* Value is the number of bytes of arguments automatically
2673 popped when returning from a subroutine call.
2674 FUNDECL is the declaration node of the function (as a tree),
2675 FUNTYPE is the data type of the function (as a tree),
2676 or for a library call it is an identifier node for the subroutine name.
2677 SIZE is the number of bytes of arguments passed on the stack.
2679 On the 80386, the RTD insn may be used to pop them if the number
2680 of args is fixed, but if the number is variable then the caller
2681 must pop them all. RTD can't be used for library calls now
2682 because the library is compiled with the Unix compiler.
2683 Use of RTD is a selectable option, since it is incompatible with
2684 standard Unix calling sequences. If the option is not selected,
2685 the caller must always pop the args.
2687 The attribute stdcall is equivalent to RTD on a per module basis. */
2690 ix86_return_pops_args (tree fundecl
, tree funtype
, int size
)
2692 int rtd
= TARGET_RTD
&& (!fundecl
|| TREE_CODE (fundecl
) != IDENTIFIER_NODE
);
2694 /* Cdecl functions override -mrtd, and never pop the stack. */
2695 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype
))) {
2697 /* Stdcall and fastcall functions will pop the stack if not
2699 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype
))
2700 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype
)))
2704 && (TYPE_ARG_TYPES (funtype
) == NULL_TREE
2705 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype
)))
2706 == void_type_node
)))
2710 /* Lose any fake structure return argument if it is passed on the stack. */
2711 if (aggregate_value_p (TREE_TYPE (funtype
), fundecl
)
2713 && !KEEP_AGGREGATE_RETURN_POINTER
)
2715 int nregs
= ix86_function_regparm (funtype
, fundecl
);
2718 return GET_MODE_SIZE (Pmode
);
2724 /* Argument support functions. */
2726 /* Return true when register may be used to pass function parameters. */
2728 ix86_function_arg_regno_p (int regno
)
2732 return (regno
< REGPARM_MAX
2733 || (TARGET_MMX
&& MMX_REGNO_P (regno
)
2734 && (regno
< FIRST_MMX_REG
+ MMX_REGPARM_MAX
))
2735 || (TARGET_SSE
&& SSE_REGNO_P (regno
)
2736 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
)));
2738 if (TARGET_SSE
&& SSE_REGNO_P (regno
)
2739 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
))
2741 /* RAX is used as hidden argument to va_arg functions. */
2744 for (i
= 0; i
< REGPARM_MAX
; i
++)
2745 if (regno
== x86_64_int_parameter_registers
[i
])
2750 /* Return if we do not know how to pass TYPE solely in registers. */
2753 ix86_must_pass_in_stack (enum machine_mode mode
, tree type
)
2755 if (must_pass_in_stack_var_size_or_pad (mode
, type
))
2758 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
2759 The layout_type routine is crafty and tries to trick us into passing
2760 currently unsupported vector types on the stack by using TImode. */
2761 return (!TARGET_64BIT
&& mode
== TImode
2762 && type
&& TREE_CODE (type
) != VECTOR_TYPE
);
2765 /* Initialize a variable CUM of type CUMULATIVE_ARGS
2766 for a call to a function whose data type is FNTYPE.
2767 For a library call, FNTYPE is 0. */
2770 init_cumulative_args (CUMULATIVE_ARGS
*cum
, /* Argument info to initialize */
2771 tree fntype
, /* tree ptr for function decl */
2772 rtx libname
, /* SYMBOL_REF of library name or 0 */
2775 static CUMULATIVE_ARGS zero_cum
;
2776 tree param
, next_param
;
2778 if (TARGET_DEBUG_ARG
)
2780 fprintf (stderr
, "\ninit_cumulative_args (");
2782 fprintf (stderr
, "fntype code = %s, ret code = %s",
2783 tree_code_name
[(int) TREE_CODE (fntype
)],
2784 tree_code_name
[(int) TREE_CODE (TREE_TYPE (fntype
))]);
2786 fprintf (stderr
, "no fntype");
2789 fprintf (stderr
, ", libname = %s", XSTR (libname
, 0));
2794 /* Set up the number of registers to use for passing arguments. */
2795 cum
->nregs
= ix86_regparm
;
2797 cum
->sse_nregs
= SSE_REGPARM_MAX
;
2799 cum
->mmx_nregs
= MMX_REGPARM_MAX
;
2800 cum
->warn_sse
= true;
2801 cum
->warn_mmx
= true;
2802 cum
->maybe_vaarg
= false;
2804 /* Use ecx and edx registers if function has fastcall attribute,
2805 else look for regparm information. */
2806 if (fntype
&& !TARGET_64BIT
)
2808 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype
)))
2814 cum
->nregs
= ix86_function_regparm (fntype
, fndecl
);
2817 /* Set up the number of SSE registers used for passing SFmode
2818 and DFmode arguments. Warn for mismatching ABI. */
2819 cum
->float_in_sse
= ix86_function_sseregparm (fntype
, fndecl
);
2821 /* Determine if this function has variable arguments. This is
2822 indicated by the last argument being 'void_type_mode' if there
2823 are no variable arguments. If there are variable arguments, then
2824 we won't pass anything in registers in 32-bit mode. */
2826 if (cum
->nregs
|| cum
->mmx_nregs
|| cum
->sse_nregs
)
2828 for (param
= (fntype
) ? TYPE_ARG_TYPES (fntype
) : 0;
2829 param
!= 0; param
= next_param
)
2831 next_param
= TREE_CHAIN (param
);
2832 if (next_param
== 0 && TREE_VALUE (param
) != void_type_node
)
2842 cum
->float_in_sse
= 0;
2844 cum
->maybe_vaarg
= true;
2848 if ((!fntype
&& !libname
)
2849 || (fntype
&& !TYPE_ARG_TYPES (fntype
)))
2850 cum
->maybe_vaarg
= true;
2852 if (TARGET_DEBUG_ARG
)
2853 fprintf (stderr
, ", nregs=%d )\n", cum
->nregs
);
2858 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
2859 But in the case of vector types, it is some vector mode.
2861 When we have only some of our vector isa extensions enabled, then there
2862 are some modes for which vector_mode_supported_p is false. For these
2863 modes, the generic vector support in gcc will choose some non-vector mode
2864 in order to implement the type. By computing the natural mode, we'll
2865 select the proper ABI location for the operand and not depend on whatever
2866 the middle-end decides to do with these vector types. */
2868 static enum machine_mode
2869 type_natural_mode (tree type
)
2871 enum machine_mode mode
= TYPE_MODE (type
);
2873 if (TREE_CODE (type
) == VECTOR_TYPE
&& !VECTOR_MODE_P (mode
))
2875 HOST_WIDE_INT size
= int_size_in_bytes (type
);
2876 if ((size
== 8 || size
== 16)
2877 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
2878 && TYPE_VECTOR_SUBPARTS (type
) > 1)
2880 enum machine_mode innermode
= TYPE_MODE (TREE_TYPE (type
));
2882 if (TREE_CODE (TREE_TYPE (type
)) == REAL_TYPE
)
2883 mode
= MIN_MODE_VECTOR_FLOAT
;
2885 mode
= MIN_MODE_VECTOR_INT
;
2887 /* Get the mode which has this inner mode and number of units. */
2888 for (; mode
!= VOIDmode
; mode
= GET_MODE_WIDER_MODE (mode
))
2889 if (GET_MODE_NUNITS (mode
) == TYPE_VECTOR_SUBPARTS (type
)
2890 && GET_MODE_INNER (mode
) == innermode
)
2900 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
2901 this may not agree with the mode that the type system has chosen for the
2902 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
2903 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
2906 gen_reg_or_parallel (enum machine_mode mode
, enum machine_mode orig_mode
,
2911 if (orig_mode
!= BLKmode
)
2912 tmp
= gen_rtx_REG (orig_mode
, regno
);
2915 tmp
= gen_rtx_REG (mode
, regno
);
2916 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
, const0_rtx
);
2917 tmp
= gen_rtx_PARALLEL (orig_mode
, gen_rtvec (1, tmp
));
2923 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
2924 of this code is to classify each 8bytes of incoming argument by the register
2925 class and assign registers accordingly. */
2927 /* Return the union class of CLASS1 and CLASS2.
2928 See the x86-64 PS ABI for details. */
2930 static enum x86_64_reg_class
2931 merge_classes (enum x86_64_reg_class class1
, enum x86_64_reg_class class2
)
2933 /* Rule #1: If both classes are equal, this is the resulting class. */
2934 if (class1
== class2
)
2937 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
2939 if (class1
== X86_64_NO_CLASS
)
2941 if (class2
== X86_64_NO_CLASS
)
2944 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
2945 if (class1
== X86_64_MEMORY_CLASS
|| class2
== X86_64_MEMORY_CLASS
)
2946 return X86_64_MEMORY_CLASS
;
2948 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
2949 if ((class1
== X86_64_INTEGERSI_CLASS
&& class2
== X86_64_SSESF_CLASS
)
2950 || (class2
== X86_64_INTEGERSI_CLASS
&& class1
== X86_64_SSESF_CLASS
))
2951 return X86_64_INTEGERSI_CLASS
;
2952 if (class1
== X86_64_INTEGER_CLASS
|| class1
== X86_64_INTEGERSI_CLASS
2953 || class2
== X86_64_INTEGER_CLASS
|| class2
== X86_64_INTEGERSI_CLASS
)
2954 return X86_64_INTEGER_CLASS
;
2956 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
2958 if (class1
== X86_64_X87_CLASS
2959 || class1
== X86_64_X87UP_CLASS
2960 || class1
== X86_64_COMPLEX_X87_CLASS
2961 || class2
== X86_64_X87_CLASS
2962 || class2
== X86_64_X87UP_CLASS
2963 || class2
== X86_64_COMPLEX_X87_CLASS
)
2964 return X86_64_MEMORY_CLASS
;
2966 /* Rule #6: Otherwise class SSE is used. */
2967 return X86_64_SSE_CLASS
;
2970 /* Classify the argument of type TYPE and mode MODE.
2971 CLASSES will be filled by the register class used to pass each word
2972 of the operand. The number of words is returned. In case the parameter
2973 should be passed in memory, 0 is returned. As a special case for zero
2974 sized containers, classes[0] will be NO_CLASS and 1 is returned.
2976 BIT_OFFSET is used internally for handling records and specifies offset
2977 of the offset in bits modulo 256 to avoid overflow cases.
2979 See the x86-64 PS ABI for details.
2983 classify_argument (enum machine_mode mode
, tree type
,
2984 enum x86_64_reg_class classes
[MAX_CLASSES
], int bit_offset
)
2986 HOST_WIDE_INT bytes
=
2987 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
2988 int words
= (bytes
+ (bit_offset
% 64) / 8 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
2990 /* Variable sized entities are always passed/returned in memory. */
2994 if (mode
!= VOIDmode
2995 && targetm
.calls
.must_pass_in_stack (mode
, type
))
2998 if (type
&& AGGREGATE_TYPE_P (type
))
3002 enum x86_64_reg_class subclasses
[MAX_CLASSES
];
3004 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
3008 for (i
= 0; i
< words
; i
++)
3009 classes
[i
] = X86_64_NO_CLASS
;
3011 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
3012 signalize memory class, so handle it as special case. */
3015 classes
[0] = X86_64_NO_CLASS
;
3019 /* Classify each field of record and merge classes. */
3020 switch (TREE_CODE (type
))
3023 /* And now merge the fields of structure. */
3024 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
3026 if (TREE_CODE (field
) == FIELD_DECL
)
3030 if (TREE_TYPE (field
) == error_mark_node
)
3033 /* Bitfields are always classified as integer. Handle them
3034 early, since later code would consider them to be
3035 misaligned integers. */
3036 if (DECL_BIT_FIELD (field
))
3038 for (i
= (int_bit_position (field
) + (bit_offset
% 64)) / 8 / 8;
3039 i
< ((int_bit_position (field
) + (bit_offset
% 64))
3040 + tree_low_cst (DECL_SIZE (field
), 0)
3043 merge_classes (X86_64_INTEGER_CLASS
,
3048 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
3049 TREE_TYPE (field
), subclasses
,
3050 (int_bit_position (field
)
3051 + bit_offset
) % 256);
3054 for (i
= 0; i
< num
; i
++)
3057 (int_bit_position (field
) + (bit_offset
% 64)) / 8 / 8;
3059 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
3067 /* Arrays are handled as small records. */
3070 num
= classify_argument (TYPE_MODE (TREE_TYPE (type
)),
3071 TREE_TYPE (type
), subclasses
, bit_offset
);
3075 /* The partial classes are now full classes. */
3076 if (subclasses
[0] == X86_64_SSESF_CLASS
&& bytes
!= 4)
3077 subclasses
[0] = X86_64_SSE_CLASS
;
3078 if (subclasses
[0] == X86_64_INTEGERSI_CLASS
&& bytes
!= 4)
3079 subclasses
[0] = X86_64_INTEGER_CLASS
;
3081 for (i
= 0; i
< words
; i
++)
3082 classes
[i
] = subclasses
[i
% num
];
3087 case QUAL_UNION_TYPE
:
3088 /* Unions are similar to RECORD_TYPE but offset is always 0.
3090 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
3092 if (TREE_CODE (field
) == FIELD_DECL
)
3096 if (TREE_TYPE (field
) == error_mark_node
)
3099 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
3100 TREE_TYPE (field
), subclasses
,
3104 for (i
= 0; i
< num
; i
++)
3105 classes
[i
] = merge_classes (subclasses
[i
], classes
[i
]);
3114 /* Final merger cleanup. */
3115 for (i
= 0; i
< words
; i
++)
3117 /* If one class is MEMORY, everything should be passed in
3119 if (classes
[i
] == X86_64_MEMORY_CLASS
)
3122 /* The X86_64_SSEUP_CLASS should be always preceded by
3123 X86_64_SSE_CLASS. */
3124 if (classes
[i
] == X86_64_SSEUP_CLASS
3125 && (i
== 0 || classes
[i
- 1] != X86_64_SSE_CLASS
))
3126 classes
[i
] = X86_64_SSE_CLASS
;
3128 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
3129 if (classes
[i
] == X86_64_X87UP_CLASS
3130 && (i
== 0 || classes
[i
- 1] != X86_64_X87_CLASS
))
3131 classes
[i
] = X86_64_SSE_CLASS
;
3136 /* Compute alignment needed. We align all types to natural boundaries with
3137 exception of XFmode that is aligned to 64bits. */
3138 if (mode
!= VOIDmode
&& mode
!= BLKmode
)
3140 int mode_alignment
= GET_MODE_BITSIZE (mode
);
3143 mode_alignment
= 128;
3144 else if (mode
== XCmode
)
3145 mode_alignment
= 256;
3146 if (COMPLEX_MODE_P (mode
))
3147 mode_alignment
/= 2;
3148 /* Misaligned fields are always returned in memory. */
3149 if (bit_offset
% mode_alignment
)
3153 /* for V1xx modes, just use the base mode */
3154 if (VECTOR_MODE_P (mode
)
3155 && GET_MODE_SIZE (GET_MODE_INNER (mode
)) == bytes
)
3156 mode
= GET_MODE_INNER (mode
);
3158 /* Classification of atomic types. */
3163 classes
[0] = X86_64_SSE_CLASS
;
3166 classes
[0] = X86_64_SSE_CLASS
;
3167 classes
[1] = X86_64_SSEUP_CLASS
;
3176 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
3177 classes
[0] = X86_64_INTEGERSI_CLASS
;
3179 classes
[0] = X86_64_INTEGER_CLASS
;
3183 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
3188 if (!(bit_offset
% 64))
3189 classes
[0] = X86_64_SSESF_CLASS
;
3191 classes
[0] = X86_64_SSE_CLASS
;
3194 classes
[0] = X86_64_SSEDF_CLASS
;
3197 classes
[0] = X86_64_X87_CLASS
;
3198 classes
[1] = X86_64_X87UP_CLASS
;
3201 classes
[0] = X86_64_SSE_CLASS
;
3202 classes
[1] = X86_64_SSEUP_CLASS
;
3205 classes
[0] = X86_64_SSE_CLASS
;
3208 classes
[0] = X86_64_SSEDF_CLASS
;
3209 classes
[1] = X86_64_SSEDF_CLASS
;
3212 classes
[0] = X86_64_COMPLEX_X87_CLASS
;
3215 /* This modes is larger than 16 bytes. */
3223 classes
[0] = X86_64_SSE_CLASS
;
3224 classes
[1] = X86_64_SSEUP_CLASS
;
3230 classes
[0] = X86_64_SSE_CLASS
;
3236 gcc_assert (VECTOR_MODE_P (mode
));
3241 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode
)) == MODE_INT
);
3243 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
3244 classes
[0] = X86_64_INTEGERSI_CLASS
;
3246 classes
[0] = X86_64_INTEGER_CLASS
;
3247 classes
[1] = X86_64_INTEGER_CLASS
;
3248 return 1 + (bytes
> 8);
3252 /* Examine the argument and return set number of register required in each
3253 class. Return 0 iff parameter should be passed in memory. */
3255 examine_argument (enum machine_mode mode
, tree type
, int in_return
,
3256 int *int_nregs
, int *sse_nregs
)
3258 enum x86_64_reg_class
class[MAX_CLASSES
];
3259 int n
= classify_argument (mode
, type
, class, 0);
3265 for (n
--; n
>= 0; n
--)
3268 case X86_64_INTEGER_CLASS
:
3269 case X86_64_INTEGERSI_CLASS
:
3272 case X86_64_SSE_CLASS
:
3273 case X86_64_SSESF_CLASS
:
3274 case X86_64_SSEDF_CLASS
:
3277 case X86_64_NO_CLASS
:
3278 case X86_64_SSEUP_CLASS
:
3280 case X86_64_X87_CLASS
:
3281 case X86_64_X87UP_CLASS
:
3285 case X86_64_COMPLEX_X87_CLASS
:
3286 return in_return
? 2 : 0;
3287 case X86_64_MEMORY_CLASS
:
3293 /* Construct container for the argument used by GCC interface. See
3294 FUNCTION_ARG for the detailed description. */
3297 construct_container (enum machine_mode mode
, enum machine_mode orig_mode
,
3298 tree type
, int in_return
, int nintregs
, int nsseregs
,
3299 const int *intreg
, int sse_regno
)
3301 /* The following variables hold the static issued_error state. */
3302 static bool issued_sse_arg_error
;
3303 static bool issued_sse_ret_error
;
3304 static bool issued_x87_ret_error
;
3306 enum machine_mode tmpmode
;
3308 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
3309 enum x86_64_reg_class
class[MAX_CLASSES
];
3313 int needed_sseregs
, needed_intregs
;
3314 rtx exp
[MAX_CLASSES
];
3317 n
= classify_argument (mode
, type
, class, 0);
3318 if (TARGET_DEBUG_ARG
)
3321 fprintf (stderr
, "Memory class\n");
3324 fprintf (stderr
, "Classes:");
3325 for (i
= 0; i
< n
; i
++)
3327 fprintf (stderr
, " %s", x86_64_reg_class_name
[class[i
]]);
3329 fprintf (stderr
, "\n");
3334 if (!examine_argument (mode
, type
, in_return
, &needed_intregs
,
3337 if (needed_intregs
> nintregs
|| needed_sseregs
> nsseregs
)
3340 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
3341 some less clueful developer tries to use floating-point anyway. */
3342 if (needed_sseregs
&& !TARGET_SSE
)
3346 if (!issued_sse_ret_error
)
3348 error ("SSE register return with SSE disabled");
3349 issued_sse_ret_error
= true;
3352 else if (!issued_sse_arg_error
)
3354 error ("SSE register argument with SSE disabled");
3355 issued_sse_arg_error
= true;
3360 /* Likewise, error if the ABI requires us to return values in the
3361 x87 registers and the user specified -mno-80387. */
3362 if (!TARGET_80387
&& in_return
)
3363 for (i
= 0; i
< n
; i
++)
3364 if (class[i
] == X86_64_X87_CLASS
3365 || class[i
] == X86_64_X87UP_CLASS
3366 || class[i
] == X86_64_COMPLEX_X87_CLASS
)
3368 if (!issued_x87_ret_error
)
3370 error ("x87 register return with x87 disabled");
3371 issued_x87_ret_error
= true;
3376 /* First construct simple cases. Avoid SCmode, since we want to use
3377 single register to pass this type. */
3378 if (n
== 1 && mode
!= SCmode
)
3381 case X86_64_INTEGER_CLASS
:
3382 case X86_64_INTEGERSI_CLASS
:
3383 return gen_rtx_REG (mode
, intreg
[0]);
3384 case X86_64_SSE_CLASS
:
3385 case X86_64_SSESF_CLASS
:
3386 case X86_64_SSEDF_CLASS
:
3387 return gen_reg_or_parallel (mode
, orig_mode
, SSE_REGNO (sse_regno
));
3388 case X86_64_X87_CLASS
:
3389 case X86_64_COMPLEX_X87_CLASS
:
3390 return gen_rtx_REG (mode
, FIRST_STACK_REG
);
3391 case X86_64_NO_CLASS
:
3392 /* Zero sized array, struct or class. */
3397 if (n
== 2 && class[0] == X86_64_SSE_CLASS
&& class[1] == X86_64_SSEUP_CLASS
3399 return gen_rtx_REG (mode
, SSE_REGNO (sse_regno
));
3401 && class[0] == X86_64_X87_CLASS
&& class[1] == X86_64_X87UP_CLASS
)
3402 return gen_rtx_REG (XFmode
, FIRST_STACK_REG
);
3403 if (n
== 2 && class[0] == X86_64_INTEGER_CLASS
3404 && class[1] == X86_64_INTEGER_CLASS
3405 && (mode
== CDImode
|| mode
== TImode
|| mode
== TFmode
)
3406 && intreg
[0] + 1 == intreg
[1])
3407 return gen_rtx_REG (mode
, intreg
[0]);
3409 /* Otherwise figure out the entries of the PARALLEL. */
3410 for (i
= 0; i
< n
; i
++)
3414 case X86_64_NO_CLASS
:
3416 case X86_64_INTEGER_CLASS
:
3417 case X86_64_INTEGERSI_CLASS
:
3418 /* Merge TImodes on aligned occasions here too. */
3419 if (i
* 8 + 8 > bytes
)
3420 tmpmode
= mode_for_size ((bytes
- i
* 8) * BITS_PER_UNIT
, MODE_INT
, 0);
3421 else if (class[i
] == X86_64_INTEGERSI_CLASS
)
3425 /* We've requested 24 bytes we don't have mode for. Use DImode. */
3426 if (tmpmode
== BLKmode
)
3428 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
3429 gen_rtx_REG (tmpmode
, *intreg
),
3433 case X86_64_SSESF_CLASS
:
3434 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
3435 gen_rtx_REG (SFmode
,
3436 SSE_REGNO (sse_regno
)),
3440 case X86_64_SSEDF_CLASS
:
3441 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
3442 gen_rtx_REG (DFmode
,
3443 SSE_REGNO (sse_regno
)),
3447 case X86_64_SSE_CLASS
:
3448 if (i
< n
- 1 && class[i
+ 1] == X86_64_SSEUP_CLASS
)
3452 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
3453 gen_rtx_REG (tmpmode
,
3454 SSE_REGNO (sse_regno
)),
3456 if (tmpmode
== TImode
)
3465 /* Empty aligned struct, union or class. */
3469 ret
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nexps
));
3470 for (i
= 0; i
< nexps
; i
++)
3471 XVECEXP (ret
, 0, i
) = exp
[i
];
3475 /* Update the data in CUM to advance over an argument
3476 of mode MODE and data type TYPE.
3477 (TYPE is null for libcalls where that information may not be available.) */
3480 function_arg_advance (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
3481 tree type
, int named
)
3484 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
3485 int words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
3488 mode
= type_natural_mode (type
);
3490 if (TARGET_DEBUG_ARG
)
3491 fprintf (stderr
, "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, "
3492 "mode=%s, named=%d)\n\n",
3493 words
, cum
->words
, cum
->nregs
, cum
->sse_nregs
,
3494 GET_MODE_NAME (mode
), named
);
3498 int int_nregs
, sse_nregs
;
3499 if (!examine_argument (mode
, type
, 0, &int_nregs
, &sse_nregs
))
3500 cum
->words
+= words
;
3501 else if (sse_nregs
<= cum
->sse_nregs
&& int_nregs
<= cum
->nregs
)
3503 cum
->nregs
-= int_nregs
;
3504 cum
->sse_nregs
-= sse_nregs
;
3505 cum
->regno
+= int_nregs
;
3506 cum
->sse_regno
+= sse_nregs
;
3509 cum
->words
+= words
;
3527 cum
->words
+= words
;
3528 cum
->nregs
-= words
;
3529 cum
->regno
+= words
;
3531 if (cum
->nregs
<= 0)
3539 if (cum
->float_in_sse
< 2)
3542 if (cum
->float_in_sse
< 1)
3553 if (!type
|| !AGGREGATE_TYPE_P (type
))
3555 cum
->sse_words
+= words
;
3556 cum
->sse_nregs
-= 1;
3557 cum
->sse_regno
+= 1;
3558 if (cum
->sse_nregs
<= 0)
3570 if (!type
|| !AGGREGATE_TYPE_P (type
))
3572 cum
->mmx_words
+= words
;
3573 cum
->mmx_nregs
-= 1;
3574 cum
->mmx_regno
+= 1;
3575 if (cum
->mmx_nregs
<= 0)
3586 /* Define where to put the arguments to a function.
3587 Value is zero to push the argument on the stack,
3588 or a hard register in which to store the argument.
3590 MODE is the argument's machine mode.
3591 TYPE is the data type of the argument (as a tree).
3592 This is null for libcalls where that information may
3594 CUM is a variable of type CUMULATIVE_ARGS which gives info about
3595 the preceding args and about the function being called.
3596 NAMED is nonzero if this argument is a named parameter
3597 (otherwise it is an extra parameter matching an ellipsis). */
3600 function_arg (CUMULATIVE_ARGS
*cum
, enum machine_mode orig_mode
,
3601 tree type
, int named
)
3603 enum machine_mode mode
= orig_mode
;
3606 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
3607 int words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
3608 static bool warnedsse
, warnedmmx
;
3610 /* To simplify the code below, represent vector types with a vector mode
3611 even if MMX/SSE are not active. */
3612 if (type
&& TREE_CODE (type
) == VECTOR_TYPE
)
3613 mode
= type_natural_mode (type
);
3615 /* Handle a hidden AL argument containing number of registers for varargs
3616 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
3618 if (mode
== VOIDmode
)
3621 return GEN_INT (cum
->maybe_vaarg
3622 ? (cum
->sse_nregs
< 0
3630 ret
= construct_container (mode
, orig_mode
, type
, 0, cum
->nregs
,
3632 &x86_64_int_parameter_registers
[cum
->regno
],
3637 /* For now, pass fp/complex values on the stack. */
3649 if (words
<= cum
->nregs
)
3651 int regno
= cum
->regno
;
3653 /* Fastcall allocates the first two DWORD (SImode) or
3654 smaller arguments to ECX and EDX. */
3657 if (mode
== BLKmode
|| mode
== DImode
)
3660 /* ECX not EAX is the first allocated register. */
3664 ret
= gen_rtx_REG (mode
, regno
);
3668 if (cum
->float_in_sse
< 2)
3671 if (cum
->float_in_sse
< 1)
3681 if (!type
|| !AGGREGATE_TYPE_P (type
))
3683 if (!TARGET_SSE
&& !warnedsse
&& cum
->warn_sse
)
3686 warning (0, "SSE vector argument without SSE enabled "
3690 ret
= gen_reg_or_parallel (mode
, orig_mode
,
3691 cum
->sse_regno
+ FIRST_SSE_REG
);
3698 if (!type
|| !AGGREGATE_TYPE_P (type
))
3700 if (!TARGET_MMX
&& !warnedmmx
&& cum
->warn_mmx
)
3703 warning (0, "MMX vector argument without MMX enabled "
3707 ret
= gen_reg_or_parallel (mode
, orig_mode
,
3708 cum
->mmx_regno
+ FIRST_MMX_REG
);
3713 if (TARGET_DEBUG_ARG
)
3716 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
3717 words
, cum
->words
, cum
->nregs
, GET_MODE_NAME (mode
), named
);
3720 print_simple_rtl (stderr
, ret
);
3722 fprintf (stderr
, ", stack");
3724 fprintf (stderr
, " )\n");
3730 /* A C expression that indicates when an argument must be passed by
3731 reference. If nonzero for an argument, a copy of that argument is
3732 made in memory and a pointer to the argument is passed instead of
3733 the argument itself. The pointer is passed in whatever way is
3734 appropriate for passing a pointer to that type. */
3737 ix86_pass_by_reference (CUMULATIVE_ARGS
*cum ATTRIBUTE_UNUSED
,
3738 enum machine_mode mode ATTRIBUTE_UNUSED
,
3739 tree type
, bool named ATTRIBUTE_UNUSED
)
3744 if (type
&& int_size_in_bytes (type
) == -1)
3746 if (TARGET_DEBUG_ARG
)
3747 fprintf (stderr
, "function_arg_pass_by_reference\n");
3754 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
3755 ABI. Only called if TARGET_SSE. */
3757 contains_128bit_aligned_vector_p (tree type
)
3759 enum machine_mode mode
= TYPE_MODE (type
);
3760 if (SSE_REG_MODE_P (mode
)
3761 && (!TYPE_USER_ALIGN (type
) || TYPE_ALIGN (type
) > 128))
3763 if (TYPE_ALIGN (type
) < 128)
3766 if (AGGREGATE_TYPE_P (type
))
3768 /* Walk the aggregates recursively. */
3769 switch (TREE_CODE (type
))
3773 case QUAL_UNION_TYPE
:
3777 /* Walk all the structure fields. */
3778 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
3780 if (TREE_CODE (field
) == FIELD_DECL
3781 && contains_128bit_aligned_vector_p (TREE_TYPE (field
)))
3788 /* Just for use if some languages passes arrays by value. */
3789 if (contains_128bit_aligned_vector_p (TREE_TYPE (type
)))
3800 /* Gives the alignment boundary, in bits, of an argument with the
3801 specified mode and type. */
3804 ix86_function_arg_boundary (enum machine_mode mode
, tree type
)
3808 align
= TYPE_ALIGN (type
);
3810 align
= GET_MODE_ALIGNMENT (mode
);
3811 if (align
< PARM_BOUNDARY
)
3812 align
= PARM_BOUNDARY
;
3815 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
3816 make an exception for SSE modes since these require 128bit
3819 The handling here differs from field_alignment. ICC aligns MMX
3820 arguments to 4 byte boundaries, while structure fields are aligned
3821 to 8 byte boundaries. */
3823 align
= PARM_BOUNDARY
;
3826 if (!SSE_REG_MODE_P (mode
))
3827 align
= PARM_BOUNDARY
;
3831 if (!contains_128bit_aligned_vector_p (type
))
3832 align
= PARM_BOUNDARY
;
3840 /* Return true if N is a possible register number of function value. */
3842 ix86_function_value_regno_p (int regno
)
3845 || (regno
== FIRST_FLOAT_REG
&& TARGET_FLOAT_RETURNS_IN_80387
)
3846 || (regno
== FIRST_SSE_REG
&& TARGET_SSE
))
3850 && (regno
== FIRST_MMX_REG
&& TARGET_MMX
))
3856 /* Define how to find the value returned by a function.
3857 VALTYPE is the data type of the value (as a tree).
3858 If the precise function being called is known, FUNC is its FUNCTION_DECL;
3859 otherwise, FUNC is 0. */
3861 ix86_function_value (tree valtype
, tree fntype_or_decl
,
3862 bool outgoing ATTRIBUTE_UNUSED
)
3864 enum machine_mode natmode
= type_natural_mode (valtype
);
3868 rtx ret
= construct_container (natmode
, TYPE_MODE (valtype
), valtype
,
3869 1, REGPARM_MAX
, SSE_REGPARM_MAX
,
3870 x86_64_int_return_registers
, 0);
3871 /* For zero sized structures, construct_container return NULL, but we
3872 need to keep rest of compiler happy by returning meaningful value. */
3874 ret
= gen_rtx_REG (TYPE_MODE (valtype
), 0);
3879 tree fn
= NULL_TREE
, fntype
;
3881 && DECL_P (fntype_or_decl
))
3882 fn
= fntype_or_decl
;
3883 fntype
= fn
? TREE_TYPE (fn
) : fntype_or_decl
;
3884 return gen_rtx_REG (TYPE_MODE (valtype
),
3885 ix86_value_regno (natmode
, fn
, fntype
));
3889 /* Return true iff type is returned in memory. */
3891 ix86_return_in_memory (tree type
)
3893 int needed_intregs
, needed_sseregs
, size
;
3894 enum machine_mode mode
= type_natural_mode (type
);
3897 return !examine_argument (mode
, type
, 1, &needed_intregs
, &needed_sseregs
);
3899 if (mode
== BLKmode
)
3902 size
= int_size_in_bytes (type
);
3904 if (MS_AGGREGATE_RETURN
&& AGGREGATE_TYPE_P (type
) && size
<= 8)
3907 if (VECTOR_MODE_P (mode
) || mode
== TImode
)
3909 /* User-created vectors small enough to fit in EAX. */
3913 /* MMX/3dNow values are returned in MM0,
3914 except when it doesn't exits. */
3916 return (TARGET_MMX
? 0 : 1);
3918 /* SSE values are returned in XMM0, except when it doesn't exist. */
3920 return (TARGET_SSE
? 0 : 1);
3934 /* When returning SSE vector types, we have a choice of either
3935 (1) being abi incompatible with a -march switch, or
3936 (2) generating an error.
3937 Given no good solution, I think the safest thing is one warning.
3938 The user won't be able to use -Werror, but....
3940 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
3941 called in response to actually generating a caller or callee that
3942 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
3943 via aggregate_value_p for general type probing from tree-ssa. */
3946 ix86_struct_value_rtx (tree type
, int incoming ATTRIBUTE_UNUSED
)
3948 static bool warnedsse
, warnedmmx
;
3952 /* Look at the return type of the function, not the function type. */
3953 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (type
));
3955 if (!TARGET_SSE
&& !warnedsse
)
3958 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
3961 warning (0, "SSE vector return without SSE enabled "
3966 if (!TARGET_MMX
&& !warnedmmx
)
3968 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
3971 warning (0, "MMX vector return without MMX enabled "
3980 /* Define how to find the value returned by a library function
3981 assuming the value has mode MODE. */
3983 ix86_libcall_value (enum machine_mode mode
)
3997 return gen_rtx_REG (mode
, FIRST_SSE_REG
);
4000 return gen_rtx_REG (mode
, FIRST_FLOAT_REG
);
4004 return gen_rtx_REG (mode
, 0);
4008 return gen_rtx_REG (mode
, ix86_value_regno (mode
, NULL
, NULL
));
4011 /* Given a mode, return the register to use for a return value. */
4014 ix86_value_regno (enum machine_mode mode
, tree func
, tree fntype
)
4016 gcc_assert (!TARGET_64BIT
);
4018 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4019 we normally prevent this case when mmx is not available. However
4020 some ABIs may require the result to be returned like DImode. */
4021 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
4022 return TARGET_MMX
? FIRST_MMX_REG
: 0;
4024 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4025 we prevent this case when sse is not available. However some ABIs
4026 may require the result to be returned like integer TImode. */
4027 if (mode
== TImode
|| (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
4028 return TARGET_SSE
? FIRST_SSE_REG
: 0;
4030 /* Decimal floating point values can go in %eax, unlike other float modes. */
4031 if (DECIMAL_FLOAT_MODE_P (mode
))
4034 /* Most things go in %eax, except (unless -mno-fp-ret-in-387) fp values. */
4035 if (!SCALAR_FLOAT_MODE_P (mode
) || !TARGET_FLOAT_RETURNS_IN_80387
)
4038 /* Floating point return values in %st(0), except for local functions when
4039 SSE math is enabled or for functions with sseregparm attribute. */
4040 if ((func
|| fntype
)
4041 && (mode
== SFmode
|| mode
== DFmode
))
4043 int sse_level
= ix86_function_sseregparm (fntype
, func
);
4044 if ((sse_level
>= 1 && mode
== SFmode
)
4045 || (sse_level
== 2 && mode
== DFmode
))
4046 return FIRST_SSE_REG
;
4049 return FIRST_FLOAT_REG
;
4052 /* Create the va_list data type. */
4055 ix86_build_builtin_va_list (void)
4057 tree f_gpr
, f_fpr
, f_ovf
, f_sav
, record
, type_decl
;
4059 /* For i386 we use plain pointer to argument area. */
4061 return build_pointer_type (char_type_node
);
4063 record
= (*lang_hooks
.types
.make_type
) (RECORD_TYPE
);
4064 type_decl
= build_decl (TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
4066 f_gpr
= build_decl (FIELD_DECL
, get_identifier ("gp_offset"),
4067 unsigned_type_node
);
4068 f_fpr
= build_decl (FIELD_DECL
, get_identifier ("fp_offset"),
4069 unsigned_type_node
);
4070 f_ovf
= build_decl (FIELD_DECL
, get_identifier ("overflow_arg_area"),
4072 f_sav
= build_decl (FIELD_DECL
, get_identifier ("reg_save_area"),
4075 va_list_gpr_counter_field
= f_gpr
;
4076 va_list_fpr_counter_field
= f_fpr
;
4078 DECL_FIELD_CONTEXT (f_gpr
) = record
;
4079 DECL_FIELD_CONTEXT (f_fpr
) = record
;
4080 DECL_FIELD_CONTEXT (f_ovf
) = record
;
4081 DECL_FIELD_CONTEXT (f_sav
) = record
;
4083 TREE_CHAIN (record
) = type_decl
;
4084 TYPE_NAME (record
) = type_decl
;
4085 TYPE_FIELDS (record
) = f_gpr
;
4086 TREE_CHAIN (f_gpr
) = f_fpr
;
4087 TREE_CHAIN (f_fpr
) = f_ovf
;
4088 TREE_CHAIN (f_ovf
) = f_sav
;
4090 layout_type (record
);
4092 /* The correct type is an array type of one element. */
4093 return build_array_type (record
, build_index_type (size_zero_node
));
4096 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4099 ix86_setup_incoming_varargs (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
4100 tree type
, int *pretend_size ATTRIBUTE_UNUSED
,
4103 CUMULATIVE_ARGS next_cum
;
4104 rtx save_area
= NULL_RTX
, mem
;
4117 if (! cfun
->va_list_gpr_size
&& ! cfun
->va_list_fpr_size
)
4120 /* Indicate to allocate space on the stack for varargs save area. */
4121 ix86_save_varrargs_registers
= 1;
4123 cfun
->stack_alignment_needed
= 128;
4125 fntype
= TREE_TYPE (current_function_decl
);
4126 stdarg_p
= (TYPE_ARG_TYPES (fntype
) != 0
4127 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype
)))
4128 != void_type_node
));
4130 /* For varargs, we do not want to skip the dummy va_dcl argument.
4131 For stdargs, we do want to skip the last named argument. */
4134 function_arg_advance (&next_cum
, mode
, type
, 1);
4137 save_area
= frame_pointer_rtx
;
4139 set
= get_varargs_alias_set ();
4141 for (i
= next_cum
.regno
;
4143 && i
< next_cum
.regno
+ cfun
->va_list_gpr_size
/ UNITS_PER_WORD
;
4146 mem
= gen_rtx_MEM (Pmode
,
4147 plus_constant (save_area
, i
* UNITS_PER_WORD
));
4148 MEM_NOTRAP_P (mem
) = 1;
4149 set_mem_alias_set (mem
, set
);
4150 emit_move_insn (mem
, gen_rtx_REG (Pmode
,
4151 x86_64_int_parameter_registers
[i
]));
4154 if (next_cum
.sse_nregs
&& cfun
->va_list_fpr_size
)
4156 /* Now emit code to save SSE registers. The AX parameter contains number
4157 of SSE parameter registers used to call this function. We use
4158 sse_prologue_save insn template that produces computed jump across
4159 SSE saves. We need some preparation work to get this working. */
4161 label
= gen_label_rtx ();
4162 label_ref
= gen_rtx_LABEL_REF (Pmode
, label
);
4164 /* Compute address to jump to :
4165 label - 5*eax + nnamed_sse_arguments*5 */
4166 tmp_reg
= gen_reg_rtx (Pmode
);
4167 nsse_reg
= gen_reg_rtx (Pmode
);
4168 emit_insn (gen_zero_extendqidi2 (nsse_reg
, gen_rtx_REG (QImode
, 0)));
4169 emit_insn (gen_rtx_SET (VOIDmode
, tmp_reg
,
4170 gen_rtx_MULT (Pmode
, nsse_reg
,
4172 if (next_cum
.sse_regno
)
4175 gen_rtx_CONST (DImode
,
4176 gen_rtx_PLUS (DImode
,
4178 GEN_INT (next_cum
.sse_regno
* 4))));
4180 emit_move_insn (nsse_reg
, label_ref
);
4181 emit_insn (gen_subdi3 (nsse_reg
, nsse_reg
, tmp_reg
));
4183 /* Compute address of memory block we save into. We always use pointer
4184 pointing 127 bytes after first byte to store - this is needed to keep
4185 instruction size limited by 4 bytes. */
4186 tmp_reg
= gen_reg_rtx (Pmode
);
4187 emit_insn (gen_rtx_SET (VOIDmode
, tmp_reg
,
4188 plus_constant (save_area
,
4189 8 * REGPARM_MAX
+ 127)));
4190 mem
= gen_rtx_MEM (BLKmode
, plus_constant (tmp_reg
, -127));
4191 MEM_NOTRAP_P (mem
) = 1;
4192 set_mem_alias_set (mem
, set
);
4193 set_mem_align (mem
, BITS_PER_WORD
);
4195 /* And finally do the dirty job! */
4196 emit_insn (gen_sse_prologue_save (mem
, nsse_reg
,
4197 GEN_INT (next_cum
.sse_regno
), label
));
4202 /* Implement va_start. */
4205 ix86_va_start (tree valist
, rtx nextarg
)
4207 HOST_WIDE_INT words
, n_gpr
, n_fpr
;
4208 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
4209 tree gpr
, fpr
, ovf
, sav
, t
;
4212 /* Only 64bit target needs something special. */
4215 std_expand_builtin_va_start (valist
, nextarg
);
4219 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
4220 f_fpr
= TREE_CHAIN (f_gpr
);
4221 f_ovf
= TREE_CHAIN (f_fpr
);
4222 f_sav
= TREE_CHAIN (f_ovf
);
4224 valist
= build1 (INDIRECT_REF
, TREE_TYPE (TREE_TYPE (valist
)), valist
);
4225 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
, NULL_TREE
);
4226 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
4227 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
4228 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
4230 /* Count number of gp and fp argument registers used. */
4231 words
= current_function_args_info
.words
;
4232 n_gpr
= current_function_args_info
.regno
;
4233 n_fpr
= current_function_args_info
.sse_regno
;
4235 if (TARGET_DEBUG_ARG
)
4236 fprintf (stderr
, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
4237 (int) words
, (int) n_gpr
, (int) n_fpr
);
4239 if (cfun
->va_list_gpr_size
)
4241 type
= TREE_TYPE (gpr
);
4242 t
= build2 (MODIFY_EXPR
, type
, gpr
,
4243 build_int_cst (type
, n_gpr
* 8));
4244 TREE_SIDE_EFFECTS (t
) = 1;
4245 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4248 if (cfun
->va_list_fpr_size
)
4250 type
= TREE_TYPE (fpr
);
4251 t
= build2 (MODIFY_EXPR
, type
, fpr
,
4252 build_int_cst (type
, n_fpr
* 16 + 8*REGPARM_MAX
));
4253 TREE_SIDE_EFFECTS (t
) = 1;
4254 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4257 /* Find the overflow area. */
4258 type
= TREE_TYPE (ovf
);
4259 t
= make_tree (type
, virtual_incoming_args_rtx
);
4261 t
= build2 (PLUS_EXPR
, type
, t
,
4262 build_int_cst (type
, words
* UNITS_PER_WORD
));
4263 t
= build2 (MODIFY_EXPR
, type
, ovf
, t
);
4264 TREE_SIDE_EFFECTS (t
) = 1;
4265 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4267 if (cfun
->va_list_gpr_size
|| cfun
->va_list_fpr_size
)
4269 /* Find the register save area.
4270 Prologue of the function save it right above stack frame. */
4271 type
= TREE_TYPE (sav
);
4272 t
= make_tree (type
, frame_pointer_rtx
);
4273 t
= build2 (MODIFY_EXPR
, type
, sav
, t
);
4274 TREE_SIDE_EFFECTS (t
) = 1;
4275 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4279 /* Implement va_arg. */
4282 ix86_gimplify_va_arg (tree valist
, tree type
, tree
*pre_p
, tree
*post_p
)
4284 static const int intreg
[6] = { 0, 1, 2, 3, 4, 5 };
4285 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
4286 tree gpr
, fpr
, ovf
, sav
, t
;
4288 tree lab_false
, lab_over
= NULL_TREE
;
4293 enum machine_mode nat_mode
;
4295 /* Only 64bit target needs something special. */
4297 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
4299 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
4300 f_fpr
= TREE_CHAIN (f_gpr
);
4301 f_ovf
= TREE_CHAIN (f_fpr
);
4302 f_sav
= TREE_CHAIN (f_ovf
);
4304 valist
= build_va_arg_indirect_ref (valist
);
4305 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
, NULL_TREE
);
4306 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
4307 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
4308 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
4310 indirect_p
= pass_by_reference (NULL
, TYPE_MODE (type
), type
, false);
4312 type
= build_pointer_type (type
);
4313 size
= int_size_in_bytes (type
);
4314 rsize
= (size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
4316 nat_mode
= type_natural_mode (type
);
4317 container
= construct_container (nat_mode
, TYPE_MODE (type
), type
, 0,
4318 REGPARM_MAX
, SSE_REGPARM_MAX
, intreg
, 0);
4320 /* Pull the value out of the saved registers. */
4322 addr
= create_tmp_var (ptr_type_node
, "addr");
4323 DECL_POINTER_ALIAS_SET (addr
) = get_varargs_alias_set ();
4327 int needed_intregs
, needed_sseregs
;
4329 tree int_addr
, sse_addr
;
4331 lab_false
= create_artificial_label ();
4332 lab_over
= create_artificial_label ();
4334 examine_argument (nat_mode
, type
, 0, &needed_intregs
, &needed_sseregs
);
4336 need_temp
= (!REG_P (container
)
4337 && ((needed_intregs
&& TYPE_ALIGN (type
) > 64)
4338 || TYPE_ALIGN (type
) > 128));
4340 /* In case we are passing structure, verify that it is consecutive block
4341 on the register save area. If not we need to do moves. */
4342 if (!need_temp
&& !REG_P (container
))
4344 /* Verify that all registers are strictly consecutive */
4345 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container
, 0, 0), 0))))
4349 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
4351 rtx slot
= XVECEXP (container
, 0, i
);
4352 if (REGNO (XEXP (slot
, 0)) != FIRST_SSE_REG
+ (unsigned int) i
4353 || INTVAL (XEXP (slot
, 1)) != i
* 16)
4361 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
4363 rtx slot
= XVECEXP (container
, 0, i
);
4364 if (REGNO (XEXP (slot
, 0)) != (unsigned int) i
4365 || INTVAL (XEXP (slot
, 1)) != i
* 8)
4377 int_addr
= create_tmp_var (ptr_type_node
, "int_addr");
4378 DECL_POINTER_ALIAS_SET (int_addr
) = get_varargs_alias_set ();
4379 sse_addr
= create_tmp_var (ptr_type_node
, "sse_addr");
4380 DECL_POINTER_ALIAS_SET (sse_addr
) = get_varargs_alias_set ();
4383 /* First ensure that we fit completely in registers. */
4386 t
= build_int_cst (TREE_TYPE (gpr
),
4387 (REGPARM_MAX
- needed_intregs
+ 1) * 8);
4388 t
= build2 (GE_EXPR
, boolean_type_node
, gpr
, t
);
4389 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
4390 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
4391 gimplify_and_add (t
, pre_p
);
4395 t
= build_int_cst (TREE_TYPE (fpr
),
4396 (SSE_REGPARM_MAX
- needed_sseregs
+ 1) * 16
4398 t
= build2 (GE_EXPR
, boolean_type_node
, fpr
, t
);
4399 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
4400 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
4401 gimplify_and_add (t
, pre_p
);
4404 /* Compute index to start of area used for integer regs. */
4407 /* int_addr = gpr + sav; */
4408 t
= fold_convert (ptr_type_node
, gpr
);
4409 t
= build2 (PLUS_EXPR
, ptr_type_node
, sav
, t
);
4410 t
= build2 (MODIFY_EXPR
, void_type_node
, int_addr
, t
);
4411 gimplify_and_add (t
, pre_p
);
4415 /* sse_addr = fpr + sav; */
4416 t
= fold_convert (ptr_type_node
, fpr
);
4417 t
= build2 (PLUS_EXPR
, ptr_type_node
, sav
, t
);
4418 t
= build2 (MODIFY_EXPR
, void_type_node
, sse_addr
, t
);
4419 gimplify_and_add (t
, pre_p
);
4424 tree temp
= create_tmp_var (type
, "va_arg_tmp");
4427 t
= build1 (ADDR_EXPR
, build_pointer_type (type
), temp
);
4428 t
= build2 (MODIFY_EXPR
, void_type_node
, addr
, t
);
4429 gimplify_and_add (t
, pre_p
);
4431 for (i
= 0; i
< XVECLEN (container
, 0); i
++)
4433 rtx slot
= XVECEXP (container
, 0, i
);
4434 rtx reg
= XEXP (slot
, 0);
4435 enum machine_mode mode
= GET_MODE (reg
);
4436 tree piece_type
= lang_hooks
.types
.type_for_mode (mode
, 1);
4437 tree addr_type
= build_pointer_type (piece_type
);
4440 tree dest_addr
, dest
;
4442 if (SSE_REGNO_P (REGNO (reg
)))
4444 src_addr
= sse_addr
;
4445 src_offset
= (REGNO (reg
) - FIRST_SSE_REG
) * 16;
4449 src_addr
= int_addr
;
4450 src_offset
= REGNO (reg
) * 8;
4452 src_addr
= fold_convert (addr_type
, src_addr
);
4453 src_addr
= fold (build2 (PLUS_EXPR
, addr_type
, src_addr
,
4454 size_int (src_offset
)));
4455 src
= build_va_arg_indirect_ref (src_addr
);
4457 dest_addr
= fold_convert (addr_type
, addr
);
4458 dest_addr
= fold (build2 (PLUS_EXPR
, addr_type
, dest_addr
,
4459 size_int (INTVAL (XEXP (slot
, 1)))));
4460 dest
= build_va_arg_indirect_ref (dest_addr
);
4462 t
= build2 (MODIFY_EXPR
, void_type_node
, dest
, src
);
4463 gimplify_and_add (t
, pre_p
);
4469 t
= build2 (PLUS_EXPR
, TREE_TYPE (gpr
), gpr
,
4470 build_int_cst (TREE_TYPE (gpr
), needed_intregs
* 8));
4471 t
= build2 (MODIFY_EXPR
, TREE_TYPE (gpr
), gpr
, t
);
4472 gimplify_and_add (t
, pre_p
);
4476 t
= build2 (PLUS_EXPR
, TREE_TYPE (fpr
), fpr
,
4477 build_int_cst (TREE_TYPE (fpr
), needed_sseregs
* 16));
4478 t
= build2 (MODIFY_EXPR
, TREE_TYPE (fpr
), fpr
, t
);
4479 gimplify_and_add (t
, pre_p
);
4482 t
= build1 (GOTO_EXPR
, void_type_node
, lab_over
);
4483 gimplify_and_add (t
, pre_p
);
4485 t
= build1 (LABEL_EXPR
, void_type_node
, lab_false
);
4486 append_to_statement_list (t
, pre_p
);
4489 /* ... otherwise out of the overflow area. */
4491 /* Care for on-stack alignment if needed. */
4492 if (FUNCTION_ARG_BOUNDARY (VOIDmode
, type
) <= 64
4493 || integer_zerop (TYPE_SIZE (type
)))
4497 HOST_WIDE_INT align
= FUNCTION_ARG_BOUNDARY (VOIDmode
, type
) / 8;
4498 t
= build2 (PLUS_EXPR
, TREE_TYPE (ovf
), ovf
,
4499 build_int_cst (TREE_TYPE (ovf
), align
- 1));
4500 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
4501 build_int_cst (TREE_TYPE (t
), -align
));
4503 gimplify_expr (&t
, pre_p
, NULL
, is_gimple_val
, fb_rvalue
);
4505 t2
= build2 (MODIFY_EXPR
, void_type_node
, addr
, t
);
4506 gimplify_and_add (t2
, pre_p
);
4508 t
= build2 (PLUS_EXPR
, TREE_TYPE (t
), t
,
4509 build_int_cst (TREE_TYPE (t
), rsize
* UNITS_PER_WORD
));
4510 t
= build2 (MODIFY_EXPR
, TREE_TYPE (ovf
), ovf
, t
);
4511 gimplify_and_add (t
, pre_p
);
4515 t
= build1 (LABEL_EXPR
, void_type_node
, lab_over
);
4516 append_to_statement_list (t
, pre_p
);
4519 ptrtype
= build_pointer_type (type
);
4520 addr
= fold_convert (ptrtype
, addr
);
4523 addr
= build_va_arg_indirect_ref (addr
);
4524 return build_va_arg_indirect_ref (addr
);
4527 /* Return nonzero if OPNUM's MEM should be matched
4528 in movabs* patterns. */
4531 ix86_check_movabs (rtx insn
, int opnum
)
4535 set
= PATTERN (insn
);
4536 if (GET_CODE (set
) == PARALLEL
)
4537 set
= XVECEXP (set
, 0, 0);
4538 gcc_assert (GET_CODE (set
) == SET
);
4539 mem
= XEXP (set
, opnum
);
4540 while (GET_CODE (mem
) == SUBREG
)
4541 mem
= SUBREG_REG (mem
);
4542 gcc_assert (GET_CODE (mem
) == MEM
);
4543 return (volatile_ok
|| !MEM_VOLATILE_P (mem
));
4546 /* Initialize the table of extra 80387 mathematical constants. */
4549 init_ext_80387_constants (void)
4551 static const char * cst
[5] =
4553 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4554 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4555 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4556 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4557 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4561 for (i
= 0; i
< 5; i
++)
4563 real_from_string (&ext_80387_constants_table
[i
], cst
[i
]);
4564 /* Ensure each constant is rounded to XFmode precision. */
4565 real_convert (&ext_80387_constants_table
[i
],
4566 XFmode
, &ext_80387_constants_table
[i
]);
4569 ext_80387_constants_init
= 1;
4572 /* Return true if the constant is something that can be loaded with
4573 a special instruction. */
4576 standard_80387_constant_p (rtx x
)
4580 if (GET_CODE (x
) != CONST_DOUBLE
|| !FLOAT_MODE_P (GET_MODE (x
)))
4583 if (x
== CONST0_RTX (GET_MODE (x
)))
4585 if (x
== CONST1_RTX (GET_MODE (x
)))
4588 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
4590 /* For XFmode constants, try to find a special 80387 instruction when
4591 optimizing for size or on those CPUs that benefit from them. */
4592 if (GET_MODE (x
) == XFmode
4593 && (optimize_size
|| x86_ext_80387_constants
& TUNEMASK
))
4597 if (! ext_80387_constants_init
)
4598 init_ext_80387_constants ();
4600 for (i
= 0; i
< 5; i
++)
4601 if (real_identical (&r
, &ext_80387_constants_table
[i
]))
4605 /* Load of the constant -0.0 or -1.0 will be split as
4606 fldz;fchs or fld1;fchs sequence. */
4607 if (real_isnegzero (&r
))
4609 if (real_identical (&r
, &dconstm1
))
4615 /* Return the opcode of the special instruction to be used to load
4619 standard_80387_constant_opcode (rtx x
)
4621 switch (standard_80387_constant_p (x
))
4645 /* Return the CONST_DOUBLE representing the 80387 constant that is
4646 loaded by the specified special instruction. The argument IDX
4647 matches the return value from standard_80387_constant_p. */
4650 standard_80387_constant_rtx (int idx
)
4654 if (! ext_80387_constants_init
)
4655 init_ext_80387_constants ();
4671 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table
[i
],
4675 /* Return 1 if mode is a valid mode for sse. */
4677 standard_sse_mode_p (enum machine_mode mode
)
4694 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4697 standard_sse_constant_p (rtx x
)
4699 enum machine_mode mode
= GET_MODE (x
);
4701 if (x
== const0_rtx
|| x
== CONST0_RTX (GET_MODE (x
)))
4703 if (vector_all_ones_operand (x
, mode
)
4704 && standard_sse_mode_p (mode
))
4705 return TARGET_SSE2
? 2 : -1;
4710 /* Return the opcode of the special instruction to be used to load
4714 standard_sse_constant_opcode (rtx insn
, rtx x
)
4716 switch (standard_sse_constant_p (x
))
4719 if (get_attr_mode (insn
) == MODE_V4SF
)
4720 return "xorps\t%0, %0";
4721 else if (get_attr_mode (insn
) == MODE_V2DF
)
4722 return "xorpd\t%0, %0";
4724 return "pxor\t%0, %0";
4726 return "pcmpeqd\t%0, %0";
4731 /* Returns 1 if OP contains a symbol reference */
4734 symbolic_reference_mentioned_p (rtx op
)
4739 if (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == LABEL_REF
)
4742 fmt
= GET_RTX_FORMAT (GET_CODE (op
));
4743 for (i
= GET_RTX_LENGTH (GET_CODE (op
)) - 1; i
>= 0; i
--)
4749 for (j
= XVECLEN (op
, i
) - 1; j
>= 0; j
--)
4750 if (symbolic_reference_mentioned_p (XVECEXP (op
, i
, j
)))
4754 else if (fmt
[i
] == 'e' && symbolic_reference_mentioned_p (XEXP (op
, i
)))
4761 /* Return 1 if it is appropriate to emit `ret' instructions in the
4762 body of a function. Do this only if the epilogue is simple, needing a
4763 couple of insns. Prior to reloading, we can't tell how many registers
4764 must be saved, so return 0 then. Return 0 if there is no frame
4765 marker to de-allocate. */
4768 ix86_can_use_return_insn_p (void)
4770 struct ix86_frame frame
;
4772 if (! reload_completed
|| frame_pointer_needed
)
4775 /* Don't allow more than 32 pop, since that's all we can do
4776 with one instruction. */
4777 if (current_function_pops_args
4778 && current_function_args_size
>= 32768)
4781 ix86_compute_frame_layout (&frame
);
4782 return frame
.to_allocate
== 0 && frame
.nregs
== 0;
4785 /* Value should be nonzero if functions must have frame pointers.
4786 Zero means the frame pointer need not be set up (and parms may
4787 be accessed via the stack pointer) in functions that seem suitable. */
4790 ix86_frame_pointer_required (void)
4792 /* If we accessed previous frames, then the generated code expects
4793 to be able to access the saved ebp value in our frame. */
4794 if (cfun
->machine
->accesses_prev_frame
)
4797 /* Several x86 os'es need a frame pointer for other reasons,
4798 usually pertaining to setjmp. */
4799 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
4802 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4803 the frame pointer by default. Turn it back on now if we've not
4804 got a leaf function. */
4805 if (TARGET_OMIT_LEAF_FRAME_POINTER
4806 && (!current_function_is_leaf
4807 || ix86_current_function_calls_tls_descriptor
))
4810 if (current_function_profile
)
4816 /* Record that the current function accesses previous call frames. */
4819 ix86_setup_frame_addresses (void)
4821 cfun
->machine
->accesses_prev_frame
= 1;
4824 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
4825 # define USE_HIDDEN_LINKONCE 1
4827 # define USE_HIDDEN_LINKONCE 0
4830 static int pic_labels_used
;
4832 /* Fills in the label name that should be used for a pc thunk for
4833 the given register. */
4836 get_pc_thunk_name (char name
[32], unsigned int regno
)
4838 gcc_assert (!TARGET_64BIT
);
4840 if (USE_HIDDEN_LINKONCE
)
4841 sprintf (name
, "__i686.get_pc_thunk.%s", reg_names
[regno
]);
4843 ASM_GENERATE_INTERNAL_LABEL (name
, "LPR", regno
);
4847 /* This function generates code for -fpic that loads %ebx with
4848 the return address of the caller and then returns. */
4851 ix86_file_end (void)
4856 for (regno
= 0; regno
< 8; ++regno
)
4860 if (! ((pic_labels_used
>> regno
) & 1))
4863 get_pc_thunk_name (name
, regno
);
4868 switch_to_section (darwin_sections
[text_coal_section
]);
4869 fputs ("\t.weak_definition\t", asm_out_file
);
4870 assemble_name (asm_out_file
, name
);
4871 fputs ("\n\t.private_extern\t", asm_out_file
);
4872 assemble_name (asm_out_file
, name
);
4873 fputs ("\n", asm_out_file
);
4874 ASM_OUTPUT_LABEL (asm_out_file
, name
);
4878 if (USE_HIDDEN_LINKONCE
)
4882 decl
= build_decl (FUNCTION_DECL
, get_identifier (name
),
4884 TREE_PUBLIC (decl
) = 1;
4885 TREE_STATIC (decl
) = 1;
4886 DECL_ONE_ONLY (decl
) = 1;
4888 (*targetm
.asm_out
.unique_section
) (decl
, 0);
4889 switch_to_section (get_named_section (decl
, NULL
, 0));
4891 (*targetm
.asm_out
.globalize_label
) (asm_out_file
, name
);
4892 fputs ("\t.hidden\t", asm_out_file
);
4893 assemble_name (asm_out_file
, name
);
4894 fputc ('\n', asm_out_file
);
4895 ASM_DECLARE_FUNCTION_NAME (asm_out_file
, name
, decl
);
4899 switch_to_section (text_section
);
4900 ASM_OUTPUT_LABEL (asm_out_file
, name
);
4903 xops
[0] = gen_rtx_REG (SImode
, regno
);
4904 xops
[1] = gen_rtx_MEM (SImode
, stack_pointer_rtx
);
4905 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops
);
4906 output_asm_insn ("ret", xops
);
4909 if (NEED_INDICATE_EXEC_STACK
)
4910 file_end_indicate_exec_stack ();
4913 /* Emit code for the SET_GOT patterns. */
4916 output_set_got (rtx dest
, rtx label ATTRIBUTE_UNUSED
)
4921 xops
[1] = gen_rtx_SYMBOL_REF (Pmode
, GOT_SYMBOL_NAME
);
4923 if (! TARGET_DEEP_BRANCH_PREDICTION
|| !flag_pic
)
4925 xops
[2] = gen_rtx_LABEL_REF (Pmode
, label
? label
: gen_label_rtx ());
4928 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
4930 output_asm_insn ("call\t%a2", xops
);
4933 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
4934 is what will be referenced by the Mach-O PIC subsystem. */
4936 ASM_OUTPUT_LABEL (asm_out_file
, machopic_function_base_name ());
4939 (*targetm
.asm_out
.internal_label
) (asm_out_file
, "L",
4940 CODE_LABEL_NUMBER (XEXP (xops
[2], 0)));
4943 output_asm_insn ("pop{l}\t%0", xops
);
4948 get_pc_thunk_name (name
, REGNO (dest
));
4949 pic_labels_used
|= 1 << REGNO (dest
);
4951 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (name
));
4952 xops
[2] = gen_rtx_MEM (QImode
, xops
[2]);
4953 output_asm_insn ("call\t%X2", xops
);
4954 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
4955 is what will be referenced by the Mach-O PIC subsystem. */
4958 ASM_OUTPUT_LABEL (asm_out_file
, machopic_function_base_name ());
4960 targetm
.asm_out
.internal_label (asm_out_file
, "L",
4961 CODE_LABEL_NUMBER (label
));
4968 if (!flag_pic
|| TARGET_DEEP_BRANCH_PREDICTION
)
4969 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops
);
4971 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops
);
4976 /* Generate an "push" pattern for input ARG. */
4981 return gen_rtx_SET (VOIDmode
,
4983 gen_rtx_PRE_DEC (Pmode
,
4984 stack_pointer_rtx
)),
4988 /* Return >= 0 if there is an unused call-clobbered register available
4989 for the entire function. */
4992 ix86_select_alt_pic_regnum (void)
4994 if (current_function_is_leaf
&& !current_function_profile
4995 && !ix86_current_function_calls_tls_descriptor
)
4998 for (i
= 2; i
>= 0; --i
)
4999 if (!regs_ever_live
[i
])
5003 return INVALID_REGNUM
;
5006 /* Return 1 if we need to save REGNO. */
5008 ix86_save_reg (unsigned int regno
, int maybe_eh_return
)
5010 if (pic_offset_table_rtx
5011 && regno
== REAL_PIC_OFFSET_TABLE_REGNUM
5012 && (regs_ever_live
[REAL_PIC_OFFSET_TABLE_REGNUM
]
5013 || current_function_profile
5014 || current_function_calls_eh_return
5015 || current_function_uses_const_pool
))
5017 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM
)
5022 if (current_function_calls_eh_return
&& maybe_eh_return
)
5027 unsigned test
= EH_RETURN_DATA_REGNO (i
);
5028 if (test
== INVALID_REGNUM
)
5035 if (cfun
->machine
->force_align_arg_pointer
5036 && regno
== REGNO (cfun
->machine
->force_align_arg_pointer
))
5039 return (regs_ever_live
[regno
]
5040 && !call_used_regs
[regno
]
5041 && !fixed_regs
[regno
]
5042 && (regno
!= HARD_FRAME_POINTER_REGNUM
|| !frame_pointer_needed
));
5045 /* Return number of registers to be saved on the stack. */
5048 ix86_nsaved_regs (void)
5053 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
>= 0; regno
--)
5054 if (ix86_save_reg (regno
, true))
5059 /* Return the offset between two registers, one to be eliminated, and the other
5060 its replacement, at the start of a routine. */
5063 ix86_initial_elimination_offset (int from
, int to
)
5065 struct ix86_frame frame
;
5066 ix86_compute_frame_layout (&frame
);
5068 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
5069 return frame
.hard_frame_pointer_offset
;
5070 else if (from
== FRAME_POINTER_REGNUM
5071 && to
== HARD_FRAME_POINTER_REGNUM
)
5072 return frame
.hard_frame_pointer_offset
- frame
.frame_pointer_offset
;
5075 gcc_assert (to
== STACK_POINTER_REGNUM
);
5077 if (from
== ARG_POINTER_REGNUM
)
5078 return frame
.stack_pointer_offset
;
5080 gcc_assert (from
== FRAME_POINTER_REGNUM
);
5081 return frame
.stack_pointer_offset
- frame
.frame_pointer_offset
;
5085 /* Fill structure ix86_frame about frame of currently computed function. */
5088 ix86_compute_frame_layout (struct ix86_frame
*frame
)
5090 HOST_WIDE_INT total_size
;
5091 unsigned int stack_alignment_needed
;
5092 HOST_WIDE_INT offset
;
5093 unsigned int preferred_alignment
;
5094 HOST_WIDE_INT size
= get_frame_size ();
5096 frame
->nregs
= ix86_nsaved_regs ();
5099 stack_alignment_needed
= cfun
->stack_alignment_needed
/ BITS_PER_UNIT
;
5100 preferred_alignment
= cfun
->preferred_stack_boundary
/ BITS_PER_UNIT
;
5102 /* During reload iteration the amount of registers saved can change.
5103 Recompute the value as needed. Do not recompute when amount of registers
5104 didn't change as reload does multiple calls to the function and does not
5105 expect the decision to change within single iteration. */
5107 && cfun
->machine
->use_fast_prologue_epilogue_nregs
!= frame
->nregs
)
5109 int count
= frame
->nregs
;
5111 cfun
->machine
->use_fast_prologue_epilogue_nregs
= count
;
5112 /* The fast prologue uses move instead of push to save registers. This
5113 is significantly longer, but also executes faster as modern hardware
5114 can execute the moves in parallel, but can't do that for push/pop.
5116 Be careful about choosing what prologue to emit: When function takes
5117 many instructions to execute we may use slow version as well as in
5118 case function is known to be outside hot spot (this is known with
5119 feedback only). Weight the size of function by number of registers
5120 to save as it is cheap to use one or two push instructions but very
5121 slow to use many of them. */
5123 count
= (count
- 1) * FAST_PROLOGUE_INSN_COUNT
;
5124 if (cfun
->function_frequency
< FUNCTION_FREQUENCY_NORMAL
5125 || (flag_branch_probabilities
5126 && cfun
->function_frequency
< FUNCTION_FREQUENCY_HOT
))
5127 cfun
->machine
->use_fast_prologue_epilogue
= false;
5129 cfun
->machine
->use_fast_prologue_epilogue
5130 = !expensive_function_p (count
);
5132 if (TARGET_PROLOGUE_USING_MOVE
5133 && cfun
->machine
->use_fast_prologue_epilogue
)
5134 frame
->save_regs_using_mov
= true;
5136 frame
->save_regs_using_mov
= false;
5139 /* Skip return address and saved base pointer. */
5140 offset
= frame_pointer_needed
? UNITS_PER_WORD
* 2 : UNITS_PER_WORD
;
5142 frame
->hard_frame_pointer_offset
= offset
;
5144 /* Do some sanity checking of stack_alignment_needed and
5145 preferred_alignment, since i386 port is the only using those features
5146 that may break easily. */
5148 gcc_assert (!size
|| stack_alignment_needed
);
5149 gcc_assert (preferred_alignment
>= STACK_BOUNDARY
/ BITS_PER_UNIT
);
5150 gcc_assert (preferred_alignment
<= PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
);
5151 gcc_assert (stack_alignment_needed
5152 <= PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
);
5154 if (stack_alignment_needed
< STACK_BOUNDARY
/ BITS_PER_UNIT
)
5155 stack_alignment_needed
= STACK_BOUNDARY
/ BITS_PER_UNIT
;
5157 /* Register save area */
5158 offset
+= frame
->nregs
* UNITS_PER_WORD
;
5161 if (ix86_save_varrargs_registers
)
5163 offset
+= X86_64_VARARGS_SIZE
;
5164 frame
->va_arg_size
= X86_64_VARARGS_SIZE
;
5167 frame
->va_arg_size
= 0;
5169 /* Align start of frame for local function. */
5170 frame
->padding1
= ((offset
+ stack_alignment_needed
- 1)
5171 & -stack_alignment_needed
) - offset
;
5173 offset
+= frame
->padding1
;
5175 /* Frame pointer points here. */
5176 frame
->frame_pointer_offset
= offset
;
5180 /* Add outgoing arguments area. Can be skipped if we eliminated
5181 all the function calls as dead code.
5182 Skipping is however impossible when function calls alloca. Alloca
5183 expander assumes that last current_function_outgoing_args_size
5184 of stack frame are unused. */
5185 if (ACCUMULATE_OUTGOING_ARGS
5186 && (!current_function_is_leaf
|| current_function_calls_alloca
5187 || ix86_current_function_calls_tls_descriptor
))
5189 offset
+= current_function_outgoing_args_size
;
5190 frame
->outgoing_arguments_size
= current_function_outgoing_args_size
;
5193 frame
->outgoing_arguments_size
= 0;
5195 /* Align stack boundary. Only needed if we're calling another function
5197 if (!current_function_is_leaf
|| current_function_calls_alloca
5198 || ix86_current_function_calls_tls_descriptor
)
5199 frame
->padding2
= ((offset
+ preferred_alignment
- 1)
5200 & -preferred_alignment
) - offset
;
5202 frame
->padding2
= 0;
5204 offset
+= frame
->padding2
;
5206 /* We've reached end of stack frame. */
5207 frame
->stack_pointer_offset
= offset
;
5209 /* Size prologue needs to allocate. */
5210 frame
->to_allocate
=
5211 (size
+ frame
->padding1
+ frame
->padding2
5212 + frame
->outgoing_arguments_size
+ frame
->va_arg_size
);
5214 if ((!frame
->to_allocate
&& frame
->nregs
<= 1)
5215 || (TARGET_64BIT
&& frame
->to_allocate
>= (HOST_WIDE_INT
) 0x80000000))
5216 frame
->save_regs_using_mov
= false;
5218 if (TARGET_RED_ZONE
&& current_function_sp_is_unchanging
5219 && current_function_is_leaf
5220 && !ix86_current_function_calls_tls_descriptor
)
5222 frame
->red_zone_size
= frame
->to_allocate
;
5223 if (frame
->save_regs_using_mov
)
5224 frame
->red_zone_size
+= frame
->nregs
* UNITS_PER_WORD
;
5225 if (frame
->red_zone_size
> RED_ZONE_SIZE
- RED_ZONE_RESERVE
)
5226 frame
->red_zone_size
= RED_ZONE_SIZE
- RED_ZONE_RESERVE
;
5229 frame
->red_zone_size
= 0;
5230 frame
->to_allocate
-= frame
->red_zone_size
;
5231 frame
->stack_pointer_offset
-= frame
->red_zone_size
;
5233 fprintf (stderr
, "nregs: %i\n", frame
->nregs
);
5234 fprintf (stderr
, "size: %i\n", size
);
5235 fprintf (stderr
, "alignment1: %i\n", stack_alignment_needed
);
5236 fprintf (stderr
, "padding1: %i\n", frame
->padding1
);
5237 fprintf (stderr
, "va_arg: %i\n", frame
->va_arg_size
);
5238 fprintf (stderr
, "padding2: %i\n", frame
->padding2
);
5239 fprintf (stderr
, "to_allocate: %i\n", frame
->to_allocate
);
5240 fprintf (stderr
, "red_zone_size: %i\n", frame
->red_zone_size
);
5241 fprintf (stderr
, "frame_pointer_offset: %i\n", frame
->frame_pointer_offset
);
5242 fprintf (stderr
, "hard_frame_pointer_offset: %i\n",
5243 frame
->hard_frame_pointer_offset
);
5244 fprintf (stderr
, "stack_pointer_offset: %i\n", frame
->stack_pointer_offset
);
5248 /* Emit code to save registers in the prologue. */
5251 ix86_emit_save_regs (void)
5256 for (regno
= FIRST_PSEUDO_REGISTER
; regno
-- > 0; )
5257 if (ix86_save_reg (regno
, true))
5259 insn
= emit_insn (gen_push (gen_rtx_REG (Pmode
, regno
)));
5260 RTX_FRAME_RELATED_P (insn
) = 1;
5264 /* Emit code to save registers using MOV insns. First register
5265 is restored from POINTER + OFFSET. */
5267 ix86_emit_save_regs_using_mov (rtx pointer
, HOST_WIDE_INT offset
)
5272 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
5273 if (ix86_save_reg (regno
, true))
5275 insn
= emit_move_insn (adjust_address (gen_rtx_MEM (Pmode
, pointer
),
5277 gen_rtx_REG (Pmode
, regno
));
5278 RTX_FRAME_RELATED_P (insn
) = 1;
5279 offset
+= UNITS_PER_WORD
;
5283 /* Expand prologue or epilogue stack adjustment.
5284 The pattern exist to put a dependency on all ebp-based memory accesses.
5285 STYLE should be negative if instructions should be marked as frame related,
5286 zero if %r11 register is live and cannot be freely used and positive
5290 pro_epilogue_adjust_stack (rtx dest
, rtx src
, rtx offset
, int style
)
5295 insn
= emit_insn (gen_pro_epilogue_adjust_stack_1 (dest
, src
, offset
));
5296 else if (x86_64_immediate_operand (offset
, DImode
))
5297 insn
= emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest
, src
, offset
));
5301 /* r11 is used by indirect sibcall return as well, set before the
5302 epilogue and used after the epilogue. ATM indirect sibcall
5303 shouldn't be used together with huge frame sizes in one
5304 function because of the frame_size check in sibcall.c. */
5306 r11
= gen_rtx_REG (DImode
, FIRST_REX_INT_REG
+ 3 /* R11 */);
5307 insn
= emit_insn (gen_rtx_SET (DImode
, r11
, offset
));
5309 RTX_FRAME_RELATED_P (insn
) = 1;
5310 insn
= emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest
, src
, r11
,
5314 RTX_FRAME_RELATED_P (insn
) = 1;
5317 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
5320 ix86_internal_arg_pointer (void)
5322 bool has_force_align_arg_pointer
=
5323 (0 != lookup_attribute (ix86_force_align_arg_pointer_string
,
5324 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))));
5325 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
5326 && DECL_NAME (current_function_decl
)
5327 && MAIN_NAME_P (DECL_NAME (current_function_decl
))
5328 && DECL_FILE_SCOPE_P (current_function_decl
))
5329 || ix86_force_align_arg_pointer
5330 || has_force_align_arg_pointer
)
5332 /* Nested functions can't realign the stack due to a register
5334 if (DECL_CONTEXT (current_function_decl
)
5335 && TREE_CODE (DECL_CONTEXT (current_function_decl
)) == FUNCTION_DECL
)
5337 if (ix86_force_align_arg_pointer
)
5338 warning (0, "-mstackrealign ignored for nested functions");
5339 if (has_force_align_arg_pointer
)
5340 error ("%s not supported for nested functions",
5341 ix86_force_align_arg_pointer_string
);
5342 return virtual_incoming_args_rtx
;
5344 cfun
->machine
->force_align_arg_pointer
= gen_rtx_REG (Pmode
, 2);
5345 return copy_to_reg (cfun
->machine
->force_align_arg_pointer
);
5348 return virtual_incoming_args_rtx
;
5351 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
5352 This is called from dwarf2out.c to emit call frame instructions
5353 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
5355 ix86_dwarf_handle_frame_unspec (const char *label
, rtx pattern
, int index
)
5357 rtx unspec
= SET_SRC (pattern
);
5358 gcc_assert (GET_CODE (unspec
) == UNSPEC
);
5362 case UNSPEC_REG_SAVE
:
5363 dwarf2out_reg_save_reg (label
, XVECEXP (unspec
, 0, 0),
5364 SET_DEST (pattern
));
5366 case UNSPEC_DEF_CFA
:
5367 dwarf2out_def_cfa (label
, REGNO (SET_DEST (pattern
)),
5368 INTVAL (XVECEXP (unspec
, 0, 0)));
5375 /* Expand the prologue into a bunch of separate insns. */
5378 ix86_expand_prologue (void)
5382 struct ix86_frame frame
;
5383 HOST_WIDE_INT allocate
;
5385 ix86_compute_frame_layout (&frame
);
5387 if (cfun
->machine
->force_align_arg_pointer
)
5391 /* Grab the argument pointer. */
5392 x
= plus_constant (stack_pointer_rtx
, 4);
5393 y
= cfun
->machine
->force_align_arg_pointer
;
5394 insn
= emit_insn (gen_rtx_SET (VOIDmode
, y
, x
));
5395 RTX_FRAME_RELATED_P (insn
) = 1;
5397 /* The unwind info consists of two parts: install the fafp as the cfa,
5398 and record the fafp as the "save register" of the stack pointer.
5399 The later is there in order that the unwinder can see where it
5400 should restore the stack pointer across the and insn. */
5401 x
= gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, const0_rtx
), UNSPEC_DEF_CFA
);
5402 x
= gen_rtx_SET (VOIDmode
, y
, x
);
5403 RTX_FRAME_RELATED_P (x
) = 1;
5404 y
= gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, stack_pointer_rtx
),
5406 y
= gen_rtx_SET (VOIDmode
, cfun
->machine
->force_align_arg_pointer
, y
);
5407 RTX_FRAME_RELATED_P (y
) = 1;
5408 x
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, x
, y
));
5409 x
= gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
, x
, NULL
);
5410 REG_NOTES (insn
) = x
;
5412 /* Align the stack. */
5413 emit_insn (gen_andsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
5416 /* And here we cheat like madmen with the unwind info. We force the
5417 cfa register back to sp+4, which is exactly what it was at the
5418 start of the function. Re-pushing the return address results in
5419 the return at the same spot relative to the cfa, and thus is
5420 correct wrt the unwind info. */
5421 x
= cfun
->machine
->force_align_arg_pointer
;
5422 x
= gen_frame_mem (Pmode
, plus_constant (x
, -4));
5423 insn
= emit_insn (gen_push (x
));
5424 RTX_FRAME_RELATED_P (insn
) = 1;
5427 x
= gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, x
), UNSPEC_DEF_CFA
);
5428 x
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, x
);
5429 x
= gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
, x
, NULL
);
5430 REG_NOTES (insn
) = x
;
5433 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5434 slower on all targets. Also sdb doesn't like it. */
5436 if (frame_pointer_needed
)
5438 insn
= emit_insn (gen_push (hard_frame_pointer_rtx
));
5439 RTX_FRAME_RELATED_P (insn
) = 1;
5441 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
5442 RTX_FRAME_RELATED_P (insn
) = 1;
5445 allocate
= frame
.to_allocate
;
5447 if (!frame
.save_regs_using_mov
)
5448 ix86_emit_save_regs ();
5450 allocate
+= frame
.nregs
* UNITS_PER_WORD
;
5452 /* When using red zone we may start register saving before allocating
5453 the stack frame saving one cycle of the prologue. */
5454 if (TARGET_RED_ZONE
&& frame
.save_regs_using_mov
)
5455 ix86_emit_save_regs_using_mov (frame_pointer_needed
? hard_frame_pointer_rtx
5456 : stack_pointer_rtx
,
5457 -frame
.nregs
* UNITS_PER_WORD
);
5461 else if (! TARGET_STACK_PROBE
|| allocate
< CHECK_STACK_LIMIT
)
5462 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
5463 GEN_INT (-allocate
), -1);
5466 /* Only valid for Win32. */
5467 rtx eax
= gen_rtx_REG (SImode
, 0);
5468 bool eax_live
= ix86_eax_live_at_start_p ();
5471 gcc_assert (!TARGET_64BIT
);
5475 emit_insn (gen_push (eax
));
5479 emit_move_insn (eax
, GEN_INT (allocate
));
5481 insn
= emit_insn (gen_allocate_stack_worker (eax
));
5482 RTX_FRAME_RELATED_P (insn
) = 1;
5483 t
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, GEN_INT (-allocate
));
5484 t
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, t
);
5485 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
,
5486 t
, REG_NOTES (insn
));
5490 if (frame_pointer_needed
)
5491 t
= plus_constant (hard_frame_pointer_rtx
,
5494 - frame
.nregs
* UNITS_PER_WORD
);
5496 t
= plus_constant (stack_pointer_rtx
, allocate
);
5497 emit_move_insn (eax
, gen_rtx_MEM (SImode
, t
));
5501 if (frame
.save_regs_using_mov
&& !TARGET_RED_ZONE
)
5503 if (!frame_pointer_needed
|| !frame
.to_allocate
)
5504 ix86_emit_save_regs_using_mov (stack_pointer_rtx
, frame
.to_allocate
);
5506 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx
,
5507 -frame
.nregs
* UNITS_PER_WORD
);
5510 pic_reg_used
= false;
5511 if (pic_offset_table_rtx
5512 && (regs_ever_live
[REAL_PIC_OFFSET_TABLE_REGNUM
]
5513 || current_function_profile
))
5515 unsigned int alt_pic_reg_used
= ix86_select_alt_pic_regnum ();
5517 if (alt_pic_reg_used
!= INVALID_REGNUM
)
5518 REGNO (pic_offset_table_rtx
) = alt_pic_reg_used
;
5520 pic_reg_used
= true;
5526 insn
= emit_insn (gen_set_got_rex64 (pic_offset_table_rtx
));
5528 insn
= emit_insn (gen_set_got (pic_offset_table_rtx
));
5530 /* Even with accurate pre-reload life analysis, we can wind up
5531 deleting all references to the pic register after reload.
5532 Consider if cross-jumping unifies two sides of a branch
5533 controlled by a comparison vs the only read from a global.
5534 In which case, allow the set_got to be deleted, though we're
5535 too late to do anything about the ebx save in the prologue. */
5536 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD
, const0_rtx
, NULL
);
5539 /* Prevent function calls from be scheduled before the call to mcount.
5540 In the pic_reg_used case, make sure that the got load isn't deleted. */
5541 if (current_function_profile
)
5542 emit_insn (gen_blockage (pic_reg_used
? pic_offset_table_rtx
: const0_rtx
));
5545 /* Emit code to restore saved registers using MOV insns. First register
5546 is restored from POINTER + OFFSET. */
5548 ix86_emit_restore_regs_using_mov (rtx pointer
, HOST_WIDE_INT offset
,
5549 int maybe_eh_return
)
5552 rtx base_address
= gen_rtx_MEM (Pmode
, pointer
);
5554 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
5555 if (ix86_save_reg (regno
, maybe_eh_return
))
5557 /* Ensure that adjust_address won't be forced to produce pointer
5558 out of range allowed by x86-64 instruction set. */
5559 if (TARGET_64BIT
&& offset
!= trunc_int_for_mode (offset
, SImode
))
5563 r11
= gen_rtx_REG (DImode
, FIRST_REX_INT_REG
+ 3 /* R11 */);
5564 emit_move_insn (r11
, GEN_INT (offset
));
5565 emit_insn (gen_adddi3 (r11
, r11
, pointer
));
5566 base_address
= gen_rtx_MEM (Pmode
, r11
);
5569 emit_move_insn (gen_rtx_REG (Pmode
, regno
),
5570 adjust_address (base_address
, Pmode
, offset
));
5571 offset
+= UNITS_PER_WORD
;
5575 /* Restore function stack, frame, and registers. */
5578 ix86_expand_epilogue (int style
)
5581 int sp_valid
= !frame_pointer_needed
|| current_function_sp_is_unchanging
;
5582 struct ix86_frame frame
;
5583 HOST_WIDE_INT offset
;
5585 ix86_compute_frame_layout (&frame
);
5587 /* Calculate start of saved registers relative to ebp. Special care
5588 must be taken for the normal return case of a function using
5589 eh_return: the eax and edx registers are marked as saved, but not
5590 restored along this path. */
5591 offset
= frame
.nregs
;
5592 if (current_function_calls_eh_return
&& style
!= 2)
5594 offset
*= -UNITS_PER_WORD
;
5596 /* If we're only restoring one register and sp is not valid then
5597 using a move instruction to restore the register since it's
5598 less work than reloading sp and popping the register.
5600 The default code result in stack adjustment using add/lea instruction,
5601 while this code results in LEAVE instruction (or discrete equivalent),
5602 so it is profitable in some other cases as well. Especially when there
5603 are no registers to restore. We also use this code when TARGET_USE_LEAVE
5604 and there is exactly one register to pop. This heuristic may need some
5605 tuning in future. */
5606 if ((!sp_valid
&& frame
.nregs
<= 1)
5607 || (TARGET_EPILOGUE_USING_MOVE
5608 && cfun
->machine
->use_fast_prologue_epilogue
5609 && (frame
.nregs
> 1 || frame
.to_allocate
))
5610 || (frame_pointer_needed
&& !frame
.nregs
&& frame
.to_allocate
)
5611 || (frame_pointer_needed
&& TARGET_USE_LEAVE
5612 && cfun
->machine
->use_fast_prologue_epilogue
5613 && frame
.nregs
== 1)
5614 || current_function_calls_eh_return
)
5616 /* Restore registers. We can use ebp or esp to address the memory
5617 locations. If both are available, default to ebp, since offsets
5618 are known to be small. Only exception is esp pointing directly to the
5619 end of block of saved registers, where we may simplify addressing
5622 if (!frame_pointer_needed
|| (sp_valid
&& !frame
.to_allocate
))
5623 ix86_emit_restore_regs_using_mov (stack_pointer_rtx
,
5624 frame
.to_allocate
, style
== 2);
5626 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx
,
5627 offset
, style
== 2);
5629 /* eh_return epilogues need %ecx added to the stack pointer. */
5632 rtx tmp
, sa
= EH_RETURN_STACKADJ_RTX
;
5634 if (frame_pointer_needed
)
5636 tmp
= gen_rtx_PLUS (Pmode
, hard_frame_pointer_rtx
, sa
);
5637 tmp
= plus_constant (tmp
, UNITS_PER_WORD
);
5638 emit_insn (gen_rtx_SET (VOIDmode
, sa
, tmp
));
5640 tmp
= gen_rtx_MEM (Pmode
, hard_frame_pointer_rtx
);
5641 emit_move_insn (hard_frame_pointer_rtx
, tmp
);
5643 pro_epilogue_adjust_stack (stack_pointer_rtx
, sa
,
5648 tmp
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, sa
);
5649 tmp
= plus_constant (tmp
, (frame
.to_allocate
5650 + frame
.nregs
* UNITS_PER_WORD
));
5651 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, tmp
));
5654 else if (!frame_pointer_needed
)
5655 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
5656 GEN_INT (frame
.to_allocate
5657 + frame
.nregs
* UNITS_PER_WORD
),
5659 /* If not an i386, mov & pop is faster than "leave". */
5660 else if (TARGET_USE_LEAVE
|| optimize_size
5661 || !cfun
->machine
->use_fast_prologue_epilogue
)
5662 emit_insn (TARGET_64BIT
? gen_leave_rex64 () : gen_leave ());
5665 pro_epilogue_adjust_stack (stack_pointer_rtx
,
5666 hard_frame_pointer_rtx
,
5669 emit_insn (gen_popdi1 (hard_frame_pointer_rtx
));
5671 emit_insn (gen_popsi1 (hard_frame_pointer_rtx
));
5676 /* First step is to deallocate the stack frame so that we can
5677 pop the registers. */
5680 gcc_assert (frame_pointer_needed
);
5681 pro_epilogue_adjust_stack (stack_pointer_rtx
,
5682 hard_frame_pointer_rtx
,
5683 GEN_INT (offset
), style
);
5685 else if (frame
.to_allocate
)
5686 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
5687 GEN_INT (frame
.to_allocate
), style
);
5689 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
5690 if (ix86_save_reg (regno
, false))
5693 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode
, regno
)));
5695 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode
, regno
)));
5697 if (frame_pointer_needed
)
5699 /* Leave results in shorter dependency chains on CPUs that are
5700 able to grok it fast. */
5701 if (TARGET_USE_LEAVE
)
5702 emit_insn (TARGET_64BIT
? gen_leave_rex64 () : gen_leave ());
5703 else if (TARGET_64BIT
)
5704 emit_insn (gen_popdi1 (hard_frame_pointer_rtx
));
5706 emit_insn (gen_popsi1 (hard_frame_pointer_rtx
));
5710 if (cfun
->machine
->force_align_arg_pointer
)
5712 emit_insn (gen_addsi3 (stack_pointer_rtx
,
5713 cfun
->machine
->force_align_arg_pointer
,
5717 /* Sibcall epilogues don't want a return instruction. */
5721 if (current_function_pops_args
&& current_function_args_size
)
5723 rtx popc
= GEN_INT (current_function_pops_args
);
5725 /* i386 can only pop 64K bytes. If asked to pop more, pop
5726 return address, do explicit add, and jump indirectly to the
5729 if (current_function_pops_args
>= 65536)
5731 rtx ecx
= gen_rtx_REG (SImode
, 2);
5733 /* There is no "pascal" calling convention in 64bit ABI. */
5734 gcc_assert (!TARGET_64BIT
);
5736 emit_insn (gen_popsi1 (ecx
));
5737 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, popc
));
5738 emit_jump_insn (gen_return_indirect_internal (ecx
));
5741 emit_jump_insn (gen_return_pop_internal (popc
));
5744 emit_jump_insn (gen_return_internal ());
5747 /* Reset from the function's potential modifications. */
5750 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
5751 HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
5753 if (pic_offset_table_rtx
)
5754 REGNO (pic_offset_table_rtx
) = REAL_PIC_OFFSET_TABLE_REGNUM
;
5756 /* Mach-O doesn't support labels at the end of objects, so if
5757 it looks like we might want one, insert a NOP. */
5759 rtx insn
= get_last_insn ();
5762 && NOTE_LINE_NUMBER (insn
) != NOTE_INSN_DELETED_LABEL
)
5763 insn
= PREV_INSN (insn
);
5767 && NOTE_LINE_NUMBER (insn
) == NOTE_INSN_DELETED_LABEL
)))
5768 fputs ("\tnop\n", file
);
5774 /* Extract the parts of an RTL expression that is a valid memory address
5775 for an instruction. Return 0 if the structure of the address is
5776 grossly off. Return -1 if the address contains ASHIFT, so it is not
5777 strictly valid, but still used for computing length of lea instruction. */
5780 ix86_decompose_address (rtx addr
, struct ix86_address
*out
)
5782 rtx base
= NULL_RTX
, index
= NULL_RTX
, disp
= NULL_RTX
;
5783 rtx base_reg
, index_reg
;
5784 HOST_WIDE_INT scale
= 1;
5785 rtx scale_rtx
= NULL_RTX
;
5787 enum ix86_address_seg seg
= SEG_DEFAULT
;
5789 if (GET_CODE (addr
) == REG
|| GET_CODE (addr
) == SUBREG
)
5791 else if (GET_CODE (addr
) == PLUS
)
5801 addends
[n
++] = XEXP (op
, 1);
5804 while (GET_CODE (op
) == PLUS
);
5809 for (i
= n
; i
>= 0; --i
)
5812 switch (GET_CODE (op
))
5817 index
= XEXP (op
, 0);
5818 scale_rtx
= XEXP (op
, 1);
5822 if (XINT (op
, 1) == UNSPEC_TP
5823 && TARGET_TLS_DIRECT_SEG_REFS
5824 && seg
== SEG_DEFAULT
)
5825 seg
= TARGET_64BIT
? SEG_FS
: SEG_GS
;
5854 else if (GET_CODE (addr
) == MULT
)
5856 index
= XEXP (addr
, 0); /* index*scale */
5857 scale_rtx
= XEXP (addr
, 1);
5859 else if (GET_CODE (addr
) == ASHIFT
)
5863 /* We're called for lea too, which implements ashift on occasion. */
5864 index
= XEXP (addr
, 0);
5865 tmp
= XEXP (addr
, 1);
5866 if (GET_CODE (tmp
) != CONST_INT
)
5868 scale
= INTVAL (tmp
);
5869 if ((unsigned HOST_WIDE_INT
) scale
> 3)
5875 disp
= addr
; /* displacement */
5877 /* Extract the integral value of scale. */
5880 if (GET_CODE (scale_rtx
) != CONST_INT
)
5882 scale
= INTVAL (scale_rtx
);
5885 base_reg
= base
&& GET_CODE (base
) == SUBREG
? SUBREG_REG (base
) : base
;
5886 index_reg
= index
&& GET_CODE (index
) == SUBREG
? SUBREG_REG (index
) : index
;
5888 /* Allow arg pointer and stack pointer as index if there is not scaling. */
5889 if (base_reg
&& index_reg
&& scale
== 1
5890 && (index_reg
== arg_pointer_rtx
5891 || index_reg
== frame_pointer_rtx
5892 || (REG_P (index_reg
) && REGNO (index_reg
) == STACK_POINTER_REGNUM
)))
5895 tmp
= base
, base
= index
, index
= tmp
;
5896 tmp
= base_reg
, base_reg
= index_reg
, index_reg
= tmp
;
5899 /* Special case: %ebp cannot be encoded as a base without a displacement. */
5900 if ((base_reg
== hard_frame_pointer_rtx
5901 || base_reg
== frame_pointer_rtx
5902 || base_reg
== arg_pointer_rtx
) && !disp
)
5905 /* Special case: on K6, [%esi] makes the instruction vector decoded.
5906 Avoid this by transforming to [%esi+0]. */
5907 if (ix86_tune
== PROCESSOR_K6
&& !optimize_size
5908 && base_reg
&& !index_reg
&& !disp
5910 && REGNO_REG_CLASS (REGNO (base_reg
)) == SIREG
)
5913 /* Special case: encode reg+reg instead of reg*2. */
5914 if (!base
&& index
&& scale
&& scale
== 2)
5915 base
= index
, base_reg
= index_reg
, scale
= 1;
5917 /* Special case: scaling cannot be encoded without base or displacement. */
5918 if (!base
&& !disp
&& index
&& scale
!= 1)
5930 /* Return cost of the memory address x.
5931 For i386, it is better to use a complex address than let gcc copy
5932 the address into a reg and make a new pseudo. But not if the address
5933 requires to two regs - that would mean more pseudos with longer
5936 ix86_address_cost (rtx x
)
5938 struct ix86_address parts
;
5940 int ok
= ix86_decompose_address (x
, &parts
);
5944 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
5945 parts
.base
= SUBREG_REG (parts
.base
);
5946 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
5947 parts
.index
= SUBREG_REG (parts
.index
);
5949 /* More complex memory references are better. */
5950 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
5952 if (parts
.seg
!= SEG_DEFAULT
)
5955 /* Attempt to minimize number of registers in the address. */
5957 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
))
5959 && (!REG_P (parts
.index
)
5960 || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)))
5964 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
)
5966 && (!REG_P (parts
.index
) || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)
5967 && parts
.base
!= parts
.index
)
5970 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5971 since it's predecode logic can't detect the length of instructions
5972 and it degenerates to vector decoded. Increase cost of such
5973 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
5974 to split such addresses or even refuse such addresses at all.
5976 Following addressing modes are affected:
5981 The first and last case may be avoidable by explicitly coding the zero in
5982 memory address, but I don't have AMD-K6 machine handy to check this
5986 && ((!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
!= 1)
5987 || (parts
.disp
&& !parts
.base
&& parts
.index
&& parts
.scale
!= 1)
5988 || (!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
== 1)))
5994 /* If X is a machine specific address (i.e. a symbol or label being
5995 referenced as a displacement from the GOT implemented using an
5996 UNSPEC), then return the base term. Otherwise return X. */
5999 ix86_find_base_term (rtx x
)
6005 if (GET_CODE (x
) != CONST
)
6008 if (GET_CODE (term
) == PLUS
6009 && (GET_CODE (XEXP (term
, 1)) == CONST_INT
6010 || GET_CODE (XEXP (term
, 1)) == CONST_DOUBLE
))
6011 term
= XEXP (term
, 0);
6012 if (GET_CODE (term
) != UNSPEC
6013 || XINT (term
, 1) != UNSPEC_GOTPCREL
)
6016 term
= XVECEXP (term
, 0, 0);
6018 if (GET_CODE (term
) != SYMBOL_REF
6019 && GET_CODE (term
) != LABEL_REF
)
6025 term
= ix86_delegitimize_address (x
);
6027 if (GET_CODE (term
) != SYMBOL_REF
6028 && GET_CODE (term
) != LABEL_REF
)
6034 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
6035 this is used for to form addresses to local data when -fPIC is in
6039 darwin_local_data_pic (rtx disp
)
6041 if (GET_CODE (disp
) == MINUS
)
6043 if (GET_CODE (XEXP (disp
, 0)) == LABEL_REF
6044 || GET_CODE (XEXP (disp
, 0)) == SYMBOL_REF
)
6045 if (GET_CODE (XEXP (disp
, 1)) == SYMBOL_REF
)
6047 const char *sym_name
= XSTR (XEXP (disp
, 1), 0);
6048 if (! strcmp (sym_name
, "<pic base>"))
6056 /* Determine if a given RTX is a valid constant. We already know this
6057 satisfies CONSTANT_P. */
6060 legitimate_constant_p (rtx x
)
6062 switch (GET_CODE (x
))
6067 if (GET_CODE (x
) == PLUS
)
6069 if (GET_CODE (XEXP (x
, 1)) != CONST_INT
)
6074 if (TARGET_MACHO
&& darwin_local_data_pic (x
))
6077 /* Only some unspecs are valid as "constants". */
6078 if (GET_CODE (x
) == UNSPEC
)
6079 switch (XINT (x
, 1))
6082 return TARGET_64BIT
;
6085 x
= XVECEXP (x
, 0, 0);
6086 return (GET_CODE (x
) == SYMBOL_REF
6087 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
6089 x
= XVECEXP (x
, 0, 0);
6090 return (GET_CODE (x
) == SYMBOL_REF
6091 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
);
6096 /* We must have drilled down to a symbol. */
6097 if (GET_CODE (x
) == LABEL_REF
)
6099 if (GET_CODE (x
) != SYMBOL_REF
)
6104 /* TLS symbols are never valid. */
6105 if (SYMBOL_REF_TLS_MODEL (x
))
6110 if (GET_MODE (x
) == TImode
6111 && x
!= CONST0_RTX (TImode
)
6117 if (x
== CONST0_RTX (GET_MODE (x
)))
6125 /* Otherwise we handle everything else in the move patterns. */
6129 /* Determine if it's legal to put X into the constant pool. This
6130 is not possible for the address of thread-local symbols, which
6131 is checked above. */
6134 ix86_cannot_force_const_mem (rtx x
)
6136 /* We can always put integral constants and vectors in memory. */
6137 switch (GET_CODE (x
))
6147 return !legitimate_constant_p (x
);
6150 /* Determine if a given RTX is a valid constant address. */
6153 constant_address_p (rtx x
)
6155 return CONSTANT_P (x
) && legitimate_address_p (Pmode
, x
, 1);
6158 /* Nonzero if the constant value X is a legitimate general operand
6159 when generating PIC code. It is given that flag_pic is on and
6160 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
6163 legitimate_pic_operand_p (rtx x
)
6167 switch (GET_CODE (x
))
6170 inner
= XEXP (x
, 0);
6171 if (GET_CODE (inner
) == PLUS
6172 && GET_CODE (XEXP (inner
, 1)) == CONST_INT
)
6173 inner
= XEXP (inner
, 0);
6175 /* Only some unspecs are valid as "constants". */
6176 if (GET_CODE (inner
) == UNSPEC
)
6177 switch (XINT (inner
, 1))
6180 return TARGET_64BIT
;
6182 x
= XVECEXP (inner
, 0, 0);
6183 return (GET_CODE (x
) == SYMBOL_REF
6184 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
6192 return legitimate_pic_address_disp_p (x
);
6199 /* Determine if a given CONST RTX is a valid memory displacement
6203 legitimate_pic_address_disp_p (rtx disp
)
6207 /* In 64bit mode we can allow direct addresses of symbols and labels
6208 when they are not dynamic symbols. */
6211 rtx op0
= disp
, op1
;
6213 switch (GET_CODE (disp
))
6219 if (GET_CODE (XEXP (disp
, 0)) != PLUS
)
6221 op0
= XEXP (XEXP (disp
, 0), 0);
6222 op1
= XEXP (XEXP (disp
, 0), 1);
6223 if (GET_CODE (op1
) != CONST_INT
6224 || INTVAL (op1
) >= 16*1024*1024
6225 || INTVAL (op1
) < -16*1024*1024)
6227 if (GET_CODE (op0
) == LABEL_REF
)
6229 if (GET_CODE (op0
) != SYMBOL_REF
)
6234 /* TLS references should always be enclosed in UNSPEC. */
6235 if (SYMBOL_REF_TLS_MODEL (op0
))
6237 if (!SYMBOL_REF_FAR_ADDR_P (op0
) && SYMBOL_REF_LOCAL_P (op0
))
6245 if (GET_CODE (disp
) != CONST
)
6247 disp
= XEXP (disp
, 0);
6251 /* We are unsafe to allow PLUS expressions. This limit allowed distance
6252 of GOT tables. We should not need these anyway. */
6253 if (GET_CODE (disp
) != UNSPEC
6254 || (XINT (disp
, 1) != UNSPEC_GOTPCREL
6255 && XINT (disp
, 1) != UNSPEC_GOTOFF
))
6258 if (GET_CODE (XVECEXP (disp
, 0, 0)) != SYMBOL_REF
6259 && GET_CODE (XVECEXP (disp
, 0, 0)) != LABEL_REF
)
6265 if (GET_CODE (disp
) == PLUS
)
6267 if (GET_CODE (XEXP (disp
, 1)) != CONST_INT
)
6269 disp
= XEXP (disp
, 0);
6273 if (TARGET_MACHO
&& darwin_local_data_pic (disp
))
6276 if (GET_CODE (disp
) != UNSPEC
)
6279 switch (XINT (disp
, 1))
6284 return GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
;
6286 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
6287 While ABI specify also 32bit relocation but we don't produce it in
6288 small PIC model at all. */
6289 if ((GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
6290 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
)
6292 return local_symbolic_operand (XVECEXP (disp
, 0, 0), Pmode
);
6294 case UNSPEC_GOTTPOFF
:
6295 case UNSPEC_GOTNTPOFF
:
6296 case UNSPEC_INDNTPOFF
:
6299 disp
= XVECEXP (disp
, 0, 0);
6300 return (GET_CODE (disp
) == SYMBOL_REF
6301 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_INITIAL_EXEC
);
6303 disp
= XVECEXP (disp
, 0, 0);
6304 return (GET_CODE (disp
) == SYMBOL_REF
6305 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_EXEC
);
6307 disp
= XVECEXP (disp
, 0, 0);
6308 return (GET_CODE (disp
) == SYMBOL_REF
6309 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_DYNAMIC
);
6315 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6316 memory address for an instruction. The MODE argument is the machine mode
6317 for the MEM expression that wants to use this address.
6319 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6320 convert common non-canonical forms to canonical form so that they will
6324 legitimate_address_p (enum machine_mode mode
, rtx addr
, int strict
)
6326 struct ix86_address parts
;
6327 rtx base
, index
, disp
;
6328 HOST_WIDE_INT scale
;
6329 const char *reason
= NULL
;
6330 rtx reason_rtx
= NULL_RTX
;
6332 if (TARGET_DEBUG_ADDR
)
6335 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
6336 GET_MODE_NAME (mode
), strict
);
6340 if (ix86_decompose_address (addr
, &parts
) <= 0)
6342 reason
= "decomposition failed";
6347 index
= parts
.index
;
6349 scale
= parts
.scale
;
6351 /* Validate base register.
6353 Don't allow SUBREG's that span more than a word here. It can lead to spill
6354 failures when the base is one word out of a two word structure, which is
6355 represented internally as a DImode int. */
6364 else if (GET_CODE (base
) == SUBREG
6365 && REG_P (SUBREG_REG (base
))
6366 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base
)))
6368 reg
= SUBREG_REG (base
);
6371 reason
= "base is not a register";
6375 if (GET_MODE (base
) != Pmode
)
6377 reason
= "base is not in Pmode";
6381 if ((strict
&& ! REG_OK_FOR_BASE_STRICT_P (reg
))
6382 || (! strict
&& ! REG_OK_FOR_BASE_NONSTRICT_P (reg
)))
6384 reason
= "base is not valid";
6389 /* Validate index register.
6391 Don't allow SUBREG's that span more than a word here -- same as above. */
6400 else if (GET_CODE (index
) == SUBREG
6401 && REG_P (SUBREG_REG (index
))
6402 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index
)))
6404 reg
= SUBREG_REG (index
);
6407 reason
= "index is not a register";
6411 if (GET_MODE (index
) != Pmode
)
6413 reason
= "index is not in Pmode";
6417 if ((strict
&& ! REG_OK_FOR_INDEX_STRICT_P (reg
))
6418 || (! strict
&& ! REG_OK_FOR_INDEX_NONSTRICT_P (reg
)))
6420 reason
= "index is not valid";
6425 /* Validate scale factor. */
6428 reason_rtx
= GEN_INT (scale
);
6431 reason
= "scale without index";
6435 if (scale
!= 2 && scale
!= 4 && scale
!= 8)
6437 reason
= "scale is not a valid multiplier";
6442 /* Validate displacement. */
6447 if (GET_CODE (disp
) == CONST
6448 && GET_CODE (XEXP (disp
, 0)) == UNSPEC
)
6449 switch (XINT (XEXP (disp
, 0), 1))
6451 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
6452 used. While ABI specify also 32bit relocations, we don't produce
6453 them at all and use IP relative instead. */
6456 gcc_assert (flag_pic
);
6458 goto is_legitimate_pic
;
6459 reason
= "64bit address unspec";
6462 case UNSPEC_GOTPCREL
:
6463 gcc_assert (flag_pic
);
6464 goto is_legitimate_pic
;
6466 case UNSPEC_GOTTPOFF
:
6467 case UNSPEC_GOTNTPOFF
:
6468 case UNSPEC_INDNTPOFF
:
6474 reason
= "invalid address unspec";
6478 else if (SYMBOLIC_CONST (disp
)
6482 && MACHOPIC_INDIRECT
6483 && !machopic_operand_p (disp
)
6489 if (TARGET_64BIT
&& (index
|| base
))
6491 /* foo@dtpoff(%rX) is ok. */
6492 if (GET_CODE (disp
) != CONST
6493 || GET_CODE (XEXP (disp
, 0)) != PLUS
6494 || GET_CODE (XEXP (XEXP (disp
, 0), 0)) != UNSPEC
6495 || GET_CODE (XEXP (XEXP (disp
, 0), 1)) != CONST_INT
6496 || (XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_DTPOFF
6497 && XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_NTPOFF
))
6499 reason
= "non-constant pic memory reference";
6503 else if (! legitimate_pic_address_disp_p (disp
))
6505 reason
= "displacement is an invalid pic construct";
6509 /* This code used to verify that a symbolic pic displacement
6510 includes the pic_offset_table_rtx register.
6512 While this is good idea, unfortunately these constructs may
6513 be created by "adds using lea" optimization for incorrect
6522 This code is nonsensical, but results in addressing
6523 GOT table with pic_offset_table_rtx base. We can't
6524 just refuse it easily, since it gets matched by
6525 "addsi3" pattern, that later gets split to lea in the
6526 case output register differs from input. While this
6527 can be handled by separate addsi pattern for this case
6528 that never results in lea, this seems to be easier and
6529 correct fix for crash to disable this test. */
6531 else if (GET_CODE (disp
) != LABEL_REF
6532 && GET_CODE (disp
) != CONST_INT
6533 && (GET_CODE (disp
) != CONST
6534 || !legitimate_constant_p (disp
))
6535 && (GET_CODE (disp
) != SYMBOL_REF
6536 || !legitimate_constant_p (disp
)))
6538 reason
= "displacement is not constant";
6541 else if (TARGET_64BIT
6542 && !x86_64_immediate_operand (disp
, VOIDmode
))
6544 reason
= "displacement is out of range";
6549 /* Everything looks valid. */
6550 if (TARGET_DEBUG_ADDR
)
6551 fprintf (stderr
, "Success.\n");
6555 if (TARGET_DEBUG_ADDR
)
6557 fprintf (stderr
, "Error: %s\n", reason
);
6558 debug_rtx (reason_rtx
);
6563 /* Return a unique alias set for the GOT. */
6565 static HOST_WIDE_INT
6566 ix86_GOT_alias_set (void)
6568 static HOST_WIDE_INT set
= -1;
6570 set
= new_alias_set ();
6574 /* Return a legitimate reference for ORIG (an address) using the
6575 register REG. If REG is 0, a new pseudo is generated.
6577 There are two types of references that must be handled:
6579 1. Global data references must load the address from the GOT, via
6580 the PIC reg. An insn is emitted to do this load, and the reg is
6583 2. Static data references, constant pool addresses, and code labels
6584 compute the address as an offset from the GOT, whose base is in
6585 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
6586 differentiate them from global data objects. The returned
6587 address is the PIC reg + an unspec constant.
6589 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6590 reg also appears in the address. */
6593 legitimize_pic_address (rtx orig
, rtx reg
)
6600 if (TARGET_MACHO
&& !TARGET_64BIT
)
6603 reg
= gen_reg_rtx (Pmode
);
6604 /* Use the generic Mach-O PIC machinery. */
6605 return machopic_legitimize_pic_address (orig
, GET_MODE (orig
), reg
);
6609 if (TARGET_64BIT
&& legitimate_pic_address_disp_p (addr
))
6611 else if (TARGET_64BIT
6612 && ix86_cmodel
!= CM_SMALL_PIC
6613 && local_symbolic_operand (addr
, Pmode
))
6616 /* This symbol may be referenced via a displacement from the PIC
6617 base address (@GOTOFF). */
6619 if (reload_in_progress
)
6620 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
6621 if (GET_CODE (addr
) == CONST
)
6622 addr
= XEXP (addr
, 0);
6623 if (GET_CODE (addr
) == PLUS
)
6625 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)), UNSPEC_GOTOFF
);
6626 new = gen_rtx_PLUS (Pmode
, new, XEXP (addr
, 1));
6629 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
6630 new = gen_rtx_CONST (Pmode
, new);
6632 tmpreg
= gen_reg_rtx (Pmode
);
6635 emit_move_insn (tmpreg
, new);
6639 new = expand_simple_binop (Pmode
, PLUS
, reg
, pic_offset_table_rtx
,
6640 tmpreg
, 1, OPTAB_DIRECT
);
6643 else new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, tmpreg
);
6645 else if (!TARGET_64BIT
&& local_symbolic_operand (addr
, Pmode
))
6647 /* This symbol may be referenced via a displacement from the PIC
6648 base address (@GOTOFF). */
6650 if (reload_in_progress
)
6651 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
6652 if (GET_CODE (addr
) == CONST
)
6653 addr
= XEXP (addr
, 0);
6654 if (GET_CODE (addr
) == PLUS
)
6656 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)), UNSPEC_GOTOFF
);
6657 new = gen_rtx_PLUS (Pmode
, new, XEXP (addr
, 1));
6660 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
6661 new = gen_rtx_CONST (Pmode
, new);
6662 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
6666 emit_move_insn (reg
, new);
6670 else if (GET_CODE (addr
) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (addr
) == 0)
6674 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTPCREL
);
6675 new = gen_rtx_CONST (Pmode
, new);
6676 new = gen_const_mem (Pmode
, new);
6677 set_mem_alias_set (new, ix86_GOT_alias_set ());
6680 reg
= gen_reg_rtx (Pmode
);
6681 /* Use directly gen_movsi, otherwise the address is loaded
6682 into register for CSE. We don't want to CSE this addresses,
6683 instead we CSE addresses from the GOT table, so skip this. */
6684 emit_insn (gen_movsi (reg
, new));
6689 /* This symbol must be referenced via a load from the
6690 Global Offset Table (@GOT). */
6692 if (reload_in_progress
)
6693 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
6694 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOT
);
6695 new = gen_rtx_CONST (Pmode
, new);
6696 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
6697 new = gen_const_mem (Pmode
, new);
6698 set_mem_alias_set (new, ix86_GOT_alias_set ());
6701 reg
= gen_reg_rtx (Pmode
);
6702 emit_move_insn (reg
, new);
6708 if (GET_CODE (addr
) == CONST_INT
6709 && !x86_64_immediate_operand (addr
, VOIDmode
))
6713 emit_move_insn (reg
, addr
);
6717 new = force_reg (Pmode
, addr
);
6719 else if (GET_CODE (addr
) == CONST
)
6721 addr
= XEXP (addr
, 0);
6723 /* We must match stuff we generate before. Assume the only
6724 unspecs that can get here are ours. Not that we could do
6725 anything with them anyway.... */
6726 if (GET_CODE (addr
) == UNSPEC
6727 || (GET_CODE (addr
) == PLUS
6728 && GET_CODE (XEXP (addr
, 0)) == UNSPEC
))
6730 gcc_assert (GET_CODE (addr
) == PLUS
);
6732 if (GET_CODE (addr
) == PLUS
)
6734 rtx op0
= XEXP (addr
, 0), op1
= XEXP (addr
, 1);
6736 /* Check first to see if this is a constant offset from a @GOTOFF
6737 symbol reference. */
6738 if (local_symbolic_operand (op0
, Pmode
)
6739 && GET_CODE (op1
) == CONST_INT
)
6743 if (reload_in_progress
)
6744 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
6745 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, op0
),
6747 new = gen_rtx_PLUS (Pmode
, new, op1
);
6748 new = gen_rtx_CONST (Pmode
, new);
6749 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
6753 emit_move_insn (reg
, new);
6759 if (INTVAL (op1
) < -16*1024*1024
6760 || INTVAL (op1
) >= 16*1024*1024)
6762 if (!x86_64_immediate_operand (op1
, Pmode
))
6763 op1
= force_reg (Pmode
, op1
);
6764 new = gen_rtx_PLUS (Pmode
, force_reg (Pmode
, op0
), op1
);
6770 base
= legitimize_pic_address (XEXP (addr
, 0), reg
);
6771 new = legitimize_pic_address (XEXP (addr
, 1),
6772 base
== reg
? NULL_RTX
: reg
);
6774 if (GET_CODE (new) == CONST_INT
)
6775 new = plus_constant (base
, INTVAL (new));
6778 if (GET_CODE (new) == PLUS
&& CONSTANT_P (XEXP (new, 1)))
6780 base
= gen_rtx_PLUS (Pmode
, base
, XEXP (new, 0));
6781 new = XEXP (new, 1);
6783 new = gen_rtx_PLUS (Pmode
, base
, new);
6791 /* Load the thread pointer. If TO_REG is true, force it into a register. */
6794 get_thread_pointer (int to_reg
)
6798 tp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
), UNSPEC_TP
);
6802 reg
= gen_reg_rtx (Pmode
);
6803 insn
= gen_rtx_SET (VOIDmode
, reg
, tp
);
6804 insn
= emit_insn (insn
);
6809 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
6810 false if we expect this to be used for a memory address and true if
6811 we expect to load the address into a register. */
6814 legitimize_tls_address (rtx x
, enum tls_model model
, int for_mov
)
6816 rtx dest
, base
, off
, pic
, tp
;
6821 case TLS_MODEL_GLOBAL_DYNAMIC
:
6822 dest
= gen_reg_rtx (Pmode
);
6823 tp
= TARGET_GNU2_TLS
? get_thread_pointer (1) : 0;
6825 if (TARGET_64BIT
&& ! TARGET_GNU2_TLS
)
6827 rtx rax
= gen_rtx_REG (Pmode
, 0), insns
;
6830 emit_call_insn (gen_tls_global_dynamic_64 (rax
, x
));
6831 insns
= get_insns ();
6834 emit_libcall_block (insns
, dest
, rax
, x
);
6836 else if (TARGET_64BIT
&& TARGET_GNU2_TLS
)
6837 emit_insn (gen_tls_global_dynamic_64 (dest
, x
));
6839 emit_insn (gen_tls_global_dynamic_32 (dest
, x
));
6841 if (TARGET_GNU2_TLS
)
6843 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, tp
, dest
));
6845 set_unique_reg_note (get_last_insn (), REG_EQUIV
, x
);
6849 case TLS_MODEL_LOCAL_DYNAMIC
:
6850 base
= gen_reg_rtx (Pmode
);
6851 tp
= TARGET_GNU2_TLS
? get_thread_pointer (1) : 0;
6853 if (TARGET_64BIT
&& ! TARGET_GNU2_TLS
)
6855 rtx rax
= gen_rtx_REG (Pmode
, 0), insns
, note
;
6858 emit_call_insn (gen_tls_local_dynamic_base_64 (rax
));
6859 insns
= get_insns ();
6862 note
= gen_rtx_EXPR_LIST (VOIDmode
, const0_rtx
, NULL
);
6863 note
= gen_rtx_EXPR_LIST (VOIDmode
, ix86_tls_get_addr (), note
);
6864 emit_libcall_block (insns
, base
, rax
, note
);
6866 else if (TARGET_64BIT
&& TARGET_GNU2_TLS
)
6867 emit_insn (gen_tls_local_dynamic_base_64 (base
));
6869 emit_insn (gen_tls_local_dynamic_base_32 (base
));
6871 if (TARGET_GNU2_TLS
)
6873 rtx x
= ix86_tls_module_base ();
6875 set_unique_reg_note (get_last_insn (), REG_EQUIV
,
6876 gen_rtx_MINUS (Pmode
, x
, tp
));
6879 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), UNSPEC_DTPOFF
);
6880 off
= gen_rtx_CONST (Pmode
, off
);
6882 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, base
, off
));
6884 if (TARGET_GNU2_TLS
)
6886 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, dest
, tp
));
6888 set_unique_reg_note (get_last_insn (), REG_EQUIV
, x
);
6893 case TLS_MODEL_INITIAL_EXEC
:
6897 type
= UNSPEC_GOTNTPOFF
;
6901 if (reload_in_progress
)
6902 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
6903 pic
= pic_offset_table_rtx
;
6904 type
= TARGET_ANY_GNU_TLS
? UNSPEC_GOTNTPOFF
: UNSPEC_GOTTPOFF
;
6906 else if (!TARGET_ANY_GNU_TLS
)
6908 pic
= gen_reg_rtx (Pmode
);
6909 emit_insn (gen_set_got (pic
));
6910 type
= UNSPEC_GOTTPOFF
;
6915 type
= UNSPEC_INDNTPOFF
;
6918 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), type
);
6919 off
= gen_rtx_CONST (Pmode
, off
);
6921 off
= gen_rtx_PLUS (Pmode
, pic
, off
);
6922 off
= gen_const_mem (Pmode
, off
);
6923 set_mem_alias_set (off
, ix86_GOT_alias_set ());
6925 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
6927 base
= get_thread_pointer (for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
6928 off
= force_reg (Pmode
, off
);
6929 return gen_rtx_PLUS (Pmode
, base
, off
);
6933 base
= get_thread_pointer (true);
6934 dest
= gen_reg_rtx (Pmode
);
6935 emit_insn (gen_subsi3 (dest
, base
, off
));
6939 case TLS_MODEL_LOCAL_EXEC
:
6940 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
),
6941 (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
6942 ? UNSPEC_NTPOFF
: UNSPEC_TPOFF
);
6943 off
= gen_rtx_CONST (Pmode
, off
);
6945 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
6947 base
= get_thread_pointer (for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
6948 return gen_rtx_PLUS (Pmode
, base
, off
);
6952 base
= get_thread_pointer (true);
6953 dest
= gen_reg_rtx (Pmode
);
6954 emit_insn (gen_subsi3 (dest
, base
, off
));
6965 /* Try machine-dependent ways of modifying an illegitimate address
6966 to be legitimate. If we find one, return the new, valid address.
6967 This macro is used in only one place: `memory_address' in explow.c.
6969 OLDX is the address as it was before break_out_memory_refs was called.
6970 In some cases it is useful to look at this to decide what needs to be done.
6972 MODE and WIN are passed so that this macro can use
6973 GO_IF_LEGITIMATE_ADDRESS.
6975 It is always safe for this macro to do nothing. It exists to recognize
6976 opportunities to optimize the output.
6978 For the 80386, we handle X+REG by loading X into a register R and
6979 using R+REG. R will go in a general reg and indexing will be used.
6980 However, if REG is a broken-out memory address or multiplication,
6981 nothing needs to be done because REG can certainly go in a general reg.
6983 When -fpic is used, special handling is needed for symbolic references.
6984 See comments by legitimize_pic_address in i386.c for details. */
6987 legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
, enum machine_mode mode
)
6992 if (TARGET_DEBUG_ADDR
)
6994 fprintf (stderr
, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
6995 GET_MODE_NAME (mode
));
6999 log
= GET_CODE (x
) == SYMBOL_REF
? SYMBOL_REF_TLS_MODEL (x
) : 0;
7001 return legitimize_tls_address (x
, log
, false);
7002 if (GET_CODE (x
) == CONST
7003 && GET_CODE (XEXP (x
, 0)) == PLUS
7004 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
7005 && (log
= SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x
, 0), 0))))
7007 rtx t
= legitimize_tls_address (XEXP (XEXP (x
, 0), 0), log
, false);
7008 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (x
, 0), 1));
7011 if (flag_pic
&& SYMBOLIC_CONST (x
))
7012 return legitimize_pic_address (x
, 0);
7014 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
7015 if (GET_CODE (x
) == ASHIFT
7016 && GET_CODE (XEXP (x
, 1)) == CONST_INT
7017 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (x
, 1)) < 4)
7020 log
= INTVAL (XEXP (x
, 1));
7021 x
= gen_rtx_MULT (Pmode
, force_reg (Pmode
, XEXP (x
, 0)),
7022 GEN_INT (1 << log
));
7025 if (GET_CODE (x
) == PLUS
)
7027 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
7029 if (GET_CODE (XEXP (x
, 0)) == ASHIFT
7030 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
7031 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 0), 1)) < 4)
7034 log
= INTVAL (XEXP (XEXP (x
, 0), 1));
7035 XEXP (x
, 0) = gen_rtx_MULT (Pmode
,
7036 force_reg (Pmode
, XEXP (XEXP (x
, 0), 0)),
7037 GEN_INT (1 << log
));
7040 if (GET_CODE (XEXP (x
, 1)) == ASHIFT
7041 && GET_CODE (XEXP (XEXP (x
, 1), 1)) == CONST_INT
7042 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 1), 1)) < 4)
7045 log
= INTVAL (XEXP (XEXP (x
, 1), 1));
7046 XEXP (x
, 1) = gen_rtx_MULT (Pmode
,
7047 force_reg (Pmode
, XEXP (XEXP (x
, 1), 0)),
7048 GEN_INT (1 << log
));
7051 /* Put multiply first if it isn't already. */
7052 if (GET_CODE (XEXP (x
, 1)) == MULT
)
7054 rtx tmp
= XEXP (x
, 0);
7055 XEXP (x
, 0) = XEXP (x
, 1);
7060 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
7061 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
7062 created by virtual register instantiation, register elimination, and
7063 similar optimizations. */
7064 if (GET_CODE (XEXP (x
, 0)) == MULT
&& GET_CODE (XEXP (x
, 1)) == PLUS
)
7067 x
= gen_rtx_PLUS (Pmode
,
7068 gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
7069 XEXP (XEXP (x
, 1), 0)),
7070 XEXP (XEXP (x
, 1), 1));
7074 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
7075 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
7076 else if (GET_CODE (x
) == PLUS
&& GET_CODE (XEXP (x
, 0)) == PLUS
7077 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
7078 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == PLUS
7079 && CONSTANT_P (XEXP (x
, 1)))
7082 rtx other
= NULL_RTX
;
7084 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
7086 constant
= XEXP (x
, 1);
7087 other
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
7089 else if (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 1), 1)) == CONST_INT
)
7091 constant
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
7092 other
= XEXP (x
, 1);
7100 x
= gen_rtx_PLUS (Pmode
,
7101 gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 0),
7102 XEXP (XEXP (XEXP (x
, 0), 1), 0)),
7103 plus_constant (other
, INTVAL (constant
)));
7107 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
7110 if (GET_CODE (XEXP (x
, 0)) == MULT
)
7113 XEXP (x
, 0) = force_operand (XEXP (x
, 0), 0);
7116 if (GET_CODE (XEXP (x
, 1)) == MULT
)
7119 XEXP (x
, 1) = force_operand (XEXP (x
, 1), 0);
7123 && GET_CODE (XEXP (x
, 1)) == REG
7124 && GET_CODE (XEXP (x
, 0)) == REG
)
7127 if (flag_pic
&& SYMBOLIC_CONST (XEXP (x
, 1)))
7130 x
= legitimize_pic_address (x
, 0);
7133 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
7136 if (GET_CODE (XEXP (x
, 0)) == REG
)
7138 rtx temp
= gen_reg_rtx (Pmode
);
7139 rtx val
= force_operand (XEXP (x
, 1), temp
);
7141 emit_move_insn (temp
, val
);
7147 else if (GET_CODE (XEXP (x
, 1)) == REG
)
7149 rtx temp
= gen_reg_rtx (Pmode
);
7150 rtx val
= force_operand (XEXP (x
, 0), temp
);
7152 emit_move_insn (temp
, val
);
7162 /* Print an integer constant expression in assembler syntax. Addition
7163 and subtraction are the only arithmetic that may appear in these
7164 expressions. FILE is the stdio stream to write to, X is the rtx, and
7165 CODE is the operand print code from the output string. */
7168 output_pic_addr_const (FILE *file
, rtx x
, int code
)
7172 switch (GET_CODE (x
))
7175 gcc_assert (flag_pic
);
7180 output_addr_const (file
, x
);
7181 if (!TARGET_MACHO
&& code
== 'P' && ! SYMBOL_REF_LOCAL_P (x
))
7182 fputs ("@PLT", file
);
7189 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (x
));
7190 assemble_name (asm_out_file
, buf
);
7194 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
7198 /* This used to output parentheses around the expression,
7199 but that does not work on the 386 (either ATT or BSD assembler). */
7200 output_pic_addr_const (file
, XEXP (x
, 0), code
);
7204 if (GET_MODE (x
) == VOIDmode
)
7206 /* We can use %d if the number is <32 bits and positive. */
7207 if (CONST_DOUBLE_HIGH (x
) || CONST_DOUBLE_LOW (x
) < 0)
7208 fprintf (file
, "0x%lx%08lx",
7209 (unsigned long) CONST_DOUBLE_HIGH (x
),
7210 (unsigned long) CONST_DOUBLE_LOW (x
));
7212 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, CONST_DOUBLE_LOW (x
));
7215 /* We can't handle floating point constants;
7216 PRINT_OPERAND must handle them. */
7217 output_operand_lossage ("floating constant misused");
7221 /* Some assemblers need integer constants to appear first. */
7222 if (GET_CODE (XEXP (x
, 0)) == CONST_INT
)
7224 output_pic_addr_const (file
, XEXP (x
, 0), code
);
7226 output_pic_addr_const (file
, XEXP (x
, 1), code
);
7230 gcc_assert (GET_CODE (XEXP (x
, 1)) == CONST_INT
);
7231 output_pic_addr_const (file
, XEXP (x
, 1), code
);
7233 output_pic_addr_const (file
, XEXP (x
, 0), code
);
7239 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? '(' : '[', file
);
7240 output_pic_addr_const (file
, XEXP (x
, 0), code
);
7242 output_pic_addr_const (file
, XEXP (x
, 1), code
);
7244 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? ')' : ']', file
);
7248 gcc_assert (XVECLEN (x
, 0) == 1);
7249 output_pic_addr_const (file
, XVECEXP (x
, 0, 0), code
);
7250 switch (XINT (x
, 1))
7253 fputs ("@GOT", file
);
7256 fputs ("@GOTOFF", file
);
7258 case UNSPEC_GOTPCREL
:
7259 fputs ("@GOTPCREL(%rip)", file
);
7261 case UNSPEC_GOTTPOFF
:
7262 /* FIXME: This might be @TPOFF in Sun ld too. */
7263 fputs ("@GOTTPOFF", file
);
7266 fputs ("@TPOFF", file
);
7270 fputs ("@TPOFF", file
);
7272 fputs ("@NTPOFF", file
);
7275 fputs ("@DTPOFF", file
);
7277 case UNSPEC_GOTNTPOFF
:
7279 fputs ("@GOTTPOFF(%rip)", file
);
7281 fputs ("@GOTNTPOFF", file
);
7283 case UNSPEC_INDNTPOFF
:
7284 fputs ("@INDNTPOFF", file
);
7287 output_operand_lossage ("invalid UNSPEC as operand");
7293 output_operand_lossage ("invalid expression as operand");
7297 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7298 We need to emit DTP-relative relocations. */
7301 i386_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
7303 fputs (ASM_LONG
, file
);
7304 output_addr_const (file
, x
);
7305 fputs ("@DTPOFF", file
);
7311 fputs (", 0", file
);
7318 /* In the name of slightly smaller debug output, and to cater to
7319 general assembler lossage, recognize PIC+GOTOFF and turn it back
7320 into a direct symbol reference.
7322 On Darwin, this is necessary to avoid a crash, because Darwin
7323 has a different PIC label for each routine but the DWARF debugging
7324 information is not associated with any particular routine, so it's
7325 necessary to remove references to the PIC label from RTL stored by
7326 the DWARF output code. */
7329 ix86_delegitimize_address (rtx orig_x
)
7332 /* reg_addend is NULL or a multiple of some register. */
7333 rtx reg_addend
= NULL_RTX
;
7334 /* const_addend is NULL or a const_int. */
7335 rtx const_addend
= NULL_RTX
;
7336 /* This is the result, or NULL. */
7337 rtx result
= NULL_RTX
;
7339 if (GET_CODE (x
) == MEM
)
7344 if (GET_CODE (x
) != CONST
7345 || GET_CODE (XEXP (x
, 0)) != UNSPEC
7346 || XINT (XEXP (x
, 0), 1) != UNSPEC_GOTPCREL
7347 || GET_CODE (orig_x
) != MEM
)
7349 return XVECEXP (XEXP (x
, 0), 0, 0);
7352 if (GET_CODE (x
) != PLUS
7353 || GET_CODE (XEXP (x
, 1)) != CONST
)
7356 if (GET_CODE (XEXP (x
, 0)) == REG
7357 && REGNO (XEXP (x
, 0)) == PIC_OFFSET_TABLE_REGNUM
)
7358 /* %ebx + GOT/GOTOFF */
7360 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
7362 /* %ebx + %reg * scale + GOT/GOTOFF */
7363 reg_addend
= XEXP (x
, 0);
7364 if (GET_CODE (XEXP (reg_addend
, 0)) == REG
7365 && REGNO (XEXP (reg_addend
, 0)) == PIC_OFFSET_TABLE_REGNUM
)
7366 reg_addend
= XEXP (reg_addend
, 1);
7367 else if (GET_CODE (XEXP (reg_addend
, 1)) == REG
7368 && REGNO (XEXP (reg_addend
, 1)) == PIC_OFFSET_TABLE_REGNUM
)
7369 reg_addend
= XEXP (reg_addend
, 0);
7372 if (GET_CODE (reg_addend
) != REG
7373 && GET_CODE (reg_addend
) != MULT
7374 && GET_CODE (reg_addend
) != ASHIFT
)
7380 x
= XEXP (XEXP (x
, 1), 0);
7381 if (GET_CODE (x
) == PLUS
7382 && GET_CODE (XEXP (x
, 1)) == CONST_INT
)
7384 const_addend
= XEXP (x
, 1);
7388 if (GET_CODE (x
) == UNSPEC
7389 && ((XINT (x
, 1) == UNSPEC_GOT
&& GET_CODE (orig_x
) == MEM
)
7390 || (XINT (x
, 1) == UNSPEC_GOTOFF
&& GET_CODE (orig_x
) != MEM
)))
7391 result
= XVECEXP (x
, 0, 0);
7393 if (TARGET_MACHO
&& darwin_local_data_pic (x
)
7394 && GET_CODE (orig_x
) != MEM
)
7395 result
= XEXP (x
, 0);
7401 result
= gen_rtx_PLUS (Pmode
, result
, const_addend
);
7403 result
= gen_rtx_PLUS (Pmode
, reg_addend
, result
);
7408 put_condition_code (enum rtx_code code
, enum machine_mode mode
, int reverse
,
7413 if (mode
== CCFPmode
|| mode
== CCFPUmode
)
7415 enum rtx_code second_code
, bypass_code
;
7416 ix86_fp_comparison_codes (code
, &bypass_code
, &code
, &second_code
);
7417 gcc_assert (bypass_code
== UNKNOWN
&& second_code
== UNKNOWN
);
7418 code
= ix86_fp_compare_code_to_integer (code
);
7422 code
= reverse_condition (code
);
7433 gcc_assert (mode
== CCmode
|| mode
== CCNOmode
|| mode
== CCGCmode
);
7437 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
7438 Those same assemblers have the same but opposite lossage on cmov. */
7439 gcc_assert (mode
== CCmode
);
7440 suffix
= fp
? "nbe" : "a";
7460 gcc_assert (mode
== CCmode
);
7482 gcc_assert (mode
== CCmode
);
7483 suffix
= fp
? "nb" : "ae";
7486 gcc_assert (mode
== CCmode
|| mode
== CCGCmode
|| mode
== CCNOmode
);
7490 gcc_assert (mode
== CCmode
);
7494 suffix
= fp
? "u" : "p";
7497 suffix
= fp
? "nu" : "np";
7502 fputs (suffix
, file
);
7505 /* Print the name of register X to FILE based on its machine mode and number.
7506 If CODE is 'w', pretend the mode is HImode.
7507 If CODE is 'b', pretend the mode is QImode.
7508 If CODE is 'k', pretend the mode is SImode.
7509 If CODE is 'q', pretend the mode is DImode.
7510 If CODE is 'h', pretend the reg is the 'high' byte register.
7511 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
7514 print_reg (rtx x
, int code
, FILE *file
)
7516 gcc_assert (REGNO (x
) != ARG_POINTER_REGNUM
7517 && REGNO (x
) != FRAME_POINTER_REGNUM
7518 && REGNO (x
) != FLAGS_REG
7519 && REGNO (x
) != FPSR_REG
7520 && REGNO (x
) != FPCR_REG
);
7522 if (ASSEMBLER_DIALECT
== ASM_ATT
|| USER_LABEL_PREFIX
[0] == 0)
7525 if (code
== 'w' || MMX_REG_P (x
))
7527 else if (code
== 'b')
7529 else if (code
== 'k')
7531 else if (code
== 'q')
7533 else if (code
== 'y')
7535 else if (code
== 'h')
7538 code
= GET_MODE_SIZE (GET_MODE (x
));
7540 /* Irritatingly, AMD extended registers use different naming convention
7541 from the normal registers. */
7542 if (REX_INT_REG_P (x
))
7544 gcc_assert (TARGET_64BIT
);
7548 error ("extended registers have no high halves");
7551 fprintf (file
, "r%ib", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
7554 fprintf (file
, "r%iw", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
7557 fprintf (file
, "r%id", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
7560 fprintf (file
, "r%i", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
7563 error ("unsupported operand size for extended register");
7571 if (STACK_TOP_P (x
))
7573 fputs ("st(0)", file
);
7580 if (! ANY_FP_REG_P (x
))
7581 putc (code
== 8 && TARGET_64BIT
? 'r' : 'e', file
);
7586 fputs (hi_reg_name
[REGNO (x
)], file
);
7589 if (REGNO (x
) >= ARRAY_SIZE (qi_reg_name
))
7591 fputs (qi_reg_name
[REGNO (x
)], file
);
7594 if (REGNO (x
) >= ARRAY_SIZE (qi_high_reg_name
))
7596 fputs (qi_high_reg_name
[REGNO (x
)], file
);
7603 /* Locate some local-dynamic symbol still in use by this function
7604 so that we can print its name in some tls_local_dynamic_base
7608 get_some_local_dynamic_name (void)
7612 if (cfun
->machine
->some_ld_name
)
7613 return cfun
->machine
->some_ld_name
;
7615 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
7617 && for_each_rtx (&PATTERN (insn
), get_some_local_dynamic_name_1
, 0))
7618 return cfun
->machine
->some_ld_name
;
7624 get_some_local_dynamic_name_1 (rtx
*px
, void *data ATTRIBUTE_UNUSED
)
7628 if (GET_CODE (x
) == SYMBOL_REF
7629 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
)
7631 cfun
->machine
->some_ld_name
= XSTR (x
, 0);
7639 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
7640 C -- print opcode suffix for set/cmov insn.
7641 c -- like C, but print reversed condition
7642 F,f -- likewise, but for floating-point.
7643 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
7645 R -- print the prefix for register names.
7646 z -- print the opcode suffix for the size of the current operand.
7647 * -- print a star (in certain assembler syntax)
7648 A -- print an absolute memory reference.
7649 w -- print the operand as if it's a "word" (HImode) even if it isn't.
7650 s -- print a shift double count, followed by the assemblers argument
7652 b -- print the QImode name of the register for the indicated operand.
7653 %b0 would print %al if operands[0] is reg 0.
7654 w -- likewise, print the HImode name of the register.
7655 k -- likewise, print the SImode name of the register.
7656 q -- likewise, print the DImode name of the register.
7657 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
7658 y -- print "st(0)" instead of "st" as a register.
7659 D -- print condition for SSE cmp instruction.
7660 P -- if PIC, print an @PLT suffix.
7661 X -- don't print any sort of PIC '@' suffix for a symbol.
7662 & -- print some in-use local-dynamic symbol name.
7663 H -- print a memory address offset by 8; used for sse high-parts
7667 print_operand (FILE *file
, rtx x
, int code
)
7674 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7679 assemble_name (file
, get_some_local_dynamic_name ());
7683 switch (ASSEMBLER_DIALECT
)
7690 /* Intel syntax. For absolute addresses, registers should not
7691 be surrounded by braces. */
7692 if (GET_CODE (x
) != REG
)
7695 PRINT_OPERAND (file
, x
, 0);
7705 PRINT_OPERAND (file
, x
, 0);
7710 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7715 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7720 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7725 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7730 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7735 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7740 /* 387 opcodes don't get size suffixes if the operands are
7742 if (STACK_REG_P (x
))
7745 /* Likewise if using Intel opcodes. */
7746 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
7749 /* This is the size of op from size of operand. */
7750 switch (GET_MODE_SIZE (GET_MODE (x
)))
7753 #ifdef HAVE_GAS_FILDS_FISTS
7759 if (GET_MODE (x
) == SFmode
)
7774 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
7776 #ifdef GAS_MNEMONICS
7802 if (GET_CODE (x
) == CONST_INT
|| ! SHIFT_DOUBLE_OMITS_COUNT
)
7804 PRINT_OPERAND (file
, x
, 0);
7810 /* Little bit of braindamage here. The SSE compare instructions
7811 does use completely different names for the comparisons that the
7812 fp conditional moves. */
7813 switch (GET_CODE (x
))
7828 fputs ("unord", file
);
7832 fputs ("neq", file
);
7836 fputs ("nlt", file
);
7840 fputs ("nle", file
);
7843 fputs ("ord", file
);
7850 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7851 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7853 switch (GET_MODE (x
))
7855 case HImode
: putc ('w', file
); break;
7857 case SFmode
: putc ('l', file
); break;
7859 case DFmode
: putc ('q', file
); break;
7860 default: gcc_unreachable ();
7867 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 0, file
);
7870 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7871 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7874 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 1, file
);
7877 /* Like above, but reverse condition */
7879 /* Check to see if argument to %c is really a constant
7880 and not a condition code which needs to be reversed. */
7881 if (!COMPARISON_P (x
))
7883 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
7886 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 0, file
);
7889 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7890 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7893 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 1, file
);
7897 /* It doesn't actually matter what mode we use here, as we're
7898 only going to use this for printing. */
7899 x
= adjust_address_nv (x
, DImode
, 8);
7906 if (!optimize
|| optimize_size
|| !TARGET_BRANCH_PREDICTION_HINTS
)
7909 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
7912 int pred_val
= INTVAL (XEXP (x
, 0));
7914 if (pred_val
< REG_BR_PROB_BASE
* 45 / 100
7915 || pred_val
> REG_BR_PROB_BASE
* 55 / 100)
7917 int taken
= pred_val
> REG_BR_PROB_BASE
/ 2;
7918 int cputaken
= final_forward_branch_p (current_output_insn
) == 0;
7920 /* Emit hints only in the case default branch prediction
7921 heuristics would fail. */
7922 if (taken
!= cputaken
)
7924 /* We use 3e (DS) prefix for taken branches and
7925 2e (CS) prefix for not taken branches. */
7927 fputs ("ds ; ", file
);
7929 fputs ("cs ; ", file
);
7936 output_operand_lossage ("invalid operand code '%c'", code
);
7940 if (GET_CODE (x
) == REG
)
7941 print_reg (x
, code
, file
);
7943 else if (GET_CODE (x
) == MEM
)
7945 /* No `byte ptr' prefix for call instructions. */
7946 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& code
!= 'X' && code
!= 'P')
7949 switch (GET_MODE_SIZE (GET_MODE (x
)))
7951 case 1: size
= "BYTE"; break;
7952 case 2: size
= "WORD"; break;
7953 case 4: size
= "DWORD"; break;
7954 case 8: size
= "QWORD"; break;
7955 case 12: size
= "XWORD"; break;
7956 case 16: size
= "XMMWORD"; break;
7961 /* Check for explicit size override (codes 'b', 'w' and 'k') */
7964 else if (code
== 'w')
7966 else if (code
== 'k')
7970 fputs (" PTR ", file
);
7974 /* Avoid (%rip) for call operands. */
7975 if (CONSTANT_ADDRESS_P (x
) && code
== 'P'
7976 && GET_CODE (x
) != CONST_INT
)
7977 output_addr_const (file
, x
);
7978 else if (this_is_asm_operands
&& ! address_operand (x
, VOIDmode
))
7979 output_operand_lossage ("invalid constraints for operand");
7984 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == SFmode
)
7989 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
7990 REAL_VALUE_TO_TARGET_SINGLE (r
, l
);
7992 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7994 fprintf (file
, "0x%08lx", l
);
7997 /* These float cases don't actually occur as immediate operands. */
7998 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == DFmode
)
8002 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
8003 fprintf (file
, "%s", dstr
);
8006 else if (GET_CODE (x
) == CONST_DOUBLE
8007 && GET_MODE (x
) == XFmode
)
8011 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
8012 fprintf (file
, "%s", dstr
);
8017 /* We have patterns that allow zero sets of memory, for instance.
8018 In 64-bit mode, we should probably support all 8-byte vectors,
8019 since we can in fact encode that into an immediate. */
8020 if (GET_CODE (x
) == CONST_VECTOR
)
8022 gcc_assert (x
== CONST0_RTX (GET_MODE (x
)));
8028 if (GET_CODE (x
) == CONST_INT
|| GET_CODE (x
) == CONST_DOUBLE
)
8030 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8033 else if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
8034 || GET_CODE (x
) == LABEL_REF
)
8036 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8039 fputs ("OFFSET FLAT:", file
);
8042 if (GET_CODE (x
) == CONST_INT
)
8043 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
8045 output_pic_addr_const (file
, x
, code
);
8047 output_addr_const (file
, x
);
8051 /* Print a memory operand whose address is ADDR. */
8054 print_operand_address (FILE *file
, rtx addr
)
8056 struct ix86_address parts
;
8057 rtx base
, index
, disp
;
8059 int ok
= ix86_decompose_address (addr
, &parts
);
8064 index
= parts
.index
;
8066 scale
= parts
.scale
;
8074 if (USER_LABEL_PREFIX
[0] == 0)
8076 fputs ((parts
.seg
== SEG_FS
? "fs:" : "gs:"), file
);
8082 if (!base
&& !index
)
8084 /* Displacement only requires special attention. */
8086 if (GET_CODE (disp
) == CONST_INT
)
8088 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& parts
.seg
== SEG_DEFAULT
)
8090 if (USER_LABEL_PREFIX
[0] == 0)
8092 fputs ("ds:", file
);
8094 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (disp
));
8097 output_pic_addr_const (file
, disp
, 0);
8099 output_addr_const (file
, disp
);
8101 /* Use one byte shorter RIP relative addressing for 64bit mode. */
8104 if (GET_CODE (disp
) == CONST
8105 && GET_CODE (XEXP (disp
, 0)) == PLUS
8106 && GET_CODE (XEXP (XEXP (disp
, 0), 1)) == CONST_INT
)
8107 disp
= XEXP (XEXP (disp
, 0), 0);
8108 if (GET_CODE (disp
) == LABEL_REF
8109 || (GET_CODE (disp
) == SYMBOL_REF
8110 && SYMBOL_REF_TLS_MODEL (disp
) == 0))
8111 fputs ("(%rip)", file
);
8116 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8121 output_pic_addr_const (file
, disp
, 0);
8122 else if (GET_CODE (disp
) == LABEL_REF
)
8123 output_asm_label (disp
);
8125 output_addr_const (file
, disp
);
8130 print_reg (base
, 0, file
);
8134 print_reg (index
, 0, file
);
8136 fprintf (file
, ",%d", scale
);
8142 rtx offset
= NULL_RTX
;
8146 /* Pull out the offset of a symbol; print any symbol itself. */
8147 if (GET_CODE (disp
) == CONST
8148 && GET_CODE (XEXP (disp
, 0)) == PLUS
8149 && GET_CODE (XEXP (XEXP (disp
, 0), 1)) == CONST_INT
)
8151 offset
= XEXP (XEXP (disp
, 0), 1);
8152 disp
= gen_rtx_CONST (VOIDmode
,
8153 XEXP (XEXP (disp
, 0), 0));
8157 output_pic_addr_const (file
, disp
, 0);
8158 else if (GET_CODE (disp
) == LABEL_REF
)
8159 output_asm_label (disp
);
8160 else if (GET_CODE (disp
) == CONST_INT
)
8163 output_addr_const (file
, disp
);
8169 print_reg (base
, 0, file
);
8172 if (INTVAL (offset
) >= 0)
8174 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
8178 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
8185 print_reg (index
, 0, file
);
8187 fprintf (file
, "*%d", scale
);
8195 output_addr_const_extra (FILE *file
, rtx x
)
8199 if (GET_CODE (x
) != UNSPEC
)
8202 op
= XVECEXP (x
, 0, 0);
8203 switch (XINT (x
, 1))
8205 case UNSPEC_GOTTPOFF
:
8206 output_addr_const (file
, op
);
8207 /* FIXME: This might be @TPOFF in Sun ld. */
8208 fputs ("@GOTTPOFF", file
);
8211 output_addr_const (file
, op
);
8212 fputs ("@TPOFF", file
);
8215 output_addr_const (file
, op
);
8217 fputs ("@TPOFF", file
);
8219 fputs ("@NTPOFF", file
);
8222 output_addr_const (file
, op
);
8223 fputs ("@DTPOFF", file
);
8225 case UNSPEC_GOTNTPOFF
:
8226 output_addr_const (file
, op
);
8228 fputs ("@GOTTPOFF(%rip)", file
);
8230 fputs ("@GOTNTPOFF", file
);
8232 case UNSPEC_INDNTPOFF
:
8233 output_addr_const (file
, op
);
8234 fputs ("@INDNTPOFF", file
);
8244 /* Split one or more DImode RTL references into pairs of SImode
8245 references. The RTL can be REG, offsettable MEM, integer constant, or
8246 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8247 split and "num" is its length. lo_half and hi_half are output arrays
8248 that parallel "operands". */
8251 split_di (rtx operands
[], int num
, rtx lo_half
[], rtx hi_half
[])
8255 rtx op
= operands
[num
];
8257 /* simplify_subreg refuse to split volatile memory addresses,
8258 but we still have to handle it. */
8259 if (GET_CODE (op
) == MEM
)
8261 lo_half
[num
] = adjust_address (op
, SImode
, 0);
8262 hi_half
[num
] = adjust_address (op
, SImode
, 4);
8266 lo_half
[num
] = simplify_gen_subreg (SImode
, op
,
8267 GET_MODE (op
) == VOIDmode
8268 ? DImode
: GET_MODE (op
), 0);
8269 hi_half
[num
] = simplify_gen_subreg (SImode
, op
,
8270 GET_MODE (op
) == VOIDmode
8271 ? DImode
: GET_MODE (op
), 4);
8275 /* Split one or more TImode RTL references into pairs of DImode
8276 references. The RTL can be REG, offsettable MEM, integer constant, or
8277 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8278 split and "num" is its length. lo_half and hi_half are output arrays
8279 that parallel "operands". */
8282 split_ti (rtx operands
[], int num
, rtx lo_half
[], rtx hi_half
[])
8286 rtx op
= operands
[num
];
8288 /* simplify_subreg refuse to split volatile memory addresses, but we
8289 still have to handle it. */
8290 if (GET_CODE (op
) == MEM
)
8292 lo_half
[num
] = adjust_address (op
, DImode
, 0);
8293 hi_half
[num
] = adjust_address (op
, DImode
, 8);
8297 lo_half
[num
] = simplify_gen_subreg (DImode
, op
, TImode
, 0);
8298 hi_half
[num
] = simplify_gen_subreg (DImode
, op
, TImode
, 8);
8303 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
8304 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
8305 is the expression of the binary operation. The output may either be
8306 emitted here, or returned to the caller, like all output_* functions.
8308 There is no guarantee that the operands are the same mode, as they
8309 might be within FLOAT or FLOAT_EXTEND expressions. */
8311 #ifndef SYSV386_COMPAT
8312 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
8313 wants to fix the assemblers because that causes incompatibility
8314 with gcc. No-one wants to fix gcc because that causes
8315 incompatibility with assemblers... You can use the option of
8316 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
8317 #define SYSV386_COMPAT 1
8321 output_387_binary_op (rtx insn
, rtx
*operands
)
8323 static char buf
[30];
8326 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]) || SSE_REG_P (operands
[2]);
8328 #ifdef ENABLE_CHECKING
8329 /* Even if we do not want to check the inputs, this documents input
8330 constraints. Which helps in understanding the following code. */
8331 if (STACK_REG_P (operands
[0])
8332 && ((REG_P (operands
[1])
8333 && REGNO (operands
[0]) == REGNO (operands
[1])
8334 && (STACK_REG_P (operands
[2]) || GET_CODE (operands
[2]) == MEM
))
8335 || (REG_P (operands
[2])
8336 && REGNO (operands
[0]) == REGNO (operands
[2])
8337 && (STACK_REG_P (operands
[1]) || GET_CODE (operands
[1]) == MEM
)))
8338 && (STACK_TOP_P (operands
[1]) || STACK_TOP_P (operands
[2])))
8341 gcc_assert (is_sse
);
8344 switch (GET_CODE (operands
[3]))
8347 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
8348 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
8356 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
8357 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
8365 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
8366 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
8374 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
8375 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
8389 if (GET_MODE (operands
[0]) == SFmode
)
8390 strcat (buf
, "ss\t{%2, %0|%0, %2}");
8392 strcat (buf
, "sd\t{%2, %0|%0, %2}");
8397 switch (GET_CODE (operands
[3]))
8401 if (REG_P (operands
[2]) && REGNO (operands
[0]) == REGNO (operands
[2]))
8403 rtx temp
= operands
[2];
8404 operands
[2] = operands
[1];
8408 /* know operands[0] == operands[1]. */
8410 if (GET_CODE (operands
[2]) == MEM
)
8416 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
8418 if (STACK_TOP_P (operands
[0]))
8419 /* How is it that we are storing to a dead operand[2]?
8420 Well, presumably operands[1] is dead too. We can't
8421 store the result to st(0) as st(0) gets popped on this
8422 instruction. Instead store to operands[2] (which I
8423 think has to be st(1)). st(1) will be popped later.
8424 gcc <= 2.8.1 didn't have this check and generated
8425 assembly code that the Unixware assembler rejected. */
8426 p
= "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8428 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8432 if (STACK_TOP_P (operands
[0]))
8433 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8435 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8440 if (GET_CODE (operands
[1]) == MEM
)
8446 if (GET_CODE (operands
[2]) == MEM
)
8452 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
8455 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
8456 derived assemblers, confusingly reverse the direction of
8457 the operation for fsub{r} and fdiv{r} when the
8458 destination register is not st(0). The Intel assembler
8459 doesn't have this brain damage. Read !SYSV386_COMPAT to
8460 figure out what the hardware really does. */
8461 if (STACK_TOP_P (operands
[0]))
8462 p
= "{p\t%0, %2|rp\t%2, %0}";
8464 p
= "{rp\t%2, %0|p\t%0, %2}";
8466 if (STACK_TOP_P (operands
[0]))
8467 /* As above for fmul/fadd, we can't store to st(0). */
8468 p
= "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8470 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8475 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
8478 if (STACK_TOP_P (operands
[0]))
8479 p
= "{rp\t%0, %1|p\t%1, %0}";
8481 p
= "{p\t%1, %0|rp\t%0, %1}";
8483 if (STACK_TOP_P (operands
[0]))
8484 p
= "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
8486 p
= "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
8491 if (STACK_TOP_P (operands
[0]))
8493 if (STACK_TOP_P (operands
[1]))
8494 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8496 p
= "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
8499 else if (STACK_TOP_P (operands
[1]))
8502 p
= "{\t%1, %0|r\t%0, %1}";
8504 p
= "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
8510 p
= "{r\t%2, %0|\t%0, %2}";
8512 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8525 /* Return needed mode for entity in optimize_mode_switching pass. */
8528 ix86_mode_needed (int entity
, rtx insn
)
8530 enum attr_i387_cw mode
;
8532 /* The mode UNINITIALIZED is used to store control word after a
8533 function call or ASM pattern. The mode ANY specify that function
8534 has no requirements on the control word and make no changes in the
8535 bits we are interested in. */
8538 || (NONJUMP_INSN_P (insn
)
8539 && (asm_noperands (PATTERN (insn
)) >= 0
8540 || GET_CODE (PATTERN (insn
)) == ASM_INPUT
)))
8541 return I387_CW_UNINITIALIZED
;
8543 if (recog_memoized (insn
) < 0)
8546 mode
= get_attr_i387_cw (insn
);
8551 if (mode
== I387_CW_TRUNC
)
8556 if (mode
== I387_CW_FLOOR
)
8561 if (mode
== I387_CW_CEIL
)
8566 if (mode
== I387_CW_MASK_PM
)
8577 /* Output code to initialize control word copies used by trunc?f?i and
8578 rounding patterns. CURRENT_MODE is set to current control word,
8579 while NEW_MODE is set to new control word. */
8582 emit_i387_cw_initialization (int mode
)
8584 rtx stored_mode
= assign_386_stack_local (HImode
, SLOT_CW_STORED
);
8589 rtx reg
= gen_reg_rtx (HImode
);
8591 emit_insn (gen_x86_fnstcw_1 (stored_mode
));
8592 emit_move_insn (reg
, copy_rtx (stored_mode
));
8594 if (TARGET_64BIT
|| TARGET_PARTIAL_REG_STALL
|| optimize_size
)
8599 /* round toward zero (truncate) */
8600 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0c00)));
8601 slot
= SLOT_CW_TRUNC
;
8605 /* round down toward -oo */
8606 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
8607 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0400)));
8608 slot
= SLOT_CW_FLOOR
;
8612 /* round up toward +oo */
8613 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
8614 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0800)));
8615 slot
= SLOT_CW_CEIL
;
8618 case I387_CW_MASK_PM
:
8619 /* mask precision exception for nearbyint() */
8620 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
8621 slot
= SLOT_CW_MASK_PM
;
8633 /* round toward zero (truncate) */
8634 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0xc)));
8635 slot
= SLOT_CW_TRUNC
;
8639 /* round down toward -oo */
8640 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x4)));
8641 slot
= SLOT_CW_FLOOR
;
8645 /* round up toward +oo */
8646 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x8)));
8647 slot
= SLOT_CW_CEIL
;
8650 case I387_CW_MASK_PM
:
8651 /* mask precision exception for nearbyint() */
8652 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
8653 slot
= SLOT_CW_MASK_PM
;
8661 gcc_assert (slot
< MAX_386_STACK_LOCALS
);
8663 new_mode
= assign_386_stack_local (HImode
, slot
);
8664 emit_move_insn (new_mode
, reg
);
8667 /* Output code for INSN to convert a float to a signed int. OPERANDS
8668 are the insn operands. The output may be [HSD]Imode and the input
8669 operand may be [SDX]Fmode. */
8672 output_fix_trunc (rtx insn
, rtx
*operands
, int fisttp
)
8674 int stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
8675 int dimode_p
= GET_MODE (operands
[0]) == DImode
;
8676 int round_mode
= get_attr_i387_cw (insn
);
8678 /* Jump through a hoop or two for DImode, since the hardware has no
8679 non-popping instruction. We used to do this a different way, but
8680 that was somewhat fragile and broke with post-reload splitters. */
8681 if ((dimode_p
|| fisttp
) && !stack_top_dies
)
8682 output_asm_insn ("fld\t%y1", operands
);
8684 gcc_assert (STACK_TOP_P (operands
[1]));
8685 gcc_assert (GET_CODE (operands
[0]) == MEM
);
8688 output_asm_insn ("fisttp%z0\t%0", operands
);
8691 if (round_mode
!= I387_CW_ANY
)
8692 output_asm_insn ("fldcw\t%3", operands
);
8693 if (stack_top_dies
|| dimode_p
)
8694 output_asm_insn ("fistp%z0\t%0", operands
);
8696 output_asm_insn ("fist%z0\t%0", operands
);
8697 if (round_mode
!= I387_CW_ANY
)
8698 output_asm_insn ("fldcw\t%2", operands
);
8704 /* Output code for x87 ffreep insn. The OPNO argument, which may only
8705 have the values zero or one, indicates the ffreep insn's operand
8706 from the OPERANDS array. */
8709 output_387_ffreep (rtx
*operands ATTRIBUTE_UNUSED
, int opno
)
8711 if (TARGET_USE_FFREEP
)
8712 #if HAVE_AS_IX86_FFREEP
8713 return opno
? "ffreep\t%y1" : "ffreep\t%y0";
8716 static char retval
[] = ".word\t0xc_df";
8717 int regno
= REGNO (operands
[opno
]);
8719 gcc_assert (FP_REGNO_P (regno
));
8721 retval
[9] = '0' + (regno
- FIRST_STACK_REG
);
8726 return opno
? "fstp\t%y1" : "fstp\t%y0";
8730 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
8731 should be used. UNORDERED_P is true when fucom should be used. */
8734 output_fp_compare (rtx insn
, rtx
*operands
, int eflags_p
, int unordered_p
)
8737 rtx cmp_op0
, cmp_op1
;
8738 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]);
8742 cmp_op0
= operands
[0];
8743 cmp_op1
= operands
[1];
8747 cmp_op0
= operands
[1];
8748 cmp_op1
= operands
[2];
8753 if (GET_MODE (operands
[0]) == SFmode
)
8755 return "ucomiss\t{%1, %0|%0, %1}";
8757 return "comiss\t{%1, %0|%0, %1}";
8760 return "ucomisd\t{%1, %0|%0, %1}";
8762 return "comisd\t{%1, %0|%0, %1}";
8765 gcc_assert (STACK_TOP_P (cmp_op0
));
8767 stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
8769 if (cmp_op1
== CONST0_RTX (GET_MODE (cmp_op1
)))
8773 output_asm_insn ("ftst\n\tfnstsw\t%0", operands
);
8774 return output_387_ffreep (operands
, 1);
8777 return "ftst\n\tfnstsw\t%0";
8780 if (STACK_REG_P (cmp_op1
)
8782 && find_regno_note (insn
, REG_DEAD
, REGNO (cmp_op1
))
8783 && REGNO (cmp_op1
) != FIRST_STACK_REG
)
8785 /* If both the top of the 387 stack dies, and the other operand
8786 is also a stack register that dies, then this must be a
8787 `fcompp' float compare */
8791 /* There is no double popping fcomi variant. Fortunately,
8792 eflags is immune from the fstp's cc clobbering. */
8794 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands
);
8796 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands
);
8797 return output_387_ffreep (operands
, 0);
8802 return "fucompp\n\tfnstsw\t%0";
8804 return "fcompp\n\tfnstsw\t%0";
8809 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
8811 static const char * const alt
[16] =
8813 "fcom%z2\t%y2\n\tfnstsw\t%0",
8814 "fcomp%z2\t%y2\n\tfnstsw\t%0",
8815 "fucom%z2\t%y2\n\tfnstsw\t%0",
8816 "fucomp%z2\t%y2\n\tfnstsw\t%0",
8818 "ficom%z2\t%y2\n\tfnstsw\t%0",
8819 "ficomp%z2\t%y2\n\tfnstsw\t%0",
8823 "fcomi\t{%y1, %0|%0, %y1}",
8824 "fcomip\t{%y1, %0|%0, %y1}",
8825 "fucomi\t{%y1, %0|%0, %y1}",
8826 "fucomip\t{%y1, %0|%0, %y1}",
8837 mask
= eflags_p
<< 3;
8838 mask
|= (GET_MODE_CLASS (GET_MODE (cmp_op1
)) == MODE_INT
) << 2;
8839 mask
|= unordered_p
<< 1;
8840 mask
|= stack_top_dies
;
8842 gcc_assert (mask
< 16);
8851 ix86_output_addr_vec_elt (FILE *file
, int value
)
8853 const char *directive
= ASM_LONG
;
8857 directive
= ASM_QUAD
;
8859 gcc_assert (!TARGET_64BIT
);
8862 fprintf (file
, "%s%s%d\n", directive
, LPREFIX
, value
);
8866 ix86_output_addr_diff_elt (FILE *file
, int value
, int rel
)
8869 fprintf (file
, "%s%s%d-%s%d\n",
8870 ASM_LONG
, LPREFIX
, value
, LPREFIX
, rel
);
8871 else if (HAVE_AS_GOTOFF_IN_DATA
)
8872 fprintf (file
, "%s%s%d@GOTOFF\n", ASM_LONG
, LPREFIX
, value
);
8874 else if (TARGET_MACHO
)
8876 fprintf (file
, "%s%s%d-", ASM_LONG
, LPREFIX
, value
);
8877 machopic_output_function_base_name (file
);
8878 fprintf(file
, "\n");
8882 asm_fprintf (file
, "%s%U%s+[.-%s%d]\n",
8883 ASM_LONG
, GOT_SYMBOL_NAME
, LPREFIX
, value
);
8886 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
8890 ix86_expand_clear (rtx dest
)
8894 /* We play register width games, which are only valid after reload. */
8895 gcc_assert (reload_completed
);
8897 /* Avoid HImode and its attendant prefix byte. */
8898 if (GET_MODE_SIZE (GET_MODE (dest
)) < 4)
8899 dest
= gen_rtx_REG (SImode
, REGNO (dest
));
8901 tmp
= gen_rtx_SET (VOIDmode
, dest
, const0_rtx
);
8903 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
8904 if (reload_completed
&& (!TARGET_USE_MOV0
|| optimize_size
))
8906 rtx clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, 17));
8907 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, clob
));
8913 /* X is an unchanging MEM. If it is a constant pool reference, return
8914 the constant pool rtx, else NULL. */
8917 maybe_get_pool_constant (rtx x
)
8919 x
= ix86_delegitimize_address (XEXP (x
, 0));
8921 if (GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
8922 return get_pool_constant (x
);
8928 ix86_expand_move (enum machine_mode mode
, rtx operands
[])
8930 int strict
= (reload_in_progress
|| reload_completed
);
8932 enum tls_model model
;
8937 if (GET_CODE (op1
) == SYMBOL_REF
)
8939 model
= SYMBOL_REF_TLS_MODEL (op1
);
8942 op1
= legitimize_tls_address (op1
, model
, true);
8943 op1
= force_operand (op1
, op0
);
8948 else if (GET_CODE (op1
) == CONST
8949 && GET_CODE (XEXP (op1
, 0)) == PLUS
8950 && GET_CODE (XEXP (XEXP (op1
, 0), 0)) == SYMBOL_REF
)
8952 model
= SYMBOL_REF_TLS_MODEL (XEXP (XEXP (op1
, 0), 0));
8955 rtx addend
= XEXP (XEXP (op1
, 0), 1);
8956 op1
= legitimize_tls_address (XEXP (XEXP (op1
, 0), 0), model
, true);
8957 op1
= force_operand (op1
, NULL
);
8958 op1
= expand_simple_binop (Pmode
, PLUS
, op1
, addend
,
8959 op0
, 1, OPTAB_DIRECT
);
8965 if (flag_pic
&& mode
== Pmode
&& symbolic_operand (op1
, Pmode
))
8967 if (TARGET_MACHO
&& !TARGET_64BIT
)
8972 rtx temp
= ((reload_in_progress
8973 || ((op0
&& GET_CODE (op0
) == REG
)
8975 ? op0
: gen_reg_rtx (Pmode
));
8976 op1
= machopic_indirect_data_reference (op1
, temp
);
8977 op1
= machopic_legitimize_pic_address (op1
, mode
,
8978 temp
== op1
? 0 : temp
);
8980 else if (MACHOPIC_INDIRECT
)
8981 op1
= machopic_indirect_data_reference (op1
, 0);
8988 if (GET_CODE (op0
) == MEM
)
8989 op1
= force_reg (Pmode
, op1
);
8991 op1
= legitimize_address (op1
, op1
, Pmode
);
8996 if (GET_CODE (op0
) == MEM
8997 && (PUSH_ROUNDING (GET_MODE_SIZE (mode
)) != GET_MODE_SIZE (mode
)
8998 || !push_operand (op0
, mode
))
8999 && GET_CODE (op1
) == MEM
)
9000 op1
= force_reg (mode
, op1
);
9002 if (push_operand (op0
, mode
)
9003 && ! general_no_elim_operand (op1
, mode
))
9004 op1
= copy_to_mode_reg (mode
, op1
);
9006 /* Force large constants in 64bit compilation into register
9007 to get them CSEed. */
9008 if (TARGET_64BIT
&& mode
== DImode
9009 && immediate_operand (op1
, mode
)
9010 && !x86_64_zext_immediate_operand (op1
, VOIDmode
)
9011 && !register_operand (op0
, mode
)
9012 && optimize
&& !reload_completed
&& !reload_in_progress
)
9013 op1
= copy_to_mode_reg (mode
, op1
);
9015 if (FLOAT_MODE_P (mode
))
9017 /* If we are loading a floating point constant to a register,
9018 force the value to memory now, since we'll get better code
9019 out the back end. */
9023 else if (GET_CODE (op1
) == CONST_DOUBLE
)
9025 op1
= validize_mem (force_const_mem (mode
, op1
));
9026 if (!register_operand (op0
, mode
))
9028 rtx temp
= gen_reg_rtx (mode
);
9029 emit_insn (gen_rtx_SET (VOIDmode
, temp
, op1
));
9030 emit_move_insn (op0
, temp
);
9037 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
9041 ix86_expand_vector_move (enum machine_mode mode
, rtx operands
[])
9043 rtx op0
= operands
[0], op1
= operands
[1];
9045 /* Force constants other than zero into memory. We do not know how
9046 the instructions used to build constants modify the upper 64 bits
9047 of the register, once we have that information we may be able
9048 to handle some of them more efficiently. */
9049 if ((reload_in_progress
| reload_completed
) == 0
9050 && register_operand (op0
, mode
)
9052 && standard_sse_constant_p (op1
) <= 0)
9053 op1
= validize_mem (force_const_mem (mode
, op1
));
9055 /* Make operand1 a register if it isn't already. */
9057 && !register_operand (op0
, mode
)
9058 && !register_operand (op1
, mode
))
9060 emit_move_insn (op0
, force_reg (GET_MODE (op0
), op1
));
9064 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
9067 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
9068 straight to ix86_expand_vector_move. */
9071 ix86_expand_vector_move_misalign (enum machine_mode mode
, rtx operands
[])
9080 /* If we're optimizing for size, movups is the smallest. */
9083 op0
= gen_lowpart (V4SFmode
, op0
);
9084 op1
= gen_lowpart (V4SFmode
, op1
);
9085 emit_insn (gen_sse_movups (op0
, op1
));
9089 /* ??? If we have typed data, then it would appear that using
9090 movdqu is the only way to get unaligned data loaded with
9092 if (TARGET_SSE2
&& GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
9094 op0
= gen_lowpart (V16QImode
, op0
);
9095 op1
= gen_lowpart (V16QImode
, op1
);
9096 emit_insn (gen_sse2_movdqu (op0
, op1
));
9100 if (TARGET_SSE2
&& mode
== V2DFmode
)
9104 /* When SSE registers are split into halves, we can avoid
9105 writing to the top half twice. */
9106 if (TARGET_SSE_SPLIT_REGS
)
9108 emit_insn (gen_rtx_CLOBBER (VOIDmode
, op0
));
9113 /* ??? Not sure about the best option for the Intel chips.
9114 The following would seem to satisfy; the register is
9115 entirely cleared, breaking the dependency chain. We
9116 then store to the upper half, with a dependency depth
9117 of one. A rumor has it that Intel recommends two movsd
9118 followed by an unpacklpd, but this is unconfirmed. And
9119 given that the dependency depth of the unpacklpd would
9120 still be one, I'm not sure why this would be better. */
9121 zero
= CONST0_RTX (V2DFmode
);
9124 m
= adjust_address (op1
, DFmode
, 0);
9125 emit_insn (gen_sse2_loadlpd (op0
, zero
, m
));
9126 m
= adjust_address (op1
, DFmode
, 8);
9127 emit_insn (gen_sse2_loadhpd (op0
, op0
, m
));
9131 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY
)
9132 emit_move_insn (op0
, CONST0_RTX (mode
));
9134 emit_insn (gen_rtx_CLOBBER (VOIDmode
, op0
));
9136 if (mode
!= V4SFmode
)
9137 op0
= gen_lowpart (V4SFmode
, op0
);
9138 m
= adjust_address (op1
, V2SFmode
, 0);
9139 emit_insn (gen_sse_loadlps (op0
, op0
, m
));
9140 m
= adjust_address (op1
, V2SFmode
, 8);
9141 emit_insn (gen_sse_loadhps (op0
, op0
, m
));
9144 else if (MEM_P (op0
))
9146 /* If we're optimizing for size, movups is the smallest. */
9149 op0
= gen_lowpart (V4SFmode
, op0
);
9150 op1
= gen_lowpart (V4SFmode
, op1
);
9151 emit_insn (gen_sse_movups (op0
, op1
));
9155 /* ??? Similar to above, only less clear because of quote
9156 typeless stores unquote. */
9157 if (TARGET_SSE2
&& !TARGET_SSE_TYPELESS_STORES
9158 && GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
9160 op0
= gen_lowpart (V16QImode
, op0
);
9161 op1
= gen_lowpart (V16QImode
, op1
);
9162 emit_insn (gen_sse2_movdqu (op0
, op1
));
9166 if (TARGET_SSE2
&& mode
== V2DFmode
)
9168 m
= adjust_address (op0
, DFmode
, 0);
9169 emit_insn (gen_sse2_storelpd (m
, op1
));
9170 m
= adjust_address (op0
, DFmode
, 8);
9171 emit_insn (gen_sse2_storehpd (m
, op1
));
9175 if (mode
!= V4SFmode
)
9176 op1
= gen_lowpart (V4SFmode
, op1
);
9177 m
= adjust_address (op0
, V2SFmode
, 0);
9178 emit_insn (gen_sse_storelps (m
, op1
));
9179 m
= adjust_address (op0
, V2SFmode
, 8);
9180 emit_insn (gen_sse_storehps (m
, op1
));
9187 /* Expand a push in MODE. This is some mode for which we do not support
9188 proper push instructions, at least from the registers that we expect
9189 the value to live in. */
9192 ix86_expand_push (enum machine_mode mode
, rtx x
)
9196 tmp
= expand_simple_binop (Pmode
, PLUS
, stack_pointer_rtx
,
9197 GEN_INT (-GET_MODE_SIZE (mode
)),
9198 stack_pointer_rtx
, 1, OPTAB_DIRECT
);
9199 if (tmp
!= stack_pointer_rtx
)
9200 emit_move_insn (stack_pointer_rtx
, tmp
);
9202 tmp
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
9203 emit_move_insn (tmp
, x
);
9206 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
9207 destination to use for the operation. If different from the true
9208 destination in operands[0], a copy operation will be required. */
9211 ix86_fixup_binary_operands (enum rtx_code code
, enum machine_mode mode
,
9214 int matching_memory
;
9215 rtx src1
, src2
, dst
;
9221 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
9222 if (GET_RTX_CLASS (code
) == RTX_COMM_ARITH
9223 && (rtx_equal_p (dst
, src2
)
9224 || immediate_operand (src1
, mode
)))
9231 /* If the destination is memory, and we do not have matching source
9232 operands, do things in registers. */
9233 matching_memory
= 0;
9234 if (GET_CODE (dst
) == MEM
)
9236 if (rtx_equal_p (dst
, src1
))
9237 matching_memory
= 1;
9238 else if (GET_RTX_CLASS (code
) == RTX_COMM_ARITH
9239 && rtx_equal_p (dst
, src2
))
9240 matching_memory
= 2;
9242 dst
= gen_reg_rtx (mode
);
9245 /* Both source operands cannot be in memory. */
9246 if (GET_CODE (src1
) == MEM
&& GET_CODE (src2
) == MEM
)
9248 if (matching_memory
!= 2)
9249 src2
= force_reg (mode
, src2
);
9251 src1
= force_reg (mode
, src1
);
9254 /* If the operation is not commutable, source 1 cannot be a constant
9255 or non-matching memory. */
9256 if ((CONSTANT_P (src1
)
9257 || (!matching_memory
&& GET_CODE (src1
) == MEM
))
9258 && GET_RTX_CLASS (code
) != RTX_COMM_ARITH
)
9259 src1
= force_reg (mode
, src1
);
9261 src1
= operands
[1] = src1
;
9262 src2
= operands
[2] = src2
;
9266 /* Similarly, but assume that the destination has already been
9270 ix86_fixup_binary_operands_no_copy (enum rtx_code code
,
9271 enum machine_mode mode
, rtx operands
[])
9273 rtx dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
9274 gcc_assert (dst
== operands
[0]);
9277 /* Attempt to expand a binary operator. Make the expansion closer to the
9278 actual machine, then just general_operand, which will allow 3 separate
9279 memory references (one output, two input) in a single insn. */
9282 ix86_expand_binary_operator (enum rtx_code code
, enum machine_mode mode
,
9285 rtx src1
, src2
, dst
, op
, clob
;
9287 dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
9291 /* Emit the instruction. */
9293 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, src1
, src2
));
9294 if (reload_in_progress
)
9296 /* Reload doesn't know about the flags register, and doesn't know that
9297 it doesn't want to clobber it. We can only do this with PLUS. */
9298 gcc_assert (code
== PLUS
);
9303 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
9304 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
9307 /* Fix up the destination if needed. */
9308 if (dst
!= operands
[0])
9309 emit_move_insn (operands
[0], dst
);
9312 /* Return TRUE or FALSE depending on whether the binary operator meets the
9313 appropriate constraints. */
9316 ix86_binary_operator_ok (enum rtx_code code
,
9317 enum machine_mode mode ATTRIBUTE_UNUSED
,
9320 /* Both source operands cannot be in memory. */
9321 if (GET_CODE (operands
[1]) == MEM
&& GET_CODE (operands
[2]) == MEM
)
9323 /* If the operation is not commutable, source 1 cannot be a constant. */
9324 if (CONSTANT_P (operands
[1]) && GET_RTX_CLASS (code
) != RTX_COMM_ARITH
)
9326 /* If the destination is memory, we must have a matching source operand. */
9327 if (GET_CODE (operands
[0]) == MEM
9328 && ! (rtx_equal_p (operands
[0], operands
[1])
9329 || (GET_RTX_CLASS (code
) == RTX_COMM_ARITH
9330 && rtx_equal_p (operands
[0], operands
[2]))))
9332 /* If the operation is not commutable and the source 1 is memory, we must
9333 have a matching destination. */
9334 if (GET_CODE (operands
[1]) == MEM
9335 && GET_RTX_CLASS (code
) != RTX_COMM_ARITH
9336 && ! rtx_equal_p (operands
[0], operands
[1]))
9341 /* Attempt to expand a unary operator. Make the expansion closer to the
9342 actual machine, then just general_operand, which will allow 2 separate
9343 memory references (one output, one input) in a single insn. */
9346 ix86_expand_unary_operator (enum rtx_code code
, enum machine_mode mode
,
9349 int matching_memory
;
9350 rtx src
, dst
, op
, clob
;
9355 /* If the destination is memory, and we do not have matching source
9356 operands, do things in registers. */
9357 matching_memory
= 0;
9360 if (rtx_equal_p (dst
, src
))
9361 matching_memory
= 1;
9363 dst
= gen_reg_rtx (mode
);
9366 /* When source operand is memory, destination must match. */
9367 if (MEM_P (src
) && !matching_memory
)
9368 src
= force_reg (mode
, src
);
9370 /* Emit the instruction. */
9372 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_e (code
, mode
, src
));
9373 if (reload_in_progress
|| code
== NOT
)
9375 /* Reload doesn't know about the flags register, and doesn't know that
9376 it doesn't want to clobber it. */
9377 gcc_assert (code
== NOT
);
9382 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
9383 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
9386 /* Fix up the destination if needed. */
9387 if (dst
!= operands
[0])
9388 emit_move_insn (operands
[0], dst
);
9391 /* Return TRUE or FALSE depending on whether the unary operator meets the
9392 appropriate constraints. */
9395 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED
,
9396 enum machine_mode mode ATTRIBUTE_UNUSED
,
9397 rtx operands
[2] ATTRIBUTE_UNUSED
)
9399 /* If one of operands is memory, source and destination must match. */
9400 if ((GET_CODE (operands
[0]) == MEM
9401 || GET_CODE (operands
[1]) == MEM
)
9402 && ! rtx_equal_p (operands
[0], operands
[1]))
9407 /* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders.
9408 Create a mask for the sign bit in MODE for an SSE register. If VECT is
9409 true, then replicate the mask for all elements of the vector register.
9410 If INVERT is true, then create a mask excluding the sign bit. */
9413 ix86_build_signbit_mask (enum machine_mode mode
, bool vect
, bool invert
)
9415 enum machine_mode vec_mode
;
9416 HOST_WIDE_INT hi
, lo
;
9421 /* Find the sign bit, sign extended to 2*HWI. */
9423 lo
= 0x80000000, hi
= lo
< 0;
9424 else if (HOST_BITS_PER_WIDE_INT
>= 64)
9425 lo
= (HOST_WIDE_INT
)1 << shift
, hi
= -1;
9427 lo
= 0, hi
= (HOST_WIDE_INT
)1 << (shift
- HOST_BITS_PER_WIDE_INT
);
9432 /* Force this value into the low part of a fp vector constant. */
9433 mask
= immed_double_const (lo
, hi
, mode
== SFmode
? SImode
: DImode
);
9434 mask
= gen_lowpart (mode
, mask
);
9439 v
= gen_rtvec (4, mask
, mask
, mask
, mask
);
9441 v
= gen_rtvec (4, mask
, CONST0_RTX (SFmode
),
9442 CONST0_RTX (SFmode
), CONST0_RTX (SFmode
));
9443 vec_mode
= V4SFmode
;
9448 v
= gen_rtvec (2, mask
, mask
);
9450 v
= gen_rtvec (2, mask
, CONST0_RTX (DFmode
));
9451 vec_mode
= V2DFmode
;
9454 return force_reg (vec_mode
, gen_rtx_CONST_VECTOR (vec_mode
, v
));
9457 /* Generate code for floating point ABS or NEG. */
9460 ix86_expand_fp_absneg_operator (enum rtx_code code
, enum machine_mode mode
,
9463 rtx mask
, set
, use
, clob
, dst
, src
;
9464 bool matching_memory
;
9465 bool use_sse
= false;
9466 bool vector_mode
= VECTOR_MODE_P (mode
);
9467 enum machine_mode elt_mode
= mode
;
9471 elt_mode
= GET_MODE_INNER (mode
);
9474 else if (TARGET_SSE_MATH
)
9475 use_sse
= SSE_FLOAT_MODE_P (mode
);
9477 /* NEG and ABS performed with SSE use bitwise mask operations.
9478 Create the appropriate mask now. */
9480 mask
= ix86_build_signbit_mask (elt_mode
, vector_mode
, code
== ABS
);
9487 /* If the destination is memory, and we don't have matching source
9488 operands or we're using the x87, do things in registers. */
9489 matching_memory
= false;
9492 if (use_sse
&& rtx_equal_p (dst
, src
))
9493 matching_memory
= true;
9495 dst
= gen_reg_rtx (mode
);
9497 if (MEM_P (src
) && !matching_memory
)
9498 src
= force_reg (mode
, src
);
9502 set
= gen_rtx_fmt_ee (code
== NEG
? XOR
: AND
, mode
, src
, mask
);
9503 set
= gen_rtx_SET (VOIDmode
, dst
, set
);
9508 set
= gen_rtx_fmt_e (code
, mode
, src
);
9509 set
= gen_rtx_SET (VOIDmode
, dst
, set
);
9512 use
= gen_rtx_USE (VOIDmode
, mask
);
9513 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
9514 emit_insn (gen_rtx_PARALLEL (VOIDmode
,
9515 gen_rtvec (3, set
, use
, clob
)));
9521 if (dst
!= operands
[0])
9522 emit_move_insn (operands
[0], dst
);
9525 /* Expand a copysign operation. Special case operand 0 being a constant. */
9528 ix86_expand_copysign (rtx operands
[])
9530 enum machine_mode mode
, vmode
;
9531 rtx dest
, op0
, op1
, mask
, nmask
;
9537 mode
= GET_MODE (dest
);
9538 vmode
= mode
== SFmode
? V4SFmode
: V2DFmode
;
9540 if (GET_CODE (op0
) == CONST_DOUBLE
)
9544 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0
)))
9545 op0
= simplify_unary_operation (ABS
, mode
, op0
, mode
);
9547 if (op0
== CONST0_RTX (mode
))
9548 op0
= CONST0_RTX (vmode
);
9552 v
= gen_rtvec (4, op0
, CONST0_RTX (SFmode
),
9553 CONST0_RTX (SFmode
), CONST0_RTX (SFmode
));
9555 v
= gen_rtvec (2, op0
, CONST0_RTX (DFmode
));
9556 op0
= force_reg (vmode
, gen_rtx_CONST_VECTOR (vmode
, v
));
9559 mask
= ix86_build_signbit_mask (mode
, 0, 0);
9562 emit_insn (gen_copysignsf3_const (dest
, op0
, op1
, mask
));
9564 emit_insn (gen_copysigndf3_const (dest
, op0
, op1
, mask
));
9568 nmask
= ix86_build_signbit_mask (mode
, 0, 1);
9569 mask
= ix86_build_signbit_mask (mode
, 0, 0);
9572 emit_insn (gen_copysignsf3_var (dest
, NULL
, op0
, op1
, nmask
, mask
));
9574 emit_insn (gen_copysigndf3_var (dest
, NULL
, op0
, op1
, nmask
, mask
));
9578 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
9579 be a constant, and so has already been expanded into a vector constant. */
9582 ix86_split_copysign_const (rtx operands
[])
9584 enum machine_mode mode
, vmode
;
9585 rtx dest
, op0
, op1
, mask
, x
;
9592 mode
= GET_MODE (dest
);
9593 vmode
= GET_MODE (mask
);
9595 dest
= simplify_gen_subreg (vmode
, dest
, mode
, 0);
9596 x
= gen_rtx_AND (vmode
, dest
, mask
);
9597 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
9599 if (op0
!= CONST0_RTX (vmode
))
9601 x
= gen_rtx_IOR (vmode
, dest
, op0
);
9602 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
9606 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
9607 so we have to do two masks. */
9610 ix86_split_copysign_var (rtx operands
[])
9612 enum machine_mode mode
, vmode
;
9613 rtx dest
, scratch
, op0
, op1
, mask
, nmask
, x
;
9616 scratch
= operands
[1];
9619 nmask
= operands
[4];
9622 mode
= GET_MODE (dest
);
9623 vmode
= GET_MODE (mask
);
9625 if (rtx_equal_p (op0
, op1
))
9627 /* Shouldn't happen often (it's useless, obviously), but when it does
9628 we'd generate incorrect code if we continue below. */
9629 emit_move_insn (dest
, op0
);
9633 if (REG_P (mask
) && REGNO (dest
) == REGNO (mask
)) /* alternative 0 */
9635 gcc_assert (REGNO (op1
) == REGNO (scratch
));
9637 x
= gen_rtx_AND (vmode
, scratch
, mask
);
9638 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
9641 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
9642 x
= gen_rtx_NOT (vmode
, dest
);
9643 x
= gen_rtx_AND (vmode
, x
, op0
);
9644 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
9648 if (REGNO (op1
) == REGNO (scratch
)) /* alternative 1,3 */
9650 x
= gen_rtx_AND (vmode
, scratch
, mask
);
9652 else /* alternative 2,4 */
9654 gcc_assert (REGNO (mask
) == REGNO (scratch
));
9655 op1
= simplify_gen_subreg (vmode
, op1
, mode
, 0);
9656 x
= gen_rtx_AND (vmode
, scratch
, op1
);
9658 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
9660 if (REGNO (op0
) == REGNO (dest
)) /* alternative 1,2 */
9662 dest
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
9663 x
= gen_rtx_AND (vmode
, dest
, nmask
);
9665 else /* alternative 3,4 */
9667 gcc_assert (REGNO (nmask
) == REGNO (dest
));
9669 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
9670 x
= gen_rtx_AND (vmode
, dest
, op0
);
9672 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
9675 x
= gen_rtx_IOR (vmode
, dest
, scratch
);
9676 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
9679 /* Return TRUE or FALSE depending on whether the first SET in INSN
9680 has source and destination with matching CC modes, and that the
9681 CC mode is at least as constrained as REQ_MODE. */
9684 ix86_match_ccmode (rtx insn
, enum machine_mode req_mode
)
9687 enum machine_mode set_mode
;
9689 set
= PATTERN (insn
);
9690 if (GET_CODE (set
) == PARALLEL
)
9691 set
= XVECEXP (set
, 0, 0);
9692 gcc_assert (GET_CODE (set
) == SET
);
9693 gcc_assert (GET_CODE (SET_SRC (set
)) == COMPARE
);
9695 set_mode
= GET_MODE (SET_DEST (set
));
9699 if (req_mode
!= CCNOmode
9700 && (req_mode
!= CCmode
9701 || XEXP (SET_SRC (set
), 1) != const0_rtx
))
9705 if (req_mode
== CCGCmode
)
9709 if (req_mode
== CCGOCmode
|| req_mode
== CCNOmode
)
9713 if (req_mode
== CCZmode
)
9723 return (GET_MODE (SET_SRC (set
)) == set_mode
);
9726 /* Generate insn patterns to do an integer compare of OPERANDS. */
9729 ix86_expand_int_compare (enum rtx_code code
, rtx op0
, rtx op1
)
9731 enum machine_mode cmpmode
;
9734 cmpmode
= SELECT_CC_MODE (code
, op0
, op1
);
9735 flags
= gen_rtx_REG (cmpmode
, FLAGS_REG
);
9737 /* This is very simple, but making the interface the same as in the
9738 FP case makes the rest of the code easier. */
9739 tmp
= gen_rtx_COMPARE (cmpmode
, op0
, op1
);
9740 emit_insn (gen_rtx_SET (VOIDmode
, flags
, tmp
));
9742 /* Return the test that should be put into the flags user, i.e.
9743 the bcc, scc, or cmov instruction. */
9744 return gen_rtx_fmt_ee (code
, VOIDmode
, flags
, const0_rtx
);
9747 /* Figure out whether to use ordered or unordered fp comparisons.
9748 Return the appropriate mode to use. */
9751 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED
)
9753 /* ??? In order to make all comparisons reversible, we do all comparisons
9754 non-trapping when compiling for IEEE. Once gcc is able to distinguish
9755 all forms trapping and nontrapping comparisons, we can make inequality
9756 comparisons trapping again, since it results in better code when using
9757 FCOM based compares. */
9758 return TARGET_IEEE_FP
? CCFPUmode
: CCFPmode
;
9762 ix86_cc_mode (enum rtx_code code
, rtx op0
, rtx op1
)
9764 if (SCALAR_FLOAT_MODE_P (GET_MODE (op0
)))
9765 return ix86_fp_compare_mode (code
);
9768 /* Only zero flag is needed. */
9770 case NE
: /* ZF!=0 */
9772 /* Codes needing carry flag. */
9773 case GEU
: /* CF=0 */
9774 case GTU
: /* CF=0 & ZF=0 */
9775 case LTU
: /* CF=1 */
9776 case LEU
: /* CF=1 | ZF=1 */
9778 /* Codes possibly doable only with sign flag when
9779 comparing against zero. */
9780 case GE
: /* SF=OF or SF=0 */
9781 case LT
: /* SF<>OF or SF=1 */
9782 if (op1
== const0_rtx
)
9785 /* For other cases Carry flag is not required. */
9787 /* Codes doable only with sign flag when comparing
9788 against zero, but we miss jump instruction for it
9789 so we need to use relational tests against overflow
9790 that thus needs to be zero. */
9791 case GT
: /* ZF=0 & SF=OF */
9792 case LE
: /* ZF=1 | SF<>OF */
9793 if (op1
== const0_rtx
)
9797 /* strcmp pattern do (use flags) and combine may ask us for proper
9806 /* Return the fixed registers used for condition codes. */
9809 ix86_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
9816 /* If two condition code modes are compatible, return a condition code
9817 mode which is compatible with both. Otherwise, return
9820 static enum machine_mode
9821 ix86_cc_modes_compatible (enum machine_mode m1
, enum machine_mode m2
)
9826 if (GET_MODE_CLASS (m1
) != MODE_CC
|| GET_MODE_CLASS (m2
) != MODE_CC
)
9829 if ((m1
== CCGCmode
&& m2
== CCGOCmode
)
9830 || (m1
== CCGOCmode
&& m2
== CCGCmode
))
9858 /* These are only compatible with themselves, which we already
9864 /* Return true if we should use an FCOMI instruction for this fp comparison. */
9867 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED
)
9869 enum rtx_code swapped_code
= swap_condition (code
);
9870 return ((ix86_fp_comparison_cost (code
) == ix86_fp_comparison_fcomi_cost (code
))
9871 || (ix86_fp_comparison_cost (swapped_code
)
9872 == ix86_fp_comparison_fcomi_cost (swapped_code
)));
9875 /* Swap, force into registers, or otherwise massage the two operands
9876 to a fp comparison. The operands are updated in place; the new
9877 comparison code is returned. */
9879 static enum rtx_code
9880 ix86_prepare_fp_compare_args (enum rtx_code code
, rtx
*pop0
, rtx
*pop1
)
9882 enum machine_mode fpcmp_mode
= ix86_fp_compare_mode (code
);
9883 rtx op0
= *pop0
, op1
= *pop1
;
9884 enum machine_mode op_mode
= GET_MODE (op0
);
9885 int is_sse
= TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (op_mode
);
9887 /* All of the unordered compare instructions only work on registers.
9888 The same is true of the fcomi compare instructions. The XFmode
9889 compare instructions require registers except when comparing
9890 against zero or when converting operand 1 from fixed point to
9894 && (fpcmp_mode
== CCFPUmode
9895 || (op_mode
== XFmode
9896 && ! (standard_80387_constant_p (op0
) == 1
9897 || standard_80387_constant_p (op1
) == 1)
9898 && GET_CODE (op1
) != FLOAT
)
9899 || ix86_use_fcomi_compare (code
)))
9901 op0
= force_reg (op_mode
, op0
);
9902 op1
= force_reg (op_mode
, op1
);
9906 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
9907 things around if they appear profitable, otherwise force op0
9910 if (standard_80387_constant_p (op0
) == 0
9911 || (GET_CODE (op0
) == MEM
9912 && ! (standard_80387_constant_p (op1
) == 0
9913 || GET_CODE (op1
) == MEM
)))
9916 tmp
= op0
, op0
= op1
, op1
= tmp
;
9917 code
= swap_condition (code
);
9920 if (GET_CODE (op0
) != REG
)
9921 op0
= force_reg (op_mode
, op0
);
9923 if (CONSTANT_P (op1
))
9925 int tmp
= standard_80387_constant_p (op1
);
9927 op1
= validize_mem (force_const_mem (op_mode
, op1
));
9931 op1
= force_reg (op_mode
, op1
);
9934 op1
= force_reg (op_mode
, op1
);
9938 /* Try to rearrange the comparison to make it cheaper. */
9939 if (ix86_fp_comparison_cost (code
)
9940 > ix86_fp_comparison_cost (swap_condition (code
))
9941 && (GET_CODE (op1
) == REG
|| !no_new_pseudos
))
9944 tmp
= op0
, op0
= op1
, op1
= tmp
;
9945 code
= swap_condition (code
);
9946 if (GET_CODE (op0
) != REG
)
9947 op0
= force_reg (op_mode
, op0
);
9955 /* Convert comparison codes we use to represent FP comparison to integer
9956 code that will result in proper branch. Return UNKNOWN if no such code
9960 ix86_fp_compare_code_to_integer (enum rtx_code code
)
9989 /* Split comparison code CODE into comparisons we can do using branch
9990 instructions. BYPASS_CODE is comparison code for branch that will
9991 branch around FIRST_CODE and SECOND_CODE. If some of branches
9992 is not required, set value to UNKNOWN.
9993 We never require more than two branches. */
9996 ix86_fp_comparison_codes (enum rtx_code code
, enum rtx_code
*bypass_code
,
9997 enum rtx_code
*first_code
,
9998 enum rtx_code
*second_code
)
10000 *first_code
= code
;
10001 *bypass_code
= UNKNOWN
;
10002 *second_code
= UNKNOWN
;
10004 /* The fcomi comparison sets flags as follows:
10014 case GT
: /* GTU - CF=0 & ZF=0 */
10015 case GE
: /* GEU - CF=0 */
10016 case ORDERED
: /* PF=0 */
10017 case UNORDERED
: /* PF=1 */
10018 case UNEQ
: /* EQ - ZF=1 */
10019 case UNLT
: /* LTU - CF=1 */
10020 case UNLE
: /* LEU - CF=1 | ZF=1 */
10021 case LTGT
: /* EQ - ZF=0 */
10023 case LT
: /* LTU - CF=1 - fails on unordered */
10024 *first_code
= UNLT
;
10025 *bypass_code
= UNORDERED
;
10027 case LE
: /* LEU - CF=1 | ZF=1 - fails on unordered */
10028 *first_code
= UNLE
;
10029 *bypass_code
= UNORDERED
;
10031 case EQ
: /* EQ - ZF=1 - fails on unordered */
10032 *first_code
= UNEQ
;
10033 *bypass_code
= UNORDERED
;
10035 case NE
: /* NE - ZF=0 - fails on unordered */
10036 *first_code
= LTGT
;
10037 *second_code
= UNORDERED
;
10039 case UNGE
: /* GEU - CF=0 - fails on unordered */
10041 *second_code
= UNORDERED
;
10043 case UNGT
: /* GTU - CF=0 & ZF=0 - fails on unordered */
10045 *second_code
= UNORDERED
;
10048 gcc_unreachable ();
10050 if (!TARGET_IEEE_FP
)
10052 *second_code
= UNKNOWN
;
10053 *bypass_code
= UNKNOWN
;
10057 /* Return cost of comparison done fcom + arithmetics operations on AX.
10058 All following functions do use number of instructions as a cost metrics.
10059 In future this should be tweaked to compute bytes for optimize_size and
10060 take into account performance of various instructions on various CPUs. */
10062 ix86_fp_comparison_arithmetics_cost (enum rtx_code code
)
10064 if (!TARGET_IEEE_FP
)
10066 /* The cost of code output by ix86_expand_fp_compare. */
10090 gcc_unreachable ();
10094 /* Return cost of comparison done using fcomi operation.
10095 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10097 ix86_fp_comparison_fcomi_cost (enum rtx_code code
)
10099 enum rtx_code bypass_code
, first_code
, second_code
;
10100 /* Return arbitrarily high cost when instruction is not supported - this
10101 prevents gcc from using it. */
10104 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
10105 return (bypass_code
!= UNKNOWN
|| second_code
!= UNKNOWN
) + 2;
10108 /* Return cost of comparison done using sahf operation.
10109 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10111 ix86_fp_comparison_sahf_cost (enum rtx_code code
)
10113 enum rtx_code bypass_code
, first_code
, second_code
;
10114 /* Return arbitrarily high cost when instruction is not preferred - this
10115 avoids gcc from using it. */
10116 if (!TARGET_USE_SAHF
&& !optimize_size
)
10118 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
10119 return (bypass_code
!= UNKNOWN
|| second_code
!= UNKNOWN
) + 3;
10122 /* Compute cost of the comparison done using any method.
10123 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10125 ix86_fp_comparison_cost (enum rtx_code code
)
10127 int fcomi_cost
, sahf_cost
, arithmetics_cost
= 1024;
10130 fcomi_cost
= ix86_fp_comparison_fcomi_cost (code
);
10131 sahf_cost
= ix86_fp_comparison_sahf_cost (code
);
10133 min
= arithmetics_cost
= ix86_fp_comparison_arithmetics_cost (code
);
10134 if (min
> sahf_cost
)
10136 if (min
> fcomi_cost
)
10141 /* Generate insn patterns to do a floating point compare of OPERANDS. */
10144 ix86_expand_fp_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx scratch
,
10145 rtx
*second_test
, rtx
*bypass_test
)
10147 enum machine_mode fpcmp_mode
, intcmp_mode
;
10149 int cost
= ix86_fp_comparison_cost (code
);
10150 enum rtx_code bypass_code
, first_code
, second_code
;
10152 fpcmp_mode
= ix86_fp_compare_mode (code
);
10153 code
= ix86_prepare_fp_compare_args (code
, &op0
, &op1
);
10156 *second_test
= NULL_RTX
;
10158 *bypass_test
= NULL_RTX
;
10160 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
10162 /* Do fcomi/sahf based test when profitable. */
10163 if ((bypass_code
== UNKNOWN
|| bypass_test
)
10164 && (second_code
== UNKNOWN
|| second_test
)
10165 && ix86_fp_comparison_arithmetics_cost (code
) > cost
)
10169 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
10170 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
10176 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
10177 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
10179 scratch
= gen_reg_rtx (HImode
);
10180 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
10181 emit_insn (gen_x86_sahf_1 (scratch
));
10184 /* The FP codes work out to act like unsigned. */
10185 intcmp_mode
= fpcmp_mode
;
10187 if (bypass_code
!= UNKNOWN
)
10188 *bypass_test
= gen_rtx_fmt_ee (bypass_code
, VOIDmode
,
10189 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
10191 if (second_code
!= UNKNOWN
)
10192 *second_test
= gen_rtx_fmt_ee (second_code
, VOIDmode
,
10193 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
10198 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
10199 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
10200 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
10202 scratch
= gen_reg_rtx (HImode
);
10203 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
10205 /* In the unordered case, we have to check C2 for NaN's, which
10206 doesn't happen to work out to anything nice combination-wise.
10207 So do some bit twiddling on the value we've got in AH to come
10208 up with an appropriate set of condition codes. */
10210 intcmp_mode
= CCNOmode
;
10215 if (code
== GT
|| !TARGET_IEEE_FP
)
10217 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
10222 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
10223 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
10224 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x44)));
10225 intcmp_mode
= CCmode
;
10231 if (code
== LT
&& TARGET_IEEE_FP
)
10233 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
10234 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x01)));
10235 intcmp_mode
= CCmode
;
10240 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x01)));
10246 if (code
== GE
|| !TARGET_IEEE_FP
)
10248 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x05)));
10253 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
10254 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
10261 if (code
== LE
&& TARGET_IEEE_FP
)
10263 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
10264 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
10265 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
10266 intcmp_mode
= CCmode
;
10271 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
10277 if (code
== EQ
&& TARGET_IEEE_FP
)
10279 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
10280 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
10281 intcmp_mode
= CCmode
;
10286 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
10293 if (code
== NE
&& TARGET_IEEE_FP
)
10295 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
10296 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
10302 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
10308 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
10312 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
10317 gcc_unreachable ();
10321 /* Return the test that should be put into the flags user, i.e.
10322 the bcc, scc, or cmov instruction. */
10323 return gen_rtx_fmt_ee (code
, VOIDmode
,
10324 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
10329 ix86_expand_compare (enum rtx_code code
, rtx
*second_test
, rtx
*bypass_test
)
10332 op0
= ix86_compare_op0
;
10333 op1
= ix86_compare_op1
;
10336 *second_test
= NULL_RTX
;
10338 *bypass_test
= NULL_RTX
;
10340 if (ix86_compare_emitted
)
10342 ret
= gen_rtx_fmt_ee (code
, VOIDmode
, ix86_compare_emitted
, const0_rtx
);
10343 ix86_compare_emitted
= NULL_RTX
;
10345 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0
)))
10346 ret
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
,
10347 second_test
, bypass_test
);
10349 ret
= ix86_expand_int_compare (code
, op0
, op1
);
10354 /* Return true if the CODE will result in nontrivial jump sequence. */
10356 ix86_fp_jump_nontrivial_p (enum rtx_code code
)
10358 enum rtx_code bypass_code
, first_code
, second_code
;
10361 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
10362 return bypass_code
!= UNKNOWN
|| second_code
!= UNKNOWN
;
10366 ix86_expand_branch (enum rtx_code code
, rtx label
)
10370 /* If we have emitted a compare insn, go straight to simple.
10371 ix86_expand_compare won't emit anything if ix86_compare_emitted
10373 if (ix86_compare_emitted
)
10376 switch (GET_MODE (ix86_compare_op0
))
10382 tmp
= ix86_expand_compare (code
, NULL
, NULL
);
10383 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
10384 gen_rtx_LABEL_REF (VOIDmode
, label
),
10386 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
10395 enum rtx_code bypass_code
, first_code
, second_code
;
10397 code
= ix86_prepare_fp_compare_args (code
, &ix86_compare_op0
,
10398 &ix86_compare_op1
);
10400 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
10402 /* Check whether we will use the natural sequence with one jump. If
10403 so, we can expand jump early. Otherwise delay expansion by
10404 creating compound insn to not confuse optimizers. */
10405 if (bypass_code
== UNKNOWN
&& second_code
== UNKNOWN
10408 ix86_split_fp_branch (code
, ix86_compare_op0
, ix86_compare_op1
,
10409 gen_rtx_LABEL_REF (VOIDmode
, label
),
10410 pc_rtx
, NULL_RTX
, NULL_RTX
);
10414 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
,
10415 ix86_compare_op0
, ix86_compare_op1
);
10416 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
10417 gen_rtx_LABEL_REF (VOIDmode
, label
),
10419 tmp
= gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
);
10421 use_fcomi
= ix86_use_fcomi_compare (code
);
10422 vec
= rtvec_alloc (3 + !use_fcomi
);
10423 RTVEC_ELT (vec
, 0) = tmp
;
10425 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCFPmode
, 18));
10427 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCFPmode
, 17));
10430 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (HImode
));
10432 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode
, vec
));
10441 /* Expand DImode branch into multiple compare+branch. */
10443 rtx lo
[2], hi
[2], label2
;
10444 enum rtx_code code1
, code2
, code3
;
10445 enum machine_mode submode
;
10447 if (CONSTANT_P (ix86_compare_op0
) && ! CONSTANT_P (ix86_compare_op1
))
10449 tmp
= ix86_compare_op0
;
10450 ix86_compare_op0
= ix86_compare_op1
;
10451 ix86_compare_op1
= tmp
;
10452 code
= swap_condition (code
);
10454 if (GET_MODE (ix86_compare_op0
) == DImode
)
10456 split_di (&ix86_compare_op0
, 1, lo
+0, hi
+0);
10457 split_di (&ix86_compare_op1
, 1, lo
+1, hi
+1);
10462 split_ti (&ix86_compare_op0
, 1, lo
+0, hi
+0);
10463 split_ti (&ix86_compare_op1
, 1, lo
+1, hi
+1);
10467 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
10468 avoid two branches. This costs one extra insn, so disable when
10469 optimizing for size. */
10471 if ((code
== EQ
|| code
== NE
)
10473 || hi
[1] == const0_rtx
|| lo
[1] == const0_rtx
))
10478 if (hi
[1] != const0_rtx
)
10479 xor1
= expand_binop (submode
, xor_optab
, xor1
, hi
[1],
10480 NULL_RTX
, 0, OPTAB_WIDEN
);
10483 if (lo
[1] != const0_rtx
)
10484 xor0
= expand_binop (submode
, xor_optab
, xor0
, lo
[1],
10485 NULL_RTX
, 0, OPTAB_WIDEN
);
10487 tmp
= expand_binop (submode
, ior_optab
, xor1
, xor0
,
10488 NULL_RTX
, 0, OPTAB_WIDEN
);
10490 ix86_compare_op0
= tmp
;
10491 ix86_compare_op1
= const0_rtx
;
10492 ix86_expand_branch (code
, label
);
10496 /* Otherwise, if we are doing less-than or greater-or-equal-than,
10497 op1 is a constant and the low word is zero, then we can just
10498 examine the high word. */
10500 if (GET_CODE (hi
[1]) == CONST_INT
&& lo
[1] == const0_rtx
)
10503 case LT
: case LTU
: case GE
: case GEU
:
10504 ix86_compare_op0
= hi
[0];
10505 ix86_compare_op1
= hi
[1];
10506 ix86_expand_branch (code
, label
);
10512 /* Otherwise, we need two or three jumps. */
10514 label2
= gen_label_rtx ();
10517 code2
= swap_condition (code
);
10518 code3
= unsigned_condition (code
);
10522 case LT
: case GT
: case LTU
: case GTU
:
10525 case LE
: code1
= LT
; code2
= GT
; break;
10526 case GE
: code1
= GT
; code2
= LT
; break;
10527 case LEU
: code1
= LTU
; code2
= GTU
; break;
10528 case GEU
: code1
= GTU
; code2
= LTU
; break;
10530 case EQ
: code1
= UNKNOWN
; code2
= NE
; break;
10531 case NE
: code2
= UNKNOWN
; break;
10534 gcc_unreachable ();
10539 * if (hi(a) < hi(b)) goto true;
10540 * if (hi(a) > hi(b)) goto false;
10541 * if (lo(a) < lo(b)) goto true;
10545 ix86_compare_op0
= hi
[0];
10546 ix86_compare_op1
= hi
[1];
10548 if (code1
!= UNKNOWN
)
10549 ix86_expand_branch (code1
, label
);
10550 if (code2
!= UNKNOWN
)
10551 ix86_expand_branch (code2
, label2
);
10553 ix86_compare_op0
= lo
[0];
10554 ix86_compare_op1
= lo
[1];
10555 ix86_expand_branch (code3
, label
);
10557 if (code2
!= UNKNOWN
)
10558 emit_label (label2
);
10563 gcc_unreachable ();
10567 /* Split branch based on floating point condition. */
10569 ix86_split_fp_branch (enum rtx_code code
, rtx op1
, rtx op2
,
10570 rtx target1
, rtx target2
, rtx tmp
, rtx pushed
)
10572 rtx second
, bypass
;
10573 rtx label
= NULL_RTX
;
10575 int bypass_probability
= -1, second_probability
= -1, probability
= -1;
10578 if (target2
!= pc_rtx
)
10581 code
= reverse_condition_maybe_unordered (code
);
10586 condition
= ix86_expand_fp_compare (code
, op1
, op2
,
10587 tmp
, &second
, &bypass
);
10589 /* Remove pushed operand from stack. */
10591 ix86_free_from_memory (GET_MODE (pushed
));
10593 if (split_branch_probability
>= 0)
10595 /* Distribute the probabilities across the jumps.
10596 Assume the BYPASS and SECOND to be always test
10598 probability
= split_branch_probability
;
10600 /* Value of 1 is low enough to make no need for probability
10601 to be updated. Later we may run some experiments and see
10602 if unordered values are more frequent in practice. */
10604 bypass_probability
= 1;
10606 second_probability
= 1;
10608 if (bypass
!= NULL_RTX
)
10610 label
= gen_label_rtx ();
10611 i
= emit_jump_insn (gen_rtx_SET
10613 gen_rtx_IF_THEN_ELSE (VOIDmode
,
10615 gen_rtx_LABEL_REF (VOIDmode
,
10618 if (bypass_probability
>= 0)
10620 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
10621 GEN_INT (bypass_probability
),
10624 i
= emit_jump_insn (gen_rtx_SET
10626 gen_rtx_IF_THEN_ELSE (VOIDmode
,
10627 condition
, target1
, target2
)));
10628 if (probability
>= 0)
10630 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
10631 GEN_INT (probability
),
10633 if (second
!= NULL_RTX
)
10635 i
= emit_jump_insn (gen_rtx_SET
10637 gen_rtx_IF_THEN_ELSE (VOIDmode
, second
, target1
,
10639 if (second_probability
>= 0)
10641 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
10642 GEN_INT (second_probability
),
10645 if (label
!= NULL_RTX
)
10646 emit_label (label
);
10650 ix86_expand_setcc (enum rtx_code code
, rtx dest
)
10652 rtx ret
, tmp
, tmpreg
, equiv
;
10653 rtx second_test
, bypass_test
;
10655 if (GET_MODE (ix86_compare_op0
) == (TARGET_64BIT
? TImode
: DImode
))
10656 return 0; /* FAIL */
10658 gcc_assert (GET_MODE (dest
) == QImode
);
10660 ret
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
10661 PUT_MODE (ret
, QImode
);
10666 emit_insn (gen_rtx_SET (VOIDmode
, tmp
, ret
));
10667 if (bypass_test
|| second_test
)
10669 rtx test
= second_test
;
10671 rtx tmp2
= gen_reg_rtx (QImode
);
10674 gcc_assert (!second_test
);
10675 test
= bypass_test
;
10677 PUT_CODE (test
, reverse_condition_maybe_unordered (GET_CODE (test
)));
10679 PUT_MODE (test
, QImode
);
10680 emit_insn (gen_rtx_SET (VOIDmode
, tmp2
, test
));
10683 emit_insn (gen_andqi3 (tmp
, tmpreg
, tmp2
));
10685 emit_insn (gen_iorqi3 (tmp
, tmpreg
, tmp2
));
10688 /* Attach a REG_EQUAL note describing the comparison result. */
10689 if (ix86_compare_op0
&& ix86_compare_op1
)
10691 equiv
= simplify_gen_relational (code
, QImode
,
10692 GET_MODE (ix86_compare_op0
),
10693 ix86_compare_op0
, ix86_compare_op1
);
10694 set_unique_reg_note (get_last_insn (), REG_EQUAL
, equiv
);
10697 return 1; /* DONE */
10700 /* Expand comparison setting or clearing carry flag. Return true when
10701 successful and set pop for the operation. */
10703 ix86_expand_carry_flag_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx
*pop
)
10705 enum machine_mode mode
=
10706 GET_MODE (op0
) != VOIDmode
? GET_MODE (op0
) : GET_MODE (op1
);
10708 /* Do not handle DImode compares that go through special path. Also we can't
10709 deal with FP compares yet. This is possible to add. */
10710 if (mode
== (TARGET_64BIT
? TImode
: DImode
))
10712 if (FLOAT_MODE_P (mode
))
10714 rtx second_test
= NULL
, bypass_test
= NULL
;
10715 rtx compare_op
, compare_seq
;
10717 /* Shortcut: following common codes never translate into carry flag compares. */
10718 if (code
== EQ
|| code
== NE
|| code
== UNEQ
|| code
== LTGT
10719 || code
== ORDERED
|| code
== UNORDERED
)
10722 /* These comparisons require zero flag; swap operands so they won't. */
10723 if ((code
== GT
|| code
== UNLE
|| code
== LE
|| code
== UNGT
)
10724 && !TARGET_IEEE_FP
)
10729 code
= swap_condition (code
);
10732 /* Try to expand the comparison and verify that we end up with carry flag
10733 based comparison. This is fails to be true only when we decide to expand
10734 comparison using arithmetic that is not too common scenario. */
10736 compare_op
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
,
10737 &second_test
, &bypass_test
);
10738 compare_seq
= get_insns ();
10741 if (second_test
|| bypass_test
)
10743 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
10744 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
10745 code
= ix86_fp_compare_code_to_integer (GET_CODE (compare_op
));
10747 code
= GET_CODE (compare_op
);
10748 if (code
!= LTU
&& code
!= GEU
)
10750 emit_insn (compare_seq
);
10754 if (!INTEGRAL_MODE_P (mode
))
10762 /* Convert a==0 into (unsigned)a<1. */
10765 if (op1
!= const0_rtx
)
10768 code
= (code
== EQ
? LTU
: GEU
);
10771 /* Convert a>b into b<a or a>=b-1. */
10774 if (GET_CODE (op1
) == CONST_INT
)
10776 op1
= gen_int_mode (INTVAL (op1
) + 1, GET_MODE (op0
));
10777 /* Bail out on overflow. We still can swap operands but that
10778 would force loading of the constant into register. */
10779 if (op1
== const0_rtx
10780 || !x86_64_immediate_operand (op1
, GET_MODE (op1
)))
10782 code
= (code
== GTU
? GEU
: LTU
);
10789 code
= (code
== GTU
? LTU
: GEU
);
10793 /* Convert a>=0 into (unsigned)a<0x80000000. */
10796 if (mode
== DImode
|| op1
!= const0_rtx
)
10798 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
10799 code
= (code
== LT
? GEU
: LTU
);
10803 if (mode
== DImode
|| op1
!= constm1_rtx
)
10805 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
10806 code
= (code
== LE
? GEU
: LTU
);
10812 /* Swapping operands may cause constant to appear as first operand. */
10813 if (!nonimmediate_operand (op0
, VOIDmode
))
10815 if (no_new_pseudos
)
10817 op0
= force_reg (mode
, op0
);
10819 ix86_compare_op0
= op0
;
10820 ix86_compare_op1
= op1
;
10821 *pop
= ix86_expand_compare (code
, NULL
, NULL
);
10822 gcc_assert (GET_CODE (*pop
) == LTU
|| GET_CODE (*pop
) == GEU
);
10827 ix86_expand_int_movcc (rtx operands
[])
10829 enum rtx_code code
= GET_CODE (operands
[1]), compare_code
;
10830 rtx compare_seq
, compare_op
;
10831 rtx second_test
, bypass_test
;
10832 enum machine_mode mode
= GET_MODE (operands
[0]);
10833 bool sign_bit_compare_p
= false;;
10836 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
10837 compare_seq
= get_insns ();
10840 compare_code
= GET_CODE (compare_op
);
10842 if ((ix86_compare_op1
== const0_rtx
&& (code
== GE
|| code
== LT
))
10843 || (ix86_compare_op1
== constm1_rtx
&& (code
== GT
|| code
== LE
)))
10844 sign_bit_compare_p
= true;
10846 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
10847 HImode insns, we'd be swallowed in word prefix ops. */
10849 if ((mode
!= HImode
|| TARGET_FAST_PREFIX
)
10850 && (mode
!= (TARGET_64BIT
? TImode
: DImode
))
10851 && GET_CODE (operands
[2]) == CONST_INT
10852 && GET_CODE (operands
[3]) == CONST_INT
)
10854 rtx out
= operands
[0];
10855 HOST_WIDE_INT ct
= INTVAL (operands
[2]);
10856 HOST_WIDE_INT cf
= INTVAL (operands
[3]);
10857 HOST_WIDE_INT diff
;
10860 /* Sign bit compares are better done using shifts than we do by using
10862 if (sign_bit_compare_p
10863 || ix86_expand_carry_flag_compare (code
, ix86_compare_op0
,
10864 ix86_compare_op1
, &compare_op
))
10866 /* Detect overlap between destination and compare sources. */
10869 if (!sign_bit_compare_p
)
10871 bool fpcmp
= false;
10873 compare_code
= GET_CODE (compare_op
);
10875 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
10876 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
10879 compare_code
= ix86_fp_compare_code_to_integer (compare_code
);
10882 /* To simplify rest of code, restrict to the GEU case. */
10883 if (compare_code
== LTU
)
10885 HOST_WIDE_INT tmp
= ct
;
10888 compare_code
= reverse_condition (compare_code
);
10889 code
= reverse_condition (code
);
10894 PUT_CODE (compare_op
,
10895 reverse_condition_maybe_unordered
10896 (GET_CODE (compare_op
)));
10898 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
10902 if (reg_overlap_mentioned_p (out
, ix86_compare_op0
)
10903 || reg_overlap_mentioned_p (out
, ix86_compare_op1
))
10904 tmp
= gen_reg_rtx (mode
);
10906 if (mode
== DImode
)
10907 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp
, compare_op
));
10909 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode
, tmp
), compare_op
));
10913 if (code
== GT
|| code
== GE
)
10914 code
= reverse_condition (code
);
10917 HOST_WIDE_INT tmp
= ct
;
10922 tmp
= emit_store_flag (tmp
, code
, ix86_compare_op0
,
10923 ix86_compare_op1
, VOIDmode
, 0, -1);
10936 tmp
= expand_simple_binop (mode
, PLUS
,
10938 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
10949 tmp
= expand_simple_binop (mode
, IOR
,
10951 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
10953 else if (diff
== -1 && ct
)
10963 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
10965 tmp
= expand_simple_binop (mode
, PLUS
,
10966 copy_rtx (tmp
), GEN_INT (cf
),
10967 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
10975 * andl cf - ct, dest
10985 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
10988 tmp
= expand_simple_binop (mode
, AND
,
10990 gen_int_mode (cf
- ct
, mode
),
10991 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
10993 tmp
= expand_simple_binop (mode
, PLUS
,
10994 copy_rtx (tmp
), GEN_INT (ct
),
10995 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
10998 if (!rtx_equal_p (tmp
, out
))
10999 emit_move_insn (copy_rtx (out
), copy_rtx (tmp
));
11001 return 1; /* DONE */
11007 tmp
= ct
, ct
= cf
, cf
= tmp
;
11009 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0
)))
11011 /* We may be reversing unordered compare to normal compare, that
11012 is not valid in general (we may convert non-trapping condition
11013 to trapping one), however on i386 we currently emit all
11014 comparisons unordered. */
11015 compare_code
= reverse_condition_maybe_unordered (compare_code
);
11016 code
= reverse_condition_maybe_unordered (code
);
11020 compare_code
= reverse_condition (compare_code
);
11021 code
= reverse_condition (code
);
11025 compare_code
= UNKNOWN
;
11026 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0
)) == MODE_INT
11027 && GET_CODE (ix86_compare_op1
) == CONST_INT
)
11029 if (ix86_compare_op1
== const0_rtx
11030 && (code
== LT
|| code
== GE
))
11031 compare_code
= code
;
11032 else if (ix86_compare_op1
== constm1_rtx
)
11036 else if (code
== GT
)
11041 /* Optimize dest = (op0 < 0) ? -1 : cf. */
11042 if (compare_code
!= UNKNOWN
11043 && GET_MODE (ix86_compare_op0
) == GET_MODE (out
)
11044 && (cf
== -1 || ct
== -1))
11046 /* If lea code below could be used, only optimize
11047 if it results in a 2 insn sequence. */
11049 if (! (diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
11050 || diff
== 3 || diff
== 5 || diff
== 9)
11051 || (compare_code
== LT
&& ct
== -1)
11052 || (compare_code
== GE
&& cf
== -1))
11055 * notl op1 (if necessary)
11063 code
= reverse_condition (code
);
11066 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
11067 ix86_compare_op1
, VOIDmode
, 0, -1);
11069 out
= expand_simple_binop (mode
, IOR
,
11071 out
, 1, OPTAB_DIRECT
);
11072 if (out
!= operands
[0])
11073 emit_move_insn (operands
[0], out
);
11075 return 1; /* DONE */
11080 if ((diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
11081 || diff
== 3 || diff
== 5 || diff
== 9)
11082 && ((mode
!= QImode
&& mode
!= HImode
) || !TARGET_PARTIAL_REG_STALL
)
11084 || x86_64_immediate_operand (GEN_INT (cf
), VOIDmode
)))
11090 * lea cf(dest*(ct-cf)),dest
11094 * This also catches the degenerate setcc-only case.
11100 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
11101 ix86_compare_op1
, VOIDmode
, 0, 1);
11104 /* On x86_64 the lea instruction operates on Pmode, so we need
11105 to get arithmetics done in proper mode to match. */
11107 tmp
= copy_rtx (out
);
11111 out1
= copy_rtx (out
);
11112 tmp
= gen_rtx_MULT (mode
, out1
, GEN_INT (diff
& ~1));
11116 tmp
= gen_rtx_PLUS (mode
, tmp
, out1
);
11122 tmp
= gen_rtx_PLUS (mode
, tmp
, GEN_INT (cf
));
11125 if (!rtx_equal_p (tmp
, out
))
11128 out
= force_operand (tmp
, copy_rtx (out
));
11130 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (out
), copy_rtx (tmp
)));
11132 if (!rtx_equal_p (out
, operands
[0]))
11133 emit_move_insn (operands
[0], copy_rtx (out
));
11135 return 1; /* DONE */
11139 * General case: Jumpful:
11140 * xorl dest,dest cmpl op1, op2
11141 * cmpl op1, op2 movl ct, dest
11142 * setcc dest jcc 1f
11143 * decl dest movl cf, dest
11144 * andl (cf-ct),dest 1:
11147 * Size 20. Size 14.
11149 * This is reasonably steep, but branch mispredict costs are
11150 * high on modern cpus, so consider failing only if optimizing
11154 if ((!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
11155 && BRANCH_COST
>= 2)
11161 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0
)))
11162 /* We may be reversing unordered compare to normal compare,
11163 that is not valid in general (we may convert non-trapping
11164 condition to trapping one), however on i386 we currently
11165 emit all comparisons unordered. */
11166 code
= reverse_condition_maybe_unordered (code
);
11169 code
= reverse_condition (code
);
11170 if (compare_code
!= UNKNOWN
)
11171 compare_code
= reverse_condition (compare_code
);
11175 if (compare_code
!= UNKNOWN
)
11177 /* notl op1 (if needed)
11182 For x < 0 (resp. x <= -1) there will be no notl,
11183 so if possible swap the constants to get rid of the
11185 True/false will be -1/0 while code below (store flag
11186 followed by decrement) is 0/-1, so the constants need
11187 to be exchanged once more. */
11189 if (compare_code
== GE
|| !cf
)
11191 code
= reverse_condition (code
);
11196 HOST_WIDE_INT tmp
= cf
;
11201 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
11202 ix86_compare_op1
, VOIDmode
, 0, -1);
11206 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
11207 ix86_compare_op1
, VOIDmode
, 0, 1);
11209 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), constm1_rtx
,
11210 copy_rtx (out
), 1, OPTAB_DIRECT
);
11213 out
= expand_simple_binop (mode
, AND
, copy_rtx (out
),
11214 gen_int_mode (cf
- ct
, mode
),
11215 copy_rtx (out
), 1, OPTAB_DIRECT
);
11217 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), GEN_INT (ct
),
11218 copy_rtx (out
), 1, OPTAB_DIRECT
);
11219 if (!rtx_equal_p (out
, operands
[0]))
11220 emit_move_insn (operands
[0], copy_rtx (out
));
11222 return 1; /* DONE */
11226 if (!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
11228 /* Try a few things more with specific constants and a variable. */
11231 rtx var
, orig_out
, out
, tmp
;
11233 if (BRANCH_COST
<= 2)
11234 return 0; /* FAIL */
11236 /* If one of the two operands is an interesting constant, load a
11237 constant with the above and mask it in with a logical operation. */
11239 if (GET_CODE (operands
[2]) == CONST_INT
)
11242 if (INTVAL (operands
[2]) == 0 && operands
[3] != constm1_rtx
)
11243 operands
[3] = constm1_rtx
, op
= and_optab
;
11244 else if (INTVAL (operands
[2]) == -1 && operands
[3] != const0_rtx
)
11245 operands
[3] = const0_rtx
, op
= ior_optab
;
11247 return 0; /* FAIL */
11249 else if (GET_CODE (operands
[3]) == CONST_INT
)
11252 if (INTVAL (operands
[3]) == 0 && operands
[2] != constm1_rtx
)
11253 operands
[2] = constm1_rtx
, op
= and_optab
;
11254 else if (INTVAL (operands
[3]) == -1 && operands
[3] != const0_rtx
)
11255 operands
[2] = const0_rtx
, op
= ior_optab
;
11257 return 0; /* FAIL */
11260 return 0; /* FAIL */
11262 orig_out
= operands
[0];
11263 tmp
= gen_reg_rtx (mode
);
11266 /* Recurse to get the constant loaded. */
11267 if (ix86_expand_int_movcc (operands
) == 0)
11268 return 0; /* FAIL */
11270 /* Mask in the interesting variable. */
11271 out
= expand_binop (mode
, op
, var
, tmp
, orig_out
, 0,
11273 if (!rtx_equal_p (out
, orig_out
))
11274 emit_move_insn (copy_rtx (orig_out
), copy_rtx (out
));
11276 return 1; /* DONE */
11280 * For comparison with above,
11290 if (! nonimmediate_operand (operands
[2], mode
))
11291 operands
[2] = force_reg (mode
, operands
[2]);
11292 if (! nonimmediate_operand (operands
[3], mode
))
11293 operands
[3] = force_reg (mode
, operands
[3]);
11295 if (bypass_test
&& reg_overlap_mentioned_p (operands
[0], operands
[3]))
11297 rtx tmp
= gen_reg_rtx (mode
);
11298 emit_move_insn (tmp
, operands
[3]);
11301 if (second_test
&& reg_overlap_mentioned_p (operands
[0], operands
[2]))
11303 rtx tmp
= gen_reg_rtx (mode
);
11304 emit_move_insn (tmp
, operands
[2]);
11308 if (! register_operand (operands
[2], VOIDmode
)
11310 || ! register_operand (operands
[3], VOIDmode
)))
11311 operands
[2] = force_reg (mode
, operands
[2]);
11314 && ! register_operand (operands
[3], VOIDmode
))
11315 operands
[3] = force_reg (mode
, operands
[3]);
11317 emit_insn (compare_seq
);
11318 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
11319 gen_rtx_IF_THEN_ELSE (mode
,
11320 compare_op
, operands
[2],
11323 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (operands
[0]),
11324 gen_rtx_IF_THEN_ELSE (mode
,
11326 copy_rtx (operands
[3]),
11327 copy_rtx (operands
[0]))));
11329 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (operands
[0]),
11330 gen_rtx_IF_THEN_ELSE (mode
,
11332 copy_rtx (operands
[2]),
11333 copy_rtx (operands
[0]))));
11335 return 1; /* DONE */
11338 /* Swap, force into registers, or otherwise massage the two operands
11339 to an sse comparison with a mask result. Thus we differ a bit from
11340 ix86_prepare_fp_compare_args which expects to produce a flags result.
11342 The DEST operand exists to help determine whether to commute commutative
11343 operators. The POP0/POP1 operands are updated in place. The new
11344 comparison code is returned, or UNKNOWN if not implementable. */
11346 static enum rtx_code
11347 ix86_prepare_sse_fp_compare_args (rtx dest
, enum rtx_code code
,
11348 rtx
*pop0
, rtx
*pop1
)
11356 /* We have no LTGT as an operator. We could implement it with
11357 NE & ORDERED, but this requires an extra temporary. It's
11358 not clear that it's worth it. */
11365 /* These are supported directly. */
11372 /* For commutative operators, try to canonicalize the destination
11373 operand to be first in the comparison - this helps reload to
11374 avoid extra moves. */
11375 if (!dest
|| !rtx_equal_p (dest
, *pop1
))
11383 /* These are not supported directly. Swap the comparison operands
11384 to transform into something that is supported. */
11388 code
= swap_condition (code
);
11392 gcc_unreachable ();
11398 /* Detect conditional moves that exactly match min/max operational
11399 semantics. Note that this is IEEE safe, as long as we don't
11400 interchange the operands.
11402 Returns FALSE if this conditional move doesn't match a MIN/MAX,
11403 and TRUE if the operation is successful and instructions are emitted. */
11406 ix86_expand_sse_fp_minmax (rtx dest
, enum rtx_code code
, rtx cmp_op0
,
11407 rtx cmp_op1
, rtx if_true
, rtx if_false
)
11409 enum machine_mode mode
;
11415 else if (code
== UNGE
)
11418 if_true
= if_false
;
11424 if (rtx_equal_p (cmp_op0
, if_true
) && rtx_equal_p (cmp_op1
, if_false
))
11426 else if (rtx_equal_p (cmp_op1
, if_true
) && rtx_equal_p (cmp_op0
, if_false
))
11431 mode
= GET_MODE (dest
);
11433 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
11434 but MODE may be a vector mode and thus not appropriate. */
11435 if (!flag_finite_math_only
|| !flag_unsafe_math_optimizations
)
11437 int u
= is_min
? UNSPEC_IEEE_MIN
: UNSPEC_IEEE_MAX
;
11440 if_true
= force_reg (mode
, if_true
);
11441 v
= gen_rtvec (2, if_true
, if_false
);
11442 tmp
= gen_rtx_UNSPEC (mode
, v
, u
);
11446 code
= is_min
? SMIN
: SMAX
;
11447 tmp
= gen_rtx_fmt_ee (code
, mode
, if_true
, if_false
);
11450 emit_insn (gen_rtx_SET (VOIDmode
, dest
, tmp
));
11454 /* Expand an sse vector comparison. Return the register with the result. */
11457 ix86_expand_sse_cmp (rtx dest
, enum rtx_code code
, rtx cmp_op0
, rtx cmp_op1
,
11458 rtx op_true
, rtx op_false
)
11460 enum machine_mode mode
= GET_MODE (dest
);
11463 cmp_op0
= force_reg (mode
, cmp_op0
);
11464 if (!nonimmediate_operand (cmp_op1
, mode
))
11465 cmp_op1
= force_reg (mode
, cmp_op1
);
11468 || reg_overlap_mentioned_p (dest
, op_true
)
11469 || reg_overlap_mentioned_p (dest
, op_false
))
11470 dest
= gen_reg_rtx (mode
);
11472 x
= gen_rtx_fmt_ee (code
, mode
, cmp_op0
, cmp_op1
);
11473 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
11478 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
11479 operations. This is used for both scalar and vector conditional moves. */
11482 ix86_expand_sse_movcc (rtx dest
, rtx cmp
, rtx op_true
, rtx op_false
)
11484 enum machine_mode mode
= GET_MODE (dest
);
11487 if (op_false
== CONST0_RTX (mode
))
11489 op_true
= force_reg (mode
, op_true
);
11490 x
= gen_rtx_AND (mode
, cmp
, op_true
);
11491 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
11493 else if (op_true
== CONST0_RTX (mode
))
11495 op_false
= force_reg (mode
, op_false
);
11496 x
= gen_rtx_NOT (mode
, cmp
);
11497 x
= gen_rtx_AND (mode
, x
, op_false
);
11498 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
11502 op_true
= force_reg (mode
, op_true
);
11503 op_false
= force_reg (mode
, op_false
);
11505 t2
= gen_reg_rtx (mode
);
11507 t3
= gen_reg_rtx (mode
);
11511 x
= gen_rtx_AND (mode
, op_true
, cmp
);
11512 emit_insn (gen_rtx_SET (VOIDmode
, t2
, x
));
11514 x
= gen_rtx_NOT (mode
, cmp
);
11515 x
= gen_rtx_AND (mode
, x
, op_false
);
11516 emit_insn (gen_rtx_SET (VOIDmode
, t3
, x
));
11518 x
= gen_rtx_IOR (mode
, t3
, t2
);
11519 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
11523 /* Expand a floating-point conditional move. Return true if successful. */
11526 ix86_expand_fp_movcc (rtx operands
[])
11528 enum machine_mode mode
= GET_MODE (operands
[0]);
11529 enum rtx_code code
= GET_CODE (operands
[1]);
11530 rtx tmp
, compare_op
, second_test
, bypass_test
;
11532 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
11534 enum machine_mode cmode
;
11536 /* Since we've no cmove for sse registers, don't force bad register
11537 allocation just to gain access to it. Deny movcc when the
11538 comparison mode doesn't match the move mode. */
11539 cmode
= GET_MODE (ix86_compare_op0
);
11540 if (cmode
== VOIDmode
)
11541 cmode
= GET_MODE (ix86_compare_op1
);
11545 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
,
11547 &ix86_compare_op1
);
11548 if (code
== UNKNOWN
)
11551 if (ix86_expand_sse_fp_minmax (operands
[0], code
, ix86_compare_op0
,
11552 ix86_compare_op1
, operands
[2],
11556 tmp
= ix86_expand_sse_cmp (operands
[0], code
, ix86_compare_op0
,
11557 ix86_compare_op1
, operands
[2], operands
[3]);
11558 ix86_expand_sse_movcc (operands
[0], tmp
, operands
[2], operands
[3]);
11562 /* The floating point conditional move instructions don't directly
11563 support conditions resulting from a signed integer comparison. */
11565 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
11567 /* The floating point conditional move instructions don't directly
11568 support signed integer comparisons. */
11570 if (!fcmov_comparison_operator (compare_op
, VOIDmode
))
11572 gcc_assert (!second_test
&& !bypass_test
);
11573 tmp
= gen_reg_rtx (QImode
);
11574 ix86_expand_setcc (code
, tmp
);
11576 ix86_compare_op0
= tmp
;
11577 ix86_compare_op1
= const0_rtx
;
11578 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
11580 if (bypass_test
&& reg_overlap_mentioned_p (operands
[0], operands
[3]))
11582 tmp
= gen_reg_rtx (mode
);
11583 emit_move_insn (tmp
, operands
[3]);
11586 if (second_test
&& reg_overlap_mentioned_p (operands
[0], operands
[2]))
11588 tmp
= gen_reg_rtx (mode
);
11589 emit_move_insn (tmp
, operands
[2]);
11593 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
11594 gen_rtx_IF_THEN_ELSE (mode
, compare_op
,
11595 operands
[2], operands
[3])));
11597 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
11598 gen_rtx_IF_THEN_ELSE (mode
, bypass_test
,
11599 operands
[3], operands
[0])));
11601 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
11602 gen_rtx_IF_THEN_ELSE (mode
, second_test
,
11603 operands
[2], operands
[0])));
11608 /* Expand a floating-point vector conditional move; a vcond operation
11609 rather than a movcc operation. */
11612 ix86_expand_fp_vcond (rtx operands
[])
11614 enum rtx_code code
= GET_CODE (operands
[3]);
11617 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
,
11618 &operands
[4], &operands
[5]);
11619 if (code
== UNKNOWN
)
11622 if (ix86_expand_sse_fp_minmax (operands
[0], code
, operands
[4],
11623 operands
[5], operands
[1], operands
[2]))
11626 cmp
= ix86_expand_sse_cmp (operands
[0], code
, operands
[4], operands
[5],
11627 operands
[1], operands
[2]);
11628 ix86_expand_sse_movcc (operands
[0], cmp
, operands
[1], operands
[2]);
11632 /* Expand a signed integral vector conditional move. */
11635 ix86_expand_int_vcond (rtx operands
[])
11637 enum machine_mode mode
= GET_MODE (operands
[0]);
11638 enum rtx_code code
= GET_CODE (operands
[3]);
11639 bool negate
= false;
11642 cop0
= operands
[4];
11643 cop1
= operands
[5];
11645 /* Canonicalize the comparison to EQ, GT, GTU. */
11656 code
= reverse_condition (code
);
11662 code
= reverse_condition (code
);
11668 code
= swap_condition (code
);
11669 x
= cop0
, cop0
= cop1
, cop1
= x
;
11673 gcc_unreachable ();
11676 /* Unsigned parallel compare is not supported by the hardware. Play some
11677 tricks to turn this into a signed comparison against 0. */
11680 cop0
= force_reg (mode
, cop0
);
11688 /* Perform a parallel modulo subtraction. */
11689 t1
= gen_reg_rtx (mode
);
11690 emit_insn (gen_subv4si3 (t1
, cop0
, cop1
));
11692 /* Extract the original sign bit of op0. */
11693 mask
= GEN_INT (-0x80000000);
11694 mask
= gen_rtx_CONST_VECTOR (mode
,
11695 gen_rtvec (4, mask
, mask
, mask
, mask
));
11696 mask
= force_reg (mode
, mask
);
11697 t2
= gen_reg_rtx (mode
);
11698 emit_insn (gen_andv4si3 (t2
, cop0
, mask
));
11700 /* XOR it back into the result of the subtraction. This results
11701 in the sign bit set iff we saw unsigned underflow. */
11702 x
= gen_reg_rtx (mode
);
11703 emit_insn (gen_xorv4si3 (x
, t1
, t2
));
11711 /* Perform a parallel unsigned saturating subtraction. */
11712 x
= gen_reg_rtx (mode
);
11713 emit_insn (gen_rtx_SET (VOIDmode
, x
,
11714 gen_rtx_US_MINUS (mode
, cop0
, cop1
)));
11721 gcc_unreachable ();
11725 cop1
= CONST0_RTX (mode
);
11728 x
= ix86_expand_sse_cmp (operands
[0], code
, cop0
, cop1
,
11729 operands
[1+negate
], operands
[2-negate
]);
11731 ix86_expand_sse_movcc (operands
[0], x
, operands
[1+negate
],
11732 operands
[2-negate
]);
11736 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
11737 true if we should do zero extension, else sign extension. HIGH_P is
11738 true if we want the N/2 high elements, else the low elements. */
11741 ix86_expand_sse_unpack (rtx operands
[2], bool unsigned_p
, bool high_p
)
11743 enum machine_mode imode
= GET_MODE (operands
[1]);
11744 rtx (*unpack
)(rtx
, rtx
, rtx
);
11751 unpack
= gen_vec_interleave_highv16qi
;
11753 unpack
= gen_vec_interleave_lowv16qi
;
11757 unpack
= gen_vec_interleave_highv8hi
;
11759 unpack
= gen_vec_interleave_lowv8hi
;
11763 unpack
= gen_vec_interleave_highv4si
;
11765 unpack
= gen_vec_interleave_lowv4si
;
11768 gcc_unreachable ();
11771 dest
= gen_lowpart (imode
, operands
[0]);
11774 se
= force_reg (imode
, CONST0_RTX (imode
));
11776 se
= ix86_expand_sse_cmp (gen_reg_rtx (imode
), GT
, CONST0_RTX (imode
),
11777 operands
[1], pc_rtx
, pc_rtx
);
11779 emit_insn (unpack (dest
, operands
[1], se
));
11782 /* Expand conditional increment or decrement using adb/sbb instructions.
11783 The default case using setcc followed by the conditional move can be
11784 done by generic code. */
11786 ix86_expand_int_addcc (rtx operands
[])
11788 enum rtx_code code
= GET_CODE (operands
[1]);
11790 rtx val
= const0_rtx
;
11791 bool fpcmp
= false;
11792 enum machine_mode mode
= GET_MODE (operands
[0]);
11794 if (operands
[3] != const1_rtx
11795 && operands
[3] != constm1_rtx
)
11797 if (!ix86_expand_carry_flag_compare (code
, ix86_compare_op0
,
11798 ix86_compare_op1
, &compare_op
))
11800 code
= GET_CODE (compare_op
);
11802 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
11803 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
11806 code
= ix86_fp_compare_code_to_integer (code
);
11813 PUT_CODE (compare_op
,
11814 reverse_condition_maybe_unordered
11815 (GET_CODE (compare_op
)));
11817 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
11819 PUT_MODE (compare_op
, mode
);
11821 /* Construct either adc or sbb insn. */
11822 if ((code
== LTU
) == (operands
[3] == constm1_rtx
))
11824 switch (GET_MODE (operands
[0]))
11827 emit_insn (gen_subqi3_carry (operands
[0], operands
[2], val
, compare_op
));
11830 emit_insn (gen_subhi3_carry (operands
[0], operands
[2], val
, compare_op
));
11833 emit_insn (gen_subsi3_carry (operands
[0], operands
[2], val
, compare_op
));
11836 emit_insn (gen_subdi3_carry_rex64 (operands
[0], operands
[2], val
, compare_op
));
11839 gcc_unreachable ();
11844 switch (GET_MODE (operands
[0]))
11847 emit_insn (gen_addqi3_carry (operands
[0], operands
[2], val
, compare_op
));
11850 emit_insn (gen_addhi3_carry (operands
[0], operands
[2], val
, compare_op
));
11853 emit_insn (gen_addsi3_carry (operands
[0], operands
[2], val
, compare_op
));
11856 emit_insn (gen_adddi3_carry_rex64 (operands
[0], operands
[2], val
, compare_op
));
11859 gcc_unreachable ();
11862 return 1; /* DONE */
11866 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
11867 works for floating pointer parameters and nonoffsetable memories.
11868 For pushes, it returns just stack offsets; the values will be saved
11869 in the right order. Maximally three parts are generated. */
11872 ix86_split_to_parts (rtx operand
, rtx
*parts
, enum machine_mode mode
)
11877 size
= mode
==XFmode
? 3 : GET_MODE_SIZE (mode
) / 4;
11879 size
= (GET_MODE_SIZE (mode
) + 4) / 8;
11881 gcc_assert (GET_CODE (operand
) != REG
|| !MMX_REGNO_P (REGNO (operand
)));
11882 gcc_assert (size
>= 2 && size
<= 3);
11884 /* Optimize constant pool reference to immediates. This is used by fp
11885 moves, that force all constants to memory to allow combining. */
11886 if (GET_CODE (operand
) == MEM
&& MEM_READONLY_P (operand
))
11888 rtx tmp
= maybe_get_pool_constant (operand
);
11893 if (GET_CODE (operand
) == MEM
&& !offsettable_memref_p (operand
))
11895 /* The only non-offsetable memories we handle are pushes. */
11896 int ok
= push_operand (operand
, VOIDmode
);
11900 operand
= copy_rtx (operand
);
11901 PUT_MODE (operand
, Pmode
);
11902 parts
[0] = parts
[1] = parts
[2] = operand
;
11906 if (GET_CODE (operand
) == CONST_VECTOR
)
11908 enum machine_mode imode
= int_mode_for_mode (mode
);
11909 /* Caution: if we looked through a constant pool memory above,
11910 the operand may actually have a different mode now. That's
11911 ok, since we want to pun this all the way back to an integer. */
11912 operand
= simplify_subreg (imode
, operand
, GET_MODE (operand
), 0);
11913 gcc_assert (operand
!= NULL
);
11919 if (mode
== DImode
)
11920 split_di (&operand
, 1, &parts
[0], &parts
[1]);
11923 if (REG_P (operand
))
11925 gcc_assert (reload_completed
);
11926 parts
[0] = gen_rtx_REG (SImode
, REGNO (operand
) + 0);
11927 parts
[1] = gen_rtx_REG (SImode
, REGNO (operand
) + 1);
11929 parts
[2] = gen_rtx_REG (SImode
, REGNO (operand
) + 2);
11931 else if (offsettable_memref_p (operand
))
11933 operand
= adjust_address (operand
, SImode
, 0);
11934 parts
[0] = operand
;
11935 parts
[1] = adjust_address (operand
, SImode
, 4);
11937 parts
[2] = adjust_address (operand
, SImode
, 8);
11939 else if (GET_CODE (operand
) == CONST_DOUBLE
)
11944 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
11948 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r
, l
);
11949 parts
[2] = gen_int_mode (l
[2], SImode
);
11952 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
11955 gcc_unreachable ();
11957 parts
[1] = gen_int_mode (l
[1], SImode
);
11958 parts
[0] = gen_int_mode (l
[0], SImode
);
11961 gcc_unreachable ();
11966 if (mode
== TImode
)
11967 split_ti (&operand
, 1, &parts
[0], &parts
[1]);
11968 if (mode
== XFmode
|| mode
== TFmode
)
11970 enum machine_mode upper_mode
= mode
==XFmode
? SImode
: DImode
;
11971 if (REG_P (operand
))
11973 gcc_assert (reload_completed
);
11974 parts
[0] = gen_rtx_REG (DImode
, REGNO (operand
) + 0);
11975 parts
[1] = gen_rtx_REG (upper_mode
, REGNO (operand
) + 1);
11977 else if (offsettable_memref_p (operand
))
11979 operand
= adjust_address (operand
, DImode
, 0);
11980 parts
[0] = operand
;
11981 parts
[1] = adjust_address (operand
, upper_mode
, 8);
11983 else if (GET_CODE (operand
) == CONST_DOUBLE
)
11988 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
11989 real_to_target (l
, &r
, mode
);
11991 /* Do not use shift by 32 to avoid warning on 32bit systems. */
11992 if (HOST_BITS_PER_WIDE_INT
>= 64)
11995 ((l
[0] & (((HOST_WIDE_INT
) 2 << 31) - 1))
11996 + ((((HOST_WIDE_INT
) l
[1]) << 31) << 1),
11999 parts
[0] = immed_double_const (l
[0], l
[1], DImode
);
12001 if (upper_mode
== SImode
)
12002 parts
[1] = gen_int_mode (l
[2], SImode
);
12003 else if (HOST_BITS_PER_WIDE_INT
>= 64)
12006 ((l
[2] & (((HOST_WIDE_INT
) 2 << 31) - 1))
12007 + ((((HOST_WIDE_INT
) l
[3]) << 31) << 1),
12010 parts
[1] = immed_double_const (l
[2], l
[3], DImode
);
12013 gcc_unreachable ();
12020 /* Emit insns to perform a move or push of DI, DF, and XF values.
12021 Return false when normal moves are needed; true when all required
12022 insns have been emitted. Operands 2-4 contain the input values
12023 int the correct order; operands 5-7 contain the output values. */
12026 ix86_split_long_move (rtx operands
[])
12031 int collisions
= 0;
12032 enum machine_mode mode
= GET_MODE (operands
[0]);
12034 /* The DFmode expanders may ask us to move double.
12035 For 64bit target this is single move. By hiding the fact
12036 here we simplify i386.md splitters. */
12037 if (GET_MODE_SIZE (GET_MODE (operands
[0])) == 8 && TARGET_64BIT
)
12039 /* Optimize constant pool reference to immediates. This is used by
12040 fp moves, that force all constants to memory to allow combining. */
12042 if (GET_CODE (operands
[1]) == MEM
12043 && GET_CODE (XEXP (operands
[1], 0)) == SYMBOL_REF
12044 && CONSTANT_POOL_ADDRESS_P (XEXP (operands
[1], 0)))
12045 operands
[1] = get_pool_constant (XEXP (operands
[1], 0));
12046 if (push_operand (operands
[0], VOIDmode
))
12048 operands
[0] = copy_rtx (operands
[0]);
12049 PUT_MODE (operands
[0], Pmode
);
12052 operands
[0] = gen_lowpart (DImode
, operands
[0]);
12053 operands
[1] = gen_lowpart (DImode
, operands
[1]);
12054 emit_move_insn (operands
[0], operands
[1]);
12058 /* The only non-offsettable memory we handle is push. */
12059 if (push_operand (operands
[0], VOIDmode
))
12062 gcc_assert (GET_CODE (operands
[0]) != MEM
12063 || offsettable_memref_p (operands
[0]));
12065 nparts
= ix86_split_to_parts (operands
[1], part
[1], GET_MODE (operands
[0]));
12066 ix86_split_to_parts (operands
[0], part
[0], GET_MODE (operands
[0]));
12068 /* When emitting push, take care for source operands on the stack. */
12069 if (push
&& GET_CODE (operands
[1]) == MEM
12070 && reg_overlap_mentioned_p (stack_pointer_rtx
, operands
[1]))
12073 part
[1][1] = change_address (part
[1][1], GET_MODE (part
[1][1]),
12074 XEXP (part
[1][2], 0));
12075 part
[1][0] = change_address (part
[1][0], GET_MODE (part
[1][0]),
12076 XEXP (part
[1][1], 0));
12079 /* We need to do copy in the right order in case an address register
12080 of the source overlaps the destination. */
12081 if (REG_P (part
[0][0]) && GET_CODE (part
[1][0]) == MEM
)
12083 if (reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0)))
12085 if (reg_overlap_mentioned_p (part
[0][1], XEXP (part
[1][0], 0)))
12088 && reg_overlap_mentioned_p (part
[0][2], XEXP (part
[1][0], 0)))
12091 /* Collision in the middle part can be handled by reordering. */
12092 if (collisions
== 1 && nparts
== 3
12093 && reg_overlap_mentioned_p (part
[0][1], XEXP (part
[1][0], 0)))
12096 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
12097 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
12100 /* If there are more collisions, we can't handle it by reordering.
12101 Do an lea to the last part and use only one colliding move. */
12102 else if (collisions
> 1)
12108 base
= part
[0][nparts
- 1];
12110 /* Handle the case when the last part isn't valid for lea.
12111 Happens in 64-bit mode storing the 12-byte XFmode. */
12112 if (GET_MODE (base
) != Pmode
)
12113 base
= gen_rtx_REG (Pmode
, REGNO (base
));
12115 emit_insn (gen_rtx_SET (VOIDmode
, base
, XEXP (part
[1][0], 0)));
12116 part
[1][0] = replace_equiv_address (part
[1][0], base
);
12117 part
[1][1] = replace_equiv_address (part
[1][1],
12118 plus_constant (base
, UNITS_PER_WORD
));
12120 part
[1][2] = replace_equiv_address (part
[1][2],
12121 plus_constant (base
, 8));
12131 if (TARGET_128BIT_LONG_DOUBLE
&& mode
== XFmode
)
12132 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, GEN_INT (-4)));
12133 emit_move_insn (part
[0][2], part
[1][2]);
12138 /* In 64bit mode we don't have 32bit push available. In case this is
12139 register, it is OK - we will just use larger counterpart. We also
12140 retype memory - these comes from attempt to avoid REX prefix on
12141 moving of second half of TFmode value. */
12142 if (GET_MODE (part
[1][1]) == SImode
)
12144 switch (GET_CODE (part
[1][1]))
12147 part
[1][1] = adjust_address (part
[1][1], DImode
, 0);
12151 part
[1][1] = gen_rtx_REG (DImode
, REGNO (part
[1][1]));
12155 gcc_unreachable ();
12158 if (GET_MODE (part
[1][0]) == SImode
)
12159 part
[1][0] = part
[1][1];
12162 emit_move_insn (part
[0][1], part
[1][1]);
12163 emit_move_insn (part
[0][0], part
[1][0]);
12167 /* Choose correct order to not overwrite the source before it is copied. */
12168 if ((REG_P (part
[0][0])
12169 && REG_P (part
[1][1])
12170 && (REGNO (part
[0][0]) == REGNO (part
[1][1])
12172 && REGNO (part
[0][0]) == REGNO (part
[1][2]))))
12174 && reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0))))
12178 operands
[2] = part
[0][2];
12179 operands
[3] = part
[0][1];
12180 operands
[4] = part
[0][0];
12181 operands
[5] = part
[1][2];
12182 operands
[6] = part
[1][1];
12183 operands
[7] = part
[1][0];
12187 operands
[2] = part
[0][1];
12188 operands
[3] = part
[0][0];
12189 operands
[5] = part
[1][1];
12190 operands
[6] = part
[1][0];
12197 operands
[2] = part
[0][0];
12198 operands
[3] = part
[0][1];
12199 operands
[4] = part
[0][2];
12200 operands
[5] = part
[1][0];
12201 operands
[6] = part
[1][1];
12202 operands
[7] = part
[1][2];
12206 operands
[2] = part
[0][0];
12207 operands
[3] = part
[0][1];
12208 operands
[5] = part
[1][0];
12209 operands
[6] = part
[1][1];
12213 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
12216 if (GET_CODE (operands
[5]) == CONST_INT
12217 && operands
[5] != const0_rtx
12218 && REG_P (operands
[2]))
12220 if (GET_CODE (operands
[6]) == CONST_INT
12221 && INTVAL (operands
[6]) == INTVAL (operands
[5]))
12222 operands
[6] = operands
[2];
12225 && GET_CODE (operands
[7]) == CONST_INT
12226 && INTVAL (operands
[7]) == INTVAL (operands
[5]))
12227 operands
[7] = operands
[2];
12231 && GET_CODE (operands
[6]) == CONST_INT
12232 && operands
[6] != const0_rtx
12233 && REG_P (operands
[3])
12234 && GET_CODE (operands
[7]) == CONST_INT
12235 && INTVAL (operands
[7]) == INTVAL (operands
[6]))
12236 operands
[7] = operands
[3];
12239 emit_move_insn (operands
[2], operands
[5]);
12240 emit_move_insn (operands
[3], operands
[6]);
12242 emit_move_insn (operands
[4], operands
[7]);
12247 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
12248 left shift by a constant, either using a single shift or
12249 a sequence of add instructions. */
12252 ix86_expand_ashl_const (rtx operand
, int count
, enum machine_mode mode
)
12256 emit_insn ((mode
== DImode
12258 : gen_adddi3
) (operand
, operand
, operand
));
12260 else if (!optimize_size
12261 && count
* ix86_cost
->add
<= ix86_cost
->shift_const
)
12264 for (i
=0; i
<count
; i
++)
12266 emit_insn ((mode
== DImode
12268 : gen_adddi3
) (operand
, operand
, operand
));
12272 emit_insn ((mode
== DImode
12274 : gen_ashldi3
) (operand
, operand
, GEN_INT (count
)));
12278 ix86_split_ashl (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
12280 rtx low
[2], high
[2];
12282 const int single_width
= mode
== DImode
? 32 : 64;
12284 if (GET_CODE (operands
[2]) == CONST_INT
)
12286 (mode
== DImode
? split_di
: split_ti
) (operands
, 2, low
, high
);
12287 count
= INTVAL (operands
[2]) & (single_width
* 2 - 1);
12289 if (count
>= single_width
)
12291 emit_move_insn (high
[0], low
[1]);
12292 emit_move_insn (low
[0], const0_rtx
);
12294 if (count
> single_width
)
12295 ix86_expand_ashl_const (high
[0], count
- single_width
, mode
);
12299 if (!rtx_equal_p (operands
[0], operands
[1]))
12300 emit_move_insn (operands
[0], operands
[1]);
12301 emit_insn ((mode
== DImode
12303 : gen_x86_64_shld
) (high
[0], low
[0], GEN_INT (count
)));
12304 ix86_expand_ashl_const (low
[0], count
, mode
);
12309 (mode
== DImode
? split_di
: split_ti
) (operands
, 1, low
, high
);
12311 if (operands
[1] == const1_rtx
)
12313 /* Assuming we've chosen a QImode capable registers, then 1 << N
12314 can be done with two 32/64-bit shifts, no branches, no cmoves. */
12315 if (ANY_QI_REG_P (low
[0]) && ANY_QI_REG_P (high
[0]))
12317 rtx s
, d
, flags
= gen_rtx_REG (CCZmode
, FLAGS_REG
);
12319 ix86_expand_clear (low
[0]);
12320 ix86_expand_clear (high
[0]);
12321 emit_insn (gen_testqi_ccz_1 (operands
[2], GEN_INT (single_width
)));
12323 d
= gen_lowpart (QImode
, low
[0]);
12324 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
12325 s
= gen_rtx_EQ (QImode
, flags
, const0_rtx
);
12326 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
12328 d
= gen_lowpart (QImode
, high
[0]);
12329 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
12330 s
= gen_rtx_NE (QImode
, flags
, const0_rtx
);
12331 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
12334 /* Otherwise, we can get the same results by manually performing
12335 a bit extract operation on bit 5/6, and then performing the two
12336 shifts. The two methods of getting 0/1 into low/high are exactly
12337 the same size. Avoiding the shift in the bit extract case helps
12338 pentium4 a bit; no one else seems to care much either way. */
12343 if (TARGET_PARTIAL_REG_STALL
&& !optimize_size
)
12344 x
= gen_rtx_ZERO_EXTEND (mode
== DImode
? SImode
: DImode
, operands
[2]);
12346 x
= gen_lowpart (mode
== DImode
? SImode
: DImode
, operands
[2]);
12347 emit_insn (gen_rtx_SET (VOIDmode
, high
[0], x
));
12349 emit_insn ((mode
== DImode
12351 : gen_lshrdi3
) (high
[0], high
[0], GEN_INT (mode
== DImode
? 5 : 6)));
12352 emit_insn ((mode
== DImode
12354 : gen_anddi3
) (high
[0], high
[0], GEN_INT (1)));
12355 emit_move_insn (low
[0], high
[0]);
12356 emit_insn ((mode
== DImode
12358 : gen_xordi3
) (low
[0], low
[0], GEN_INT (1)));
12361 emit_insn ((mode
== DImode
12363 : gen_ashldi3
) (low
[0], low
[0], operands
[2]));
12364 emit_insn ((mode
== DImode
12366 : gen_ashldi3
) (high
[0], high
[0], operands
[2]));
12370 if (operands
[1] == constm1_rtx
)
12372 /* For -1 << N, we can avoid the shld instruction, because we
12373 know that we're shifting 0...31/63 ones into a -1. */
12374 emit_move_insn (low
[0], constm1_rtx
);
12376 emit_move_insn (high
[0], low
[0]);
12378 emit_move_insn (high
[0], constm1_rtx
);
12382 if (!rtx_equal_p (operands
[0], operands
[1]))
12383 emit_move_insn (operands
[0], operands
[1]);
12385 (mode
== DImode
? split_di
: split_ti
) (operands
, 1, low
, high
);
12386 emit_insn ((mode
== DImode
12388 : gen_x86_64_shld
) (high
[0], low
[0], operands
[2]));
12391 emit_insn ((mode
== DImode
? gen_ashlsi3
: gen_ashldi3
) (low
[0], low
[0], operands
[2]));
12393 if (TARGET_CMOVE
&& scratch
)
12395 ix86_expand_clear (scratch
);
12396 emit_insn ((mode
== DImode
12397 ? gen_x86_shift_adj_1
12398 : gen_x86_64_shift_adj
) (high
[0], low
[0], operands
[2], scratch
));
12401 emit_insn (gen_x86_shift_adj_2 (high
[0], low
[0], operands
[2]));
12405 ix86_split_ashr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
12407 rtx low
[2], high
[2];
12409 const int single_width
= mode
== DImode
? 32 : 64;
12411 if (GET_CODE (operands
[2]) == CONST_INT
)
12413 (mode
== DImode
? split_di
: split_ti
) (operands
, 2, low
, high
);
12414 count
= INTVAL (operands
[2]) & (single_width
* 2 - 1);
12416 if (count
== single_width
* 2 - 1)
12418 emit_move_insn (high
[0], high
[1]);
12419 emit_insn ((mode
== DImode
12421 : gen_ashrdi3
) (high
[0], high
[0],
12422 GEN_INT (single_width
- 1)));
12423 emit_move_insn (low
[0], high
[0]);
12426 else if (count
>= single_width
)
12428 emit_move_insn (low
[0], high
[1]);
12429 emit_move_insn (high
[0], low
[0]);
12430 emit_insn ((mode
== DImode
12432 : gen_ashrdi3
) (high
[0], high
[0],
12433 GEN_INT (single_width
- 1)));
12434 if (count
> single_width
)
12435 emit_insn ((mode
== DImode
12437 : gen_ashrdi3
) (low
[0], low
[0],
12438 GEN_INT (count
- single_width
)));
12442 if (!rtx_equal_p (operands
[0], operands
[1]))
12443 emit_move_insn (operands
[0], operands
[1]);
12444 emit_insn ((mode
== DImode
12446 : gen_x86_64_shrd
) (low
[0], high
[0], GEN_INT (count
)));
12447 emit_insn ((mode
== DImode
12449 : gen_ashrdi3
) (high
[0], high
[0], GEN_INT (count
)));
12454 if (!rtx_equal_p (operands
[0], operands
[1]))
12455 emit_move_insn (operands
[0], operands
[1]);
12457 (mode
== DImode
? split_di
: split_ti
) (operands
, 1, low
, high
);
12459 emit_insn ((mode
== DImode
12461 : gen_x86_64_shrd
) (low
[0], high
[0], operands
[2]));
12462 emit_insn ((mode
== DImode
12464 : gen_ashrdi3
) (high
[0], high
[0], operands
[2]));
12466 if (TARGET_CMOVE
&& scratch
)
12468 emit_move_insn (scratch
, high
[0]);
12469 emit_insn ((mode
== DImode
12471 : gen_ashrdi3
) (scratch
, scratch
,
12472 GEN_INT (single_width
- 1)));
12473 emit_insn ((mode
== DImode
12474 ? gen_x86_shift_adj_1
12475 : gen_x86_64_shift_adj
) (low
[0], high
[0], operands
[2],
12479 emit_insn (gen_x86_shift_adj_3 (low
[0], high
[0], operands
[2]));
12484 ix86_split_lshr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
12486 rtx low
[2], high
[2];
12488 const int single_width
= mode
== DImode
? 32 : 64;
12490 if (GET_CODE (operands
[2]) == CONST_INT
)
12492 (mode
== DImode
? split_di
: split_ti
) (operands
, 2, low
, high
);
12493 count
= INTVAL (operands
[2]) & (single_width
* 2 - 1);
12495 if (count
>= single_width
)
12497 emit_move_insn (low
[0], high
[1]);
12498 ix86_expand_clear (high
[0]);
12500 if (count
> single_width
)
12501 emit_insn ((mode
== DImode
12503 : gen_lshrdi3
) (low
[0], low
[0],
12504 GEN_INT (count
- single_width
)));
12508 if (!rtx_equal_p (operands
[0], operands
[1]))
12509 emit_move_insn (operands
[0], operands
[1]);
12510 emit_insn ((mode
== DImode
12512 : gen_x86_64_shrd
) (low
[0], high
[0], GEN_INT (count
)));
12513 emit_insn ((mode
== DImode
12515 : gen_lshrdi3
) (high
[0], high
[0], GEN_INT (count
)));
12520 if (!rtx_equal_p (operands
[0], operands
[1]))
12521 emit_move_insn (operands
[0], operands
[1]);
12523 (mode
== DImode
? split_di
: split_ti
) (operands
, 1, low
, high
);
12525 emit_insn ((mode
== DImode
12527 : gen_x86_64_shrd
) (low
[0], high
[0], operands
[2]));
12528 emit_insn ((mode
== DImode
12530 : gen_lshrdi3
) (high
[0], high
[0], operands
[2]));
12532 /* Heh. By reversing the arguments, we can reuse this pattern. */
12533 if (TARGET_CMOVE
&& scratch
)
12535 ix86_expand_clear (scratch
);
12536 emit_insn ((mode
== DImode
12537 ? gen_x86_shift_adj_1
12538 : gen_x86_64_shift_adj
) (low
[0], high
[0], operands
[2],
12542 emit_insn (gen_x86_shift_adj_2 (low
[0], high
[0], operands
[2]));
12546 /* Helper function for the string operations below. Dest VARIABLE whether
12547 it is aligned to VALUE bytes. If true, jump to the label. */
12549 ix86_expand_aligntest (rtx variable
, int value
)
12551 rtx label
= gen_label_rtx ();
12552 rtx tmpcount
= gen_reg_rtx (GET_MODE (variable
));
12553 if (GET_MODE (variable
) == DImode
)
12554 emit_insn (gen_anddi3 (tmpcount
, variable
, GEN_INT (value
)));
12556 emit_insn (gen_andsi3 (tmpcount
, variable
, GEN_INT (value
)));
12557 emit_cmp_and_jump_insns (tmpcount
, const0_rtx
, EQ
, 0, GET_MODE (variable
),
12562 /* Adjust COUNTER by the VALUE. */
12564 ix86_adjust_counter (rtx countreg
, HOST_WIDE_INT value
)
12566 if (GET_MODE (countreg
) == DImode
)
12567 emit_insn (gen_adddi3 (countreg
, countreg
, GEN_INT (-value
)));
12569 emit_insn (gen_addsi3 (countreg
, countreg
, GEN_INT (-value
)));
12572 /* Zero extend possibly SImode EXP to Pmode register. */
12574 ix86_zero_extend_to_Pmode (rtx exp
)
12577 if (GET_MODE (exp
) == VOIDmode
)
12578 return force_reg (Pmode
, exp
);
12579 if (GET_MODE (exp
) == Pmode
)
12580 return copy_to_mode_reg (Pmode
, exp
);
12581 r
= gen_reg_rtx (Pmode
);
12582 emit_insn (gen_zero_extendsidi2 (r
, exp
));
12586 /* Expand string move (memcpy) operation. Use i386 string operations when
12587 profitable. expand_clrmem contains similar code. */
12589 ix86_expand_movmem (rtx dst
, rtx src
, rtx count_exp
, rtx align_exp
)
12591 rtx srcreg
, destreg
, countreg
, srcexp
, destexp
;
12592 enum machine_mode counter_mode
;
12593 HOST_WIDE_INT align
= 0;
12594 unsigned HOST_WIDE_INT count
= 0;
12596 if (GET_CODE (align_exp
) == CONST_INT
)
12597 align
= INTVAL (align_exp
);
12599 /* Can't use any of this if the user has appropriated esi or edi. */
12600 if (global_regs
[4] || global_regs
[5])
12603 /* This simple hack avoids all inlining code and simplifies code below. */
12604 if (!TARGET_ALIGN_STRINGOPS
)
12607 if (GET_CODE (count_exp
) == CONST_INT
)
12609 count
= INTVAL (count_exp
);
12610 if (!TARGET_INLINE_ALL_STRINGOPS
&& count
> 64)
12614 /* Figure out proper mode for counter. For 32bits it is always SImode,
12615 for 64bits use SImode when possible, otherwise DImode.
12616 Set count to number of bytes copied when known at compile time. */
12618 || GET_MODE (count_exp
) == SImode
12619 || x86_64_zext_immediate_operand (count_exp
, VOIDmode
))
12620 counter_mode
= SImode
;
12622 counter_mode
= DImode
;
12624 gcc_assert (counter_mode
== SImode
|| counter_mode
== DImode
);
12626 destreg
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
12627 if (destreg
!= XEXP (dst
, 0))
12628 dst
= replace_equiv_address_nv (dst
, destreg
);
12629 srcreg
= copy_to_mode_reg (Pmode
, XEXP (src
, 0));
12630 if (srcreg
!= XEXP (src
, 0))
12631 src
= replace_equiv_address_nv (src
, srcreg
);
12633 /* When optimizing for size emit simple rep ; movsb instruction for
12634 counts not divisible by 4, except when (movsl;)*(movsw;)?(movsb;)?
12635 sequence is shorter than mov{b,l} $count, %{ecx,cl}; rep; movsb.
12636 Sice of (movsl;)*(movsw;)?(movsb;)? sequence is
12637 count / 4 + (count & 3), the other sequence is either 4 or 7 bytes,
12638 but we don't know whether upper 24 (resp. 56) bits of %ecx will be
12639 known to be zero or not. The rep; movsb sequence causes higher
12640 register pressure though, so take that into account. */
12642 if ((!optimize
|| optimize_size
)
12647 || (count
& 3) + count
/ 4 > 6))))
12649 emit_insn (gen_cld ());
12650 countreg
= ix86_zero_extend_to_Pmode (count_exp
);
12651 destexp
= gen_rtx_PLUS (Pmode
, destreg
, countreg
);
12652 srcexp
= gen_rtx_PLUS (Pmode
, srcreg
, countreg
);
12653 emit_insn (gen_rep_mov (destreg
, dst
, srcreg
, src
, countreg
,
12657 /* For constant aligned (or small unaligned) copies use rep movsl
12658 followed by code copying the rest. For PentiumPro ensure 8 byte
12659 alignment to allow rep movsl acceleration. */
12661 else if (count
!= 0
12663 || (!TARGET_PENTIUMPRO
&& !TARGET_64BIT
&& align
>= 4)
12664 || optimize_size
|| count
< (unsigned int) 64))
12666 unsigned HOST_WIDE_INT offset
= 0;
12667 int size
= TARGET_64BIT
&& !optimize_size
? 8 : 4;
12668 rtx srcmem
, dstmem
;
12670 emit_insn (gen_cld ());
12671 if (count
& ~(size
- 1))
12673 if ((TARGET_SINGLE_STRINGOP
|| optimize_size
) && count
< 5 * 4)
12675 enum machine_mode movs_mode
= size
== 4 ? SImode
: DImode
;
12677 while (offset
< (count
& ~(size
- 1)))
12679 srcmem
= adjust_automodify_address_nv (src
, movs_mode
,
12681 dstmem
= adjust_automodify_address_nv (dst
, movs_mode
,
12683 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
12689 countreg
= GEN_INT ((count
>> (size
== 4 ? 2 : 3))
12690 & (TARGET_64BIT
? -1 : 0x3fffffff));
12691 countreg
= copy_to_mode_reg (counter_mode
, countreg
);
12692 countreg
= ix86_zero_extend_to_Pmode (countreg
);
12694 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
12695 GEN_INT (size
== 4 ? 2 : 3));
12696 srcexp
= gen_rtx_PLUS (Pmode
, destexp
, srcreg
);
12697 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destreg
);
12699 emit_insn (gen_rep_mov (destreg
, dst
, srcreg
, src
,
12700 countreg
, destexp
, srcexp
));
12701 offset
= count
& ~(size
- 1);
12704 if (size
== 8 && (count
& 0x04))
12706 srcmem
= adjust_automodify_address_nv (src
, SImode
, srcreg
,
12708 dstmem
= adjust_automodify_address_nv (dst
, SImode
, destreg
,
12710 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
12715 srcmem
= adjust_automodify_address_nv (src
, HImode
, srcreg
,
12717 dstmem
= adjust_automodify_address_nv (dst
, HImode
, destreg
,
12719 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
12724 srcmem
= adjust_automodify_address_nv (src
, QImode
, srcreg
,
12726 dstmem
= adjust_automodify_address_nv (dst
, QImode
, destreg
,
12728 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
12731 /* The generic code based on the glibc implementation:
12732 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
12733 allowing accelerated copying there)
12734 - copy the data using rep movsl
12735 - copy the rest. */
12740 rtx srcmem
, dstmem
;
12741 int desired_alignment
= (TARGET_PENTIUMPRO
12742 && (count
== 0 || count
>= (unsigned int) 260)
12743 ? 8 : UNITS_PER_WORD
);
12744 /* Get rid of MEM_OFFSETs, they won't be accurate. */
12745 dst
= change_address (dst
, BLKmode
, destreg
);
12746 src
= change_address (src
, BLKmode
, srcreg
);
12748 /* In case we don't know anything about the alignment, default to
12749 library version, since it is usually equally fast and result in
12752 Also emit call when we know that the count is large and call overhead
12753 will not be important. */
12754 if (!TARGET_INLINE_ALL_STRINGOPS
12755 && (align
< UNITS_PER_WORD
|| !TARGET_REP_MOVL_OPTIMAL
))
12758 if (TARGET_SINGLE_STRINGOP
)
12759 emit_insn (gen_cld ());
12761 countreg2
= gen_reg_rtx (Pmode
);
12762 countreg
= copy_to_mode_reg (counter_mode
, count_exp
);
12764 /* We don't use loops to align destination and to copy parts smaller
12765 than 4 bytes, because gcc is able to optimize such code better (in
12766 the case the destination or the count really is aligned, gcc is often
12767 able to predict the branches) and also it is friendlier to the
12768 hardware branch prediction.
12770 Using loops is beneficial for generic case, because we can
12771 handle small counts using the loops. Many CPUs (such as Athlon)
12772 have large REP prefix setup costs.
12774 This is quite costly. Maybe we can revisit this decision later or
12775 add some customizability to this code. */
12777 if (count
== 0 && align
< desired_alignment
)
12779 label
= gen_label_rtx ();
12780 emit_cmp_and_jump_insns (countreg
, GEN_INT (desired_alignment
- 1),
12781 LEU
, 0, counter_mode
, 1, label
);
12785 rtx label
= ix86_expand_aligntest (destreg
, 1);
12786 srcmem
= change_address (src
, QImode
, srcreg
);
12787 dstmem
= change_address (dst
, QImode
, destreg
);
12788 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
12789 ix86_adjust_counter (countreg
, 1);
12790 emit_label (label
);
12791 LABEL_NUSES (label
) = 1;
12795 rtx label
= ix86_expand_aligntest (destreg
, 2);
12796 srcmem
= change_address (src
, HImode
, srcreg
);
12797 dstmem
= change_address (dst
, HImode
, destreg
);
12798 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
12799 ix86_adjust_counter (countreg
, 2);
12800 emit_label (label
);
12801 LABEL_NUSES (label
) = 1;
12803 if (align
<= 4 && desired_alignment
> 4)
12805 rtx label
= ix86_expand_aligntest (destreg
, 4);
12806 srcmem
= change_address (src
, SImode
, srcreg
);
12807 dstmem
= change_address (dst
, SImode
, destreg
);
12808 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
12809 ix86_adjust_counter (countreg
, 4);
12810 emit_label (label
);
12811 LABEL_NUSES (label
) = 1;
12814 if (label
&& desired_alignment
> 4 && !TARGET_64BIT
)
12816 emit_label (label
);
12817 LABEL_NUSES (label
) = 1;
12820 if (!TARGET_SINGLE_STRINGOP
)
12821 emit_insn (gen_cld ());
12824 emit_insn (gen_lshrdi3 (countreg2
, ix86_zero_extend_to_Pmode (countreg
),
12826 destexp
= gen_rtx_ASHIFT (Pmode
, countreg2
, GEN_INT (3));
12830 emit_insn (gen_lshrsi3 (countreg2
, countreg
, const2_rtx
));
12831 destexp
= gen_rtx_ASHIFT (Pmode
, countreg2
, const2_rtx
);
12833 srcexp
= gen_rtx_PLUS (Pmode
, destexp
, srcreg
);
12834 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destreg
);
12835 emit_insn (gen_rep_mov (destreg
, dst
, srcreg
, src
,
12836 countreg2
, destexp
, srcexp
));
12840 emit_label (label
);
12841 LABEL_NUSES (label
) = 1;
12843 if (TARGET_64BIT
&& align
> 4 && count
!= 0 && (count
& 4))
12845 srcmem
= change_address (src
, SImode
, srcreg
);
12846 dstmem
= change_address (dst
, SImode
, destreg
);
12847 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
12849 if ((align
<= 4 || count
== 0) && TARGET_64BIT
)
12851 rtx label
= ix86_expand_aligntest (countreg
, 4);
12852 srcmem
= change_address (src
, SImode
, srcreg
);
12853 dstmem
= change_address (dst
, SImode
, destreg
);
12854 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
12855 emit_label (label
);
12856 LABEL_NUSES (label
) = 1;
12858 if (align
> 2 && count
!= 0 && (count
& 2))
12860 srcmem
= change_address (src
, HImode
, srcreg
);
12861 dstmem
= change_address (dst
, HImode
, destreg
);
12862 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
12864 if (align
<= 2 || count
== 0)
12866 rtx label
= ix86_expand_aligntest (countreg
, 2);
12867 srcmem
= change_address (src
, HImode
, srcreg
);
12868 dstmem
= change_address (dst
, HImode
, destreg
);
12869 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
12870 emit_label (label
);
12871 LABEL_NUSES (label
) = 1;
12873 if (align
> 1 && count
!= 0 && (count
& 1))
12875 srcmem
= change_address (src
, QImode
, srcreg
);
12876 dstmem
= change_address (dst
, QImode
, destreg
);
12877 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
12879 if (align
<= 1 || count
== 0)
12881 rtx label
= ix86_expand_aligntest (countreg
, 1);
12882 srcmem
= change_address (src
, QImode
, srcreg
);
12883 dstmem
= change_address (dst
, QImode
, destreg
);
12884 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
12885 emit_label (label
);
12886 LABEL_NUSES (label
) = 1;
12893 /* Expand string clear operation (bzero). Use i386 string operations when
12894 profitable. expand_movmem contains similar code. */
12896 ix86_expand_clrmem (rtx dst
, rtx count_exp
, rtx align_exp
)
12898 rtx destreg
, zeroreg
, countreg
, destexp
;
12899 enum machine_mode counter_mode
;
12900 HOST_WIDE_INT align
= 0;
12901 unsigned HOST_WIDE_INT count
= 0;
12903 if (GET_CODE (align_exp
) == CONST_INT
)
12904 align
= INTVAL (align_exp
);
12906 /* Can't use any of this if the user has appropriated esi. */
12907 if (global_regs
[4])
12910 /* This simple hack avoids all inlining code and simplifies code below. */
12911 if (!TARGET_ALIGN_STRINGOPS
)
12914 if (GET_CODE (count_exp
) == CONST_INT
)
12916 count
= INTVAL (count_exp
);
12917 if (!TARGET_INLINE_ALL_STRINGOPS
&& count
> 64)
12920 /* Figure out proper mode for counter. For 32bits it is always SImode,
12921 for 64bits use SImode when possible, otherwise DImode.
12922 Set count to number of bytes copied when known at compile time. */
12924 || GET_MODE (count_exp
) == SImode
12925 || x86_64_zext_immediate_operand (count_exp
, VOIDmode
))
12926 counter_mode
= SImode
;
12928 counter_mode
= DImode
;
12930 destreg
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
12931 if (destreg
!= XEXP (dst
, 0))
12932 dst
= replace_equiv_address_nv (dst
, destreg
);
12935 /* When optimizing for size emit simple rep ; movsb instruction for
12936 counts not divisible by 4. The movl $N, %ecx; rep; stosb
12937 sequence is 7 bytes long, so if optimizing for size and count is
12938 small enough that some stosl, stosw and stosb instructions without
12939 rep are shorter, fall back into the next if. */
12941 if ((!optimize
|| optimize_size
)
12944 && (!optimize_size
|| (count
& 0x03) + (count
>> 2) > 7))))
12946 emit_insn (gen_cld ());
12948 countreg
= ix86_zero_extend_to_Pmode (count_exp
);
12949 zeroreg
= copy_to_mode_reg (QImode
, const0_rtx
);
12950 destexp
= gen_rtx_PLUS (Pmode
, destreg
, countreg
);
12951 emit_insn (gen_rep_stos (destreg
, countreg
, dst
, zeroreg
, destexp
));
12953 else if (count
!= 0
12955 || (!TARGET_PENTIUMPRO
&& !TARGET_64BIT
&& align
>= 4)
12956 || optimize_size
|| count
< (unsigned int) 64))
12958 int size
= TARGET_64BIT
&& !optimize_size
? 8 : 4;
12959 unsigned HOST_WIDE_INT offset
= 0;
12961 emit_insn (gen_cld ());
12963 zeroreg
= copy_to_mode_reg (size
== 4 ? SImode
: DImode
, const0_rtx
);
12964 if (count
& ~(size
- 1))
12966 unsigned HOST_WIDE_INT repcount
;
12967 unsigned int max_nonrep
;
12969 repcount
= count
>> (size
== 4 ? 2 : 3);
12971 repcount
&= 0x3fffffff;
12973 /* movl $N, %ecx; rep; stosl is 7 bytes, while N x stosl is N bytes.
12974 movl $N, %ecx; rep; stosq is 8 bytes, while N x stosq is 2xN
12975 bytes. In both cases the latter seems to be faster for small
12977 max_nonrep
= size
== 4 ? 7 : 4;
12978 if (!optimize_size
)
12981 case PROCESSOR_PENTIUM4
:
12982 case PROCESSOR_NOCONA
:
12989 if (repcount
<= max_nonrep
)
12990 while (repcount
-- > 0)
12992 rtx mem
= adjust_automodify_address_nv (dst
,
12993 GET_MODE (zeroreg
),
12995 emit_insn (gen_strset (destreg
, mem
, zeroreg
));
13000 countreg
= copy_to_mode_reg (counter_mode
, GEN_INT (repcount
));
13001 countreg
= ix86_zero_extend_to_Pmode (countreg
);
13002 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
13003 GEN_INT (size
== 4 ? 2 : 3));
13004 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destreg
);
13005 emit_insn (gen_rep_stos (destreg
, countreg
, dst
, zeroreg
,
13007 offset
= count
& ~(size
- 1);
13010 if (size
== 8 && (count
& 0x04))
13012 rtx mem
= adjust_automodify_address_nv (dst
, SImode
, destreg
,
13014 emit_insn (gen_strset (destreg
, mem
,
13015 gen_rtx_SUBREG (SImode
, zeroreg
, 0)));
13020 rtx mem
= adjust_automodify_address_nv (dst
, HImode
, destreg
,
13022 emit_insn (gen_strset (destreg
, mem
,
13023 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
13028 rtx mem
= adjust_automodify_address_nv (dst
, QImode
, destreg
,
13030 emit_insn (gen_strset (destreg
, mem
,
13031 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
13038 /* Compute desired alignment of the string operation. */
13039 int desired_alignment
= (TARGET_PENTIUMPRO
13040 && (count
== 0 || count
>= (unsigned int) 260)
13041 ? 8 : UNITS_PER_WORD
);
13043 /* In case we don't know anything about the alignment, default to
13044 library version, since it is usually equally fast and result in
13047 Also emit call when we know that the count is large and call overhead
13048 will not be important. */
13049 if (!TARGET_INLINE_ALL_STRINGOPS
13050 && (align
< UNITS_PER_WORD
|| !TARGET_REP_MOVL_OPTIMAL
))
13053 if (TARGET_SINGLE_STRINGOP
)
13054 emit_insn (gen_cld ());
13056 countreg2
= gen_reg_rtx (Pmode
);
13057 countreg
= copy_to_mode_reg (counter_mode
, count_exp
);
13058 zeroreg
= copy_to_mode_reg (Pmode
, const0_rtx
);
13059 /* Get rid of MEM_OFFSET, it won't be accurate. */
13060 dst
= change_address (dst
, BLKmode
, destreg
);
13062 if (count
== 0 && align
< desired_alignment
)
13064 label
= gen_label_rtx ();
13065 emit_cmp_and_jump_insns (countreg
, GEN_INT (desired_alignment
- 1),
13066 LEU
, 0, counter_mode
, 1, label
);
13070 rtx label
= ix86_expand_aligntest (destreg
, 1);
13071 emit_insn (gen_strset (destreg
, dst
,
13072 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
13073 ix86_adjust_counter (countreg
, 1);
13074 emit_label (label
);
13075 LABEL_NUSES (label
) = 1;
13079 rtx label
= ix86_expand_aligntest (destreg
, 2);
13080 emit_insn (gen_strset (destreg
, dst
,
13081 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
13082 ix86_adjust_counter (countreg
, 2);
13083 emit_label (label
);
13084 LABEL_NUSES (label
) = 1;
13086 if (align
<= 4 && desired_alignment
> 4)
13088 rtx label
= ix86_expand_aligntest (destreg
, 4);
13089 emit_insn (gen_strset (destreg
, dst
,
13091 ? gen_rtx_SUBREG (SImode
, zeroreg
, 0)
13093 ix86_adjust_counter (countreg
, 4);
13094 emit_label (label
);
13095 LABEL_NUSES (label
) = 1;
13098 if (label
&& desired_alignment
> 4 && !TARGET_64BIT
)
13100 emit_label (label
);
13101 LABEL_NUSES (label
) = 1;
13105 if (!TARGET_SINGLE_STRINGOP
)
13106 emit_insn (gen_cld ());
13109 emit_insn (gen_lshrdi3 (countreg2
, ix86_zero_extend_to_Pmode (countreg
),
13111 destexp
= gen_rtx_ASHIFT (Pmode
, countreg2
, GEN_INT (3));
13115 emit_insn (gen_lshrsi3 (countreg2
, countreg
, const2_rtx
));
13116 destexp
= gen_rtx_ASHIFT (Pmode
, countreg2
, const2_rtx
);
13118 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destreg
);
13119 emit_insn (gen_rep_stos (destreg
, countreg2
, dst
, zeroreg
, destexp
));
13123 emit_label (label
);
13124 LABEL_NUSES (label
) = 1;
13127 if (TARGET_64BIT
&& align
> 4 && count
!= 0 && (count
& 4))
13128 emit_insn (gen_strset (destreg
, dst
,
13129 gen_rtx_SUBREG (SImode
, zeroreg
, 0)));
13130 if (TARGET_64BIT
&& (align
<= 4 || count
== 0))
13132 rtx label
= ix86_expand_aligntest (countreg
, 4);
13133 emit_insn (gen_strset (destreg
, dst
,
13134 gen_rtx_SUBREG (SImode
, zeroreg
, 0)));
13135 emit_label (label
);
13136 LABEL_NUSES (label
) = 1;
13138 if (align
> 2 && count
!= 0 && (count
& 2))
13139 emit_insn (gen_strset (destreg
, dst
,
13140 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
13141 if (align
<= 2 || count
== 0)
13143 rtx label
= ix86_expand_aligntest (countreg
, 2);
13144 emit_insn (gen_strset (destreg
, dst
,
13145 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
13146 emit_label (label
);
13147 LABEL_NUSES (label
) = 1;
13149 if (align
> 1 && count
!= 0 && (count
& 1))
13150 emit_insn (gen_strset (destreg
, dst
,
13151 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
13152 if (align
<= 1 || count
== 0)
13154 rtx label
= ix86_expand_aligntest (countreg
, 1);
13155 emit_insn (gen_strset (destreg
, dst
,
13156 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
13157 emit_label (label
);
13158 LABEL_NUSES (label
) = 1;
13164 /* Expand strlen. */
13166 ix86_expand_strlen (rtx out
, rtx src
, rtx eoschar
, rtx align
)
13168 rtx addr
, scratch1
, scratch2
, scratch3
, scratch4
;
13170 /* The generic case of strlen expander is long. Avoid it's
13171 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
13173 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
13174 && !TARGET_INLINE_ALL_STRINGOPS
13176 && (GET_CODE (align
) != CONST_INT
|| INTVAL (align
) < 4))
13179 addr
= force_reg (Pmode
, XEXP (src
, 0));
13180 scratch1
= gen_reg_rtx (Pmode
);
13182 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
13185 /* Well it seems that some optimizer does not combine a call like
13186 foo(strlen(bar), strlen(bar));
13187 when the move and the subtraction is done here. It does calculate
13188 the length just once when these instructions are done inside of
13189 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
13190 often used and I use one fewer register for the lifetime of
13191 output_strlen_unroll() this is better. */
13193 emit_move_insn (out
, addr
);
13195 ix86_expand_strlensi_unroll_1 (out
, src
, align
);
13197 /* strlensi_unroll_1 returns the address of the zero at the end of
13198 the string, like memchr(), so compute the length by subtracting
13199 the start address. */
13201 emit_insn (gen_subdi3 (out
, out
, addr
));
13203 emit_insn (gen_subsi3 (out
, out
, addr
));
13208 scratch2
= gen_reg_rtx (Pmode
);
13209 scratch3
= gen_reg_rtx (Pmode
);
13210 scratch4
= force_reg (Pmode
, constm1_rtx
);
13212 emit_move_insn (scratch3
, addr
);
13213 eoschar
= force_reg (QImode
, eoschar
);
13215 emit_insn (gen_cld ());
13216 src
= replace_equiv_address_nv (src
, scratch3
);
13218 /* If .md starts supporting :P, this can be done in .md. */
13219 unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (4, src
, eoschar
, align
,
13220 scratch4
), UNSPEC_SCAS
);
13221 emit_insn (gen_strlenqi_1 (scratch1
, scratch3
, unspec
));
13224 emit_insn (gen_one_cmpldi2 (scratch2
, scratch1
));
13225 emit_insn (gen_adddi3 (out
, scratch2
, constm1_rtx
));
13229 emit_insn (gen_one_cmplsi2 (scratch2
, scratch1
));
13230 emit_insn (gen_addsi3 (out
, scratch2
, constm1_rtx
));
13236 /* Expand the appropriate insns for doing strlen if not just doing
13239 out = result, initialized with the start address
13240 align_rtx = alignment of the address.
13241 scratch = scratch register, initialized with the startaddress when
13242 not aligned, otherwise undefined
13244 This is just the body. It needs the initializations mentioned above and
13245 some address computing at the end. These things are done in i386.md. */
13248 ix86_expand_strlensi_unroll_1 (rtx out
, rtx src
, rtx align_rtx
)
13252 rtx align_2_label
= NULL_RTX
;
13253 rtx align_3_label
= NULL_RTX
;
13254 rtx align_4_label
= gen_label_rtx ();
13255 rtx end_0_label
= gen_label_rtx ();
13257 rtx tmpreg
= gen_reg_rtx (SImode
);
13258 rtx scratch
= gen_reg_rtx (SImode
);
13262 if (GET_CODE (align_rtx
) == CONST_INT
)
13263 align
= INTVAL (align_rtx
);
13265 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
13267 /* Is there a known alignment and is it less than 4? */
13270 rtx scratch1
= gen_reg_rtx (Pmode
);
13271 emit_move_insn (scratch1
, out
);
13272 /* Is there a known alignment and is it not 2? */
13275 align_3_label
= gen_label_rtx (); /* Label when aligned to 3-byte */
13276 align_2_label
= gen_label_rtx (); /* Label when aligned to 2-byte */
13278 /* Leave just the 3 lower bits. */
13279 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, GEN_INT (3),
13280 NULL_RTX
, 0, OPTAB_WIDEN
);
13282 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
13283 Pmode
, 1, align_4_label
);
13284 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, EQ
, NULL
,
13285 Pmode
, 1, align_2_label
);
13286 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, GTU
, NULL
,
13287 Pmode
, 1, align_3_label
);
13291 /* Since the alignment is 2, we have to check 2 or 0 bytes;
13292 check if is aligned to 4 - byte. */
13294 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, const2_rtx
,
13295 NULL_RTX
, 0, OPTAB_WIDEN
);
13297 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
13298 Pmode
, 1, align_4_label
);
13301 mem
= change_address (src
, QImode
, out
);
13303 /* Now compare the bytes. */
13305 /* Compare the first n unaligned byte on a byte per byte basis. */
13306 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
,
13307 QImode
, 1, end_0_label
);
13309 /* Increment the address. */
13311 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
13313 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
13315 /* Not needed with an alignment of 2 */
13318 emit_label (align_2_label
);
13320 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
13324 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
13326 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
13328 emit_label (align_3_label
);
13331 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
13335 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
13337 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
13340 /* Generate loop to check 4 bytes at a time. It is not a good idea to
13341 align this loop. It gives only huge programs, but does not help to
13343 emit_label (align_4_label
);
13345 mem
= change_address (src
, SImode
, out
);
13346 emit_move_insn (scratch
, mem
);
13348 emit_insn (gen_adddi3 (out
, out
, GEN_INT (4)));
13350 emit_insn (gen_addsi3 (out
, out
, GEN_INT (4)));
13352 /* This formula yields a nonzero result iff one of the bytes is zero.
13353 This saves three branches inside loop and many cycles. */
13355 emit_insn (gen_addsi3 (tmpreg
, scratch
, GEN_INT (-0x01010101)));
13356 emit_insn (gen_one_cmplsi2 (scratch
, scratch
));
13357 emit_insn (gen_andsi3 (tmpreg
, tmpreg
, scratch
));
13358 emit_insn (gen_andsi3 (tmpreg
, tmpreg
,
13359 gen_int_mode (0x80808080, SImode
)));
13360 emit_cmp_and_jump_insns (tmpreg
, const0_rtx
, EQ
, 0, SImode
, 1,
13365 rtx reg
= gen_reg_rtx (SImode
);
13366 rtx reg2
= gen_reg_rtx (Pmode
);
13367 emit_move_insn (reg
, tmpreg
);
13368 emit_insn (gen_lshrsi3 (reg
, reg
, GEN_INT (16)));
13370 /* If zero is not in the first two bytes, move two bytes forward. */
13371 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
13372 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
13373 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
13374 emit_insn (gen_rtx_SET (VOIDmode
, tmpreg
,
13375 gen_rtx_IF_THEN_ELSE (SImode
, tmp
,
13378 /* Emit lea manually to avoid clobbering of flags. */
13379 emit_insn (gen_rtx_SET (SImode
, reg2
,
13380 gen_rtx_PLUS (Pmode
, out
, const2_rtx
)));
13382 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
13383 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
13384 emit_insn (gen_rtx_SET (VOIDmode
, out
,
13385 gen_rtx_IF_THEN_ELSE (Pmode
, tmp
,
13392 rtx end_2_label
= gen_label_rtx ();
13393 /* Is zero in the first two bytes? */
13395 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
13396 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
13397 tmp
= gen_rtx_NE (VOIDmode
, tmp
, const0_rtx
);
13398 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
13399 gen_rtx_LABEL_REF (VOIDmode
, end_2_label
),
13401 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
13402 JUMP_LABEL (tmp
) = end_2_label
;
13404 /* Not in the first two. Move two bytes forward. */
13405 emit_insn (gen_lshrsi3 (tmpreg
, tmpreg
, GEN_INT (16)));
13407 emit_insn (gen_adddi3 (out
, out
, const2_rtx
));
13409 emit_insn (gen_addsi3 (out
, out
, const2_rtx
));
13411 emit_label (end_2_label
);
13415 /* Avoid branch in fixing the byte. */
13416 tmpreg
= gen_lowpart (QImode
, tmpreg
);
13417 emit_insn (gen_addqi3_cc (tmpreg
, tmpreg
, tmpreg
));
13418 cmp
= gen_rtx_LTU (Pmode
, gen_rtx_REG (CCmode
, 17), const0_rtx
);
13420 emit_insn (gen_subdi3_carry_rex64 (out
, out
, GEN_INT (3), cmp
));
13422 emit_insn (gen_subsi3_carry (out
, out
, GEN_INT (3), cmp
));
13424 emit_label (end_0_label
);
13428 ix86_expand_call (rtx retval
, rtx fnaddr
, rtx callarg1
,
13429 rtx callarg2 ATTRIBUTE_UNUSED
,
13430 rtx pop
, int sibcall
)
13432 rtx use
= NULL
, call
;
13434 if (pop
== const0_rtx
)
13436 gcc_assert (!TARGET_64BIT
|| !pop
);
13438 if (TARGET_MACHO
&& !TARGET_64BIT
)
13441 if (flag_pic
&& GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
)
13442 fnaddr
= machopic_indirect_call_target (fnaddr
);
13447 /* Static functions and indirect calls don't need the pic register. */
13448 if (! TARGET_64BIT
&& flag_pic
13449 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
13450 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr
, 0)))
13451 use_reg (&use
, pic_offset_table_rtx
);
13454 if (TARGET_64BIT
&& INTVAL (callarg2
) >= 0)
13456 rtx al
= gen_rtx_REG (QImode
, 0);
13457 emit_move_insn (al
, callarg2
);
13458 use_reg (&use
, al
);
13461 if (! call_insn_operand (XEXP (fnaddr
, 0), Pmode
))
13463 fnaddr
= copy_to_mode_reg (Pmode
, XEXP (fnaddr
, 0));
13464 fnaddr
= gen_rtx_MEM (QImode
, fnaddr
);
13466 if (sibcall
&& TARGET_64BIT
13467 && !constant_call_address_operand (XEXP (fnaddr
, 0), Pmode
))
13470 addr
= copy_to_mode_reg (Pmode
, XEXP (fnaddr
, 0));
13471 fnaddr
= gen_rtx_REG (Pmode
, FIRST_REX_INT_REG
+ 3 /* R11 */);
13472 emit_move_insn (fnaddr
, addr
);
13473 fnaddr
= gen_rtx_MEM (QImode
, fnaddr
);
13476 call
= gen_rtx_CALL (VOIDmode
, fnaddr
, callarg1
);
13478 call
= gen_rtx_SET (VOIDmode
, retval
, call
);
13481 pop
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, pop
);
13482 pop
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, pop
);
13483 call
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, call
, pop
));
13486 call
= emit_call_insn (call
);
13488 CALL_INSN_FUNCTION_USAGE (call
) = use
;
13492 /* Clear stack slot assignments remembered from previous functions.
13493 This is called from INIT_EXPANDERS once before RTL is emitted for each
13496 static struct machine_function
*
13497 ix86_init_machine_status (void)
13499 struct machine_function
*f
;
13501 f
= ggc_alloc_cleared (sizeof (struct machine_function
));
13502 f
->use_fast_prologue_epilogue_nregs
= -1;
13503 f
->tls_descriptor_call_expanded_p
= 0;
13508 /* Return a MEM corresponding to a stack slot with mode MODE.
13509 Allocate a new slot if necessary.
13511 The RTL for a function can have several slots available: N is
13512 which slot to use. */
13515 assign_386_stack_local (enum machine_mode mode
, enum ix86_stack_slot n
)
13517 struct stack_local_entry
*s
;
13519 gcc_assert (n
< MAX_386_STACK_LOCALS
);
13521 for (s
= ix86_stack_locals
; s
; s
= s
->next
)
13522 if (s
->mode
== mode
&& s
->n
== n
)
13523 return copy_rtx (s
->rtl
);
13525 s
= (struct stack_local_entry
*)
13526 ggc_alloc (sizeof (struct stack_local_entry
));
13529 s
->rtl
= assign_stack_local (mode
, GET_MODE_SIZE (mode
), 0);
13531 s
->next
= ix86_stack_locals
;
13532 ix86_stack_locals
= s
;
13536 /* Construct the SYMBOL_REF for the tls_get_addr function. */
13538 static GTY(()) rtx ix86_tls_symbol
;
13540 ix86_tls_get_addr (void)
13543 if (!ix86_tls_symbol
)
13545 ix86_tls_symbol
= gen_rtx_SYMBOL_REF (Pmode
,
13546 (TARGET_ANY_GNU_TLS
13548 ? "___tls_get_addr"
13549 : "__tls_get_addr");
13552 return ix86_tls_symbol
;
13555 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
13557 static GTY(()) rtx ix86_tls_module_base_symbol
;
13559 ix86_tls_module_base (void)
13562 if (!ix86_tls_module_base_symbol
)
13564 ix86_tls_module_base_symbol
= gen_rtx_SYMBOL_REF (Pmode
,
13565 "_TLS_MODULE_BASE_");
13566 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol
)
13567 |= TLS_MODEL_GLOBAL_DYNAMIC
<< SYMBOL_FLAG_TLS_SHIFT
;
13570 return ix86_tls_module_base_symbol
;
13573 /* Calculate the length of the memory address in the instruction
13574 encoding. Does not include the one-byte modrm, opcode, or prefix. */
13577 memory_address_length (rtx addr
)
13579 struct ix86_address parts
;
13580 rtx base
, index
, disp
;
13584 if (GET_CODE (addr
) == PRE_DEC
13585 || GET_CODE (addr
) == POST_INC
13586 || GET_CODE (addr
) == PRE_MODIFY
13587 || GET_CODE (addr
) == POST_MODIFY
)
13590 ok
= ix86_decompose_address (addr
, &parts
);
13593 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
13594 parts
.base
= SUBREG_REG (parts
.base
);
13595 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
13596 parts
.index
= SUBREG_REG (parts
.index
);
13599 index
= parts
.index
;
13604 - esp as the base always wants an index,
13605 - ebp as the base always wants a displacement. */
13607 /* Register Indirect. */
13608 if (base
&& !index
&& !disp
)
13610 /* esp (for its index) and ebp (for its displacement) need
13611 the two-byte modrm form. */
13612 if (addr
== stack_pointer_rtx
13613 || addr
== arg_pointer_rtx
13614 || addr
== frame_pointer_rtx
13615 || addr
== hard_frame_pointer_rtx
)
13619 /* Direct Addressing. */
13620 else if (disp
&& !base
&& !index
)
13625 /* Find the length of the displacement constant. */
13628 if (base
&& satisfies_constraint_K (disp
))
13633 /* ebp always wants a displacement. */
13634 else if (base
== hard_frame_pointer_rtx
)
13637 /* An index requires the two-byte modrm form.... */
13639 /* ...like esp, which always wants an index. */
13640 || base
== stack_pointer_rtx
13641 || base
== arg_pointer_rtx
13642 || base
== frame_pointer_rtx
)
13649 /* Compute default value for "length_immediate" attribute. When SHORTFORM
13650 is set, expect that insn have 8bit immediate alternative. */
13652 ix86_attr_length_immediate_default (rtx insn
, int shortform
)
13656 extract_insn_cached (insn
);
13657 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
13658 if (CONSTANT_P (recog_data
.operand
[i
]))
13661 if (shortform
&& satisfies_constraint_K (recog_data
.operand
[i
]))
13665 switch (get_attr_mode (insn
))
13676 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
13681 fatal_insn ("unknown insn mode", insn
);
13687 /* Compute default value for "length_address" attribute. */
13689 ix86_attr_length_address_default (rtx insn
)
13693 if (get_attr_type (insn
) == TYPE_LEA
)
13695 rtx set
= PATTERN (insn
);
13697 if (GET_CODE (set
) == PARALLEL
)
13698 set
= XVECEXP (set
, 0, 0);
13700 gcc_assert (GET_CODE (set
) == SET
);
13702 return memory_address_length (SET_SRC (set
));
13705 extract_insn_cached (insn
);
13706 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
13707 if (GET_CODE (recog_data
.operand
[i
]) == MEM
)
13709 return memory_address_length (XEXP (recog_data
.operand
[i
], 0));
13715 /* Return the maximum number of instructions a cpu can issue. */
13718 ix86_issue_rate (void)
13722 case PROCESSOR_PENTIUM
:
13726 case PROCESSOR_PENTIUMPRO
:
13727 case PROCESSOR_PENTIUM4
:
13728 case PROCESSOR_ATHLON
:
13730 case PROCESSOR_NOCONA
:
13731 case PROCESSOR_GENERIC32
:
13732 case PROCESSOR_GENERIC64
:
13740 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
13741 by DEP_INSN and nothing set by DEP_INSN. */
13744 ix86_flags_dependent (rtx insn
, rtx dep_insn
, enum attr_type insn_type
)
13748 /* Simplify the test for uninteresting insns. */
13749 if (insn_type
!= TYPE_SETCC
13750 && insn_type
!= TYPE_ICMOV
13751 && insn_type
!= TYPE_FCMOV
13752 && insn_type
!= TYPE_IBR
)
13755 if ((set
= single_set (dep_insn
)) != 0)
13757 set
= SET_DEST (set
);
13760 else if (GET_CODE (PATTERN (dep_insn
)) == PARALLEL
13761 && XVECLEN (PATTERN (dep_insn
), 0) == 2
13762 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 0)) == SET
13763 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 1)) == SET
)
13765 set
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
13766 set2
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
13771 if (GET_CODE (set
) != REG
|| REGNO (set
) != FLAGS_REG
)
13774 /* This test is true if the dependent insn reads the flags but
13775 not any other potentially set register. */
13776 if (!reg_overlap_mentioned_p (set
, PATTERN (insn
)))
13779 if (set2
&& reg_overlap_mentioned_p (set2
, PATTERN (insn
)))
13785 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
13786 address with operands set by DEP_INSN. */
13789 ix86_agi_dependent (rtx insn
, rtx dep_insn
, enum attr_type insn_type
)
13793 if (insn_type
== TYPE_LEA
13796 addr
= PATTERN (insn
);
13798 if (GET_CODE (addr
) == PARALLEL
)
13799 addr
= XVECEXP (addr
, 0, 0);
13801 gcc_assert (GET_CODE (addr
) == SET
);
13803 addr
= SET_SRC (addr
);
13808 extract_insn_cached (insn
);
13809 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
13810 if (GET_CODE (recog_data
.operand
[i
]) == MEM
)
13812 addr
= XEXP (recog_data
.operand
[i
], 0);
13819 return modified_in_p (addr
, dep_insn
);
13823 ix86_adjust_cost (rtx insn
, rtx link
, rtx dep_insn
, int cost
)
13825 enum attr_type insn_type
, dep_insn_type
;
13826 enum attr_memory memory
;
13828 int dep_insn_code_number
;
13830 /* Anti and output dependencies have zero cost on all CPUs. */
13831 if (REG_NOTE_KIND (link
) != 0)
13834 dep_insn_code_number
= recog_memoized (dep_insn
);
13836 /* If we can't recognize the insns, we can't really do anything. */
13837 if (dep_insn_code_number
< 0 || recog_memoized (insn
) < 0)
13840 insn_type
= get_attr_type (insn
);
13841 dep_insn_type
= get_attr_type (dep_insn
);
13845 case PROCESSOR_PENTIUM
:
13846 /* Address Generation Interlock adds a cycle of latency. */
13847 if (ix86_agi_dependent (insn
, dep_insn
, insn_type
))
13850 /* ??? Compares pair with jump/setcc. */
13851 if (ix86_flags_dependent (insn
, dep_insn
, insn_type
))
13854 /* Floating point stores require value to be ready one cycle earlier. */
13855 if (insn_type
== TYPE_FMOV
13856 && get_attr_memory (insn
) == MEMORY_STORE
13857 && !ix86_agi_dependent (insn
, dep_insn
, insn_type
))
13861 case PROCESSOR_PENTIUMPRO
:
13862 memory
= get_attr_memory (insn
);
13864 /* INT->FP conversion is expensive. */
13865 if (get_attr_fp_int_src (dep_insn
))
13868 /* There is one cycle extra latency between an FP op and a store. */
13869 if (insn_type
== TYPE_FMOV
13870 && (set
= single_set (dep_insn
)) != NULL_RTX
13871 && (set2
= single_set (insn
)) != NULL_RTX
13872 && rtx_equal_p (SET_DEST (set
), SET_SRC (set2
))
13873 && GET_CODE (SET_DEST (set2
)) == MEM
)
13876 /* Show ability of reorder buffer to hide latency of load by executing
13877 in parallel with previous instruction in case
13878 previous instruction is not needed to compute the address. */
13879 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
13880 && !ix86_agi_dependent (insn
, dep_insn
, insn_type
))
13882 /* Claim moves to take one cycle, as core can issue one load
13883 at time and the next load can start cycle later. */
13884 if (dep_insn_type
== TYPE_IMOV
13885 || dep_insn_type
== TYPE_FMOV
)
13893 memory
= get_attr_memory (insn
);
13895 /* The esp dependency is resolved before the instruction is really
13897 if ((insn_type
== TYPE_PUSH
|| insn_type
== TYPE_POP
)
13898 && (dep_insn_type
== TYPE_PUSH
|| dep_insn_type
== TYPE_POP
))
13901 /* INT->FP conversion is expensive. */
13902 if (get_attr_fp_int_src (dep_insn
))
13905 /* Show ability of reorder buffer to hide latency of load by executing
13906 in parallel with previous instruction in case
13907 previous instruction is not needed to compute the address. */
13908 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
13909 && !ix86_agi_dependent (insn
, dep_insn
, insn_type
))
13911 /* Claim moves to take one cycle, as core can issue one load
13912 at time and the next load can start cycle later. */
13913 if (dep_insn_type
== TYPE_IMOV
13914 || dep_insn_type
== TYPE_FMOV
)
13923 case PROCESSOR_ATHLON
:
13925 case PROCESSOR_GENERIC32
:
13926 case PROCESSOR_GENERIC64
:
13927 memory
= get_attr_memory (insn
);
13929 /* Show ability of reorder buffer to hide latency of load by executing
13930 in parallel with previous instruction in case
13931 previous instruction is not needed to compute the address. */
13932 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
13933 && !ix86_agi_dependent (insn
, dep_insn
, insn_type
))
13935 enum attr_unit unit
= get_attr_unit (insn
);
13938 /* Because of the difference between the length of integer and
13939 floating unit pipeline preparation stages, the memory operands
13940 for floating point are cheaper.
13942 ??? For Athlon it the difference is most probably 2. */
13943 if (unit
== UNIT_INTEGER
|| unit
== UNIT_UNKNOWN
)
13946 loadcost
= TARGET_ATHLON
? 2 : 0;
13948 if (cost
>= loadcost
)
13961 /* How many alternative schedules to try. This should be as wide as the
13962 scheduling freedom in the DFA, but no wider. Making this value too
13963 large results extra work for the scheduler. */
13966 ia32_multipass_dfa_lookahead (void)
13968 if (ix86_tune
== PROCESSOR_PENTIUM
)
13971 if (ix86_tune
== PROCESSOR_PENTIUMPRO
13972 || ix86_tune
== PROCESSOR_K6
)
13980 /* Compute the alignment given to a constant that is being placed in memory.
13981 EXP is the constant and ALIGN is the alignment that the object would
13983 The value of this function is used instead of that alignment to align
13987 ix86_constant_alignment (tree exp
, int align
)
13989 if (TREE_CODE (exp
) == REAL_CST
)
13991 if (TYPE_MODE (TREE_TYPE (exp
)) == DFmode
&& align
< 64)
13993 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp
))) && align
< 128)
13996 else if (!optimize_size
&& TREE_CODE (exp
) == STRING_CST
13997 && TREE_STRING_LENGTH (exp
) >= 31 && align
< BITS_PER_WORD
)
13998 return BITS_PER_WORD
;
14003 /* Compute the alignment for a static variable.
14004 TYPE is the data type, and ALIGN is the alignment that
14005 the object would ordinarily have. The value of this function is used
14006 instead of that alignment to align the object. */
14009 ix86_data_alignment (tree type
, int align
)
14011 int max_align
= optimize_size
? BITS_PER_WORD
: 256;
14013 if (AGGREGATE_TYPE_P (type
)
14014 && TYPE_SIZE (type
)
14015 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
14016 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= (unsigned) max_align
14017 || TREE_INT_CST_HIGH (TYPE_SIZE (type
)))
14018 && align
< max_align
)
14021 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
14022 to 16byte boundary. */
14025 if (AGGREGATE_TYPE_P (type
)
14026 && TYPE_SIZE (type
)
14027 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
14028 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 128
14029 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
14033 if (TREE_CODE (type
) == ARRAY_TYPE
)
14035 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
14037 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
14040 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
14043 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
14045 if (TYPE_MODE (type
) == XCmode
&& align
< 128)
14048 else if ((TREE_CODE (type
) == RECORD_TYPE
14049 || TREE_CODE (type
) == UNION_TYPE
14050 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
14051 && TYPE_FIELDS (type
))
14053 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
14055 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
14058 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
14059 || TREE_CODE (type
) == INTEGER_TYPE
)
14061 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
14063 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
14070 /* Compute the alignment for a local variable.
14071 TYPE is the data type, and ALIGN is the alignment that
14072 the object would ordinarily have. The value of this macro is used
14073 instead of that alignment to align the object. */
14076 ix86_local_alignment (tree type
, int align
)
14078 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
14079 to 16byte boundary. */
14082 if (AGGREGATE_TYPE_P (type
)
14083 && TYPE_SIZE (type
)
14084 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
14085 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 16
14086 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
14089 if (TREE_CODE (type
) == ARRAY_TYPE
)
14091 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
14093 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
14096 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
14098 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
14100 if (TYPE_MODE (type
) == XCmode
&& align
< 128)
14103 else if ((TREE_CODE (type
) == RECORD_TYPE
14104 || TREE_CODE (type
) == UNION_TYPE
14105 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
14106 && TYPE_FIELDS (type
))
14108 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
14110 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
14113 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
14114 || TREE_CODE (type
) == INTEGER_TYPE
)
14117 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
14119 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
14125 /* Emit RTL insns to initialize the variable parts of a trampoline.
14126 FNADDR is an RTX for the address of the function's pure code.
14127 CXT is an RTX for the static chain value for the function. */
14129 x86_initialize_trampoline (rtx tramp
, rtx fnaddr
, rtx cxt
)
14133 /* Compute offset from the end of the jmp to the target function. */
14134 rtx disp
= expand_binop (SImode
, sub_optab
, fnaddr
,
14135 plus_constant (tramp
, 10),
14136 NULL_RTX
, 1, OPTAB_DIRECT
);
14137 emit_move_insn (gen_rtx_MEM (QImode
, tramp
),
14138 gen_int_mode (0xb9, QImode
));
14139 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, 1)), cxt
);
14140 emit_move_insn (gen_rtx_MEM (QImode
, plus_constant (tramp
, 5)),
14141 gen_int_mode (0xe9, QImode
));
14142 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, 6)), disp
);
14147 /* Try to load address using shorter movl instead of movabs.
14148 We may want to support movq for kernel mode, but kernel does not use
14149 trampolines at the moment. */
14150 if (x86_64_zext_immediate_operand (fnaddr
, VOIDmode
))
14152 fnaddr
= copy_to_mode_reg (DImode
, fnaddr
);
14153 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
14154 gen_int_mode (0xbb41, HImode
));
14155 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, offset
+ 2)),
14156 gen_lowpart (SImode
, fnaddr
));
14161 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
14162 gen_int_mode (0xbb49, HImode
));
14163 emit_move_insn (gen_rtx_MEM (DImode
, plus_constant (tramp
, offset
+ 2)),
14167 /* Load static chain using movabs to r10. */
14168 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
14169 gen_int_mode (0xba49, HImode
));
14170 emit_move_insn (gen_rtx_MEM (DImode
, plus_constant (tramp
, offset
+ 2)),
14173 /* Jump to the r11 */
14174 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
14175 gen_int_mode (0xff49, HImode
));
14176 emit_move_insn (gen_rtx_MEM (QImode
, plus_constant (tramp
, offset
+2)),
14177 gen_int_mode (0xe3, QImode
));
14179 gcc_assert (offset
<= TRAMPOLINE_SIZE
);
14182 #ifdef ENABLE_EXECUTE_STACK
14183 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__enable_execute_stack"),
14184 LCT_NORMAL
, VOIDmode
, 1, tramp
, Pmode
);
14188 /* Codes for all the SSE/MMX builtins. */
14191 IX86_BUILTIN_ADDPS
,
14192 IX86_BUILTIN_ADDSS
,
14193 IX86_BUILTIN_DIVPS
,
14194 IX86_BUILTIN_DIVSS
,
14195 IX86_BUILTIN_MULPS
,
14196 IX86_BUILTIN_MULSS
,
14197 IX86_BUILTIN_SUBPS
,
14198 IX86_BUILTIN_SUBSS
,
14200 IX86_BUILTIN_CMPEQPS
,
14201 IX86_BUILTIN_CMPLTPS
,
14202 IX86_BUILTIN_CMPLEPS
,
14203 IX86_BUILTIN_CMPGTPS
,
14204 IX86_BUILTIN_CMPGEPS
,
14205 IX86_BUILTIN_CMPNEQPS
,
14206 IX86_BUILTIN_CMPNLTPS
,
14207 IX86_BUILTIN_CMPNLEPS
,
14208 IX86_BUILTIN_CMPNGTPS
,
14209 IX86_BUILTIN_CMPNGEPS
,
14210 IX86_BUILTIN_CMPORDPS
,
14211 IX86_BUILTIN_CMPUNORDPS
,
14212 IX86_BUILTIN_CMPEQSS
,
14213 IX86_BUILTIN_CMPLTSS
,
14214 IX86_BUILTIN_CMPLESS
,
14215 IX86_BUILTIN_CMPNEQSS
,
14216 IX86_BUILTIN_CMPNLTSS
,
14217 IX86_BUILTIN_CMPNLESS
,
14218 IX86_BUILTIN_CMPNGTSS
,
14219 IX86_BUILTIN_CMPNGESS
,
14220 IX86_BUILTIN_CMPORDSS
,
14221 IX86_BUILTIN_CMPUNORDSS
,
14223 IX86_BUILTIN_COMIEQSS
,
14224 IX86_BUILTIN_COMILTSS
,
14225 IX86_BUILTIN_COMILESS
,
14226 IX86_BUILTIN_COMIGTSS
,
14227 IX86_BUILTIN_COMIGESS
,
14228 IX86_BUILTIN_COMINEQSS
,
14229 IX86_BUILTIN_UCOMIEQSS
,
14230 IX86_BUILTIN_UCOMILTSS
,
14231 IX86_BUILTIN_UCOMILESS
,
14232 IX86_BUILTIN_UCOMIGTSS
,
14233 IX86_BUILTIN_UCOMIGESS
,
14234 IX86_BUILTIN_UCOMINEQSS
,
14236 IX86_BUILTIN_CVTPI2PS
,
14237 IX86_BUILTIN_CVTPS2PI
,
14238 IX86_BUILTIN_CVTSI2SS
,
14239 IX86_BUILTIN_CVTSI642SS
,
14240 IX86_BUILTIN_CVTSS2SI
,
14241 IX86_BUILTIN_CVTSS2SI64
,
14242 IX86_BUILTIN_CVTTPS2PI
,
14243 IX86_BUILTIN_CVTTSS2SI
,
14244 IX86_BUILTIN_CVTTSS2SI64
,
14246 IX86_BUILTIN_MAXPS
,
14247 IX86_BUILTIN_MAXSS
,
14248 IX86_BUILTIN_MINPS
,
14249 IX86_BUILTIN_MINSS
,
14251 IX86_BUILTIN_LOADUPS
,
14252 IX86_BUILTIN_STOREUPS
,
14253 IX86_BUILTIN_MOVSS
,
14255 IX86_BUILTIN_MOVHLPS
,
14256 IX86_BUILTIN_MOVLHPS
,
14257 IX86_BUILTIN_LOADHPS
,
14258 IX86_BUILTIN_LOADLPS
,
14259 IX86_BUILTIN_STOREHPS
,
14260 IX86_BUILTIN_STORELPS
,
14262 IX86_BUILTIN_MASKMOVQ
,
14263 IX86_BUILTIN_MOVMSKPS
,
14264 IX86_BUILTIN_PMOVMSKB
,
14266 IX86_BUILTIN_MOVNTPS
,
14267 IX86_BUILTIN_MOVNTQ
,
14269 IX86_BUILTIN_LOADDQU
,
14270 IX86_BUILTIN_STOREDQU
,
14272 IX86_BUILTIN_PACKSSWB
,
14273 IX86_BUILTIN_PACKSSDW
,
14274 IX86_BUILTIN_PACKUSWB
,
14276 IX86_BUILTIN_PADDB
,
14277 IX86_BUILTIN_PADDW
,
14278 IX86_BUILTIN_PADDD
,
14279 IX86_BUILTIN_PADDQ
,
14280 IX86_BUILTIN_PADDSB
,
14281 IX86_BUILTIN_PADDSW
,
14282 IX86_BUILTIN_PADDUSB
,
14283 IX86_BUILTIN_PADDUSW
,
14284 IX86_BUILTIN_PSUBB
,
14285 IX86_BUILTIN_PSUBW
,
14286 IX86_BUILTIN_PSUBD
,
14287 IX86_BUILTIN_PSUBQ
,
14288 IX86_BUILTIN_PSUBSB
,
14289 IX86_BUILTIN_PSUBSW
,
14290 IX86_BUILTIN_PSUBUSB
,
14291 IX86_BUILTIN_PSUBUSW
,
14294 IX86_BUILTIN_PANDN
,
14298 IX86_BUILTIN_PAVGB
,
14299 IX86_BUILTIN_PAVGW
,
14301 IX86_BUILTIN_PCMPEQB
,
14302 IX86_BUILTIN_PCMPEQW
,
14303 IX86_BUILTIN_PCMPEQD
,
14304 IX86_BUILTIN_PCMPGTB
,
14305 IX86_BUILTIN_PCMPGTW
,
14306 IX86_BUILTIN_PCMPGTD
,
14308 IX86_BUILTIN_PMADDWD
,
14310 IX86_BUILTIN_PMAXSW
,
14311 IX86_BUILTIN_PMAXUB
,
14312 IX86_BUILTIN_PMINSW
,
14313 IX86_BUILTIN_PMINUB
,
14315 IX86_BUILTIN_PMULHUW
,
14316 IX86_BUILTIN_PMULHW
,
14317 IX86_BUILTIN_PMULLW
,
14319 IX86_BUILTIN_PSADBW
,
14320 IX86_BUILTIN_PSHUFW
,
14322 IX86_BUILTIN_PSLLW
,
14323 IX86_BUILTIN_PSLLD
,
14324 IX86_BUILTIN_PSLLQ
,
14325 IX86_BUILTIN_PSRAW
,
14326 IX86_BUILTIN_PSRAD
,
14327 IX86_BUILTIN_PSRLW
,
14328 IX86_BUILTIN_PSRLD
,
14329 IX86_BUILTIN_PSRLQ
,
14330 IX86_BUILTIN_PSLLWI
,
14331 IX86_BUILTIN_PSLLDI
,
14332 IX86_BUILTIN_PSLLQI
,
14333 IX86_BUILTIN_PSRAWI
,
14334 IX86_BUILTIN_PSRADI
,
14335 IX86_BUILTIN_PSRLWI
,
14336 IX86_BUILTIN_PSRLDI
,
14337 IX86_BUILTIN_PSRLQI
,
14339 IX86_BUILTIN_PUNPCKHBW
,
14340 IX86_BUILTIN_PUNPCKHWD
,
14341 IX86_BUILTIN_PUNPCKHDQ
,
14342 IX86_BUILTIN_PUNPCKLBW
,
14343 IX86_BUILTIN_PUNPCKLWD
,
14344 IX86_BUILTIN_PUNPCKLDQ
,
14346 IX86_BUILTIN_SHUFPS
,
14348 IX86_BUILTIN_RCPPS
,
14349 IX86_BUILTIN_RCPSS
,
14350 IX86_BUILTIN_RSQRTPS
,
14351 IX86_BUILTIN_RSQRTSS
,
14352 IX86_BUILTIN_SQRTPS
,
14353 IX86_BUILTIN_SQRTSS
,
14355 IX86_BUILTIN_UNPCKHPS
,
14356 IX86_BUILTIN_UNPCKLPS
,
14358 IX86_BUILTIN_ANDPS
,
14359 IX86_BUILTIN_ANDNPS
,
14361 IX86_BUILTIN_XORPS
,
14364 IX86_BUILTIN_LDMXCSR
,
14365 IX86_BUILTIN_STMXCSR
,
14366 IX86_BUILTIN_SFENCE
,
14368 /* 3DNow! Original */
14369 IX86_BUILTIN_FEMMS
,
14370 IX86_BUILTIN_PAVGUSB
,
14371 IX86_BUILTIN_PF2ID
,
14372 IX86_BUILTIN_PFACC
,
14373 IX86_BUILTIN_PFADD
,
14374 IX86_BUILTIN_PFCMPEQ
,
14375 IX86_BUILTIN_PFCMPGE
,
14376 IX86_BUILTIN_PFCMPGT
,
14377 IX86_BUILTIN_PFMAX
,
14378 IX86_BUILTIN_PFMIN
,
14379 IX86_BUILTIN_PFMUL
,
14380 IX86_BUILTIN_PFRCP
,
14381 IX86_BUILTIN_PFRCPIT1
,
14382 IX86_BUILTIN_PFRCPIT2
,
14383 IX86_BUILTIN_PFRSQIT1
,
14384 IX86_BUILTIN_PFRSQRT
,
14385 IX86_BUILTIN_PFSUB
,
14386 IX86_BUILTIN_PFSUBR
,
14387 IX86_BUILTIN_PI2FD
,
14388 IX86_BUILTIN_PMULHRW
,
14390 /* 3DNow! Athlon Extensions */
14391 IX86_BUILTIN_PF2IW
,
14392 IX86_BUILTIN_PFNACC
,
14393 IX86_BUILTIN_PFPNACC
,
14394 IX86_BUILTIN_PI2FW
,
14395 IX86_BUILTIN_PSWAPDSI
,
14396 IX86_BUILTIN_PSWAPDSF
,
14399 IX86_BUILTIN_ADDPD
,
14400 IX86_BUILTIN_ADDSD
,
14401 IX86_BUILTIN_DIVPD
,
14402 IX86_BUILTIN_DIVSD
,
14403 IX86_BUILTIN_MULPD
,
14404 IX86_BUILTIN_MULSD
,
14405 IX86_BUILTIN_SUBPD
,
14406 IX86_BUILTIN_SUBSD
,
14408 IX86_BUILTIN_CMPEQPD
,
14409 IX86_BUILTIN_CMPLTPD
,
14410 IX86_BUILTIN_CMPLEPD
,
14411 IX86_BUILTIN_CMPGTPD
,
14412 IX86_BUILTIN_CMPGEPD
,
14413 IX86_BUILTIN_CMPNEQPD
,
14414 IX86_BUILTIN_CMPNLTPD
,
14415 IX86_BUILTIN_CMPNLEPD
,
14416 IX86_BUILTIN_CMPNGTPD
,
14417 IX86_BUILTIN_CMPNGEPD
,
14418 IX86_BUILTIN_CMPORDPD
,
14419 IX86_BUILTIN_CMPUNORDPD
,
14420 IX86_BUILTIN_CMPNEPD
,
14421 IX86_BUILTIN_CMPEQSD
,
14422 IX86_BUILTIN_CMPLTSD
,
14423 IX86_BUILTIN_CMPLESD
,
14424 IX86_BUILTIN_CMPNEQSD
,
14425 IX86_BUILTIN_CMPNLTSD
,
14426 IX86_BUILTIN_CMPNLESD
,
14427 IX86_BUILTIN_CMPORDSD
,
14428 IX86_BUILTIN_CMPUNORDSD
,
14429 IX86_BUILTIN_CMPNESD
,
14431 IX86_BUILTIN_COMIEQSD
,
14432 IX86_BUILTIN_COMILTSD
,
14433 IX86_BUILTIN_COMILESD
,
14434 IX86_BUILTIN_COMIGTSD
,
14435 IX86_BUILTIN_COMIGESD
,
14436 IX86_BUILTIN_COMINEQSD
,
14437 IX86_BUILTIN_UCOMIEQSD
,
14438 IX86_BUILTIN_UCOMILTSD
,
14439 IX86_BUILTIN_UCOMILESD
,
14440 IX86_BUILTIN_UCOMIGTSD
,
14441 IX86_BUILTIN_UCOMIGESD
,
14442 IX86_BUILTIN_UCOMINEQSD
,
14444 IX86_BUILTIN_MAXPD
,
14445 IX86_BUILTIN_MAXSD
,
14446 IX86_BUILTIN_MINPD
,
14447 IX86_BUILTIN_MINSD
,
14449 IX86_BUILTIN_ANDPD
,
14450 IX86_BUILTIN_ANDNPD
,
14452 IX86_BUILTIN_XORPD
,
14454 IX86_BUILTIN_SQRTPD
,
14455 IX86_BUILTIN_SQRTSD
,
14457 IX86_BUILTIN_UNPCKHPD
,
14458 IX86_BUILTIN_UNPCKLPD
,
14460 IX86_BUILTIN_SHUFPD
,
14462 IX86_BUILTIN_LOADUPD
,
14463 IX86_BUILTIN_STOREUPD
,
14464 IX86_BUILTIN_MOVSD
,
14466 IX86_BUILTIN_LOADHPD
,
14467 IX86_BUILTIN_LOADLPD
,
14469 IX86_BUILTIN_CVTDQ2PD
,
14470 IX86_BUILTIN_CVTDQ2PS
,
14472 IX86_BUILTIN_CVTPD2DQ
,
14473 IX86_BUILTIN_CVTPD2PI
,
14474 IX86_BUILTIN_CVTPD2PS
,
14475 IX86_BUILTIN_CVTTPD2DQ
,
14476 IX86_BUILTIN_CVTTPD2PI
,
14478 IX86_BUILTIN_CVTPI2PD
,
14479 IX86_BUILTIN_CVTSI2SD
,
14480 IX86_BUILTIN_CVTSI642SD
,
14482 IX86_BUILTIN_CVTSD2SI
,
14483 IX86_BUILTIN_CVTSD2SI64
,
14484 IX86_BUILTIN_CVTSD2SS
,
14485 IX86_BUILTIN_CVTSS2SD
,
14486 IX86_BUILTIN_CVTTSD2SI
,
14487 IX86_BUILTIN_CVTTSD2SI64
,
14489 IX86_BUILTIN_CVTPS2DQ
,
14490 IX86_BUILTIN_CVTPS2PD
,
14491 IX86_BUILTIN_CVTTPS2DQ
,
14493 IX86_BUILTIN_MOVNTI
,
14494 IX86_BUILTIN_MOVNTPD
,
14495 IX86_BUILTIN_MOVNTDQ
,
14498 IX86_BUILTIN_MASKMOVDQU
,
14499 IX86_BUILTIN_MOVMSKPD
,
14500 IX86_BUILTIN_PMOVMSKB128
,
14502 IX86_BUILTIN_PACKSSWB128
,
14503 IX86_BUILTIN_PACKSSDW128
,
14504 IX86_BUILTIN_PACKUSWB128
,
14506 IX86_BUILTIN_PADDB128
,
14507 IX86_BUILTIN_PADDW128
,
14508 IX86_BUILTIN_PADDD128
,
14509 IX86_BUILTIN_PADDQ128
,
14510 IX86_BUILTIN_PADDSB128
,
14511 IX86_BUILTIN_PADDSW128
,
14512 IX86_BUILTIN_PADDUSB128
,
14513 IX86_BUILTIN_PADDUSW128
,
14514 IX86_BUILTIN_PSUBB128
,
14515 IX86_BUILTIN_PSUBW128
,
14516 IX86_BUILTIN_PSUBD128
,
14517 IX86_BUILTIN_PSUBQ128
,
14518 IX86_BUILTIN_PSUBSB128
,
14519 IX86_BUILTIN_PSUBSW128
,
14520 IX86_BUILTIN_PSUBUSB128
,
14521 IX86_BUILTIN_PSUBUSW128
,
14523 IX86_BUILTIN_PAND128
,
14524 IX86_BUILTIN_PANDN128
,
14525 IX86_BUILTIN_POR128
,
14526 IX86_BUILTIN_PXOR128
,
14528 IX86_BUILTIN_PAVGB128
,
14529 IX86_BUILTIN_PAVGW128
,
14531 IX86_BUILTIN_PCMPEQB128
,
14532 IX86_BUILTIN_PCMPEQW128
,
14533 IX86_BUILTIN_PCMPEQD128
,
14534 IX86_BUILTIN_PCMPGTB128
,
14535 IX86_BUILTIN_PCMPGTW128
,
14536 IX86_BUILTIN_PCMPGTD128
,
14538 IX86_BUILTIN_PMADDWD128
,
14540 IX86_BUILTIN_PMAXSW128
,
14541 IX86_BUILTIN_PMAXUB128
,
14542 IX86_BUILTIN_PMINSW128
,
14543 IX86_BUILTIN_PMINUB128
,
14545 IX86_BUILTIN_PMULUDQ
,
14546 IX86_BUILTIN_PMULUDQ128
,
14547 IX86_BUILTIN_PMULHUW128
,
14548 IX86_BUILTIN_PMULHW128
,
14549 IX86_BUILTIN_PMULLW128
,
14551 IX86_BUILTIN_PSADBW128
,
14552 IX86_BUILTIN_PSHUFHW
,
14553 IX86_BUILTIN_PSHUFLW
,
14554 IX86_BUILTIN_PSHUFD
,
14556 IX86_BUILTIN_PSLLW128
,
14557 IX86_BUILTIN_PSLLD128
,
14558 IX86_BUILTIN_PSLLQ128
,
14559 IX86_BUILTIN_PSRAW128
,
14560 IX86_BUILTIN_PSRAD128
,
14561 IX86_BUILTIN_PSRLW128
,
14562 IX86_BUILTIN_PSRLD128
,
14563 IX86_BUILTIN_PSRLQ128
,
14564 IX86_BUILTIN_PSLLDQI128
,
14565 IX86_BUILTIN_PSLLWI128
,
14566 IX86_BUILTIN_PSLLDI128
,
14567 IX86_BUILTIN_PSLLQI128
,
14568 IX86_BUILTIN_PSRAWI128
,
14569 IX86_BUILTIN_PSRADI128
,
14570 IX86_BUILTIN_PSRLDQI128
,
14571 IX86_BUILTIN_PSRLWI128
,
14572 IX86_BUILTIN_PSRLDI128
,
14573 IX86_BUILTIN_PSRLQI128
,
14575 IX86_BUILTIN_PUNPCKHBW128
,
14576 IX86_BUILTIN_PUNPCKHWD128
,
14577 IX86_BUILTIN_PUNPCKHDQ128
,
14578 IX86_BUILTIN_PUNPCKHQDQ128
,
14579 IX86_BUILTIN_PUNPCKLBW128
,
14580 IX86_BUILTIN_PUNPCKLWD128
,
14581 IX86_BUILTIN_PUNPCKLDQ128
,
14582 IX86_BUILTIN_PUNPCKLQDQ128
,
14584 IX86_BUILTIN_CLFLUSH
,
14585 IX86_BUILTIN_MFENCE
,
14586 IX86_BUILTIN_LFENCE
,
14588 /* Prescott New Instructions. */
14589 IX86_BUILTIN_ADDSUBPS
,
14590 IX86_BUILTIN_HADDPS
,
14591 IX86_BUILTIN_HSUBPS
,
14592 IX86_BUILTIN_MOVSHDUP
,
14593 IX86_BUILTIN_MOVSLDUP
,
14594 IX86_BUILTIN_ADDSUBPD
,
14595 IX86_BUILTIN_HADDPD
,
14596 IX86_BUILTIN_HSUBPD
,
14597 IX86_BUILTIN_LDDQU
,
14599 IX86_BUILTIN_MONITOR
,
14600 IX86_BUILTIN_MWAIT
,
14603 IX86_BUILTIN_PHADDW
,
14604 IX86_BUILTIN_PHADDD
,
14605 IX86_BUILTIN_PHADDSW
,
14606 IX86_BUILTIN_PHSUBW
,
14607 IX86_BUILTIN_PHSUBD
,
14608 IX86_BUILTIN_PHSUBSW
,
14609 IX86_BUILTIN_PMADDUBSW
,
14610 IX86_BUILTIN_PMULHRSW
,
14611 IX86_BUILTIN_PSHUFB
,
14612 IX86_BUILTIN_PSIGNB
,
14613 IX86_BUILTIN_PSIGNW
,
14614 IX86_BUILTIN_PSIGND
,
14615 IX86_BUILTIN_PALIGNR
,
14616 IX86_BUILTIN_PABSB
,
14617 IX86_BUILTIN_PABSW
,
14618 IX86_BUILTIN_PABSD
,
14620 IX86_BUILTIN_PHADDW128
,
14621 IX86_BUILTIN_PHADDD128
,
14622 IX86_BUILTIN_PHADDSW128
,
14623 IX86_BUILTIN_PHSUBW128
,
14624 IX86_BUILTIN_PHSUBD128
,
14625 IX86_BUILTIN_PHSUBSW128
,
14626 IX86_BUILTIN_PMADDUBSW128
,
14627 IX86_BUILTIN_PMULHRSW128
,
14628 IX86_BUILTIN_PSHUFB128
,
14629 IX86_BUILTIN_PSIGNB128
,
14630 IX86_BUILTIN_PSIGNW128
,
14631 IX86_BUILTIN_PSIGND128
,
14632 IX86_BUILTIN_PALIGNR128
,
14633 IX86_BUILTIN_PABSB128
,
14634 IX86_BUILTIN_PABSW128
,
14635 IX86_BUILTIN_PABSD128
,
14637 IX86_BUILTIN_VEC_INIT_V2SI
,
14638 IX86_BUILTIN_VEC_INIT_V4HI
,
14639 IX86_BUILTIN_VEC_INIT_V8QI
,
14640 IX86_BUILTIN_VEC_EXT_V2DF
,
14641 IX86_BUILTIN_VEC_EXT_V2DI
,
14642 IX86_BUILTIN_VEC_EXT_V4SF
,
14643 IX86_BUILTIN_VEC_EXT_V4SI
,
14644 IX86_BUILTIN_VEC_EXT_V8HI
,
14645 IX86_BUILTIN_VEC_EXT_V2SI
,
14646 IX86_BUILTIN_VEC_EXT_V4HI
,
14647 IX86_BUILTIN_VEC_SET_V8HI
,
14648 IX86_BUILTIN_VEC_SET_V4HI
,
14653 #define def_builtin(MASK, NAME, TYPE, CODE) \
14655 if ((MASK) & target_flags \
14656 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
14657 add_builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
14658 NULL, NULL_TREE); \
14661 /* Bits for builtin_description.flag. */
14663 /* Set when we don't support the comparison natively, and should
14664 swap_comparison in order to support it. */
14665 #define BUILTIN_DESC_SWAP_OPERANDS 1
14667 struct builtin_description
14669 const unsigned int mask
;
14670 const enum insn_code icode
;
14671 const char *const name
;
14672 const enum ix86_builtins code
;
14673 const enum rtx_code comparison
;
14674 const unsigned int flag
;
14677 static const struct builtin_description bdesc_comi
[] =
14679 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS
, UNEQ
, 0 },
14680 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS
, UNLT
, 0 },
14681 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS
, UNLE
, 0 },
14682 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS
, GT
, 0 },
14683 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS
, GE
, 0 },
14684 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS
, LTGT
, 0 },
14685 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS
, UNEQ
, 0 },
14686 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS
, UNLT
, 0 },
14687 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS
, UNLE
, 0 },
14688 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS
, GT
, 0 },
14689 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS
, GE
, 0 },
14690 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS
, LTGT
, 0 },
14691 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD
, UNEQ
, 0 },
14692 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD
, UNLT
, 0 },
14693 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD
, UNLE
, 0 },
14694 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD
, GT
, 0 },
14695 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD
, GE
, 0 },
14696 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD
, LTGT
, 0 },
14697 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD
, UNEQ
, 0 },
14698 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD
, UNLT
, 0 },
14699 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD
, UNLE
, 0 },
14700 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD
, GT
, 0 },
14701 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD
, GE
, 0 },
14702 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD
, LTGT
, 0 },
14705 static const struct builtin_description bdesc_2arg
[] =
14708 { MASK_SSE
, CODE_FOR_addv4sf3
, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS
, 0, 0 },
14709 { MASK_SSE
, CODE_FOR_subv4sf3
, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS
, 0, 0 },
14710 { MASK_SSE
, CODE_FOR_mulv4sf3
, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS
, 0, 0 },
14711 { MASK_SSE
, CODE_FOR_divv4sf3
, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS
, 0, 0 },
14712 { MASK_SSE
, CODE_FOR_sse_vmaddv4sf3
, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS
, 0, 0 },
14713 { MASK_SSE
, CODE_FOR_sse_vmsubv4sf3
, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS
, 0, 0 },
14714 { MASK_SSE
, CODE_FOR_sse_vmmulv4sf3
, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS
, 0, 0 },
14715 { MASK_SSE
, CODE_FOR_sse_vmdivv4sf3
, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS
, 0, 0 },
14717 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS
, EQ
, 0 },
14718 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS
, LT
, 0 },
14719 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS
, LE
, 0 },
14720 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS
, LT
,
14721 BUILTIN_DESC_SWAP_OPERANDS
},
14722 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS
, LE
,
14723 BUILTIN_DESC_SWAP_OPERANDS
},
14724 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS
, UNORDERED
, 0 },
14725 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS
, NE
, 0 },
14726 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS
, UNGE
, 0 },
14727 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS
, UNGT
, 0 },
14728 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS
, UNGE
,
14729 BUILTIN_DESC_SWAP_OPERANDS
},
14730 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS
, UNGT
,
14731 BUILTIN_DESC_SWAP_OPERANDS
},
14732 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS
, ORDERED
, 0 },
14733 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS
, EQ
, 0 },
14734 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS
, LT
, 0 },
14735 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS
, LE
, 0 },
14736 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS
, UNORDERED
, 0 },
14737 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS
, NE
, 0 },
14738 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS
, UNGE
, 0 },
14739 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS
, UNGT
, 0 },
14740 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS
, UNGE
,
14741 BUILTIN_DESC_SWAP_OPERANDS
},
14742 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS
, UNGT
,
14743 BUILTIN_DESC_SWAP_OPERANDS
},
14744 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS
, UNORDERED
, 0 },
14746 { MASK_SSE
, CODE_FOR_sminv4sf3
, "__builtin_ia32_minps", IX86_BUILTIN_MINPS
, 0, 0 },
14747 { MASK_SSE
, CODE_FOR_smaxv4sf3
, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS
, 0, 0 },
14748 { MASK_SSE
, CODE_FOR_sse_vmsminv4sf3
, "__builtin_ia32_minss", IX86_BUILTIN_MINSS
, 0, 0 },
14749 { MASK_SSE
, CODE_FOR_sse_vmsmaxv4sf3
, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS
, 0, 0 },
14751 { MASK_SSE
, CODE_FOR_andv4sf3
, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS
, 0, 0 },
14752 { MASK_SSE
, CODE_FOR_sse_nandv4sf3
, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS
, 0, 0 },
14753 { MASK_SSE
, CODE_FOR_iorv4sf3
, "__builtin_ia32_orps", IX86_BUILTIN_ORPS
, 0, 0 },
14754 { MASK_SSE
, CODE_FOR_xorv4sf3
, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS
, 0, 0 },
14756 { MASK_SSE
, CODE_FOR_sse_movss
, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS
, 0, 0 },
14757 { MASK_SSE
, CODE_FOR_sse_movhlps
, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS
, 0, 0 },
14758 { MASK_SSE
, CODE_FOR_sse_movlhps
, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS
, 0, 0 },
14759 { MASK_SSE
, CODE_FOR_sse_unpckhps
, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS
, 0, 0 },
14760 { MASK_SSE
, CODE_FOR_sse_unpcklps
, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS
, 0, 0 },
14763 { MASK_MMX
, CODE_FOR_mmx_addv8qi3
, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB
, 0, 0 },
14764 { MASK_MMX
, CODE_FOR_mmx_addv4hi3
, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW
, 0, 0 },
14765 { MASK_MMX
, CODE_FOR_mmx_addv2si3
, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD
, 0, 0 },
14766 { MASK_SSE2
, CODE_FOR_mmx_adddi3
, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ
, 0, 0 },
14767 { MASK_MMX
, CODE_FOR_mmx_subv8qi3
, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB
, 0, 0 },
14768 { MASK_MMX
, CODE_FOR_mmx_subv4hi3
, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW
, 0, 0 },
14769 { MASK_MMX
, CODE_FOR_mmx_subv2si3
, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD
, 0, 0 },
14770 { MASK_SSE2
, CODE_FOR_mmx_subdi3
, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ
, 0, 0 },
14772 { MASK_MMX
, CODE_FOR_mmx_ssaddv8qi3
, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB
, 0, 0 },
14773 { MASK_MMX
, CODE_FOR_mmx_ssaddv4hi3
, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW
, 0, 0 },
14774 { MASK_MMX
, CODE_FOR_mmx_sssubv8qi3
, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB
, 0, 0 },
14775 { MASK_MMX
, CODE_FOR_mmx_sssubv4hi3
, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW
, 0, 0 },
14776 { MASK_MMX
, CODE_FOR_mmx_usaddv8qi3
, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB
, 0, 0 },
14777 { MASK_MMX
, CODE_FOR_mmx_usaddv4hi3
, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW
, 0, 0 },
14778 { MASK_MMX
, CODE_FOR_mmx_ussubv8qi3
, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB
, 0, 0 },
14779 { MASK_MMX
, CODE_FOR_mmx_ussubv4hi3
, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW
, 0, 0 },
14781 { MASK_MMX
, CODE_FOR_mmx_mulv4hi3
, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW
, 0, 0 },
14782 { MASK_MMX
, CODE_FOR_mmx_smulv4hi3_highpart
, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW
, 0, 0 },
14783 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_umulv4hi3_highpart
, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW
, 0, 0 },
14785 { MASK_MMX
, CODE_FOR_mmx_andv2si3
, "__builtin_ia32_pand", IX86_BUILTIN_PAND
, 0, 0 },
14786 { MASK_MMX
, CODE_FOR_mmx_nandv2si3
, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN
, 0, 0 },
14787 { MASK_MMX
, CODE_FOR_mmx_iorv2si3
, "__builtin_ia32_por", IX86_BUILTIN_POR
, 0, 0 },
14788 { MASK_MMX
, CODE_FOR_mmx_xorv2si3
, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR
, 0, 0 },
14790 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB
, 0, 0 },
14791 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_uavgv4hi3
, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW
, 0, 0 },
14793 { MASK_MMX
, CODE_FOR_mmx_eqv8qi3
, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB
, 0, 0 },
14794 { MASK_MMX
, CODE_FOR_mmx_eqv4hi3
, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW
, 0, 0 },
14795 { MASK_MMX
, CODE_FOR_mmx_eqv2si3
, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD
, 0, 0 },
14796 { MASK_MMX
, CODE_FOR_mmx_gtv8qi3
, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB
, 0, 0 },
14797 { MASK_MMX
, CODE_FOR_mmx_gtv4hi3
, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW
, 0, 0 },
14798 { MASK_MMX
, CODE_FOR_mmx_gtv2si3
, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD
, 0, 0 },
14800 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_umaxv8qi3
, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB
, 0, 0 },
14801 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_smaxv4hi3
, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW
, 0, 0 },
14802 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_uminv8qi3
, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB
, 0, 0 },
14803 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_sminv4hi3
, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW
, 0, 0 },
14805 { MASK_MMX
, CODE_FOR_mmx_punpckhbw
, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW
, 0, 0 },
14806 { MASK_MMX
, CODE_FOR_mmx_punpckhwd
, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD
, 0, 0 },
14807 { MASK_MMX
, CODE_FOR_mmx_punpckhdq
, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ
, 0, 0 },
14808 { MASK_MMX
, CODE_FOR_mmx_punpcklbw
, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW
, 0, 0 },
14809 { MASK_MMX
, CODE_FOR_mmx_punpcklwd
, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD
, 0, 0 },
14810 { MASK_MMX
, CODE_FOR_mmx_punpckldq
, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ
, 0, 0 },
14813 { MASK_MMX
, CODE_FOR_mmx_packsswb
, 0, IX86_BUILTIN_PACKSSWB
, 0, 0 },
14814 { MASK_MMX
, CODE_FOR_mmx_packssdw
, 0, IX86_BUILTIN_PACKSSDW
, 0, 0 },
14815 { MASK_MMX
, CODE_FOR_mmx_packuswb
, 0, IX86_BUILTIN_PACKUSWB
, 0, 0 },
14817 { MASK_SSE
, CODE_FOR_sse_cvtpi2ps
, 0, IX86_BUILTIN_CVTPI2PS
, 0, 0 },
14818 { MASK_SSE
, CODE_FOR_sse_cvtsi2ss
, 0, IX86_BUILTIN_CVTSI2SS
, 0, 0 },
14819 { MASK_SSE
| MASK_64BIT
, CODE_FOR_sse_cvtsi2ssq
, 0, IX86_BUILTIN_CVTSI642SS
, 0, 0 },
14821 { MASK_MMX
, CODE_FOR_mmx_ashlv4hi3
, 0, IX86_BUILTIN_PSLLW
, 0, 0 },
14822 { MASK_MMX
, CODE_FOR_mmx_ashlv4hi3
, 0, IX86_BUILTIN_PSLLWI
, 0, 0 },
14823 { MASK_MMX
, CODE_FOR_mmx_ashlv2si3
, 0, IX86_BUILTIN_PSLLD
, 0, 0 },
14824 { MASK_MMX
, CODE_FOR_mmx_ashlv2si3
, 0, IX86_BUILTIN_PSLLDI
, 0, 0 },
14825 { MASK_MMX
, CODE_FOR_mmx_ashldi3
, 0, IX86_BUILTIN_PSLLQ
, 0, 0 },
14826 { MASK_MMX
, CODE_FOR_mmx_ashldi3
, 0, IX86_BUILTIN_PSLLQI
, 0, 0 },
14828 { MASK_MMX
, CODE_FOR_mmx_lshrv4hi3
, 0, IX86_BUILTIN_PSRLW
, 0, 0 },
14829 { MASK_MMX
, CODE_FOR_mmx_lshrv4hi3
, 0, IX86_BUILTIN_PSRLWI
, 0, 0 },
14830 { MASK_MMX
, CODE_FOR_mmx_lshrv2si3
, 0, IX86_BUILTIN_PSRLD
, 0, 0 },
14831 { MASK_MMX
, CODE_FOR_mmx_lshrv2si3
, 0, IX86_BUILTIN_PSRLDI
, 0, 0 },
14832 { MASK_MMX
, CODE_FOR_mmx_lshrdi3
, 0, IX86_BUILTIN_PSRLQ
, 0, 0 },
14833 { MASK_MMX
, CODE_FOR_mmx_lshrdi3
, 0, IX86_BUILTIN_PSRLQI
, 0, 0 },
14835 { MASK_MMX
, CODE_FOR_mmx_ashrv4hi3
, 0, IX86_BUILTIN_PSRAW
, 0, 0 },
14836 { MASK_MMX
, CODE_FOR_mmx_ashrv4hi3
, 0, IX86_BUILTIN_PSRAWI
, 0, 0 },
14837 { MASK_MMX
, CODE_FOR_mmx_ashrv2si3
, 0, IX86_BUILTIN_PSRAD
, 0, 0 },
14838 { MASK_MMX
, CODE_FOR_mmx_ashrv2si3
, 0, IX86_BUILTIN_PSRADI
, 0, 0 },
14840 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_psadbw
, 0, IX86_BUILTIN_PSADBW
, 0, 0 },
14841 { MASK_MMX
, CODE_FOR_mmx_pmaddwd
, 0, IX86_BUILTIN_PMADDWD
, 0, 0 },
14844 { MASK_SSE2
, CODE_FOR_addv2df3
, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD
, 0, 0 },
14845 { MASK_SSE2
, CODE_FOR_subv2df3
, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD
, 0, 0 },
14846 { MASK_SSE2
, CODE_FOR_mulv2df3
, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD
, 0, 0 },
14847 { MASK_SSE2
, CODE_FOR_divv2df3
, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD
, 0, 0 },
14848 { MASK_SSE2
, CODE_FOR_sse2_vmaddv2df3
, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD
, 0, 0 },
14849 { MASK_SSE2
, CODE_FOR_sse2_vmsubv2df3
, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD
, 0, 0 },
14850 { MASK_SSE2
, CODE_FOR_sse2_vmmulv2df3
, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD
, 0, 0 },
14851 { MASK_SSE2
, CODE_FOR_sse2_vmdivv2df3
, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD
, 0, 0 },
14853 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD
, EQ
, 0 },
14854 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD
, LT
, 0 },
14855 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD
, LE
, 0 },
14856 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD
, LT
,
14857 BUILTIN_DESC_SWAP_OPERANDS
},
14858 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD
, LE
,
14859 BUILTIN_DESC_SWAP_OPERANDS
},
14860 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD
, UNORDERED
, 0 },
14861 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD
, NE
, 0 },
14862 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD
, UNGE
, 0 },
14863 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD
, UNGT
, 0 },
14864 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD
, UNGE
,
14865 BUILTIN_DESC_SWAP_OPERANDS
},
14866 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD
, UNGT
,
14867 BUILTIN_DESC_SWAP_OPERANDS
},
14868 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD
, ORDERED
, 0 },
14869 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD
, EQ
, 0 },
14870 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD
, LT
, 0 },
14871 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD
, LE
, 0 },
14872 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD
, UNORDERED
, 0 },
14873 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD
, NE
, 0 },
14874 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD
, UNGE
, 0 },
14875 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD
, UNGT
, 0 },
14876 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD
, ORDERED
, 0 },
14878 { MASK_SSE2
, CODE_FOR_sminv2df3
, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD
, 0, 0 },
14879 { MASK_SSE2
, CODE_FOR_smaxv2df3
, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD
, 0, 0 },
14880 { MASK_SSE2
, CODE_FOR_sse2_vmsminv2df3
, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD
, 0, 0 },
14881 { MASK_SSE2
, CODE_FOR_sse2_vmsmaxv2df3
, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD
, 0, 0 },
14883 { MASK_SSE2
, CODE_FOR_andv2df3
, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD
, 0, 0 },
14884 { MASK_SSE2
, CODE_FOR_sse2_nandv2df3
, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD
, 0, 0 },
14885 { MASK_SSE2
, CODE_FOR_iorv2df3
, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD
, 0, 0 },
14886 { MASK_SSE2
, CODE_FOR_xorv2df3
, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD
, 0, 0 },
14888 { MASK_SSE2
, CODE_FOR_sse2_movsd
, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD
, 0, 0 },
14889 { MASK_SSE2
, CODE_FOR_sse2_unpckhpd
, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD
, 0, 0 },
14890 { MASK_SSE2
, CODE_FOR_sse2_unpcklpd
, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD
, 0, 0 },
14893 { MASK_SSE2
, CODE_FOR_addv16qi3
, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128
, 0, 0 },
14894 { MASK_SSE2
, CODE_FOR_addv8hi3
, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128
, 0, 0 },
14895 { MASK_SSE2
, CODE_FOR_addv4si3
, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128
, 0, 0 },
14896 { MASK_SSE2
, CODE_FOR_addv2di3
, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128
, 0, 0 },
14897 { MASK_SSE2
, CODE_FOR_subv16qi3
, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128
, 0, 0 },
14898 { MASK_SSE2
, CODE_FOR_subv8hi3
, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128
, 0, 0 },
14899 { MASK_SSE2
, CODE_FOR_subv4si3
, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128
, 0, 0 },
14900 { MASK_SSE2
, CODE_FOR_subv2di3
, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128
, 0, 0 },
14902 { MASK_MMX
, CODE_FOR_sse2_ssaddv16qi3
, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128
, 0, 0 },
14903 { MASK_MMX
, CODE_FOR_sse2_ssaddv8hi3
, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128
, 0, 0 },
14904 { MASK_MMX
, CODE_FOR_sse2_sssubv16qi3
, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128
, 0, 0 },
14905 { MASK_MMX
, CODE_FOR_sse2_sssubv8hi3
, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128
, 0, 0 },
14906 { MASK_MMX
, CODE_FOR_sse2_usaddv16qi3
, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128
, 0, 0 },
14907 { MASK_MMX
, CODE_FOR_sse2_usaddv8hi3
, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128
, 0, 0 },
14908 { MASK_MMX
, CODE_FOR_sse2_ussubv16qi3
, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128
, 0, 0 },
14909 { MASK_MMX
, CODE_FOR_sse2_ussubv8hi3
, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128
, 0, 0 },
14911 { MASK_SSE2
, CODE_FOR_mulv8hi3
, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128
, 0, 0 },
14912 { MASK_SSE2
, CODE_FOR_smulv8hi3_highpart
, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128
, 0, 0 },
14914 { MASK_SSE2
, CODE_FOR_andv2di3
, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128
, 0, 0 },
14915 { MASK_SSE2
, CODE_FOR_sse2_nandv2di3
, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128
, 0, 0 },
14916 { MASK_SSE2
, CODE_FOR_iorv2di3
, "__builtin_ia32_por128", IX86_BUILTIN_POR128
, 0, 0 },
14917 { MASK_SSE2
, CODE_FOR_xorv2di3
, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128
, 0, 0 },
14919 { MASK_SSE2
, CODE_FOR_sse2_uavgv16qi3
, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128
, 0, 0 },
14920 { MASK_SSE2
, CODE_FOR_sse2_uavgv8hi3
, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128
, 0, 0 },
14922 { MASK_SSE2
, CODE_FOR_sse2_eqv16qi3
, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128
, 0, 0 },
14923 { MASK_SSE2
, CODE_FOR_sse2_eqv8hi3
, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128
, 0, 0 },
14924 { MASK_SSE2
, CODE_FOR_sse2_eqv4si3
, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128
, 0, 0 },
14925 { MASK_SSE2
, CODE_FOR_sse2_gtv16qi3
, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128
, 0, 0 },
14926 { MASK_SSE2
, CODE_FOR_sse2_gtv8hi3
, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128
, 0, 0 },
14927 { MASK_SSE2
, CODE_FOR_sse2_gtv4si3
, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128
, 0, 0 },
14929 { MASK_SSE2
, CODE_FOR_umaxv16qi3
, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128
, 0, 0 },
14930 { MASK_SSE2
, CODE_FOR_smaxv8hi3
, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128
, 0, 0 },
14931 { MASK_SSE2
, CODE_FOR_uminv16qi3
, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128
, 0, 0 },
14932 { MASK_SSE2
, CODE_FOR_sminv8hi3
, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128
, 0, 0 },
14934 { MASK_SSE2
, CODE_FOR_sse2_punpckhbw
, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128
, 0, 0 },
14935 { MASK_SSE2
, CODE_FOR_sse2_punpckhwd
, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128
, 0, 0 },
14936 { MASK_SSE2
, CODE_FOR_sse2_punpckhdq
, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128
, 0, 0 },
14937 { MASK_SSE2
, CODE_FOR_sse2_punpckhqdq
, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128
, 0, 0 },
14938 { MASK_SSE2
, CODE_FOR_sse2_punpcklbw
, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128
, 0, 0 },
14939 { MASK_SSE2
, CODE_FOR_sse2_punpcklwd
, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128
, 0, 0 },
14940 { MASK_SSE2
, CODE_FOR_sse2_punpckldq
, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128
, 0, 0 },
14941 { MASK_SSE2
, CODE_FOR_sse2_punpcklqdq
, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128
, 0, 0 },
14943 { MASK_SSE2
, CODE_FOR_sse2_packsswb
, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128
, 0, 0 },
14944 { MASK_SSE2
, CODE_FOR_sse2_packssdw
, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128
, 0, 0 },
14945 { MASK_SSE2
, CODE_FOR_sse2_packuswb
, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128
, 0, 0 },
14947 { MASK_SSE2
, CODE_FOR_umulv8hi3_highpart
, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128
, 0, 0 },
14948 { MASK_SSE2
, CODE_FOR_sse2_psadbw
, 0, IX86_BUILTIN_PSADBW128
, 0, 0 },
14950 { MASK_SSE2
, CODE_FOR_sse2_umulsidi3
, 0, IX86_BUILTIN_PMULUDQ
, 0, 0 },
14951 { MASK_SSE2
, CODE_FOR_sse2_umulv2siv2di3
, 0, IX86_BUILTIN_PMULUDQ128
, 0, 0 },
14953 { MASK_SSE2
, CODE_FOR_ashlv8hi3
, 0, IX86_BUILTIN_PSLLWI128
, 0, 0 },
14954 { MASK_SSE2
, CODE_FOR_ashlv4si3
, 0, IX86_BUILTIN_PSLLDI128
, 0, 0 },
14955 { MASK_SSE2
, CODE_FOR_ashlv2di3
, 0, IX86_BUILTIN_PSLLQI128
, 0, 0 },
14957 { MASK_SSE2
, CODE_FOR_lshrv8hi3
, 0, IX86_BUILTIN_PSRLWI128
, 0, 0 },
14958 { MASK_SSE2
, CODE_FOR_lshrv4si3
, 0, IX86_BUILTIN_PSRLDI128
, 0, 0 },
14959 { MASK_SSE2
, CODE_FOR_lshrv2di3
, 0, IX86_BUILTIN_PSRLQI128
, 0, 0 },
14961 { MASK_SSE2
, CODE_FOR_ashrv8hi3
, 0, IX86_BUILTIN_PSRAWI128
, 0, 0 },
14962 { MASK_SSE2
, CODE_FOR_ashrv4si3
, 0, IX86_BUILTIN_PSRADI128
, 0, 0 },
14964 { MASK_SSE2
, CODE_FOR_sse2_pmaddwd
, 0, IX86_BUILTIN_PMADDWD128
, 0, 0 },
14966 { MASK_SSE2
, CODE_FOR_sse2_cvtsi2sd
, 0, IX86_BUILTIN_CVTSI2SD
, 0, 0 },
14967 { MASK_SSE2
| MASK_64BIT
, CODE_FOR_sse2_cvtsi2sdq
, 0, IX86_BUILTIN_CVTSI642SD
, 0, 0 },
14968 { MASK_SSE2
, CODE_FOR_sse2_cvtsd2ss
, 0, IX86_BUILTIN_CVTSD2SS
, 0, 0 },
14969 { MASK_SSE2
, CODE_FOR_sse2_cvtss2sd
, 0, IX86_BUILTIN_CVTSS2SD
, 0, 0 },
14972 { MASK_SSE3
, CODE_FOR_sse3_addsubv4sf3
, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS
, 0, 0 },
14973 { MASK_SSE3
, CODE_FOR_sse3_addsubv2df3
, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD
, 0, 0 },
14974 { MASK_SSE3
, CODE_FOR_sse3_haddv4sf3
, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS
, 0, 0 },
14975 { MASK_SSE3
, CODE_FOR_sse3_haddv2df3
, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD
, 0, 0 },
14976 { MASK_SSE3
, CODE_FOR_sse3_hsubv4sf3
, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS
, 0, 0 },
14977 { MASK_SSE3
, CODE_FOR_sse3_hsubv2df3
, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD
, 0, 0 },
14980 { MASK_SSSE3
, CODE_FOR_ssse3_phaddwv8hi3
, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128
, 0, 0 },
14981 { MASK_SSSE3
, CODE_FOR_ssse3_phaddwv4hi3
, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW
, 0, 0 },
14982 { MASK_SSSE3
, CODE_FOR_ssse3_phadddv4si3
, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128
, 0, 0 },
14983 { MASK_SSSE3
, CODE_FOR_ssse3_phadddv2si3
, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD
, 0, 0 },
14984 { MASK_SSSE3
, CODE_FOR_ssse3_phaddswv8hi3
, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128
, 0, 0 },
14985 { MASK_SSSE3
, CODE_FOR_ssse3_phaddswv4hi3
, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW
, 0, 0 },
14986 { MASK_SSSE3
, CODE_FOR_ssse3_phsubwv8hi3
, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128
, 0, 0 },
14987 { MASK_SSSE3
, CODE_FOR_ssse3_phsubwv4hi3
, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW
, 0, 0 },
14988 { MASK_SSSE3
, CODE_FOR_ssse3_phsubdv4si3
, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128
, 0, 0 },
14989 { MASK_SSSE3
, CODE_FOR_ssse3_phsubdv2si3
, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD
, 0, 0 },
14990 { MASK_SSSE3
, CODE_FOR_ssse3_phsubswv8hi3
, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128
, 0, 0 },
14991 { MASK_SSSE3
, CODE_FOR_ssse3_phsubswv4hi3
, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW
, 0, 0 },
14992 { MASK_SSSE3
, CODE_FOR_ssse3_pmaddubswv8hi3
, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128
, 0, 0 },
14993 { MASK_SSSE3
, CODE_FOR_ssse3_pmaddubswv4hi3
, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW
, 0, 0 },
14994 { MASK_SSSE3
, CODE_FOR_ssse3_pmulhrswv8hi3
, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128
, 0, 0 },
14995 { MASK_SSSE3
, CODE_FOR_ssse3_pmulhrswv4hi3
, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW
, 0, 0 },
14996 { MASK_SSSE3
, CODE_FOR_ssse3_pshufbv16qi3
, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128
, 0, 0 },
14997 { MASK_SSSE3
, CODE_FOR_ssse3_pshufbv8qi3
, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB
, 0, 0 },
14998 { MASK_SSSE3
, CODE_FOR_ssse3_psignv16qi3
, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128
, 0, 0 },
14999 { MASK_SSSE3
, CODE_FOR_ssse3_psignv8qi3
, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB
, 0, 0 },
15000 { MASK_SSSE3
, CODE_FOR_ssse3_psignv8hi3
, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128
, 0, 0 },
15001 { MASK_SSSE3
, CODE_FOR_ssse3_psignv4hi3
, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW
, 0, 0 },
15002 { MASK_SSSE3
, CODE_FOR_ssse3_psignv4si3
, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128
, 0, 0 },
15003 { MASK_SSSE3
, CODE_FOR_ssse3_psignv2si3
, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND
, 0, 0 }
15006 static const struct builtin_description bdesc_1arg
[] =
15008 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_pmovmskb
, 0, IX86_BUILTIN_PMOVMSKB
, 0, 0 },
15009 { MASK_SSE
, CODE_FOR_sse_movmskps
, 0, IX86_BUILTIN_MOVMSKPS
, 0, 0 },
15011 { MASK_SSE
, CODE_FOR_sqrtv4sf2
, 0, IX86_BUILTIN_SQRTPS
, 0, 0 },
15012 { MASK_SSE
, CODE_FOR_sse_rsqrtv4sf2
, 0, IX86_BUILTIN_RSQRTPS
, 0, 0 },
15013 { MASK_SSE
, CODE_FOR_sse_rcpv4sf2
, 0, IX86_BUILTIN_RCPPS
, 0, 0 },
15015 { MASK_SSE
, CODE_FOR_sse_cvtps2pi
, 0, IX86_BUILTIN_CVTPS2PI
, 0, 0 },
15016 { MASK_SSE
, CODE_FOR_sse_cvtss2si
, 0, IX86_BUILTIN_CVTSS2SI
, 0, 0 },
15017 { MASK_SSE
| MASK_64BIT
, CODE_FOR_sse_cvtss2siq
, 0, IX86_BUILTIN_CVTSS2SI64
, 0, 0 },
15018 { MASK_SSE
, CODE_FOR_sse_cvttps2pi
, 0, IX86_BUILTIN_CVTTPS2PI
, 0, 0 },
15019 { MASK_SSE
, CODE_FOR_sse_cvttss2si
, 0, IX86_BUILTIN_CVTTSS2SI
, 0, 0 },
15020 { MASK_SSE
| MASK_64BIT
, CODE_FOR_sse_cvttss2siq
, 0, IX86_BUILTIN_CVTTSS2SI64
, 0, 0 },
15022 { MASK_SSE2
, CODE_FOR_sse2_pmovmskb
, 0, IX86_BUILTIN_PMOVMSKB128
, 0, 0 },
15023 { MASK_SSE2
, CODE_FOR_sse2_movmskpd
, 0, IX86_BUILTIN_MOVMSKPD
, 0, 0 },
15025 { MASK_SSE2
, CODE_FOR_sqrtv2df2
, 0, IX86_BUILTIN_SQRTPD
, 0, 0 },
15027 { MASK_SSE2
, CODE_FOR_sse2_cvtdq2pd
, 0, IX86_BUILTIN_CVTDQ2PD
, 0, 0 },
15028 { MASK_SSE2
, CODE_FOR_sse2_cvtdq2ps
, 0, IX86_BUILTIN_CVTDQ2PS
, 0, 0 },
15030 { MASK_SSE2
, CODE_FOR_sse2_cvtpd2dq
, 0, IX86_BUILTIN_CVTPD2DQ
, 0, 0 },
15031 { MASK_SSE2
, CODE_FOR_sse2_cvtpd2pi
, 0, IX86_BUILTIN_CVTPD2PI
, 0, 0 },
15032 { MASK_SSE2
, CODE_FOR_sse2_cvtpd2ps
, 0, IX86_BUILTIN_CVTPD2PS
, 0, 0 },
15033 { MASK_SSE2
, CODE_FOR_sse2_cvttpd2dq
, 0, IX86_BUILTIN_CVTTPD2DQ
, 0, 0 },
15034 { MASK_SSE2
, CODE_FOR_sse2_cvttpd2pi
, 0, IX86_BUILTIN_CVTTPD2PI
, 0, 0 },
15036 { MASK_SSE2
, CODE_FOR_sse2_cvtpi2pd
, 0, IX86_BUILTIN_CVTPI2PD
, 0, 0 },
15038 { MASK_SSE2
, CODE_FOR_sse2_cvtsd2si
, 0, IX86_BUILTIN_CVTSD2SI
, 0, 0 },
15039 { MASK_SSE2
, CODE_FOR_sse2_cvttsd2si
, 0, IX86_BUILTIN_CVTTSD2SI
, 0, 0 },
15040 { MASK_SSE2
| MASK_64BIT
, CODE_FOR_sse2_cvtsd2siq
, 0, IX86_BUILTIN_CVTSD2SI64
, 0, 0 },
15041 { MASK_SSE2
| MASK_64BIT
, CODE_FOR_sse2_cvttsd2siq
, 0, IX86_BUILTIN_CVTTSD2SI64
, 0, 0 },
15043 { MASK_SSE2
, CODE_FOR_sse2_cvtps2dq
, 0, IX86_BUILTIN_CVTPS2DQ
, 0, 0 },
15044 { MASK_SSE2
, CODE_FOR_sse2_cvtps2pd
, 0, IX86_BUILTIN_CVTPS2PD
, 0, 0 },
15045 { MASK_SSE2
, CODE_FOR_sse2_cvttps2dq
, 0, IX86_BUILTIN_CVTTPS2DQ
, 0, 0 },
15048 { MASK_SSE3
, CODE_FOR_sse3_movshdup
, 0, IX86_BUILTIN_MOVSHDUP
, 0, 0 },
15049 { MASK_SSE3
, CODE_FOR_sse3_movsldup
, 0, IX86_BUILTIN_MOVSLDUP
, 0, 0 },
15052 { MASK_SSSE3
, CODE_FOR_absv16qi2
, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128
, 0, 0 },
15053 { MASK_SSSE3
, CODE_FOR_absv8qi2
, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB
, 0, 0 },
15054 { MASK_SSSE3
, CODE_FOR_absv8hi2
, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128
, 0, 0 },
15055 { MASK_SSSE3
, CODE_FOR_absv4hi2
, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW
, 0, 0 },
15056 { MASK_SSSE3
, CODE_FOR_absv4si2
, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128
, 0, 0 },
15057 { MASK_SSSE3
, CODE_FOR_absv2si2
, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD
, 0, 0 },
15061 ix86_init_builtins (void)
15064 ix86_init_mmx_sse_builtins ();
15067 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
15068 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
15071 ix86_init_mmx_sse_builtins (void)
15073 const struct builtin_description
* d
;
15076 tree V16QI_type_node
= build_vector_type_for_mode (intQI_type_node
, V16QImode
);
15077 tree V2SI_type_node
= build_vector_type_for_mode (intSI_type_node
, V2SImode
);
15078 tree V2SF_type_node
= build_vector_type_for_mode (float_type_node
, V2SFmode
);
15079 tree V2DI_type_node
15080 = build_vector_type_for_mode (long_long_integer_type_node
, V2DImode
);
15081 tree V2DF_type_node
= build_vector_type_for_mode (double_type_node
, V2DFmode
);
15082 tree V4SF_type_node
= build_vector_type_for_mode (float_type_node
, V4SFmode
);
15083 tree V4SI_type_node
= build_vector_type_for_mode (intSI_type_node
, V4SImode
);
15084 tree V4HI_type_node
= build_vector_type_for_mode (intHI_type_node
, V4HImode
);
15085 tree V8QI_type_node
= build_vector_type_for_mode (intQI_type_node
, V8QImode
);
15086 tree V8HI_type_node
= build_vector_type_for_mode (intHI_type_node
, V8HImode
);
15088 tree pchar_type_node
= build_pointer_type (char_type_node
);
15089 tree pcchar_type_node
= build_pointer_type (
15090 build_type_variant (char_type_node
, 1, 0));
15091 tree pfloat_type_node
= build_pointer_type (float_type_node
);
15092 tree pcfloat_type_node
= build_pointer_type (
15093 build_type_variant (float_type_node
, 1, 0));
15094 tree pv2si_type_node
= build_pointer_type (V2SI_type_node
);
15095 tree pv2di_type_node
= build_pointer_type (V2DI_type_node
);
15096 tree pdi_type_node
= build_pointer_type (long_long_unsigned_type_node
);
15099 tree int_ftype_v4sf_v4sf
15100 = build_function_type_list (integer_type_node
,
15101 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
15102 tree v4si_ftype_v4sf_v4sf
15103 = build_function_type_list (V4SI_type_node
,
15104 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
15105 /* MMX/SSE/integer conversions. */
15106 tree int_ftype_v4sf
15107 = build_function_type_list (integer_type_node
,
15108 V4SF_type_node
, NULL_TREE
);
15109 tree int64_ftype_v4sf
15110 = build_function_type_list (long_long_integer_type_node
,
15111 V4SF_type_node
, NULL_TREE
);
15112 tree int_ftype_v8qi
15113 = build_function_type_list (integer_type_node
, V8QI_type_node
, NULL_TREE
);
15114 tree v4sf_ftype_v4sf_int
15115 = build_function_type_list (V4SF_type_node
,
15116 V4SF_type_node
, integer_type_node
, NULL_TREE
);
15117 tree v4sf_ftype_v4sf_int64
15118 = build_function_type_list (V4SF_type_node
,
15119 V4SF_type_node
, long_long_integer_type_node
,
15121 tree v4sf_ftype_v4sf_v2si
15122 = build_function_type_list (V4SF_type_node
,
15123 V4SF_type_node
, V2SI_type_node
, NULL_TREE
);
15125 /* Miscellaneous. */
15126 tree v8qi_ftype_v4hi_v4hi
15127 = build_function_type_list (V8QI_type_node
,
15128 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
15129 tree v4hi_ftype_v2si_v2si
15130 = build_function_type_list (V4HI_type_node
,
15131 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
15132 tree v4sf_ftype_v4sf_v4sf_int
15133 = build_function_type_list (V4SF_type_node
,
15134 V4SF_type_node
, V4SF_type_node
,
15135 integer_type_node
, NULL_TREE
);
15136 tree v2si_ftype_v4hi_v4hi
15137 = build_function_type_list (V2SI_type_node
,
15138 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
15139 tree v4hi_ftype_v4hi_int
15140 = build_function_type_list (V4HI_type_node
,
15141 V4HI_type_node
, integer_type_node
, NULL_TREE
);
15142 tree v4hi_ftype_v4hi_di
15143 = build_function_type_list (V4HI_type_node
,
15144 V4HI_type_node
, long_long_unsigned_type_node
,
15146 tree v2si_ftype_v2si_di
15147 = build_function_type_list (V2SI_type_node
,
15148 V2SI_type_node
, long_long_unsigned_type_node
,
15150 tree void_ftype_void
15151 = build_function_type (void_type_node
, void_list_node
);
15152 tree void_ftype_unsigned
15153 = build_function_type_list (void_type_node
, unsigned_type_node
, NULL_TREE
);
15154 tree void_ftype_unsigned_unsigned
15155 = build_function_type_list (void_type_node
, unsigned_type_node
,
15156 unsigned_type_node
, NULL_TREE
);
15157 tree void_ftype_pcvoid_unsigned_unsigned
15158 = build_function_type_list (void_type_node
, const_ptr_type_node
,
15159 unsigned_type_node
, unsigned_type_node
,
15161 tree unsigned_ftype_void
15162 = build_function_type (unsigned_type_node
, void_list_node
);
15163 tree v2si_ftype_v4sf
15164 = build_function_type_list (V2SI_type_node
, V4SF_type_node
, NULL_TREE
);
15165 /* Loads/stores. */
15166 tree void_ftype_v8qi_v8qi_pchar
15167 = build_function_type_list (void_type_node
,
15168 V8QI_type_node
, V8QI_type_node
,
15169 pchar_type_node
, NULL_TREE
);
15170 tree v4sf_ftype_pcfloat
15171 = build_function_type_list (V4SF_type_node
, pcfloat_type_node
, NULL_TREE
);
15172 /* @@@ the type is bogus */
15173 tree v4sf_ftype_v4sf_pv2si
15174 = build_function_type_list (V4SF_type_node
,
15175 V4SF_type_node
, pv2si_type_node
, NULL_TREE
);
15176 tree void_ftype_pv2si_v4sf
15177 = build_function_type_list (void_type_node
,
15178 pv2si_type_node
, V4SF_type_node
, NULL_TREE
);
15179 tree void_ftype_pfloat_v4sf
15180 = build_function_type_list (void_type_node
,
15181 pfloat_type_node
, V4SF_type_node
, NULL_TREE
);
15182 tree void_ftype_pdi_di
15183 = build_function_type_list (void_type_node
,
15184 pdi_type_node
, long_long_unsigned_type_node
,
15186 tree void_ftype_pv2di_v2di
15187 = build_function_type_list (void_type_node
,
15188 pv2di_type_node
, V2DI_type_node
, NULL_TREE
);
15189 /* Normal vector unops. */
15190 tree v4sf_ftype_v4sf
15191 = build_function_type_list (V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
15192 tree v16qi_ftype_v16qi
15193 = build_function_type_list (V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
15194 tree v8hi_ftype_v8hi
15195 = build_function_type_list (V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
15196 tree v4si_ftype_v4si
15197 = build_function_type_list (V4SI_type_node
, V4SI_type_node
, NULL_TREE
);
15198 tree v8qi_ftype_v8qi
15199 = build_function_type_list (V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
15200 tree v4hi_ftype_v4hi
15201 = build_function_type_list (V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
15203 /* Normal vector binops. */
15204 tree v4sf_ftype_v4sf_v4sf
15205 = build_function_type_list (V4SF_type_node
,
15206 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
15207 tree v8qi_ftype_v8qi_v8qi
15208 = build_function_type_list (V8QI_type_node
,
15209 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
15210 tree v4hi_ftype_v4hi_v4hi
15211 = build_function_type_list (V4HI_type_node
,
15212 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
15213 tree v2si_ftype_v2si_v2si
15214 = build_function_type_list (V2SI_type_node
,
15215 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
15216 tree di_ftype_di_di
15217 = build_function_type_list (long_long_unsigned_type_node
,
15218 long_long_unsigned_type_node
,
15219 long_long_unsigned_type_node
, NULL_TREE
);
15221 tree di_ftype_di_di_int
15222 = build_function_type_list (long_long_unsigned_type_node
,
15223 long_long_unsigned_type_node
,
15224 long_long_unsigned_type_node
,
15225 integer_type_node
, NULL_TREE
);
15227 tree v2si_ftype_v2sf
15228 = build_function_type_list (V2SI_type_node
, V2SF_type_node
, NULL_TREE
);
15229 tree v2sf_ftype_v2si
15230 = build_function_type_list (V2SF_type_node
, V2SI_type_node
, NULL_TREE
);
15231 tree v2si_ftype_v2si
15232 = build_function_type_list (V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
15233 tree v2sf_ftype_v2sf
15234 = build_function_type_list (V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
15235 tree v2sf_ftype_v2sf_v2sf
15236 = build_function_type_list (V2SF_type_node
,
15237 V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
15238 tree v2si_ftype_v2sf_v2sf
15239 = build_function_type_list (V2SI_type_node
,
15240 V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
15241 tree pint_type_node
= build_pointer_type (integer_type_node
);
15242 tree pdouble_type_node
= build_pointer_type (double_type_node
);
15243 tree pcdouble_type_node
= build_pointer_type (
15244 build_type_variant (double_type_node
, 1, 0));
15245 tree int_ftype_v2df_v2df
15246 = build_function_type_list (integer_type_node
,
15247 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
15249 tree void_ftype_pcvoid
15250 = build_function_type_list (void_type_node
, const_ptr_type_node
, NULL_TREE
);
15251 tree v4sf_ftype_v4si
15252 = build_function_type_list (V4SF_type_node
, V4SI_type_node
, NULL_TREE
);
15253 tree v4si_ftype_v4sf
15254 = build_function_type_list (V4SI_type_node
, V4SF_type_node
, NULL_TREE
);
15255 tree v2df_ftype_v4si
15256 = build_function_type_list (V2DF_type_node
, V4SI_type_node
, NULL_TREE
);
15257 tree v4si_ftype_v2df
15258 = build_function_type_list (V4SI_type_node
, V2DF_type_node
, NULL_TREE
);
15259 tree v2si_ftype_v2df
15260 = build_function_type_list (V2SI_type_node
, V2DF_type_node
, NULL_TREE
);
15261 tree v4sf_ftype_v2df
15262 = build_function_type_list (V4SF_type_node
, V2DF_type_node
, NULL_TREE
);
15263 tree v2df_ftype_v2si
15264 = build_function_type_list (V2DF_type_node
, V2SI_type_node
, NULL_TREE
);
15265 tree v2df_ftype_v4sf
15266 = build_function_type_list (V2DF_type_node
, V4SF_type_node
, NULL_TREE
);
15267 tree int_ftype_v2df
15268 = build_function_type_list (integer_type_node
, V2DF_type_node
, NULL_TREE
);
15269 tree int64_ftype_v2df
15270 = build_function_type_list (long_long_integer_type_node
,
15271 V2DF_type_node
, NULL_TREE
);
15272 tree v2df_ftype_v2df_int
15273 = build_function_type_list (V2DF_type_node
,
15274 V2DF_type_node
, integer_type_node
, NULL_TREE
);
15275 tree v2df_ftype_v2df_int64
15276 = build_function_type_list (V2DF_type_node
,
15277 V2DF_type_node
, long_long_integer_type_node
,
15279 tree v4sf_ftype_v4sf_v2df
15280 = build_function_type_list (V4SF_type_node
,
15281 V4SF_type_node
, V2DF_type_node
, NULL_TREE
);
15282 tree v2df_ftype_v2df_v4sf
15283 = build_function_type_list (V2DF_type_node
,
15284 V2DF_type_node
, V4SF_type_node
, NULL_TREE
);
15285 tree v2df_ftype_v2df_v2df_int
15286 = build_function_type_list (V2DF_type_node
,
15287 V2DF_type_node
, V2DF_type_node
,
15290 tree v2df_ftype_v2df_pcdouble
15291 = build_function_type_list (V2DF_type_node
,
15292 V2DF_type_node
, pcdouble_type_node
, NULL_TREE
);
15293 tree void_ftype_pdouble_v2df
15294 = build_function_type_list (void_type_node
,
15295 pdouble_type_node
, V2DF_type_node
, NULL_TREE
);
15296 tree void_ftype_pint_int
15297 = build_function_type_list (void_type_node
,
15298 pint_type_node
, integer_type_node
, NULL_TREE
);
15299 tree void_ftype_v16qi_v16qi_pchar
15300 = build_function_type_list (void_type_node
,
15301 V16QI_type_node
, V16QI_type_node
,
15302 pchar_type_node
, NULL_TREE
);
15303 tree v2df_ftype_pcdouble
15304 = build_function_type_list (V2DF_type_node
, pcdouble_type_node
, NULL_TREE
);
15305 tree v2df_ftype_v2df_v2df
15306 = build_function_type_list (V2DF_type_node
,
15307 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
15308 tree v16qi_ftype_v16qi_v16qi
15309 = build_function_type_list (V16QI_type_node
,
15310 V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
15311 tree v8hi_ftype_v8hi_v8hi
15312 = build_function_type_list (V8HI_type_node
,
15313 V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
15314 tree v4si_ftype_v4si_v4si
15315 = build_function_type_list (V4SI_type_node
,
15316 V4SI_type_node
, V4SI_type_node
, NULL_TREE
);
15317 tree v2di_ftype_v2di_v2di
15318 = build_function_type_list (V2DI_type_node
,
15319 V2DI_type_node
, V2DI_type_node
, NULL_TREE
);
15320 tree v2di_ftype_v2df_v2df
15321 = build_function_type_list (V2DI_type_node
,
15322 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
15323 tree v2df_ftype_v2df
15324 = build_function_type_list (V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
15325 tree v2di_ftype_v2di_int
15326 = build_function_type_list (V2DI_type_node
,
15327 V2DI_type_node
, integer_type_node
, NULL_TREE
);
15328 tree v2di_ftype_v2di_v2di_int
15329 = build_function_type_list (V2DI_type_node
, V2DI_type_node
,
15330 V2DI_type_node
, integer_type_node
, NULL_TREE
);
15331 tree v4si_ftype_v4si_int
15332 = build_function_type_list (V4SI_type_node
,
15333 V4SI_type_node
, integer_type_node
, NULL_TREE
);
15334 tree v8hi_ftype_v8hi_int
15335 = build_function_type_list (V8HI_type_node
,
15336 V8HI_type_node
, integer_type_node
, NULL_TREE
);
15337 tree v8hi_ftype_v8hi_v2di
15338 = build_function_type_list (V8HI_type_node
,
15339 V8HI_type_node
, V2DI_type_node
, NULL_TREE
);
15340 tree v4si_ftype_v4si_v2di
15341 = build_function_type_list (V4SI_type_node
,
15342 V4SI_type_node
, V2DI_type_node
, NULL_TREE
);
15343 tree v4si_ftype_v8hi_v8hi
15344 = build_function_type_list (V4SI_type_node
,
15345 V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
15346 tree di_ftype_v8qi_v8qi
15347 = build_function_type_list (long_long_unsigned_type_node
,
15348 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
15349 tree di_ftype_v2si_v2si
15350 = build_function_type_list (long_long_unsigned_type_node
,
15351 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
15352 tree v2di_ftype_v16qi_v16qi
15353 = build_function_type_list (V2DI_type_node
,
15354 V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
15355 tree v2di_ftype_v4si_v4si
15356 = build_function_type_list (V2DI_type_node
,
15357 V4SI_type_node
, V4SI_type_node
, NULL_TREE
);
15358 tree int_ftype_v16qi
15359 = build_function_type_list (integer_type_node
, V16QI_type_node
, NULL_TREE
);
15360 tree v16qi_ftype_pcchar
15361 = build_function_type_list (V16QI_type_node
, pcchar_type_node
, NULL_TREE
);
15362 tree void_ftype_pchar_v16qi
15363 = build_function_type_list (void_type_node
,
15364 pchar_type_node
, V16QI_type_node
, NULL_TREE
);
15367 tree float128_type
;
15370 /* The __float80 type. */
15371 if (TYPE_MODE (long_double_type_node
) == XFmode
)
15372 (*lang_hooks
.types
.register_builtin_type
) (long_double_type_node
,
15376 /* The __float80 type. */
15377 float80_type
= make_node (REAL_TYPE
);
15378 TYPE_PRECISION (float80_type
) = 80;
15379 layout_type (float80_type
);
15380 (*lang_hooks
.types
.register_builtin_type
) (float80_type
, "__float80");
15385 float128_type
= make_node (REAL_TYPE
);
15386 TYPE_PRECISION (float128_type
) = 128;
15387 layout_type (float128_type
);
15388 (*lang_hooks
.types
.register_builtin_type
) (float128_type
, "__float128");
15391 /* Add all builtins that are more or less simple operations on two
15393 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
15395 /* Use one of the operands; the target can have a different mode for
15396 mask-generating compares. */
15397 enum machine_mode mode
;
15402 mode
= insn_data
[d
->icode
].operand
[1].mode
;
15407 type
= v16qi_ftype_v16qi_v16qi
;
15410 type
= v8hi_ftype_v8hi_v8hi
;
15413 type
= v4si_ftype_v4si_v4si
;
15416 type
= v2di_ftype_v2di_v2di
;
15419 type
= v2df_ftype_v2df_v2df
;
15422 type
= v4sf_ftype_v4sf_v4sf
;
15425 type
= v8qi_ftype_v8qi_v8qi
;
15428 type
= v4hi_ftype_v4hi_v4hi
;
15431 type
= v2si_ftype_v2si_v2si
;
15434 type
= di_ftype_di_di
;
15438 gcc_unreachable ();
15441 /* Override for comparisons. */
15442 if (d
->icode
== CODE_FOR_sse_maskcmpv4sf3
15443 || d
->icode
== CODE_FOR_sse_vmmaskcmpv4sf3
)
15444 type
= v4si_ftype_v4sf_v4sf
;
15446 if (d
->icode
== CODE_FOR_sse2_maskcmpv2df3
15447 || d
->icode
== CODE_FOR_sse2_vmmaskcmpv2df3
)
15448 type
= v2di_ftype_v2df_v2df
;
15450 def_builtin (d
->mask
, d
->name
, type
, d
->code
);
15453 /* Add all builtins that are more or less simple operations on 1 operand. */
15454 for (i
= 0, d
= bdesc_1arg
; i
< ARRAY_SIZE (bdesc_1arg
); i
++, d
++)
15456 enum machine_mode mode
;
15461 mode
= insn_data
[d
->icode
].operand
[1].mode
;
15466 type
= v16qi_ftype_v16qi
;
15469 type
= v8hi_ftype_v8hi
;
15472 type
= v4si_ftype_v4si
;
15475 type
= v2df_ftype_v2df
;
15478 type
= v4sf_ftype_v4sf
;
15481 type
= v8qi_ftype_v8qi
;
15484 type
= v4hi_ftype_v4hi
;
15487 type
= v2si_ftype_v2si
;
15494 def_builtin (d
->mask
, d
->name
, type
, d
->code
);
15497 /* Add the remaining MMX insns with somewhat more complicated types. */
15498 def_builtin (MASK_MMX
, "__builtin_ia32_emms", void_ftype_void
, IX86_BUILTIN_EMMS
);
15499 def_builtin (MASK_MMX
, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSLLW
);
15500 def_builtin (MASK_MMX
, "__builtin_ia32_pslld", v2si_ftype_v2si_di
, IX86_BUILTIN_PSLLD
);
15501 def_builtin (MASK_MMX
, "__builtin_ia32_psllq", di_ftype_di_di
, IX86_BUILTIN_PSLLQ
);
15503 def_builtin (MASK_MMX
, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSRLW
);
15504 def_builtin (MASK_MMX
, "__builtin_ia32_psrld", v2si_ftype_v2si_di
, IX86_BUILTIN_PSRLD
);
15505 def_builtin (MASK_MMX
, "__builtin_ia32_psrlq", di_ftype_di_di
, IX86_BUILTIN_PSRLQ
);
15507 def_builtin (MASK_MMX
, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSRAW
);
15508 def_builtin (MASK_MMX
, "__builtin_ia32_psrad", v2si_ftype_v2si_di
, IX86_BUILTIN_PSRAD
);
15510 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int
, IX86_BUILTIN_PSHUFW
);
15511 def_builtin (MASK_MMX
, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi
, IX86_BUILTIN_PMADDWD
);
15513 /* comi/ucomi insns. */
15514 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
15515 if (d
->mask
== MASK_SSE2
)
15516 def_builtin (d
->mask
, d
->name
, int_ftype_v2df_v2df
, d
->code
);
15518 def_builtin (d
->mask
, d
->name
, int_ftype_v4sf_v4sf
, d
->code
);
15520 def_builtin (MASK_MMX
, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi
, IX86_BUILTIN_PACKSSWB
);
15521 def_builtin (MASK_MMX
, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si
, IX86_BUILTIN_PACKSSDW
);
15522 def_builtin (MASK_MMX
, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi
, IX86_BUILTIN_PACKUSWB
);
15524 def_builtin (MASK_SSE
, "__builtin_ia32_ldmxcsr", void_ftype_unsigned
, IX86_BUILTIN_LDMXCSR
);
15525 def_builtin (MASK_SSE
, "__builtin_ia32_stmxcsr", unsigned_ftype_void
, IX86_BUILTIN_STMXCSR
);
15526 def_builtin (MASK_SSE
, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si
, IX86_BUILTIN_CVTPI2PS
);
15527 def_builtin (MASK_SSE
, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf
, IX86_BUILTIN_CVTPS2PI
);
15528 def_builtin (MASK_SSE
, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int
, IX86_BUILTIN_CVTSI2SS
);
15529 def_builtin (MASK_SSE
| MASK_64BIT
, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64
, IX86_BUILTIN_CVTSI642SS
);
15530 def_builtin (MASK_SSE
, "__builtin_ia32_cvtss2si", int_ftype_v4sf
, IX86_BUILTIN_CVTSS2SI
);
15531 def_builtin (MASK_SSE
| MASK_64BIT
, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf
, IX86_BUILTIN_CVTSS2SI64
);
15532 def_builtin (MASK_SSE
, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf
, IX86_BUILTIN_CVTTPS2PI
);
15533 def_builtin (MASK_SSE
, "__builtin_ia32_cvttss2si", int_ftype_v4sf
, IX86_BUILTIN_CVTTSS2SI
);
15534 def_builtin (MASK_SSE
| MASK_64BIT
, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf
, IX86_BUILTIN_CVTTSS2SI64
);
15536 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar
, IX86_BUILTIN_MASKMOVQ
);
15538 def_builtin (MASK_SSE
, "__builtin_ia32_loadups", v4sf_ftype_pcfloat
, IX86_BUILTIN_LOADUPS
);
15539 def_builtin (MASK_SSE
, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf
, IX86_BUILTIN_STOREUPS
);
15541 def_builtin (MASK_SSE
, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si
, IX86_BUILTIN_LOADHPS
);
15542 def_builtin (MASK_SSE
, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si
, IX86_BUILTIN_LOADLPS
);
15543 def_builtin (MASK_SSE
, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf
, IX86_BUILTIN_STOREHPS
);
15544 def_builtin (MASK_SSE
, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf
, IX86_BUILTIN_STORELPS
);
15546 def_builtin (MASK_SSE
, "__builtin_ia32_movmskps", int_ftype_v4sf
, IX86_BUILTIN_MOVMSKPS
);
15547 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_pmovmskb", int_ftype_v8qi
, IX86_BUILTIN_PMOVMSKB
);
15548 def_builtin (MASK_SSE
, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf
, IX86_BUILTIN_MOVNTPS
);
15549 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_movntq", void_ftype_pdi_di
, IX86_BUILTIN_MOVNTQ
);
15551 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_sfence", void_ftype_void
, IX86_BUILTIN_SFENCE
);
15553 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi
, IX86_BUILTIN_PSADBW
);
15555 def_builtin (MASK_SSE
, "__builtin_ia32_rcpps", v4sf_ftype_v4sf
, IX86_BUILTIN_RCPPS
);
15556 def_builtin (MASK_SSE
, "__builtin_ia32_rcpss", v4sf_ftype_v4sf
, IX86_BUILTIN_RCPSS
);
15557 def_builtin (MASK_SSE
, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf
, IX86_BUILTIN_RSQRTPS
);
15558 def_builtin (MASK_SSE
, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf
, IX86_BUILTIN_RSQRTSS
);
15559 def_builtin (MASK_SSE
, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf
, IX86_BUILTIN_SQRTPS
);
15560 def_builtin (MASK_SSE
, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf
, IX86_BUILTIN_SQRTSS
);
15562 def_builtin (MASK_SSE
, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int
, IX86_BUILTIN_SHUFPS
);
15564 /* Original 3DNow! */
15565 def_builtin (MASK_3DNOW
, "__builtin_ia32_femms", void_ftype_void
, IX86_BUILTIN_FEMMS
);
15566 def_builtin (MASK_3DNOW
, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi
, IX86_BUILTIN_PAVGUSB
);
15567 def_builtin (MASK_3DNOW
, "__builtin_ia32_pf2id", v2si_ftype_v2sf
, IX86_BUILTIN_PF2ID
);
15568 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFACC
);
15569 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFADD
);
15570 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPEQ
);
15571 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPGE
);
15572 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPGT
);
15573 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMAX
);
15574 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMIN
);
15575 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMUL
);
15576 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf
, IX86_BUILTIN_PFRCP
);
15577 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRCPIT1
);
15578 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRCPIT2
);
15579 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf
, IX86_BUILTIN_PFRSQRT
);
15580 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRSQIT1
);
15581 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFSUB
);
15582 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFSUBR
);
15583 def_builtin (MASK_3DNOW
, "__builtin_ia32_pi2fd", v2sf_ftype_v2si
, IX86_BUILTIN_PI2FD
);
15584 def_builtin (MASK_3DNOW
, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi
, IX86_BUILTIN_PMULHRW
);
15586 /* 3DNow! extension as used in the Athlon CPU. */
15587 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pf2iw", v2si_ftype_v2sf
, IX86_BUILTIN_PF2IW
);
15588 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFNACC
);
15589 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFPNACC
);
15590 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pi2fw", v2sf_ftype_v2si
, IX86_BUILTIN_PI2FW
);
15591 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf
, IX86_BUILTIN_PSWAPDSF
);
15592 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pswapdsi", v2si_ftype_v2si
, IX86_BUILTIN_PSWAPDSI
);
15595 def_builtin (MASK_SSE2
, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar
, IX86_BUILTIN_MASKMOVDQU
);
15597 def_builtin (MASK_SSE2
, "__builtin_ia32_loadupd", v2df_ftype_pcdouble
, IX86_BUILTIN_LOADUPD
);
15598 def_builtin (MASK_SSE2
, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df
, IX86_BUILTIN_STOREUPD
);
15600 def_builtin (MASK_SSE2
, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble
, IX86_BUILTIN_LOADHPD
);
15601 def_builtin (MASK_SSE2
, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble
, IX86_BUILTIN_LOADLPD
);
15603 def_builtin (MASK_SSE2
, "__builtin_ia32_movmskpd", int_ftype_v2df
, IX86_BUILTIN_MOVMSKPD
);
15604 def_builtin (MASK_SSE2
, "__builtin_ia32_pmovmskb128", int_ftype_v16qi
, IX86_BUILTIN_PMOVMSKB128
);
15605 def_builtin (MASK_SSE2
, "__builtin_ia32_movnti", void_ftype_pint_int
, IX86_BUILTIN_MOVNTI
);
15606 def_builtin (MASK_SSE2
, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df
, IX86_BUILTIN_MOVNTPD
);
15607 def_builtin (MASK_SSE2
, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di
, IX86_BUILTIN_MOVNTDQ
);
15609 def_builtin (MASK_SSE2
, "__builtin_ia32_pshufd", v4si_ftype_v4si_int
, IX86_BUILTIN_PSHUFD
);
15610 def_builtin (MASK_SSE2
, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSHUFLW
);
15611 def_builtin (MASK_SSE2
, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSHUFHW
);
15612 def_builtin (MASK_SSE2
, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi
, IX86_BUILTIN_PSADBW128
);
15614 def_builtin (MASK_SSE2
, "__builtin_ia32_sqrtpd", v2df_ftype_v2df
, IX86_BUILTIN_SQRTPD
);
15615 def_builtin (MASK_SSE2
, "__builtin_ia32_sqrtsd", v2df_ftype_v2df
, IX86_BUILTIN_SQRTSD
);
15617 def_builtin (MASK_SSE2
, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int
, IX86_BUILTIN_SHUFPD
);
15619 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si
, IX86_BUILTIN_CVTDQ2PD
);
15620 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si
, IX86_BUILTIN_CVTDQ2PS
);
15622 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df
, IX86_BUILTIN_CVTPD2DQ
);
15623 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df
, IX86_BUILTIN_CVTPD2PI
);
15624 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df
, IX86_BUILTIN_CVTPD2PS
);
15625 def_builtin (MASK_SSE2
, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df
, IX86_BUILTIN_CVTTPD2DQ
);
15626 def_builtin (MASK_SSE2
, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df
, IX86_BUILTIN_CVTTPD2PI
);
15628 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si
, IX86_BUILTIN_CVTPI2PD
);
15630 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtsd2si", int_ftype_v2df
, IX86_BUILTIN_CVTSD2SI
);
15631 def_builtin (MASK_SSE2
, "__builtin_ia32_cvttsd2si", int_ftype_v2df
, IX86_BUILTIN_CVTTSD2SI
);
15632 def_builtin (MASK_SSE2
| MASK_64BIT
, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df
, IX86_BUILTIN_CVTSD2SI64
);
15633 def_builtin (MASK_SSE2
| MASK_64BIT
, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df
, IX86_BUILTIN_CVTTSD2SI64
);
15635 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf
, IX86_BUILTIN_CVTPS2DQ
);
15636 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf
, IX86_BUILTIN_CVTPS2PD
);
15637 def_builtin (MASK_SSE2
, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf
, IX86_BUILTIN_CVTTPS2DQ
);
15639 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int
, IX86_BUILTIN_CVTSI2SD
);
15640 def_builtin (MASK_SSE2
| MASK_64BIT
, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64
, IX86_BUILTIN_CVTSI642SD
);
15641 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df
, IX86_BUILTIN_CVTSD2SS
);
15642 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf
, IX86_BUILTIN_CVTSS2SD
);
15644 def_builtin (MASK_SSE2
, "__builtin_ia32_clflush", void_ftype_pcvoid
, IX86_BUILTIN_CLFLUSH
);
15645 def_builtin (MASK_SSE2
, "__builtin_ia32_lfence", void_ftype_void
, IX86_BUILTIN_LFENCE
);
15646 def_builtin (MASK_SSE2
, "__builtin_ia32_mfence", void_ftype_void
, IX86_BUILTIN_MFENCE
);
15648 def_builtin (MASK_SSE2
, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar
, IX86_BUILTIN_LOADDQU
);
15649 def_builtin (MASK_SSE2
, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi
, IX86_BUILTIN_STOREDQU
);
15651 def_builtin (MASK_SSE2
, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si
, IX86_BUILTIN_PMULUDQ
);
15652 def_builtin (MASK_SSE2
, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si
, IX86_BUILTIN_PMULUDQ128
);
15654 def_builtin (MASK_SSE2
, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSLLW128
);
15655 def_builtin (MASK_SSE2
, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSLLD128
);
15656 def_builtin (MASK_SSE2
, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di
, IX86_BUILTIN_PSLLQ128
);
15658 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSRLW128
);
15659 def_builtin (MASK_SSE2
, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSRLD128
);
15660 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di
, IX86_BUILTIN_PSRLQ128
);
15662 def_builtin (MASK_SSE2
, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSRAW128
);
15663 def_builtin (MASK_SSE2
, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSRAD128
);
15665 def_builtin (MASK_SSE2
, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSLLDQI128
);
15666 def_builtin (MASK_SSE2
, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSLLWI128
);
15667 def_builtin (MASK_SSE2
, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSLLDI128
);
15668 def_builtin (MASK_SSE2
, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSLLQI128
);
15670 def_builtin (MASK_SSE2
, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSRLDQI128
);
15671 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSRLWI128
);
15672 def_builtin (MASK_SSE2
, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSRLDI128
);
15673 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSRLQI128
);
15675 def_builtin (MASK_SSE2
, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSRAWI128
);
15676 def_builtin (MASK_SSE2
, "__builtin_ia32_psradi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSRADI128
);
15678 def_builtin (MASK_SSE2
, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi
, IX86_BUILTIN_PMADDWD128
);
15680 /* Prescott New Instructions. */
15681 def_builtin (MASK_SSE3
, "__builtin_ia32_monitor",
15682 void_ftype_pcvoid_unsigned_unsigned
,
15683 IX86_BUILTIN_MONITOR
);
15684 def_builtin (MASK_SSE3
, "__builtin_ia32_mwait",
15685 void_ftype_unsigned_unsigned
,
15686 IX86_BUILTIN_MWAIT
);
15687 def_builtin (MASK_SSE3
, "__builtin_ia32_movshdup",
15689 IX86_BUILTIN_MOVSHDUP
);
15690 def_builtin (MASK_SSE3
, "__builtin_ia32_movsldup",
15692 IX86_BUILTIN_MOVSLDUP
);
15693 def_builtin (MASK_SSE3
, "__builtin_ia32_lddqu",
15694 v16qi_ftype_pcchar
, IX86_BUILTIN_LDDQU
);
15697 def_builtin (MASK_SSSE3
, "__builtin_ia32_palignr128",
15698 v2di_ftype_v2di_v2di_int
, IX86_BUILTIN_PALIGNR128
);
15699 def_builtin (MASK_SSSE3
, "__builtin_ia32_palignr", di_ftype_di_di_int
,
15700 IX86_BUILTIN_PALIGNR
);
15702 /* Access to the vec_init patterns. */
15703 ftype
= build_function_type_list (V2SI_type_node
, integer_type_node
,
15704 integer_type_node
, NULL_TREE
);
15705 def_builtin (MASK_MMX
, "__builtin_ia32_vec_init_v2si",
15706 ftype
, IX86_BUILTIN_VEC_INIT_V2SI
);
15708 ftype
= build_function_type_list (V4HI_type_node
, short_integer_type_node
,
15709 short_integer_type_node
,
15710 short_integer_type_node
,
15711 short_integer_type_node
, NULL_TREE
);
15712 def_builtin (MASK_MMX
, "__builtin_ia32_vec_init_v4hi",
15713 ftype
, IX86_BUILTIN_VEC_INIT_V4HI
);
15715 ftype
= build_function_type_list (V8QI_type_node
, char_type_node
,
15716 char_type_node
, char_type_node
,
15717 char_type_node
, char_type_node
,
15718 char_type_node
, char_type_node
,
15719 char_type_node
, NULL_TREE
);
15720 def_builtin (MASK_MMX
, "__builtin_ia32_vec_init_v8qi",
15721 ftype
, IX86_BUILTIN_VEC_INIT_V8QI
);
15723 /* Access to the vec_extract patterns. */
15724 ftype
= build_function_type_list (double_type_node
, V2DF_type_node
,
15725 integer_type_node
, NULL_TREE
);
15726 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v2df",
15727 ftype
, IX86_BUILTIN_VEC_EXT_V2DF
);
15729 ftype
= build_function_type_list (long_long_integer_type_node
,
15730 V2DI_type_node
, integer_type_node
,
15732 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v2di",
15733 ftype
, IX86_BUILTIN_VEC_EXT_V2DI
);
15735 ftype
= build_function_type_list (float_type_node
, V4SF_type_node
,
15736 integer_type_node
, NULL_TREE
);
15737 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v4sf",
15738 ftype
, IX86_BUILTIN_VEC_EXT_V4SF
);
15740 ftype
= build_function_type_list (intSI_type_node
, V4SI_type_node
,
15741 integer_type_node
, NULL_TREE
);
15742 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v4si",
15743 ftype
, IX86_BUILTIN_VEC_EXT_V4SI
);
15745 ftype
= build_function_type_list (intHI_type_node
, V8HI_type_node
,
15746 integer_type_node
, NULL_TREE
);
15747 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v8hi",
15748 ftype
, IX86_BUILTIN_VEC_EXT_V8HI
);
15750 ftype
= build_function_type_list (intHI_type_node
, V4HI_type_node
,
15751 integer_type_node
, NULL_TREE
);
15752 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_vec_ext_v4hi",
15753 ftype
, IX86_BUILTIN_VEC_EXT_V4HI
);
15755 ftype
= build_function_type_list (intSI_type_node
, V2SI_type_node
,
15756 integer_type_node
, NULL_TREE
);
15757 def_builtin (MASK_MMX
, "__builtin_ia32_vec_ext_v2si",
15758 ftype
, IX86_BUILTIN_VEC_EXT_V2SI
);
15760 /* Access to the vec_set patterns. */
15761 ftype
= build_function_type_list (V8HI_type_node
, V8HI_type_node
,
15763 integer_type_node
, NULL_TREE
);
15764 def_builtin (MASK_SSE
, "__builtin_ia32_vec_set_v8hi",
15765 ftype
, IX86_BUILTIN_VEC_SET_V8HI
);
15767 ftype
= build_function_type_list (V4HI_type_node
, V4HI_type_node
,
15769 integer_type_node
, NULL_TREE
);
15770 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_vec_set_v4hi",
15771 ftype
, IX86_BUILTIN_VEC_SET_V4HI
);
15774 /* Errors in the source file can cause expand_expr to return const0_rtx
15775 where we expect a vector. To avoid crashing, use one of the vector
15776 clear instructions. */
15778 safe_vector_operand (rtx x
, enum machine_mode mode
)
15780 if (x
== const0_rtx
)
15781 x
= CONST0_RTX (mode
);
15785 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
15788 ix86_expand_binop_builtin (enum insn_code icode
, tree arglist
, rtx target
)
15791 tree arg0
= TREE_VALUE (arglist
);
15792 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
15793 rtx op0
= expand_normal (arg0
);
15794 rtx op1
= expand_normal (arg1
);
15795 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
15796 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
15797 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
15799 if (VECTOR_MODE_P (mode0
))
15800 op0
= safe_vector_operand (op0
, mode0
);
15801 if (VECTOR_MODE_P (mode1
))
15802 op1
= safe_vector_operand (op1
, mode1
);
15804 if (optimize
|| !target
15805 || GET_MODE (target
) != tmode
15806 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
15807 target
= gen_reg_rtx (tmode
);
15809 if (GET_MODE (op1
) == SImode
&& mode1
== TImode
)
15811 rtx x
= gen_reg_rtx (V4SImode
);
15812 emit_insn (gen_sse2_loadd (x
, op1
));
15813 op1
= gen_lowpart (TImode
, x
);
15816 /* The insn must want input operands in the same modes as the
15818 gcc_assert ((GET_MODE (op0
) == mode0
|| GET_MODE (op0
) == VOIDmode
)
15819 && (GET_MODE (op1
) == mode1
|| GET_MODE (op1
) == VOIDmode
));
15821 if (!(*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
15822 op0
= copy_to_mode_reg (mode0
, op0
);
15823 if (!(*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
15824 op1
= copy_to_mode_reg (mode1
, op1
);
15826 /* ??? Using ix86_fixup_binary_operands is problematic when
15827 we've got mismatched modes. Fake it. */
15833 if (tmode
== mode0
&& tmode
== mode1
)
15835 target
= ix86_fixup_binary_operands (UNKNOWN
, tmode
, xops
);
15839 else if (optimize
|| !ix86_binary_operator_ok (UNKNOWN
, tmode
, xops
))
15841 op0
= force_reg (mode0
, op0
);
15842 op1
= force_reg (mode1
, op1
);
15843 target
= gen_reg_rtx (tmode
);
15846 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
15853 /* Subroutine of ix86_expand_builtin to take care of stores. */
15856 ix86_expand_store_builtin (enum insn_code icode
, tree arglist
)
15859 tree arg0
= TREE_VALUE (arglist
);
15860 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
15861 rtx op0
= expand_normal (arg0
);
15862 rtx op1
= expand_normal (arg1
);
15863 enum machine_mode mode0
= insn_data
[icode
].operand
[0].mode
;
15864 enum machine_mode mode1
= insn_data
[icode
].operand
[1].mode
;
15866 if (VECTOR_MODE_P (mode1
))
15867 op1
= safe_vector_operand (op1
, mode1
);
15869 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
15870 op1
= copy_to_mode_reg (mode1
, op1
);
15872 pat
= GEN_FCN (icode
) (op0
, op1
);
15878 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
15881 ix86_expand_unop_builtin (enum insn_code icode
, tree arglist
,
15882 rtx target
, int do_load
)
15885 tree arg0
= TREE_VALUE (arglist
);
15886 rtx op0
= expand_normal (arg0
);
15887 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
15888 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
15890 if (optimize
|| !target
15891 || GET_MODE (target
) != tmode
15892 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
15893 target
= gen_reg_rtx (tmode
);
15895 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
15898 if (VECTOR_MODE_P (mode0
))
15899 op0
= safe_vector_operand (op0
, mode0
);
15901 if ((optimize
&& !register_operand (op0
, mode0
))
15902 || ! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
15903 op0
= copy_to_mode_reg (mode0
, op0
);
15906 pat
= GEN_FCN (icode
) (target
, op0
);
15913 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
15914 sqrtss, rsqrtss, rcpss. */
15917 ix86_expand_unop1_builtin (enum insn_code icode
, tree arglist
, rtx target
)
15920 tree arg0
= TREE_VALUE (arglist
);
15921 rtx op1
, op0
= expand_normal (arg0
);
15922 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
15923 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
15925 if (optimize
|| !target
15926 || GET_MODE (target
) != tmode
15927 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
15928 target
= gen_reg_rtx (tmode
);
15930 if (VECTOR_MODE_P (mode0
))
15931 op0
= safe_vector_operand (op0
, mode0
);
15933 if ((optimize
&& !register_operand (op0
, mode0
))
15934 || ! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
15935 op0
= copy_to_mode_reg (mode0
, op0
);
15938 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode0
))
15939 op1
= copy_to_mode_reg (mode0
, op1
);
15941 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
15948 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
15951 ix86_expand_sse_compare (const struct builtin_description
*d
, tree arglist
,
15955 tree arg0
= TREE_VALUE (arglist
);
15956 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
15957 rtx op0
= expand_normal (arg0
);
15958 rtx op1
= expand_normal (arg1
);
15960 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
15961 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
15962 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
15963 enum rtx_code comparison
= d
->comparison
;
15965 if (VECTOR_MODE_P (mode0
))
15966 op0
= safe_vector_operand (op0
, mode0
);
15967 if (VECTOR_MODE_P (mode1
))
15968 op1
= safe_vector_operand (op1
, mode1
);
15970 /* Swap operands if we have a comparison that isn't available in
15972 if (d
->flag
& BUILTIN_DESC_SWAP_OPERANDS
)
15974 rtx tmp
= gen_reg_rtx (mode1
);
15975 emit_move_insn (tmp
, op1
);
15980 if (optimize
|| !target
15981 || GET_MODE (target
) != tmode
15982 || ! (*insn_data
[d
->icode
].operand
[0].predicate
) (target
, tmode
))
15983 target
= gen_reg_rtx (tmode
);
15985 if ((optimize
&& !register_operand (op0
, mode0
))
15986 || ! (*insn_data
[d
->icode
].operand
[1].predicate
) (op0
, mode0
))
15987 op0
= copy_to_mode_reg (mode0
, op0
);
15988 if ((optimize
&& !register_operand (op1
, mode1
))
15989 || ! (*insn_data
[d
->icode
].operand
[2].predicate
) (op1
, mode1
))
15990 op1
= copy_to_mode_reg (mode1
, op1
);
15992 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
15993 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
16000 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
16003 ix86_expand_sse_comi (const struct builtin_description
*d
, tree arglist
,
16007 tree arg0
= TREE_VALUE (arglist
);
16008 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
16009 rtx op0
= expand_normal (arg0
);
16010 rtx op1
= expand_normal (arg1
);
16012 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
16013 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
16014 enum rtx_code comparison
= d
->comparison
;
16016 if (VECTOR_MODE_P (mode0
))
16017 op0
= safe_vector_operand (op0
, mode0
);
16018 if (VECTOR_MODE_P (mode1
))
16019 op1
= safe_vector_operand (op1
, mode1
);
16021 /* Swap operands if we have a comparison that isn't available in
16023 if (d
->flag
& BUILTIN_DESC_SWAP_OPERANDS
)
16030 target
= gen_reg_rtx (SImode
);
16031 emit_move_insn (target
, const0_rtx
);
16032 target
= gen_rtx_SUBREG (QImode
, target
, 0);
16034 if ((optimize
&& !register_operand (op0
, mode0
))
16035 || !(*insn_data
[d
->icode
].operand
[0].predicate
) (op0
, mode0
))
16036 op0
= copy_to_mode_reg (mode0
, op0
);
16037 if ((optimize
&& !register_operand (op1
, mode1
))
16038 || !(*insn_data
[d
->icode
].operand
[1].predicate
) (op1
, mode1
))
16039 op1
= copy_to_mode_reg (mode1
, op1
);
16041 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
16042 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
16046 emit_insn (gen_rtx_SET (VOIDmode
,
16047 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
16048 gen_rtx_fmt_ee (comparison
, QImode
,
16052 return SUBREG_REG (target
);
16055 /* Return the integer constant in ARG. Constrain it to be in the range
16056 of the subparts of VEC_TYPE; issue an error if not. */
16059 get_element_number (tree vec_type
, tree arg
)
16061 unsigned HOST_WIDE_INT elt
, max
= TYPE_VECTOR_SUBPARTS (vec_type
) - 1;
16063 if (!host_integerp (arg
, 1)
16064 || (elt
= tree_low_cst (arg
, 1), elt
> max
))
16066 error ("selector must be an integer constant in the range 0..%wi", max
);
16073 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
16074 ix86_expand_vector_init. We DO have language-level syntax for this, in
16075 the form of (type){ init-list }. Except that since we can't place emms
16076 instructions from inside the compiler, we can't allow the use of MMX
16077 registers unless the user explicitly asks for it. So we do *not* define
16078 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
16079 we have builtins invoked by mmintrin.h that gives us license to emit
16080 these sorts of instructions. */
16083 ix86_expand_vec_init_builtin (tree type
, tree arglist
, rtx target
)
16085 enum machine_mode tmode
= TYPE_MODE (type
);
16086 enum machine_mode inner_mode
= GET_MODE_INNER (tmode
);
16087 int i
, n_elt
= GET_MODE_NUNITS (tmode
);
16088 rtvec v
= rtvec_alloc (n_elt
);
16090 gcc_assert (VECTOR_MODE_P (tmode
));
16092 for (i
= 0; i
< n_elt
; ++i
, arglist
= TREE_CHAIN (arglist
))
16094 rtx x
= expand_normal (TREE_VALUE (arglist
));
16095 RTVEC_ELT (v
, i
) = gen_lowpart (inner_mode
, x
);
16098 gcc_assert (arglist
== NULL
);
16100 if (!target
|| !register_operand (target
, tmode
))
16101 target
= gen_reg_rtx (tmode
);
16103 ix86_expand_vector_init (true, target
, gen_rtx_PARALLEL (tmode
, v
));
16107 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
16108 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
16109 had a language-level syntax for referencing vector elements. */
16112 ix86_expand_vec_ext_builtin (tree arglist
, rtx target
)
16114 enum machine_mode tmode
, mode0
;
16119 arg0
= TREE_VALUE (arglist
);
16120 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
16122 op0
= expand_normal (arg0
);
16123 elt
= get_element_number (TREE_TYPE (arg0
), arg1
);
16125 tmode
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
16126 mode0
= TYPE_MODE (TREE_TYPE (arg0
));
16127 gcc_assert (VECTOR_MODE_P (mode0
));
16129 op0
= force_reg (mode0
, op0
);
16131 if (optimize
|| !target
|| !register_operand (target
, tmode
))
16132 target
= gen_reg_rtx (tmode
);
16134 ix86_expand_vector_extract (true, target
, op0
, elt
);
16139 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
16140 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
16141 a language-level syntax for referencing vector elements. */
16144 ix86_expand_vec_set_builtin (tree arglist
)
16146 enum machine_mode tmode
, mode1
;
16147 tree arg0
, arg1
, arg2
;
16151 arg0
= TREE_VALUE (arglist
);
16152 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
16153 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
16155 tmode
= TYPE_MODE (TREE_TYPE (arg0
));
16156 mode1
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
16157 gcc_assert (VECTOR_MODE_P (tmode
));
16159 op0
= expand_expr (arg0
, NULL_RTX
, tmode
, 0);
16160 op1
= expand_expr (arg1
, NULL_RTX
, mode1
, 0);
16161 elt
= get_element_number (TREE_TYPE (arg0
), arg2
);
16163 if (GET_MODE (op1
) != mode1
&& GET_MODE (op1
) != VOIDmode
)
16164 op1
= convert_modes (mode1
, GET_MODE (op1
), op1
, true);
16166 op0
= force_reg (tmode
, op0
);
16167 op1
= force_reg (mode1
, op1
);
16169 ix86_expand_vector_set (true, op0
, op1
, elt
);
16174 /* Expand an expression EXP that calls a built-in function,
16175 with result going to TARGET if that's convenient
16176 (and in mode MODE if that's convenient).
16177 SUBTARGET may be used as the target for computing one of EXP's operands.
16178 IGNORE is nonzero if the value is to be ignored. */
16181 ix86_expand_builtin (tree exp
, rtx target
, rtx subtarget ATTRIBUTE_UNUSED
,
16182 enum machine_mode mode ATTRIBUTE_UNUSED
,
16183 int ignore ATTRIBUTE_UNUSED
)
16185 const struct builtin_description
*d
;
16187 enum insn_code icode
;
16188 tree fndecl
= TREE_OPERAND (TREE_OPERAND (exp
, 0), 0);
16189 tree arglist
= TREE_OPERAND (exp
, 1);
16190 tree arg0
, arg1
, arg2
;
16191 rtx op0
, op1
, op2
, pat
;
16192 enum machine_mode tmode
, mode0
, mode1
, mode2
, mode3
;
16193 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
16197 case IX86_BUILTIN_EMMS
:
16198 emit_insn (gen_mmx_emms ());
16201 case IX86_BUILTIN_SFENCE
:
16202 emit_insn (gen_sse_sfence ());
16205 case IX86_BUILTIN_MASKMOVQ
:
16206 case IX86_BUILTIN_MASKMOVDQU
:
16207 icode
= (fcode
== IX86_BUILTIN_MASKMOVQ
16208 ? CODE_FOR_mmx_maskmovq
16209 : CODE_FOR_sse2_maskmovdqu
);
16210 /* Note the arg order is different from the operand order. */
16211 arg1
= TREE_VALUE (arglist
);
16212 arg2
= TREE_VALUE (TREE_CHAIN (arglist
));
16213 arg0
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
16214 op0
= expand_normal (arg0
);
16215 op1
= expand_normal (arg1
);
16216 op2
= expand_normal (arg2
);
16217 mode0
= insn_data
[icode
].operand
[0].mode
;
16218 mode1
= insn_data
[icode
].operand
[1].mode
;
16219 mode2
= insn_data
[icode
].operand
[2].mode
;
16221 op0
= force_reg (Pmode
, op0
);
16222 op0
= gen_rtx_MEM (mode1
, op0
);
16224 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, mode0
))
16225 op0
= copy_to_mode_reg (mode0
, op0
);
16226 if (! (*insn_data
[icode
].operand
[1].predicate
) (op1
, mode1
))
16227 op1
= copy_to_mode_reg (mode1
, op1
);
16228 if (! (*insn_data
[icode
].operand
[2].predicate
) (op2
, mode2
))
16229 op2
= copy_to_mode_reg (mode2
, op2
);
16230 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
16236 case IX86_BUILTIN_SQRTSS
:
16237 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2
, arglist
, target
);
16238 case IX86_BUILTIN_RSQRTSS
:
16239 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2
, arglist
, target
);
16240 case IX86_BUILTIN_RCPSS
:
16241 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2
, arglist
, target
);
16243 case IX86_BUILTIN_LOADUPS
:
16244 return ix86_expand_unop_builtin (CODE_FOR_sse_movups
, arglist
, target
, 1);
16246 case IX86_BUILTIN_STOREUPS
:
16247 return ix86_expand_store_builtin (CODE_FOR_sse_movups
, arglist
);
16249 case IX86_BUILTIN_LOADHPS
:
16250 case IX86_BUILTIN_LOADLPS
:
16251 case IX86_BUILTIN_LOADHPD
:
16252 case IX86_BUILTIN_LOADLPD
:
16253 icode
= (fcode
== IX86_BUILTIN_LOADHPS
? CODE_FOR_sse_loadhps
16254 : fcode
== IX86_BUILTIN_LOADLPS
? CODE_FOR_sse_loadlps
16255 : fcode
== IX86_BUILTIN_LOADHPD
? CODE_FOR_sse2_loadhpd
16256 : CODE_FOR_sse2_loadlpd
);
16257 arg0
= TREE_VALUE (arglist
);
16258 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
16259 op0
= expand_normal (arg0
);
16260 op1
= expand_normal (arg1
);
16261 tmode
= insn_data
[icode
].operand
[0].mode
;
16262 mode0
= insn_data
[icode
].operand
[1].mode
;
16263 mode1
= insn_data
[icode
].operand
[2].mode
;
16265 op0
= force_reg (mode0
, op0
);
16266 op1
= gen_rtx_MEM (mode1
, copy_to_mode_reg (Pmode
, op1
));
16267 if (optimize
|| target
== 0
16268 || GET_MODE (target
) != tmode
16269 || !register_operand (target
, tmode
))
16270 target
= gen_reg_rtx (tmode
);
16271 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
16277 case IX86_BUILTIN_STOREHPS
:
16278 case IX86_BUILTIN_STORELPS
:
16279 icode
= (fcode
== IX86_BUILTIN_STOREHPS
? CODE_FOR_sse_storehps
16280 : CODE_FOR_sse_storelps
);
16281 arg0
= TREE_VALUE (arglist
);
16282 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
16283 op0
= expand_normal (arg0
);
16284 op1
= expand_normal (arg1
);
16285 mode0
= insn_data
[icode
].operand
[0].mode
;
16286 mode1
= insn_data
[icode
].operand
[1].mode
;
16288 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
16289 op1
= force_reg (mode1
, op1
);
16291 pat
= GEN_FCN (icode
) (op0
, op1
);
16297 case IX86_BUILTIN_MOVNTPS
:
16298 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf
, arglist
);
16299 case IX86_BUILTIN_MOVNTQ
:
16300 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi
, arglist
);
16302 case IX86_BUILTIN_LDMXCSR
:
16303 op0
= expand_normal (TREE_VALUE (arglist
));
16304 target
= assign_386_stack_local (SImode
, SLOT_TEMP
);
16305 emit_move_insn (target
, op0
);
16306 emit_insn (gen_sse_ldmxcsr (target
));
16309 case IX86_BUILTIN_STMXCSR
:
16310 target
= assign_386_stack_local (SImode
, SLOT_TEMP
);
16311 emit_insn (gen_sse_stmxcsr (target
));
16312 return copy_to_mode_reg (SImode
, target
);
16314 case IX86_BUILTIN_SHUFPS
:
16315 case IX86_BUILTIN_SHUFPD
:
16316 icode
= (fcode
== IX86_BUILTIN_SHUFPS
16317 ? CODE_FOR_sse_shufps
16318 : CODE_FOR_sse2_shufpd
);
16319 arg0
= TREE_VALUE (arglist
);
16320 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
16321 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
16322 op0
= expand_normal (arg0
);
16323 op1
= expand_normal (arg1
);
16324 op2
= expand_normal (arg2
);
16325 tmode
= insn_data
[icode
].operand
[0].mode
;
16326 mode0
= insn_data
[icode
].operand
[1].mode
;
16327 mode1
= insn_data
[icode
].operand
[2].mode
;
16328 mode2
= insn_data
[icode
].operand
[3].mode
;
16330 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
16331 op0
= copy_to_mode_reg (mode0
, op0
);
16332 if ((optimize
&& !register_operand (op1
, mode1
))
16333 || !(*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
16334 op1
= copy_to_mode_reg (mode1
, op1
);
16335 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
16337 /* @@@ better error message */
16338 error ("mask must be an immediate");
16339 return gen_reg_rtx (tmode
);
16341 if (optimize
|| target
== 0
16342 || GET_MODE (target
) != tmode
16343 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
16344 target
= gen_reg_rtx (tmode
);
16345 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
16351 case IX86_BUILTIN_PSHUFW
:
16352 case IX86_BUILTIN_PSHUFD
:
16353 case IX86_BUILTIN_PSHUFHW
:
16354 case IX86_BUILTIN_PSHUFLW
:
16355 icode
= ( fcode
== IX86_BUILTIN_PSHUFHW
? CODE_FOR_sse2_pshufhw
16356 : fcode
== IX86_BUILTIN_PSHUFLW
? CODE_FOR_sse2_pshuflw
16357 : fcode
== IX86_BUILTIN_PSHUFD
? CODE_FOR_sse2_pshufd
16358 : CODE_FOR_mmx_pshufw
);
16359 arg0
= TREE_VALUE (arglist
);
16360 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
16361 op0
= expand_normal (arg0
);
16362 op1
= expand_normal (arg1
);
16363 tmode
= insn_data
[icode
].operand
[0].mode
;
16364 mode1
= insn_data
[icode
].operand
[1].mode
;
16365 mode2
= insn_data
[icode
].operand
[2].mode
;
16367 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
16368 op0
= copy_to_mode_reg (mode1
, op0
);
16369 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
16371 /* @@@ better error message */
16372 error ("mask must be an immediate");
16376 || GET_MODE (target
) != tmode
16377 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
16378 target
= gen_reg_rtx (tmode
);
16379 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
16385 case IX86_BUILTIN_PSLLDQI128
:
16386 case IX86_BUILTIN_PSRLDQI128
:
16387 icode
= ( fcode
== IX86_BUILTIN_PSLLDQI128
? CODE_FOR_sse2_ashlti3
16388 : CODE_FOR_sse2_lshrti3
);
16389 arg0
= TREE_VALUE (arglist
);
16390 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
16391 op0
= expand_normal (arg0
);
16392 op1
= expand_normal (arg1
);
16393 tmode
= insn_data
[icode
].operand
[0].mode
;
16394 mode1
= insn_data
[icode
].operand
[1].mode
;
16395 mode2
= insn_data
[icode
].operand
[2].mode
;
16397 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
16399 op0
= copy_to_reg (op0
);
16400 op0
= simplify_gen_subreg (mode1
, op0
, GET_MODE (op0
), 0);
16402 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
16404 error ("shift must be an immediate");
16407 target
= gen_reg_rtx (V2DImode
);
16408 pat
= GEN_FCN (icode
) (simplify_gen_subreg (tmode
, target
, V2DImode
, 0), op0
, op1
);
16414 case IX86_BUILTIN_FEMMS
:
16415 emit_insn (gen_mmx_femms ());
16418 case IX86_BUILTIN_PAVGUSB
:
16419 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3
, arglist
, target
);
16421 case IX86_BUILTIN_PF2ID
:
16422 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id
, arglist
, target
, 0);
16424 case IX86_BUILTIN_PFACC
:
16425 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3
, arglist
, target
);
16427 case IX86_BUILTIN_PFADD
:
16428 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3
, arglist
, target
);
16430 case IX86_BUILTIN_PFCMPEQ
:
16431 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3
, arglist
, target
);
16433 case IX86_BUILTIN_PFCMPGE
:
16434 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3
, arglist
, target
);
16436 case IX86_BUILTIN_PFCMPGT
:
16437 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3
, arglist
, target
);
16439 case IX86_BUILTIN_PFMAX
:
16440 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3
, arglist
, target
);
16442 case IX86_BUILTIN_PFMIN
:
16443 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3
, arglist
, target
);
16445 case IX86_BUILTIN_PFMUL
:
16446 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3
, arglist
, target
);
16448 case IX86_BUILTIN_PFRCP
:
16449 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2
, arglist
, target
, 0);
16451 case IX86_BUILTIN_PFRCPIT1
:
16452 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3
, arglist
, target
);
16454 case IX86_BUILTIN_PFRCPIT2
:
16455 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3
, arglist
, target
);
16457 case IX86_BUILTIN_PFRSQIT1
:
16458 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3
, arglist
, target
);
16460 case IX86_BUILTIN_PFRSQRT
:
16461 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2
, arglist
, target
, 0);
16463 case IX86_BUILTIN_PFSUB
:
16464 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3
, arglist
, target
);
16466 case IX86_BUILTIN_PFSUBR
:
16467 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3
, arglist
, target
);
16469 case IX86_BUILTIN_PI2FD
:
16470 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2
, arglist
, target
, 0);
16472 case IX86_BUILTIN_PMULHRW
:
16473 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3
, arglist
, target
);
16475 case IX86_BUILTIN_PF2IW
:
16476 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw
, arglist
, target
, 0);
16478 case IX86_BUILTIN_PFNACC
:
16479 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3
, arglist
, target
);
16481 case IX86_BUILTIN_PFPNACC
:
16482 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3
, arglist
, target
);
16484 case IX86_BUILTIN_PI2FW
:
16485 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw
, arglist
, target
, 0);
16487 case IX86_BUILTIN_PSWAPDSI
:
16488 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2
, arglist
, target
, 0);
16490 case IX86_BUILTIN_PSWAPDSF
:
16491 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2
, arglist
, target
, 0);
16493 case IX86_BUILTIN_SQRTSD
:
16494 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2
, arglist
, target
);
16495 case IX86_BUILTIN_LOADUPD
:
16496 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd
, arglist
, target
, 1);
16497 case IX86_BUILTIN_STOREUPD
:
16498 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd
, arglist
);
16500 case IX86_BUILTIN_MFENCE
:
16501 emit_insn (gen_sse2_mfence ());
16503 case IX86_BUILTIN_LFENCE
:
16504 emit_insn (gen_sse2_lfence ());
16507 case IX86_BUILTIN_CLFLUSH
:
16508 arg0
= TREE_VALUE (arglist
);
16509 op0
= expand_normal (arg0
);
16510 icode
= CODE_FOR_sse2_clflush
;
16511 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, Pmode
))
16512 op0
= copy_to_mode_reg (Pmode
, op0
);
16514 emit_insn (gen_sse2_clflush (op0
));
16517 case IX86_BUILTIN_MOVNTPD
:
16518 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df
, arglist
);
16519 case IX86_BUILTIN_MOVNTDQ
:
16520 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di
, arglist
);
16521 case IX86_BUILTIN_MOVNTI
:
16522 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi
, arglist
);
16524 case IX86_BUILTIN_LOADDQU
:
16525 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu
, arglist
, target
, 1);
16526 case IX86_BUILTIN_STOREDQU
:
16527 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu
, arglist
);
16529 case IX86_BUILTIN_MONITOR
:
16530 arg0
= TREE_VALUE (arglist
);
16531 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
16532 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
16533 op0
= expand_normal (arg0
);
16534 op1
= expand_normal (arg1
);
16535 op2
= expand_normal (arg2
);
16537 op0
= copy_to_mode_reg (Pmode
, op0
);
16539 op1
= copy_to_mode_reg (SImode
, op1
);
16541 op2
= copy_to_mode_reg (SImode
, op2
);
16543 emit_insn (gen_sse3_monitor (op0
, op1
, op2
));
16545 emit_insn (gen_sse3_monitor64 (op0
, op1
, op2
));
16548 case IX86_BUILTIN_MWAIT
:
16549 arg0
= TREE_VALUE (arglist
);
16550 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
16551 op0
= expand_normal (arg0
);
16552 op1
= expand_normal (arg1
);
16554 op0
= copy_to_mode_reg (SImode
, op0
);
16556 op1
= copy_to_mode_reg (SImode
, op1
);
16557 emit_insn (gen_sse3_mwait (op0
, op1
));
16560 case IX86_BUILTIN_LDDQU
:
16561 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu
, arglist
,
16564 case IX86_BUILTIN_PALIGNR
:
16565 case IX86_BUILTIN_PALIGNR128
:
16566 if (fcode
== IX86_BUILTIN_PALIGNR
)
16568 icode
= CODE_FOR_ssse3_palignrdi
;
16573 icode
= CODE_FOR_ssse3_palignrti
;
16576 arg0
= TREE_VALUE (arglist
);
16577 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
16578 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
16579 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
16580 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
16581 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, 0);
16582 tmode
= insn_data
[icode
].operand
[0].mode
;
16583 mode1
= insn_data
[icode
].operand
[1].mode
;
16584 mode2
= insn_data
[icode
].operand
[2].mode
;
16585 mode3
= insn_data
[icode
].operand
[3].mode
;
16587 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
16589 op0
= copy_to_reg (op0
);
16590 op0
= simplify_gen_subreg (mode1
, op0
, GET_MODE (op0
), 0);
16592 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
16594 op1
= copy_to_reg (op1
);
16595 op1
= simplify_gen_subreg (mode2
, op1
, GET_MODE (op1
), 0);
16597 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode3
))
16599 error ("shift must be an immediate");
16602 target
= gen_reg_rtx (mode
);
16603 pat
= GEN_FCN (icode
) (simplify_gen_subreg (tmode
, target
, mode
, 0),
16610 case IX86_BUILTIN_VEC_INIT_V2SI
:
16611 case IX86_BUILTIN_VEC_INIT_V4HI
:
16612 case IX86_BUILTIN_VEC_INIT_V8QI
:
16613 return ix86_expand_vec_init_builtin (TREE_TYPE (exp
), arglist
, target
);
16615 case IX86_BUILTIN_VEC_EXT_V2DF
:
16616 case IX86_BUILTIN_VEC_EXT_V2DI
:
16617 case IX86_BUILTIN_VEC_EXT_V4SF
:
16618 case IX86_BUILTIN_VEC_EXT_V4SI
:
16619 case IX86_BUILTIN_VEC_EXT_V8HI
:
16620 case IX86_BUILTIN_VEC_EXT_V2SI
:
16621 case IX86_BUILTIN_VEC_EXT_V4HI
:
16622 return ix86_expand_vec_ext_builtin (arglist
, target
);
16624 case IX86_BUILTIN_VEC_SET_V8HI
:
16625 case IX86_BUILTIN_VEC_SET_V4HI
:
16626 return ix86_expand_vec_set_builtin (arglist
);
16632 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
16633 if (d
->code
== fcode
)
16635 /* Compares are treated specially. */
16636 if (d
->icode
== CODE_FOR_sse_maskcmpv4sf3
16637 || d
->icode
== CODE_FOR_sse_vmmaskcmpv4sf3
16638 || d
->icode
== CODE_FOR_sse2_maskcmpv2df3
16639 || d
->icode
== CODE_FOR_sse2_vmmaskcmpv2df3
)
16640 return ix86_expand_sse_compare (d
, arglist
, target
);
16642 return ix86_expand_binop_builtin (d
->icode
, arglist
, target
);
16645 for (i
= 0, d
= bdesc_1arg
; i
< ARRAY_SIZE (bdesc_1arg
); i
++, d
++)
16646 if (d
->code
== fcode
)
16647 return ix86_expand_unop_builtin (d
->icode
, arglist
, target
, 0);
16649 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
16650 if (d
->code
== fcode
)
16651 return ix86_expand_sse_comi (d
, arglist
, target
);
16653 gcc_unreachable ();
16656 /* Store OPERAND to the memory after reload is completed. This means
16657 that we can't easily use assign_stack_local. */
16659 ix86_force_to_memory (enum machine_mode mode
, rtx operand
)
16663 gcc_assert (reload_completed
);
16664 if (TARGET_RED_ZONE
)
16666 result
= gen_rtx_MEM (mode
,
16667 gen_rtx_PLUS (Pmode
,
16669 GEN_INT (-RED_ZONE_SIZE
)));
16670 emit_move_insn (result
, operand
);
16672 else if (!TARGET_RED_ZONE
&& TARGET_64BIT
)
16678 operand
= gen_lowpart (DImode
, operand
);
16682 gen_rtx_SET (VOIDmode
,
16683 gen_rtx_MEM (DImode
,
16684 gen_rtx_PRE_DEC (DImode
,
16685 stack_pointer_rtx
)),
16689 gcc_unreachable ();
16691 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
16700 split_di (&operand
, 1, operands
, operands
+ 1);
16702 gen_rtx_SET (VOIDmode
,
16703 gen_rtx_MEM (SImode
,
16704 gen_rtx_PRE_DEC (Pmode
,
16705 stack_pointer_rtx
)),
16708 gen_rtx_SET (VOIDmode
,
16709 gen_rtx_MEM (SImode
,
16710 gen_rtx_PRE_DEC (Pmode
,
16711 stack_pointer_rtx
)),
16716 /* Store HImodes as SImodes. */
16717 operand
= gen_lowpart (SImode
, operand
);
16721 gen_rtx_SET (VOIDmode
,
16722 gen_rtx_MEM (GET_MODE (operand
),
16723 gen_rtx_PRE_DEC (SImode
,
16724 stack_pointer_rtx
)),
16728 gcc_unreachable ();
16730 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
16735 /* Free operand from the memory. */
16737 ix86_free_from_memory (enum machine_mode mode
)
16739 if (!TARGET_RED_ZONE
)
16743 if (mode
== DImode
|| TARGET_64BIT
)
16747 /* Use LEA to deallocate stack space. In peephole2 it will be converted
16748 to pop or add instruction if registers are available. */
16749 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
16750 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
16755 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
16756 QImode must go into class Q_REGS.
16757 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
16758 movdf to do mem-to-mem moves through integer regs. */
16760 ix86_preferred_reload_class (rtx x
, enum reg_class
class)
16762 enum machine_mode mode
= GET_MODE (x
);
16764 /* We're only allowed to return a subclass of CLASS. Many of the
16765 following checks fail for NO_REGS, so eliminate that early. */
16766 if (class == NO_REGS
)
16769 /* All classes can load zeros. */
16770 if (x
== CONST0_RTX (mode
))
16773 /* Force constants into memory if we are loading a (nonzero) constant into
16774 an MMX or SSE register. This is because there are no MMX/SSE instructions
16775 to load from a constant. */
16777 && (MAYBE_MMX_CLASS_P (class) || MAYBE_SSE_CLASS_P (class)))
16780 /* Prefer SSE regs only, if we can use them for math. */
16781 if (TARGET_SSE_MATH
&& !TARGET_MIX_SSE_I387
&& SSE_FLOAT_MODE_P (mode
))
16782 return SSE_CLASS_P (class) ? class : NO_REGS
;
16784 /* Floating-point constants need more complex checks. */
16785 if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) != VOIDmode
)
16787 /* General regs can load everything. */
16788 if (reg_class_subset_p (class, GENERAL_REGS
))
16791 /* Floats can load 0 and 1 plus some others. Note that we eliminated
16792 zero above. We only want to wind up preferring 80387 registers if
16793 we plan on doing computation with them. */
16795 && standard_80387_constant_p (x
))
16797 /* Limit class to non-sse. */
16798 if (class == FLOAT_SSE_REGS
)
16800 if (class == FP_TOP_SSE_REGS
)
16802 if (class == FP_SECOND_SSE_REGS
)
16803 return FP_SECOND_REG
;
16804 if (class == FLOAT_INT_REGS
|| class == FLOAT_REGS
)
16811 /* Generally when we see PLUS here, it's the function invariant
16812 (plus soft-fp const_int). Which can only be computed into general
16814 if (GET_CODE (x
) == PLUS
)
16815 return reg_class_subset_p (class, GENERAL_REGS
) ? class : NO_REGS
;
16817 /* QImode constants are easy to load, but non-constant QImode data
16818 must go into Q_REGS. */
16819 if (GET_MODE (x
) == QImode
&& !CONSTANT_P (x
))
16821 if (reg_class_subset_p (class, Q_REGS
))
16823 if (reg_class_subset_p (Q_REGS
, class))
16831 /* Discourage putting floating-point values in SSE registers unless
16832 SSE math is being used, and likewise for the 387 registers. */
16834 ix86_preferred_output_reload_class (rtx x
, enum reg_class
class)
16836 enum machine_mode mode
= GET_MODE (x
);
16838 /* Restrict the output reload class to the register bank that we are doing
16839 math on. If we would like not to return a subset of CLASS, reject this
16840 alternative: if reload cannot do this, it will still use its choice. */
16841 mode
= GET_MODE (x
);
16842 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
16843 return MAYBE_SSE_CLASS_P (class) ? SSE_REGS
: NO_REGS
;
16845 if (TARGET_80387
&& SCALAR_FLOAT_MODE_P (mode
))
16847 if (class == FP_TOP_SSE_REGS
)
16849 else if (class == FP_SECOND_SSE_REGS
)
16850 return FP_SECOND_REG
;
16852 return FLOAT_CLASS_P (class) ? class : NO_REGS
;
16858 /* If we are copying between general and FP registers, we need a memory
16859 location. The same is true for SSE and MMX registers.
16861 The macro can't work reliably when one of the CLASSES is class containing
16862 registers from multiple units (SSE, MMX, integer). We avoid this by never
16863 combining those units in single alternative in the machine description.
16864 Ensure that this constraint holds to avoid unexpected surprises.
16866 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
16867 enforce these sanity checks. */
16870 ix86_secondary_memory_needed (enum reg_class class1
, enum reg_class class2
,
16871 enum machine_mode mode
, int strict
)
16873 if (MAYBE_FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class1
)
16874 || MAYBE_FLOAT_CLASS_P (class2
) != FLOAT_CLASS_P (class2
)
16875 || MAYBE_SSE_CLASS_P (class1
) != SSE_CLASS_P (class1
)
16876 || MAYBE_SSE_CLASS_P (class2
) != SSE_CLASS_P (class2
)
16877 || MAYBE_MMX_CLASS_P (class1
) != MMX_CLASS_P (class1
)
16878 || MAYBE_MMX_CLASS_P (class2
) != MMX_CLASS_P (class2
))
16880 gcc_assert (!strict
);
16884 if (FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class2
))
16887 /* ??? This is a lie. We do have moves between mmx/general, and for
16888 mmx/sse2. But by saying we need secondary memory we discourage the
16889 register allocator from using the mmx registers unless needed. */
16890 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
))
16893 if (SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
16895 /* SSE1 doesn't have any direct moves from other classes. */
16899 /* If the target says that inter-unit moves are more expensive
16900 than moving through memory, then don't generate them. */
16901 if (!TARGET_INTER_UNIT_MOVES
&& !optimize_size
)
16904 /* Between SSE and general, we have moves no larger than word size. */
16905 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
16908 /* ??? For the cost of one register reformat penalty, we could use
16909 the same instructions to move SFmode and DFmode data, but the
16910 relevant move patterns don't support those alternatives. */
16911 if (mode
== SFmode
|| mode
== DFmode
)
16918 /* Return true if the registers in CLASS cannot represent the change from
16919 modes FROM to TO. */
16922 ix86_cannot_change_mode_class (enum machine_mode from
, enum machine_mode to
,
16923 enum reg_class
class)
16928 /* x87 registers can't do subreg at all, as all values are reformatted
16929 to extended precision. */
16930 if (MAYBE_FLOAT_CLASS_P (class))
16933 if (MAYBE_SSE_CLASS_P (class) || MAYBE_MMX_CLASS_P (class))
16935 /* Vector registers do not support QI or HImode loads. If we don't
16936 disallow a change to these modes, reload will assume it's ok to
16937 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
16938 the vec_dupv4hi pattern. */
16939 if (GET_MODE_SIZE (from
) < 4)
16942 /* Vector registers do not support subreg with nonzero offsets, which
16943 are otherwise valid for integer registers. Since we can't see
16944 whether we have a nonzero offset from here, prohibit all
16945 nonparadoxical subregs changing size. */
16946 if (GET_MODE_SIZE (to
) < GET_MODE_SIZE (from
))
16953 /* Return the cost of moving data from a register in class CLASS1 to
16954 one in class CLASS2.
16956 It is not required that the cost always equal 2 when FROM is the same as TO;
16957 on some machines it is expensive to move between registers if they are not
16958 general registers. */
16961 ix86_register_move_cost (enum machine_mode mode
, enum reg_class class1
,
16962 enum reg_class class2
)
16964 /* In case we require secondary memory, compute cost of the store followed
16965 by load. In order to avoid bad register allocation choices, we need
16966 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
16968 if (ix86_secondary_memory_needed (class1
, class2
, mode
, 0))
16972 cost
+= MAX (MEMORY_MOVE_COST (mode
, class1
, 0),
16973 MEMORY_MOVE_COST (mode
, class1
, 1));
16974 cost
+= MAX (MEMORY_MOVE_COST (mode
, class2
, 0),
16975 MEMORY_MOVE_COST (mode
, class2
, 1));
16977 /* In case of copying from general_purpose_register we may emit multiple
16978 stores followed by single load causing memory size mismatch stall.
16979 Count this as arbitrarily high cost of 20. */
16980 if (CLASS_MAX_NREGS (class1
, mode
) > CLASS_MAX_NREGS (class2
, mode
))
16983 /* In the case of FP/MMX moves, the registers actually overlap, and we
16984 have to switch modes in order to treat them differently. */
16985 if ((MMX_CLASS_P (class1
) && MAYBE_FLOAT_CLASS_P (class2
))
16986 || (MMX_CLASS_P (class2
) && MAYBE_FLOAT_CLASS_P (class1
)))
16992 /* Moves between SSE/MMX and integer unit are expensive. */
16993 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
)
16994 || SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
16995 return ix86_cost
->mmxsse_to_integer
;
16996 if (MAYBE_FLOAT_CLASS_P (class1
))
16997 return ix86_cost
->fp_move
;
16998 if (MAYBE_SSE_CLASS_P (class1
))
16999 return ix86_cost
->sse_move
;
17000 if (MAYBE_MMX_CLASS_P (class1
))
17001 return ix86_cost
->mmx_move
;
17005 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
17008 ix86_hard_regno_mode_ok (int regno
, enum machine_mode mode
)
17010 /* Flags and only flags can only hold CCmode values. */
17011 if (CC_REGNO_P (regno
))
17012 return GET_MODE_CLASS (mode
) == MODE_CC
;
17013 if (GET_MODE_CLASS (mode
) == MODE_CC
17014 || GET_MODE_CLASS (mode
) == MODE_RANDOM
17015 || GET_MODE_CLASS (mode
) == MODE_PARTIAL_INT
)
17017 if (FP_REGNO_P (regno
))
17018 return VALID_FP_MODE_P (mode
);
17019 if (SSE_REGNO_P (regno
))
17021 /* We implement the move patterns for all vector modes into and
17022 out of SSE registers, even when no operation instructions
17024 return (VALID_SSE_REG_MODE (mode
)
17025 || VALID_SSE2_REG_MODE (mode
)
17026 || VALID_MMX_REG_MODE (mode
)
17027 || VALID_MMX_REG_MODE_3DNOW (mode
));
17029 if (MMX_REGNO_P (regno
))
17031 /* We implement the move patterns for 3DNOW modes even in MMX mode,
17032 so if the register is available at all, then we can move data of
17033 the given mode into or out of it. */
17034 return (VALID_MMX_REG_MODE (mode
)
17035 || VALID_MMX_REG_MODE_3DNOW (mode
));
17038 if (mode
== QImode
)
17040 /* Take care for QImode values - they can be in non-QI regs,
17041 but then they do cause partial register stalls. */
17042 if (regno
< 4 || TARGET_64BIT
)
17044 if (!TARGET_PARTIAL_REG_STALL
)
17046 return reload_in_progress
|| reload_completed
;
17048 /* We handle both integer and floats in the general purpose registers. */
17049 else if (VALID_INT_MODE_P (mode
))
17051 else if (VALID_FP_MODE_P (mode
))
17053 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
17054 on to use that value in smaller contexts, this can easily force a
17055 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
17056 supporting DImode, allow it. */
17057 else if (VALID_MMX_REG_MODE_3DNOW (mode
) || VALID_MMX_REG_MODE (mode
))
17063 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
17064 tieable integer mode. */
17067 ix86_tieable_integer_mode_p (enum machine_mode mode
)
17076 return TARGET_64BIT
|| !TARGET_PARTIAL_REG_STALL
;
17079 return TARGET_64BIT
;
17086 /* Return true if MODE1 is accessible in a register that can hold MODE2
17087 without copying. That is, all register classes that can hold MODE2
17088 can also hold MODE1. */
17091 ix86_modes_tieable_p (enum machine_mode mode1
, enum machine_mode mode2
)
17093 if (mode1
== mode2
)
17096 if (ix86_tieable_integer_mode_p (mode1
)
17097 && ix86_tieable_integer_mode_p (mode2
))
17100 /* MODE2 being XFmode implies fp stack or general regs, which means we
17101 can tie any smaller floating point modes to it. Note that we do not
17102 tie this with TFmode. */
17103 if (mode2
== XFmode
)
17104 return mode1
== SFmode
|| mode1
== DFmode
;
17106 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
17107 that we can tie it with SFmode. */
17108 if (mode2
== DFmode
)
17109 return mode1
== SFmode
;
17111 /* If MODE2 is only appropriate for an SSE register, then tie with
17112 any other mode acceptable to SSE registers. */
17113 if (GET_MODE_SIZE (mode2
) >= 8
17114 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode2
))
17115 return ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode1
);
17117 /* If MODE2 is appropriate for an MMX (or SSE) register, then tie
17118 with any other mode acceptable to MMX registers. */
17119 if (GET_MODE_SIZE (mode2
) == 8
17120 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode2
))
17121 return ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode1
);
17126 /* Return the cost of moving data of mode M between a
17127 register and memory. A value of 2 is the default; this cost is
17128 relative to those in `REGISTER_MOVE_COST'.
17130 If moving between registers and memory is more expensive than
17131 between two registers, you should define this macro to express the
17134 Model also increased moving costs of QImode registers in non
17138 ix86_memory_move_cost (enum machine_mode mode
, enum reg_class
class, int in
)
17140 if (FLOAT_CLASS_P (class))
17157 return in
? ix86_cost
->fp_load
[index
] : ix86_cost
->fp_store
[index
];
17159 if (SSE_CLASS_P (class))
17162 switch (GET_MODE_SIZE (mode
))
17176 return in
? ix86_cost
->sse_load
[index
] : ix86_cost
->sse_store
[index
];
17178 if (MMX_CLASS_P (class))
17181 switch (GET_MODE_SIZE (mode
))
17192 return in
? ix86_cost
->mmx_load
[index
] : ix86_cost
->mmx_store
[index
];
17194 switch (GET_MODE_SIZE (mode
))
17198 return (Q_CLASS_P (class) ? ix86_cost
->int_load
[0]
17199 : ix86_cost
->movzbl_load
);
17201 return (Q_CLASS_P (class) ? ix86_cost
->int_store
[0]
17202 : ix86_cost
->int_store
[0] + 4);
17205 return in
? ix86_cost
->int_load
[1] : ix86_cost
->int_store
[1];
17207 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
17208 if (mode
== TFmode
)
17210 return ((in
? ix86_cost
->int_load
[2] : ix86_cost
->int_store
[2])
17211 * (((int) GET_MODE_SIZE (mode
)
17212 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
));
17216 /* Compute a (partial) cost for rtx X. Return true if the complete
17217 cost has been computed, and false if subexpressions should be
17218 scanned. In either case, *TOTAL contains the cost result. */
17221 ix86_rtx_costs (rtx x
, int code
, int outer_code
, int *total
)
17223 enum machine_mode mode
= GET_MODE (x
);
17231 if (TARGET_64BIT
&& !x86_64_immediate_operand (x
, VOIDmode
))
17233 else if (TARGET_64BIT
&& !x86_64_zext_immediate_operand (x
, VOIDmode
))
17235 else if (flag_pic
&& SYMBOLIC_CONST (x
)
17237 || (!GET_CODE (x
) != LABEL_REF
17238 && (GET_CODE (x
) != SYMBOL_REF
17239 || !SYMBOL_REF_LOCAL_P (x
)))))
17246 if (mode
== VOIDmode
)
17249 switch (standard_80387_constant_p (x
))
17254 default: /* Other constants */
17259 /* Start with (MEM (SYMBOL_REF)), since that's where
17260 it'll probably end up. Add a penalty for size. */
17261 *total
= (COSTS_N_INSNS (1)
17262 + (flag_pic
!= 0 && !TARGET_64BIT
)
17263 + (mode
== SFmode
? 0 : mode
== DFmode
? 1 : 2));
17269 /* The zero extensions is often completely free on x86_64, so make
17270 it as cheap as possible. */
17271 if (TARGET_64BIT
&& mode
== DImode
17272 && GET_MODE (XEXP (x
, 0)) == SImode
)
17274 else if (TARGET_ZERO_EXTEND_WITH_AND
)
17275 *total
= ix86_cost
->add
;
17277 *total
= ix86_cost
->movzx
;
17281 *total
= ix86_cost
->movsx
;
17285 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
17286 && (GET_MODE (XEXP (x
, 0)) != DImode
|| TARGET_64BIT
))
17288 HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
17291 *total
= ix86_cost
->add
;
17294 if ((value
== 2 || value
== 3)
17295 && ix86_cost
->lea
<= ix86_cost
->shift_const
)
17297 *total
= ix86_cost
->lea
;
17307 if (!TARGET_64BIT
&& GET_MODE (XEXP (x
, 0)) == DImode
)
17309 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
17311 if (INTVAL (XEXP (x
, 1)) > 32)
17312 *total
= ix86_cost
->shift_const
+ COSTS_N_INSNS (2);
17314 *total
= ix86_cost
->shift_const
* 2;
17318 if (GET_CODE (XEXP (x
, 1)) == AND
)
17319 *total
= ix86_cost
->shift_var
* 2;
17321 *total
= ix86_cost
->shift_var
* 6 + COSTS_N_INSNS (2);
17326 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
17327 *total
= ix86_cost
->shift_const
;
17329 *total
= ix86_cost
->shift_var
;
17334 if (FLOAT_MODE_P (mode
))
17336 *total
= ix86_cost
->fmul
;
17341 rtx op0
= XEXP (x
, 0);
17342 rtx op1
= XEXP (x
, 1);
17344 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
17346 unsigned HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
17347 for (nbits
= 0; value
!= 0; value
&= value
- 1)
17351 /* This is arbitrary. */
17354 /* Compute costs correctly for widening multiplication. */
17355 if ((GET_CODE (op0
) == SIGN_EXTEND
|| GET_CODE (op1
) == ZERO_EXTEND
)
17356 && GET_MODE_SIZE (GET_MODE (XEXP (op0
, 0))) * 2
17357 == GET_MODE_SIZE (mode
))
17359 int is_mulwiden
= 0;
17360 enum machine_mode inner_mode
= GET_MODE (op0
);
17362 if (GET_CODE (op0
) == GET_CODE (op1
))
17363 is_mulwiden
= 1, op1
= XEXP (op1
, 0);
17364 else if (GET_CODE (op1
) == CONST_INT
)
17366 if (GET_CODE (op0
) == SIGN_EXTEND
)
17367 is_mulwiden
= trunc_int_for_mode (INTVAL (op1
), inner_mode
)
17370 is_mulwiden
= !(INTVAL (op1
) & ~GET_MODE_MASK (inner_mode
));
17374 op0
= XEXP (op0
, 0), mode
= GET_MODE (op0
);
17377 *total
= (ix86_cost
->mult_init
[MODE_INDEX (mode
)]
17378 + nbits
* ix86_cost
->mult_bit
17379 + rtx_cost (op0
, outer_code
) + rtx_cost (op1
, outer_code
));
17388 if (FLOAT_MODE_P (mode
))
17389 *total
= ix86_cost
->fdiv
;
17391 *total
= ix86_cost
->divide
[MODE_INDEX (mode
)];
17395 if (FLOAT_MODE_P (mode
))
17396 *total
= ix86_cost
->fadd
;
17397 else if (GET_MODE_CLASS (mode
) == MODE_INT
17398 && GET_MODE_BITSIZE (mode
) <= GET_MODE_BITSIZE (Pmode
))
17400 if (GET_CODE (XEXP (x
, 0)) == PLUS
17401 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
17402 && GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)) == CONST_INT
17403 && CONSTANT_P (XEXP (x
, 1)))
17405 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (XEXP (x
, 0), 0), 1));
17406 if (val
== 2 || val
== 4 || val
== 8)
17408 *total
= ix86_cost
->lea
;
17409 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1), outer_code
);
17410 *total
+= rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0),
17412 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
17416 else if (GET_CODE (XEXP (x
, 0)) == MULT
17417 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
)
17419 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (x
, 0), 1));
17420 if (val
== 2 || val
== 4 || val
== 8)
17422 *total
= ix86_cost
->lea
;
17423 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
);
17424 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
17428 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
17430 *total
= ix86_cost
->lea
;
17431 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
);
17432 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1), outer_code
);
17433 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
17440 if (FLOAT_MODE_P (mode
))
17442 *total
= ix86_cost
->fadd
;
17450 if (!TARGET_64BIT
&& mode
== DImode
)
17452 *total
= (ix86_cost
->add
* 2
17453 + (rtx_cost (XEXP (x
, 0), outer_code
)
17454 << (GET_MODE (XEXP (x
, 0)) != DImode
))
17455 + (rtx_cost (XEXP (x
, 1), outer_code
)
17456 << (GET_MODE (XEXP (x
, 1)) != DImode
)));
17462 if (FLOAT_MODE_P (mode
))
17464 *total
= ix86_cost
->fchs
;
17470 if (!TARGET_64BIT
&& mode
== DImode
)
17471 *total
= ix86_cost
->add
* 2;
17473 *total
= ix86_cost
->add
;
17477 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTRACT
17478 && XEXP (XEXP (x
, 0), 1) == const1_rtx
17479 && GET_CODE (XEXP (XEXP (x
, 0), 2)) == CONST_INT
17480 && XEXP (x
, 1) == const0_rtx
)
17482 /* This kind of construct is implemented using test[bwl].
17483 Treat it as if we had an AND. */
17484 *total
= (ix86_cost
->add
17485 + rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
)
17486 + rtx_cost (const1_rtx
, outer_code
));
17492 if (!TARGET_SSE_MATH
17494 || (mode
== DFmode
&& !TARGET_SSE2
))
17495 /* For standard 80387 constants, raise the cost to prevent
17496 compress_float_constant() to generate load from memory. */
17497 switch (standard_80387_constant_p (XEXP (x
, 0)))
17507 *total
= (x86_ext_80387_constants
& TUNEMASK
17514 if (FLOAT_MODE_P (mode
))
17515 *total
= ix86_cost
->fabs
;
17519 if (FLOAT_MODE_P (mode
))
17520 *total
= ix86_cost
->fsqrt
;
17524 if (XINT (x
, 1) == UNSPEC_TP
)
17535 static int current_machopic_label_num
;
17537 /* Given a symbol name and its associated stub, write out the
17538 definition of the stub. */
17541 machopic_output_stub (FILE *file
, const char *symb
, const char *stub
)
17543 unsigned int length
;
17544 char *binder_name
, *symbol_name
, lazy_ptr_name
[32];
17545 int label
= ++current_machopic_label_num
;
17547 /* For 64-bit we shouldn't get here. */
17548 gcc_assert (!TARGET_64BIT
);
17550 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
17551 symb
= (*targetm
.strip_name_encoding
) (symb
);
17553 length
= strlen (stub
);
17554 binder_name
= alloca (length
+ 32);
17555 GEN_BINDER_NAME_FOR_STUB (binder_name
, stub
, length
);
17557 length
= strlen (symb
);
17558 symbol_name
= alloca (length
+ 32);
17559 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name
, symb
, length
);
17561 sprintf (lazy_ptr_name
, "L%d$lz", label
);
17564 switch_to_section (darwin_sections
[machopic_picsymbol_stub_section
]);
17566 switch_to_section (darwin_sections
[machopic_symbol_stub_section
]);
17568 fprintf (file
, "%s:\n", stub
);
17569 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
17573 fprintf (file
, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label
, label
);
17574 fprintf (file
, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name
, label
);
17575 fprintf (file
, "\tjmp\t*%%edx\n");
17578 fprintf (file
, "\tjmp\t*%s\n", lazy_ptr_name
);
17580 fprintf (file
, "%s:\n", binder_name
);
17584 fprintf (file
, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name
, label
);
17585 fprintf (file
, "\tpushl\t%%eax\n");
17588 fprintf (file
, "\tpushl\t$%s\n", lazy_ptr_name
);
17590 fprintf (file
, "\tjmp\tdyld_stub_binding_helper\n");
17592 switch_to_section (darwin_sections
[machopic_lazy_symbol_ptr_section
]);
17593 fprintf (file
, "%s:\n", lazy_ptr_name
);
17594 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
17595 fprintf (file
, "\t.long %s\n", binder_name
);
17599 darwin_x86_file_end (void)
17601 darwin_file_end ();
17604 #endif /* TARGET_MACHO */
17606 /* Order the registers for register allocator. */
17609 x86_order_regs_for_local_alloc (void)
17614 /* First allocate the local general purpose registers. */
17615 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
17616 if (GENERAL_REGNO_P (i
) && call_used_regs
[i
])
17617 reg_alloc_order
[pos
++] = i
;
17619 /* Global general purpose registers. */
17620 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
17621 if (GENERAL_REGNO_P (i
) && !call_used_regs
[i
])
17622 reg_alloc_order
[pos
++] = i
;
17624 /* x87 registers come first in case we are doing FP math
17626 if (!TARGET_SSE_MATH
)
17627 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
17628 reg_alloc_order
[pos
++] = i
;
17630 /* SSE registers. */
17631 for (i
= FIRST_SSE_REG
; i
<= LAST_SSE_REG
; i
++)
17632 reg_alloc_order
[pos
++] = i
;
17633 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
17634 reg_alloc_order
[pos
++] = i
;
17636 /* x87 registers. */
17637 if (TARGET_SSE_MATH
)
17638 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
17639 reg_alloc_order
[pos
++] = i
;
17641 for (i
= FIRST_MMX_REG
; i
<= LAST_MMX_REG
; i
++)
17642 reg_alloc_order
[pos
++] = i
;
17644 /* Initialize the rest of array as we do not allocate some registers
17646 while (pos
< FIRST_PSEUDO_REGISTER
)
17647 reg_alloc_order
[pos
++] = 0;
17650 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
17651 struct attribute_spec.handler. */
17653 ix86_handle_struct_attribute (tree
*node
, tree name
,
17654 tree args ATTRIBUTE_UNUSED
,
17655 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
17658 if (DECL_P (*node
))
17660 if (TREE_CODE (*node
) == TYPE_DECL
)
17661 type
= &TREE_TYPE (*node
);
17666 if (!(type
&& (TREE_CODE (*type
) == RECORD_TYPE
17667 || TREE_CODE (*type
) == UNION_TYPE
)))
17669 warning (OPT_Wattributes
, "%qs attribute ignored",
17670 IDENTIFIER_POINTER (name
));
17671 *no_add_attrs
= true;
17674 else if ((is_attribute_p ("ms_struct", name
)
17675 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type
)))
17676 || ((is_attribute_p ("gcc_struct", name
)
17677 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type
)))))
17679 warning (OPT_Wattributes
, "%qs incompatible attribute ignored",
17680 IDENTIFIER_POINTER (name
));
17681 *no_add_attrs
= true;
17688 ix86_ms_bitfield_layout_p (tree record_type
)
17690 return (TARGET_MS_BITFIELD_LAYOUT
&&
17691 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type
)))
17692 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type
));
17695 /* Returns an expression indicating where the this parameter is
17696 located on entry to the FUNCTION. */
17699 x86_this_parameter (tree function
)
17701 tree type
= TREE_TYPE (function
);
17705 int n
= aggregate_value_p (TREE_TYPE (type
), type
) != 0;
17706 return gen_rtx_REG (DImode
, x86_64_int_parameter_registers
[n
]);
17709 if (ix86_function_regparm (type
, function
) > 0)
17713 parm
= TYPE_ARG_TYPES (type
);
17714 /* Figure out whether or not the function has a variable number of
17716 for (; parm
; parm
= TREE_CHAIN (parm
))
17717 if (TREE_VALUE (parm
) == void_type_node
)
17719 /* If not, the this parameter is in the first argument. */
17723 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type
)))
17725 return gen_rtx_REG (SImode
, regno
);
17729 if (aggregate_value_p (TREE_TYPE (type
), type
))
17730 return gen_rtx_MEM (SImode
, plus_constant (stack_pointer_rtx
, 8));
17732 return gen_rtx_MEM (SImode
, plus_constant (stack_pointer_rtx
, 4));
17735 /* Determine whether x86_output_mi_thunk can succeed. */
17738 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED
,
17739 HOST_WIDE_INT delta ATTRIBUTE_UNUSED
,
17740 HOST_WIDE_INT vcall_offset
, tree function
)
17742 /* 64-bit can handle anything. */
17746 /* For 32-bit, everything's fine if we have one free register. */
17747 if (ix86_function_regparm (TREE_TYPE (function
), function
) < 3)
17750 /* Need a free register for vcall_offset. */
17754 /* Need a free register for GOT references. */
17755 if (flag_pic
&& !(*targetm
.binds_local_p
) (function
))
17758 /* Otherwise ok. */
17762 /* Output the assembler code for a thunk function. THUNK_DECL is the
17763 declaration for the thunk function itself, FUNCTION is the decl for
17764 the target function. DELTA is an immediate constant offset to be
17765 added to THIS. If VCALL_OFFSET is nonzero, the word at
17766 *(*this + vcall_offset) should be added to THIS. */
17769 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED
,
17770 tree thunk ATTRIBUTE_UNUSED
, HOST_WIDE_INT delta
,
17771 HOST_WIDE_INT vcall_offset
, tree function
)
17774 rtx
this = x86_this_parameter (function
);
17777 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
17778 pull it in now and let DELTA benefit. */
17781 else if (vcall_offset
)
17783 /* Put the this parameter into %eax. */
17785 xops
[1] = this_reg
= gen_rtx_REG (Pmode
, 0);
17786 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
17789 this_reg
= NULL_RTX
;
17791 /* Adjust the this parameter by a fixed constant. */
17794 xops
[0] = GEN_INT (delta
);
17795 xops
[1] = this_reg
? this_reg
: this;
17798 if (!x86_64_general_operand (xops
[0], DImode
))
17800 tmp
= gen_rtx_REG (DImode
, FIRST_REX_INT_REG
+ 2 /* R10 */);
17802 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops
);
17806 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops
);
17809 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops
);
17812 /* Adjust the this parameter by a value stored in the vtable. */
17816 tmp
= gen_rtx_REG (DImode
, FIRST_REX_INT_REG
+ 2 /* R10 */);
17819 int tmp_regno
= 2 /* ECX */;
17820 if (lookup_attribute ("fastcall",
17821 TYPE_ATTRIBUTES (TREE_TYPE (function
))))
17822 tmp_regno
= 0 /* EAX */;
17823 tmp
= gen_rtx_REG (SImode
, tmp_regno
);
17826 xops
[0] = gen_rtx_MEM (Pmode
, this_reg
);
17829 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops
);
17831 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
17833 /* Adjust the this parameter. */
17834 xops
[0] = gen_rtx_MEM (Pmode
, plus_constant (tmp
, vcall_offset
));
17835 if (TARGET_64BIT
&& !memory_operand (xops
[0], Pmode
))
17837 rtx tmp2
= gen_rtx_REG (DImode
, FIRST_REX_INT_REG
+ 3 /* R11 */);
17838 xops
[0] = GEN_INT (vcall_offset
);
17840 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops
);
17841 xops
[0] = gen_rtx_MEM (Pmode
, gen_rtx_PLUS (Pmode
, tmp
, tmp2
));
17843 xops
[1] = this_reg
;
17845 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops
);
17847 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops
);
17850 /* If necessary, drop THIS back to its stack slot. */
17851 if (this_reg
&& this_reg
!= this)
17853 xops
[0] = this_reg
;
17855 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
17858 xops
[0] = XEXP (DECL_RTL (function
), 0);
17861 if (!flag_pic
|| (*targetm
.binds_local_p
) (function
))
17862 output_asm_insn ("jmp\t%P0", xops
);
17865 tmp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, xops
[0]), UNSPEC_GOTPCREL
);
17866 tmp
= gen_rtx_CONST (Pmode
, tmp
);
17867 tmp
= gen_rtx_MEM (QImode
, tmp
);
17869 output_asm_insn ("jmp\t%A0", xops
);
17874 if (!flag_pic
|| (*targetm
.binds_local_p
) (function
))
17875 output_asm_insn ("jmp\t%P0", xops
);
17880 rtx sym_ref
= XEXP (DECL_RTL (function
), 0);
17881 tmp
= (gen_rtx_SYMBOL_REF
17883 machopic_indirection_name (sym_ref
, /*stub_p=*/true)));
17884 tmp
= gen_rtx_MEM (QImode
, tmp
);
17886 output_asm_insn ("jmp\t%0", xops
);
17889 #endif /* TARGET_MACHO */
17891 tmp
= gen_rtx_REG (SImode
, 2 /* ECX */);
17892 output_set_got (tmp
, NULL_RTX
);
17895 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops
);
17896 output_asm_insn ("jmp\t{*}%1", xops
);
17902 x86_file_start (void)
17904 default_file_start ();
17906 darwin_file_start ();
17908 if (X86_FILE_START_VERSION_DIRECTIVE
)
17909 fputs ("\t.version\t\"01.01\"\n", asm_out_file
);
17910 if (X86_FILE_START_FLTUSED
)
17911 fputs ("\t.global\t__fltused\n", asm_out_file
);
17912 if (ix86_asm_dialect
== ASM_INTEL
)
17913 fputs ("\t.intel_syntax\n", asm_out_file
);
17917 x86_field_alignment (tree field
, int computed
)
17919 enum machine_mode mode
;
17920 tree type
= TREE_TYPE (field
);
17922 if (TARGET_64BIT
|| TARGET_ALIGN_DOUBLE
)
17924 mode
= TYPE_MODE (TREE_CODE (type
) == ARRAY_TYPE
17925 ? get_inner_array_type (type
) : type
);
17926 if (mode
== DFmode
|| mode
== DCmode
17927 || GET_MODE_CLASS (mode
) == MODE_INT
17928 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_INT
)
17929 return MIN (32, computed
);
17933 /* Output assembler code to FILE to increment profiler label # LABELNO
17934 for profiling a function entry. */
17936 x86_function_profiler (FILE *file
, int labelno ATTRIBUTE_UNUSED
)
17941 #ifndef NO_PROFILE_COUNTERS
17942 fprintf (file
, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX
, labelno
);
17944 fprintf (file
, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME
);
17948 #ifndef NO_PROFILE_COUNTERS
17949 fprintf (file
, "\tmovq\t$%sP%d,%%r11\n", LPREFIX
, labelno
);
17951 fprintf (file
, "\tcall\t%s\n", MCOUNT_NAME
);
17955 #ifndef NO_PROFILE_COUNTERS
17956 fprintf (file
, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
17957 LPREFIX
, labelno
, PROFILE_COUNT_REGISTER
);
17959 fprintf (file
, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME
);
17963 #ifndef NO_PROFILE_COUNTERS
17964 fprintf (file
, "\tmovl\t$%sP%d,%%%s\n", LPREFIX
, labelno
,
17965 PROFILE_COUNT_REGISTER
);
17967 fprintf (file
, "\tcall\t%s\n", MCOUNT_NAME
);
17971 /* We don't have exact information about the insn sizes, but we may assume
17972 quite safely that we are informed about all 1 byte insns and memory
17973 address sizes. This is enough to eliminate unnecessary padding in
17977 min_insn_size (rtx insn
)
17981 if (!INSN_P (insn
) || !active_insn_p (insn
))
17984 /* Discard alignments we've emit and jump instructions. */
17985 if (GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
17986 && XINT (PATTERN (insn
), 1) == UNSPECV_ALIGN
)
17988 if (GET_CODE (insn
) == JUMP_INSN
17989 && (GET_CODE (PATTERN (insn
)) == ADDR_VEC
17990 || GET_CODE (PATTERN (insn
)) == ADDR_DIFF_VEC
))
17993 /* Important case - calls are always 5 bytes.
17994 It is common to have many calls in the row. */
17995 if (GET_CODE (insn
) == CALL_INSN
17996 && symbolic_reference_mentioned_p (PATTERN (insn
))
17997 && !SIBLING_CALL_P (insn
))
17999 if (get_attr_length (insn
) <= 1)
18002 /* For normal instructions we may rely on the sizes of addresses
18003 and the presence of symbol to require 4 bytes of encoding.
18004 This is not the case for jumps where references are PC relative. */
18005 if (GET_CODE (insn
) != JUMP_INSN
)
18007 l
= get_attr_length_address (insn
);
18008 if (l
< 4 && symbolic_reference_mentioned_p (PATTERN (insn
)))
18017 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
18021 ix86_avoid_jump_misspredicts (void)
18023 rtx insn
, start
= get_insns ();
18024 int nbytes
= 0, njumps
= 0;
18027 /* Look for all minimal intervals of instructions containing 4 jumps.
18028 The intervals are bounded by START and INSN. NBYTES is the total
18029 size of instructions in the interval including INSN and not including
18030 START. When the NBYTES is smaller than 16 bytes, it is possible
18031 that the end of START and INSN ends up in the same 16byte page.
18033 The smallest offset in the page INSN can start is the case where START
18034 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
18035 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
18037 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
18040 nbytes
+= min_insn_size (insn
);
18042 fprintf(dump_file
, "Insn %i estimated to %i bytes\n",
18043 INSN_UID (insn
), min_insn_size (insn
));
18044 if ((GET_CODE (insn
) == JUMP_INSN
18045 && GET_CODE (PATTERN (insn
)) != ADDR_VEC
18046 && GET_CODE (PATTERN (insn
)) != ADDR_DIFF_VEC
)
18047 || GET_CODE (insn
) == CALL_INSN
)
18054 start
= NEXT_INSN (start
);
18055 if ((GET_CODE (start
) == JUMP_INSN
18056 && GET_CODE (PATTERN (start
)) != ADDR_VEC
18057 && GET_CODE (PATTERN (start
)) != ADDR_DIFF_VEC
)
18058 || GET_CODE (start
) == CALL_INSN
)
18059 njumps
--, isjump
= 1;
18062 nbytes
-= min_insn_size (start
);
18064 gcc_assert (njumps
>= 0);
18066 fprintf (dump_file
, "Interval %i to %i has %i bytes\n",
18067 INSN_UID (start
), INSN_UID (insn
), nbytes
);
18069 if (njumps
== 3 && isjump
&& nbytes
< 16)
18071 int padsize
= 15 - nbytes
+ min_insn_size (insn
);
18074 fprintf (dump_file
, "Padding insn %i by %i bytes!\n",
18075 INSN_UID (insn
), padsize
);
18076 emit_insn_before (gen_align (GEN_INT (padsize
)), insn
);
18081 /* AMD Athlon works faster
18082 when RET is not destination of conditional jump or directly preceded
18083 by other jump instruction. We avoid the penalty by inserting NOP just
18084 before the RET instructions in such cases. */
18086 ix86_pad_returns (void)
18091 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
18093 basic_block bb
= e
->src
;
18094 rtx ret
= BB_END (bb
);
18096 bool replace
= false;
18098 if (GET_CODE (ret
) != JUMP_INSN
|| GET_CODE (PATTERN (ret
)) != RETURN
18099 || !maybe_hot_bb_p (bb
))
18101 for (prev
= PREV_INSN (ret
); prev
; prev
= PREV_INSN (prev
))
18102 if (active_insn_p (prev
) || GET_CODE (prev
) == CODE_LABEL
)
18104 if (prev
&& GET_CODE (prev
) == CODE_LABEL
)
18109 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
18110 if (EDGE_FREQUENCY (e
) && e
->src
->index
>= 0
18111 && !(e
->flags
& EDGE_FALLTHRU
))
18116 prev
= prev_active_insn (ret
);
18118 && ((GET_CODE (prev
) == JUMP_INSN
&& any_condjump_p (prev
))
18119 || GET_CODE (prev
) == CALL_INSN
))
18121 /* Empty functions get branch mispredict even when the jump destination
18122 is not visible to us. */
18123 if (!prev
&& cfun
->function_frequency
> FUNCTION_FREQUENCY_UNLIKELY_EXECUTED
)
18128 emit_insn_before (gen_return_internal_long (), ret
);
18134 /* Implement machine specific optimizations. We implement padding of returns
18135 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
18139 if (TARGET_PAD_RETURNS
&& optimize
&& !optimize_size
)
18140 ix86_pad_returns ();
18141 if (TARGET_FOUR_JUMP_LIMIT
&& optimize
&& !optimize_size
)
18142 ix86_avoid_jump_misspredicts ();
18145 /* Return nonzero when QImode register that must be represented via REX prefix
18148 x86_extended_QIreg_mentioned_p (rtx insn
)
18151 extract_insn_cached (insn
);
18152 for (i
= 0; i
< recog_data
.n_operands
; i
++)
18153 if (REG_P (recog_data
.operand
[i
])
18154 && REGNO (recog_data
.operand
[i
]) >= 4)
18159 /* Return nonzero when P points to register encoded via REX prefix.
18160 Called via for_each_rtx. */
18162 extended_reg_mentioned_1 (rtx
*p
, void *data ATTRIBUTE_UNUSED
)
18164 unsigned int regno
;
18167 regno
= REGNO (*p
);
18168 return REX_INT_REGNO_P (regno
) || REX_SSE_REGNO_P (regno
);
18171 /* Return true when INSN mentions register that must be encoded using REX
18174 x86_extended_reg_mentioned_p (rtx insn
)
18176 return for_each_rtx (&PATTERN (insn
), extended_reg_mentioned_1
, NULL
);
18179 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
18180 optabs would emit if we didn't have TFmode patterns. */
18183 x86_emit_floatuns (rtx operands
[2])
18185 rtx neglab
, donelab
, i0
, i1
, f0
, in
, out
;
18186 enum machine_mode mode
, inmode
;
18188 inmode
= GET_MODE (operands
[1]);
18189 gcc_assert (inmode
== SImode
|| inmode
== DImode
);
18192 in
= force_reg (inmode
, operands
[1]);
18193 mode
= GET_MODE (out
);
18194 neglab
= gen_label_rtx ();
18195 donelab
= gen_label_rtx ();
18196 i1
= gen_reg_rtx (Pmode
);
18197 f0
= gen_reg_rtx (mode
);
18199 emit_cmp_and_jump_insns (in
, const0_rtx
, LT
, const0_rtx
, Pmode
, 0, neglab
);
18201 emit_insn (gen_rtx_SET (VOIDmode
, out
, gen_rtx_FLOAT (mode
, in
)));
18202 emit_jump_insn (gen_jump (donelab
));
18205 emit_label (neglab
);
18207 i0
= expand_simple_binop (Pmode
, LSHIFTRT
, in
, const1_rtx
, NULL
, 1, OPTAB_DIRECT
);
18208 i1
= expand_simple_binop (Pmode
, AND
, in
, const1_rtx
, NULL
, 1, OPTAB_DIRECT
);
18209 i0
= expand_simple_binop (Pmode
, IOR
, i0
, i1
, i0
, 1, OPTAB_DIRECT
);
18210 expand_float (f0
, i0
, 0);
18211 emit_insn (gen_rtx_SET (VOIDmode
, out
, gen_rtx_PLUS (mode
, f0
, f0
)));
18213 emit_label (donelab
);
18216 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
18217 with all elements equal to VAR. Return true if successful. */
18220 ix86_expand_vector_init_duplicate (bool mmx_ok
, enum machine_mode mode
,
18221 rtx target
, rtx val
)
18223 enum machine_mode smode
, wsmode
, wvmode
;
18238 val
= force_reg (GET_MODE_INNER (mode
), val
);
18239 x
= gen_rtx_VEC_DUPLICATE (mode
, val
);
18240 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
18246 if (TARGET_SSE
|| TARGET_3DNOW_A
)
18248 val
= gen_lowpart (SImode
, val
);
18249 x
= gen_rtx_TRUNCATE (HImode
, val
);
18250 x
= gen_rtx_VEC_DUPLICATE (mode
, x
);
18251 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
18273 /* Extend HImode to SImode using a paradoxical SUBREG. */
18274 tmp1
= gen_reg_rtx (SImode
);
18275 emit_move_insn (tmp1
, gen_lowpart (SImode
, val
));
18276 /* Insert the SImode value as low element of V4SImode vector. */
18277 tmp2
= gen_reg_rtx (V4SImode
);
18278 tmp1
= gen_rtx_VEC_MERGE (V4SImode
,
18279 gen_rtx_VEC_DUPLICATE (V4SImode
, tmp1
),
18280 CONST0_RTX (V4SImode
),
18282 emit_insn (gen_rtx_SET (VOIDmode
, tmp2
, tmp1
));
18283 /* Cast the V4SImode vector back to a V8HImode vector. */
18284 tmp1
= gen_reg_rtx (V8HImode
);
18285 emit_move_insn (tmp1
, gen_lowpart (V8HImode
, tmp2
));
18286 /* Duplicate the low short through the whole low SImode word. */
18287 emit_insn (gen_sse2_punpcklwd (tmp1
, tmp1
, tmp1
));
18288 /* Cast the V8HImode vector back to a V4SImode vector. */
18289 tmp2
= gen_reg_rtx (V4SImode
);
18290 emit_move_insn (tmp2
, gen_lowpart (V4SImode
, tmp1
));
18291 /* Replicate the low element of the V4SImode vector. */
18292 emit_insn (gen_sse2_pshufd (tmp2
, tmp2
, const0_rtx
));
18293 /* Cast the V2SImode back to V8HImode, and store in target. */
18294 emit_move_insn (target
, gen_lowpart (V8HImode
, tmp2
));
18305 /* Extend QImode to SImode using a paradoxical SUBREG. */
18306 tmp1
= gen_reg_rtx (SImode
);
18307 emit_move_insn (tmp1
, gen_lowpart (SImode
, val
));
18308 /* Insert the SImode value as low element of V4SImode vector. */
18309 tmp2
= gen_reg_rtx (V4SImode
);
18310 tmp1
= gen_rtx_VEC_MERGE (V4SImode
,
18311 gen_rtx_VEC_DUPLICATE (V4SImode
, tmp1
),
18312 CONST0_RTX (V4SImode
),
18314 emit_insn (gen_rtx_SET (VOIDmode
, tmp2
, tmp1
));
18315 /* Cast the V4SImode vector back to a V16QImode vector. */
18316 tmp1
= gen_reg_rtx (V16QImode
);
18317 emit_move_insn (tmp1
, gen_lowpart (V16QImode
, tmp2
));
18318 /* Duplicate the low byte through the whole low SImode word. */
18319 emit_insn (gen_sse2_punpcklbw (tmp1
, tmp1
, tmp1
));
18320 emit_insn (gen_sse2_punpcklbw (tmp1
, tmp1
, tmp1
));
18321 /* Cast the V16QImode vector back to a V4SImode vector. */
18322 tmp2
= gen_reg_rtx (V4SImode
);
18323 emit_move_insn (tmp2
, gen_lowpart (V4SImode
, tmp1
));
18324 /* Replicate the low element of the V4SImode vector. */
18325 emit_insn (gen_sse2_pshufd (tmp2
, tmp2
, const0_rtx
));
18326 /* Cast the V2SImode back to V16QImode, and store in target. */
18327 emit_move_insn (target
, gen_lowpart (V16QImode
, tmp2
));
18335 /* Replicate the value once into the next wider mode and recurse. */
18336 val
= convert_modes (wsmode
, smode
, val
, true);
18337 x
= expand_simple_binop (wsmode
, ASHIFT
, val
,
18338 GEN_INT (GET_MODE_BITSIZE (smode
)),
18339 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
18340 val
= expand_simple_binop (wsmode
, IOR
, val
, x
, x
, 1, OPTAB_LIB_WIDEN
);
18342 x
= gen_reg_rtx (wvmode
);
18343 if (!ix86_expand_vector_init_duplicate (mmx_ok
, wvmode
, x
, val
))
18344 gcc_unreachable ();
18345 emit_move_insn (target
, gen_lowpart (mode
, x
));
18353 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
18354 whose ONE_VAR element is VAR, and other elements are zero. Return true
18358 ix86_expand_vector_init_one_nonzero (bool mmx_ok
, enum machine_mode mode
,
18359 rtx target
, rtx var
, int one_var
)
18361 enum machine_mode vsimode
;
18377 var
= force_reg (GET_MODE_INNER (mode
), var
);
18378 x
= gen_rtx_VEC_CONCAT (mode
, var
, CONST0_RTX (GET_MODE_INNER (mode
)));
18379 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
18384 if (!REG_P (target
) || REGNO (target
) < FIRST_PSEUDO_REGISTER
)
18385 new_target
= gen_reg_rtx (mode
);
18387 new_target
= target
;
18388 var
= force_reg (GET_MODE_INNER (mode
), var
);
18389 x
= gen_rtx_VEC_DUPLICATE (mode
, var
);
18390 x
= gen_rtx_VEC_MERGE (mode
, x
, CONST0_RTX (mode
), const1_rtx
);
18391 emit_insn (gen_rtx_SET (VOIDmode
, new_target
, x
));
18394 /* We need to shuffle the value to the correct position, so
18395 create a new pseudo to store the intermediate result. */
18397 /* With SSE2, we can use the integer shuffle insns. */
18398 if (mode
!= V4SFmode
&& TARGET_SSE2
)
18400 emit_insn (gen_sse2_pshufd_1 (new_target
, new_target
,
18402 GEN_INT (one_var
== 1 ? 0 : 1),
18403 GEN_INT (one_var
== 2 ? 0 : 1),
18404 GEN_INT (one_var
== 3 ? 0 : 1)));
18405 if (target
!= new_target
)
18406 emit_move_insn (target
, new_target
);
18410 /* Otherwise convert the intermediate result to V4SFmode and
18411 use the SSE1 shuffle instructions. */
18412 if (mode
!= V4SFmode
)
18414 tmp
= gen_reg_rtx (V4SFmode
);
18415 emit_move_insn (tmp
, gen_lowpart (V4SFmode
, new_target
));
18420 emit_insn (gen_sse_shufps_1 (tmp
, tmp
, tmp
,
18422 GEN_INT (one_var
== 1 ? 0 : 1),
18423 GEN_INT (one_var
== 2 ? 0+4 : 1+4),
18424 GEN_INT (one_var
== 3 ? 0+4 : 1+4)));
18426 if (mode
!= V4SFmode
)
18427 emit_move_insn (target
, gen_lowpart (V4SImode
, tmp
));
18428 else if (tmp
!= target
)
18429 emit_move_insn (target
, tmp
);
18431 else if (target
!= new_target
)
18432 emit_move_insn (target
, new_target
);
18437 vsimode
= V4SImode
;
18443 vsimode
= V2SImode
;
18449 /* Zero extend the variable element to SImode and recurse. */
18450 var
= convert_modes (SImode
, GET_MODE_INNER (mode
), var
, true);
18452 x
= gen_reg_rtx (vsimode
);
18453 if (!ix86_expand_vector_init_one_nonzero (mmx_ok
, vsimode
, x
,
18455 gcc_unreachable ();
18457 emit_move_insn (target
, gen_lowpart (mode
, x
));
18465 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
18466 consisting of the values in VALS. It is known that all elements
18467 except ONE_VAR are constants. Return true if successful. */
18470 ix86_expand_vector_init_one_var (bool mmx_ok
, enum machine_mode mode
,
18471 rtx target
, rtx vals
, int one_var
)
18473 rtx var
= XVECEXP (vals
, 0, one_var
);
18474 enum machine_mode wmode
;
18477 const_vec
= copy_rtx (vals
);
18478 XVECEXP (const_vec
, 0, one_var
) = CONST0_RTX (GET_MODE_INNER (mode
));
18479 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (const_vec
, 0));
18487 /* For the two element vectors, it's just as easy to use
18488 the general case. */
18504 /* There's no way to set one QImode entry easily. Combine
18505 the variable value with its adjacent constant value, and
18506 promote to an HImode set. */
18507 x
= XVECEXP (vals
, 0, one_var
^ 1);
18510 var
= convert_modes (HImode
, QImode
, var
, true);
18511 var
= expand_simple_binop (HImode
, ASHIFT
, var
, GEN_INT (8),
18512 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
18513 x
= GEN_INT (INTVAL (x
) & 0xff);
18517 var
= convert_modes (HImode
, QImode
, var
, true);
18518 x
= gen_int_mode (INTVAL (x
) << 8, HImode
);
18520 if (x
!= const0_rtx
)
18521 var
= expand_simple_binop (HImode
, IOR
, var
, x
, var
,
18522 1, OPTAB_LIB_WIDEN
);
18524 x
= gen_reg_rtx (wmode
);
18525 emit_move_insn (x
, gen_lowpart (wmode
, const_vec
));
18526 ix86_expand_vector_set (mmx_ok
, x
, var
, one_var
>> 1);
18528 emit_move_insn (target
, gen_lowpart (mode
, x
));
18535 emit_move_insn (target
, const_vec
);
18536 ix86_expand_vector_set (mmx_ok
, target
, var
, one_var
);
18540 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
18541 all values variable, and none identical. */
18544 ix86_expand_vector_init_general (bool mmx_ok
, enum machine_mode mode
,
18545 rtx target
, rtx vals
)
18547 enum machine_mode half_mode
= GET_MODE_INNER (mode
);
18548 rtx op0
= NULL
, op1
= NULL
;
18549 bool use_vec_concat
= false;
18555 if (!mmx_ok
&& !TARGET_SSE
)
18561 /* For the two element vectors, we always implement VEC_CONCAT. */
18562 op0
= XVECEXP (vals
, 0, 0);
18563 op1
= XVECEXP (vals
, 0, 1);
18564 use_vec_concat
= true;
18568 half_mode
= V2SFmode
;
18571 half_mode
= V2SImode
;
18577 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
18578 Recurse to load the two halves. */
18580 op0
= gen_reg_rtx (half_mode
);
18581 v
= gen_rtvec (2, XVECEXP (vals
, 0, 0), XVECEXP (vals
, 0, 1));
18582 ix86_expand_vector_init (false, op0
, gen_rtx_PARALLEL (half_mode
, v
));
18584 op1
= gen_reg_rtx (half_mode
);
18585 v
= gen_rtvec (2, XVECEXP (vals
, 0, 2), XVECEXP (vals
, 0, 3));
18586 ix86_expand_vector_init (false, op1
, gen_rtx_PARALLEL (half_mode
, v
));
18588 use_vec_concat
= true;
18599 gcc_unreachable ();
18602 if (use_vec_concat
)
18604 if (!register_operand (op0
, half_mode
))
18605 op0
= force_reg (half_mode
, op0
);
18606 if (!register_operand (op1
, half_mode
))
18607 op1
= force_reg (half_mode
, op1
);
18609 emit_insn (gen_rtx_SET (VOIDmode
, target
,
18610 gen_rtx_VEC_CONCAT (mode
, op0
, op1
)));
18614 int i
, j
, n_elts
, n_words
, n_elt_per_word
;
18615 enum machine_mode inner_mode
;
18616 rtx words
[4], shift
;
18618 inner_mode
= GET_MODE_INNER (mode
);
18619 n_elts
= GET_MODE_NUNITS (mode
);
18620 n_words
= GET_MODE_SIZE (mode
) / UNITS_PER_WORD
;
18621 n_elt_per_word
= n_elts
/ n_words
;
18622 shift
= GEN_INT (GET_MODE_BITSIZE (inner_mode
));
18624 for (i
= 0; i
< n_words
; ++i
)
18626 rtx word
= NULL_RTX
;
18628 for (j
= 0; j
< n_elt_per_word
; ++j
)
18630 rtx elt
= XVECEXP (vals
, 0, (i
+1)*n_elt_per_word
- j
- 1);
18631 elt
= convert_modes (word_mode
, inner_mode
, elt
, true);
18637 word
= expand_simple_binop (word_mode
, ASHIFT
, word
, shift
,
18638 word
, 1, OPTAB_LIB_WIDEN
);
18639 word
= expand_simple_binop (word_mode
, IOR
, word
, elt
,
18640 word
, 1, OPTAB_LIB_WIDEN
);
18648 emit_move_insn (target
, gen_lowpart (mode
, words
[0]));
18649 else if (n_words
== 2)
18651 rtx tmp
= gen_reg_rtx (mode
);
18652 emit_insn (gen_rtx_CLOBBER (VOIDmode
, tmp
));
18653 emit_move_insn (gen_lowpart (word_mode
, tmp
), words
[0]);
18654 emit_move_insn (gen_highpart (word_mode
, tmp
), words
[1]);
18655 emit_move_insn (target
, tmp
);
18657 else if (n_words
== 4)
18659 rtx tmp
= gen_reg_rtx (V4SImode
);
18660 vals
= gen_rtx_PARALLEL (V4SImode
, gen_rtvec_v (4, words
));
18661 ix86_expand_vector_init_general (false, V4SImode
, tmp
, vals
);
18662 emit_move_insn (target
, gen_lowpart (mode
, tmp
));
18665 gcc_unreachable ();
18669 /* Initialize vector TARGET via VALS. Suppress the use of MMX
18670 instructions unless MMX_OK is true. */
18673 ix86_expand_vector_init (bool mmx_ok
, rtx target
, rtx vals
)
18675 enum machine_mode mode
= GET_MODE (target
);
18676 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
18677 int n_elts
= GET_MODE_NUNITS (mode
);
18678 int n_var
= 0, one_var
= -1;
18679 bool all_same
= true, all_const_zero
= true;
18683 for (i
= 0; i
< n_elts
; ++i
)
18685 x
= XVECEXP (vals
, 0, i
);
18686 if (!CONSTANT_P (x
))
18687 n_var
++, one_var
= i
;
18688 else if (x
!= CONST0_RTX (inner_mode
))
18689 all_const_zero
= false;
18690 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
18694 /* Constants are best loaded from the constant pool. */
18697 emit_move_insn (target
, gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0)));
18701 /* If all values are identical, broadcast the value. */
18703 && ix86_expand_vector_init_duplicate (mmx_ok
, mode
, target
,
18704 XVECEXP (vals
, 0, 0)))
18707 /* Values where only one field is non-constant are best loaded from
18708 the pool and overwritten via move later. */
18712 && ix86_expand_vector_init_one_nonzero (mmx_ok
, mode
, target
,
18713 XVECEXP (vals
, 0, one_var
),
18717 if (ix86_expand_vector_init_one_var (mmx_ok
, mode
, target
, vals
, one_var
))
18721 ix86_expand_vector_init_general (mmx_ok
, mode
, target
, vals
);
18725 ix86_expand_vector_set (bool mmx_ok
, rtx target
, rtx val
, int elt
)
18727 enum machine_mode mode
= GET_MODE (target
);
18728 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
18729 bool use_vec_merge
= false;
18738 tmp
= gen_reg_rtx (GET_MODE_INNER (mode
));
18739 ix86_expand_vector_extract (true, tmp
, target
, 1 - elt
);
18741 tmp
= gen_rtx_VEC_CONCAT (mode
, tmp
, val
);
18743 tmp
= gen_rtx_VEC_CONCAT (mode
, val
, tmp
);
18744 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
18754 /* For the two element vectors, we implement a VEC_CONCAT with
18755 the extraction of the other element. */
18757 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (1 - elt
)));
18758 tmp
= gen_rtx_VEC_SELECT (inner_mode
, target
, tmp
);
18761 op0
= val
, op1
= tmp
;
18763 op0
= tmp
, op1
= val
;
18765 tmp
= gen_rtx_VEC_CONCAT (mode
, op0
, op1
);
18766 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
18774 use_vec_merge
= true;
18778 /* tmp = target = A B C D */
18779 tmp
= copy_to_reg (target
);
18780 /* target = A A B B */
18781 emit_insn (gen_sse_unpcklps (target
, target
, target
));
18782 /* target = X A B B */
18783 ix86_expand_vector_set (false, target
, val
, 0);
18784 /* target = A X C D */
18785 emit_insn (gen_sse_shufps_1 (target
, target
, tmp
,
18786 GEN_INT (1), GEN_INT (0),
18787 GEN_INT (2+4), GEN_INT (3+4)));
18791 /* tmp = target = A B C D */
18792 tmp
= copy_to_reg (target
);
18793 /* tmp = X B C D */
18794 ix86_expand_vector_set (false, tmp
, val
, 0);
18795 /* target = A B X D */
18796 emit_insn (gen_sse_shufps_1 (target
, target
, tmp
,
18797 GEN_INT (0), GEN_INT (1),
18798 GEN_INT (0+4), GEN_INT (3+4)));
18802 /* tmp = target = A B C D */
18803 tmp
= copy_to_reg (target
);
18804 /* tmp = X B C D */
18805 ix86_expand_vector_set (false, tmp
, val
, 0);
18806 /* target = A B X D */
18807 emit_insn (gen_sse_shufps_1 (target
, target
, tmp
,
18808 GEN_INT (0), GEN_INT (1),
18809 GEN_INT (2+4), GEN_INT (0+4)));
18813 gcc_unreachable ();
18818 /* Element 0 handled by vec_merge below. */
18821 use_vec_merge
= true;
18827 /* With SSE2, use integer shuffles to swap element 0 and ELT,
18828 store into element 0, then shuffle them back. */
18832 order
[0] = GEN_INT (elt
);
18833 order
[1] = const1_rtx
;
18834 order
[2] = const2_rtx
;
18835 order
[3] = GEN_INT (3);
18836 order
[elt
] = const0_rtx
;
18838 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
18839 order
[1], order
[2], order
[3]));
18841 ix86_expand_vector_set (false, target
, val
, 0);
18843 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
18844 order
[1], order
[2], order
[3]));
18848 /* For SSE1, we have to reuse the V4SF code. */
18849 ix86_expand_vector_set (false, gen_lowpart (V4SFmode
, target
),
18850 gen_lowpart (SFmode
, val
), elt
);
18855 use_vec_merge
= TARGET_SSE2
;
18858 use_vec_merge
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
18869 tmp
= gen_rtx_VEC_DUPLICATE (mode
, val
);
18870 tmp
= gen_rtx_VEC_MERGE (mode
, tmp
, target
, GEN_INT (1 << elt
));
18871 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
18875 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
), false);
18877 emit_move_insn (mem
, target
);
18879 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
18880 emit_move_insn (tmp
, val
);
18882 emit_move_insn (target
, mem
);
18887 ix86_expand_vector_extract (bool mmx_ok
, rtx target
, rtx vec
, int elt
)
18889 enum machine_mode mode
= GET_MODE (vec
);
18890 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
18891 bool use_vec_extr
= false;
18904 use_vec_extr
= true;
18916 tmp
= gen_reg_rtx (mode
);
18917 emit_insn (gen_sse_shufps_1 (tmp
, vec
, vec
,
18918 GEN_INT (elt
), GEN_INT (elt
),
18919 GEN_INT (elt
+4), GEN_INT (elt
+4)));
18923 tmp
= gen_reg_rtx (mode
);
18924 emit_insn (gen_sse_unpckhps (tmp
, vec
, vec
));
18928 gcc_unreachable ();
18931 use_vec_extr
= true;
18946 tmp
= gen_reg_rtx (mode
);
18947 emit_insn (gen_sse2_pshufd_1 (tmp
, vec
,
18948 GEN_INT (elt
), GEN_INT (elt
),
18949 GEN_INT (elt
), GEN_INT (elt
)));
18953 tmp
= gen_reg_rtx (mode
);
18954 emit_insn (gen_sse2_punpckhdq (tmp
, vec
, vec
));
18958 gcc_unreachable ();
18961 use_vec_extr
= true;
18966 /* For SSE1, we have to reuse the V4SF code. */
18967 ix86_expand_vector_extract (false, gen_lowpart (SFmode
, target
),
18968 gen_lowpart (V4SFmode
, vec
), elt
);
18974 use_vec_extr
= TARGET_SSE2
;
18977 use_vec_extr
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
18982 /* ??? Could extract the appropriate HImode element and shift. */
18989 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (elt
)));
18990 tmp
= gen_rtx_VEC_SELECT (inner_mode
, vec
, tmp
);
18992 /* Let the rtl optimizers know about the zero extension performed. */
18993 if (inner_mode
== HImode
)
18995 tmp
= gen_rtx_ZERO_EXTEND (SImode
, tmp
);
18996 target
= gen_lowpart (SImode
, target
);
18999 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
19003 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
), false);
19005 emit_move_insn (mem
, vec
);
19007 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
19008 emit_move_insn (target
, tmp
);
19012 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
19013 pattern to reduce; DEST is the destination; IN is the input vector. */
19016 ix86_expand_reduc_v4sf (rtx (*fn
) (rtx
, rtx
, rtx
), rtx dest
, rtx in
)
19018 rtx tmp1
, tmp2
, tmp3
;
19020 tmp1
= gen_reg_rtx (V4SFmode
);
19021 tmp2
= gen_reg_rtx (V4SFmode
);
19022 tmp3
= gen_reg_rtx (V4SFmode
);
19024 emit_insn (gen_sse_movhlps (tmp1
, in
, in
));
19025 emit_insn (fn (tmp2
, tmp1
, in
));
19027 emit_insn (gen_sse_shufps_1 (tmp3
, tmp2
, tmp2
,
19028 GEN_INT (1), GEN_INT (1),
19029 GEN_INT (1+4), GEN_INT (1+4)));
19030 emit_insn (fn (dest
, tmp2
, tmp3
));
19033 /* Target hook for scalar_mode_supported_p. */
19035 ix86_scalar_mode_supported_p (enum machine_mode mode
)
19037 if (DECIMAL_FLOAT_MODE_P (mode
))
19040 return default_scalar_mode_supported_p (mode
);
19043 /* Implements target hook vector_mode_supported_p. */
19045 ix86_vector_mode_supported_p (enum machine_mode mode
)
19047 if (TARGET_SSE
&& VALID_SSE_REG_MODE (mode
))
19049 if (TARGET_SSE2
&& VALID_SSE2_REG_MODE (mode
))
19051 if (TARGET_MMX
&& VALID_MMX_REG_MODE (mode
))
19053 if (TARGET_3DNOW
&& VALID_MMX_REG_MODE_3DNOW (mode
))
19058 /* Worker function for TARGET_MD_ASM_CLOBBERS.
19060 We do this in the new i386 backend to maintain source compatibility
19061 with the old cc0-based compiler. */
19064 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED
,
19065 tree inputs ATTRIBUTE_UNUSED
,
19068 clobbers
= tree_cons (NULL_TREE
, build_string (5, "flags"),
19070 clobbers
= tree_cons (NULL_TREE
, build_string (4, "fpsr"),
19072 clobbers
= tree_cons (NULL_TREE
, build_string (7, "dirflag"),
19077 /* Return true if this goes in small data/bss. */
19080 ix86_in_large_data_p (tree exp
)
19082 if (ix86_cmodel
!= CM_MEDIUM
&& ix86_cmodel
!= CM_MEDIUM_PIC
)
19085 /* Functions are never large data. */
19086 if (TREE_CODE (exp
) == FUNCTION_DECL
)
19089 if (TREE_CODE (exp
) == VAR_DECL
&& DECL_SECTION_NAME (exp
))
19091 const char *section
= TREE_STRING_POINTER (DECL_SECTION_NAME (exp
));
19092 if (strcmp (section
, ".ldata") == 0
19093 || strcmp (section
, ".lbss") == 0)
19099 HOST_WIDE_INT size
= int_size_in_bytes (TREE_TYPE (exp
));
19101 /* If this is an incomplete type with size 0, then we can't put it
19102 in data because it might be too big when completed. */
19103 if (!size
|| size
> ix86_section_threshold
)
19110 ix86_encode_section_info (tree decl
, rtx rtl
, int first
)
19112 default_encode_section_info (decl
, rtl
, first
);
19114 if (TREE_CODE (decl
) == VAR_DECL
19115 && (TREE_STATIC (decl
) || DECL_EXTERNAL (decl
))
19116 && ix86_in_large_data_p (decl
))
19117 SYMBOL_REF_FLAGS (XEXP (rtl
, 0)) |= SYMBOL_FLAG_FAR_ADDR
;
19120 /* Worker function for REVERSE_CONDITION. */
19123 ix86_reverse_condition (enum rtx_code code
, enum machine_mode mode
)
19125 return (mode
!= CCFPmode
&& mode
!= CCFPUmode
19126 ? reverse_condition (code
)
19127 : reverse_condition_maybe_unordered (code
));
19130 /* Output code to perform an x87 FP register move, from OPERANDS[1]
19134 output_387_reg_move (rtx insn
, rtx
*operands
)
19136 if (REG_P (operands
[1])
19137 && find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
19139 if (REGNO (operands
[0]) == FIRST_STACK_REG
)
19140 return output_387_ffreep (operands
, 0);
19141 return "fstp\t%y0";
19143 if (STACK_TOP_P (operands
[0]))
19144 return "fld%z1\t%y1";
19148 /* Output code to perform a conditional jump to LABEL, if C2 flag in
19149 FP status register is set. */
19152 ix86_emit_fp_unordered_jump (rtx label
)
19154 rtx reg
= gen_reg_rtx (HImode
);
19157 emit_insn (gen_x86_fnstsw_1 (reg
));
19159 if (TARGET_USE_SAHF
)
19161 emit_insn (gen_x86_sahf_1 (reg
));
19163 temp
= gen_rtx_REG (CCmode
, FLAGS_REG
);
19164 temp
= gen_rtx_UNORDERED (VOIDmode
, temp
, const0_rtx
);
19168 emit_insn (gen_testqi_ext_ccno_0 (reg
, GEN_INT (0x04)));
19170 temp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
19171 temp
= gen_rtx_NE (VOIDmode
, temp
, const0_rtx
);
19174 temp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, temp
,
19175 gen_rtx_LABEL_REF (VOIDmode
, label
),
19177 temp
= gen_rtx_SET (VOIDmode
, pc_rtx
, temp
);
19178 emit_jump_insn (temp
);
19181 /* Output code to perform a log1p XFmode calculation. */
19183 void ix86_emit_i387_log1p (rtx op0
, rtx op1
)
19185 rtx label1
= gen_label_rtx ();
19186 rtx label2
= gen_label_rtx ();
19188 rtx tmp
= gen_reg_rtx (XFmode
);
19189 rtx tmp2
= gen_reg_rtx (XFmode
);
19191 emit_insn (gen_absxf2 (tmp
, op1
));
19192 emit_insn (gen_cmpxf (tmp
,
19193 CONST_DOUBLE_FROM_REAL_VALUE (
19194 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode
),
19196 emit_jump_insn (gen_bge (label1
));
19198 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
19199 emit_insn (gen_fyl2xp1_xf3 (op0
, tmp2
, op1
));
19200 emit_jump (label2
);
19202 emit_label (label1
);
19203 emit_move_insn (tmp
, CONST1_RTX (XFmode
));
19204 emit_insn (gen_addxf3 (tmp
, op1
, tmp
));
19205 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
19206 emit_insn (gen_fyl2x_xf3 (op0
, tmp2
, tmp
));
19208 emit_label (label2
);
19211 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
19214 i386_solaris_elf_named_section (const char *name
, unsigned int flags
,
19217 /* With Binutils 2.15, the "@unwind" marker must be specified on
19218 every occurrence of the ".eh_frame" section, not just the first
19221 && strcmp (name
, ".eh_frame") == 0)
19223 fprintf (asm_out_file
, "\t.section\t%s,\"%s\",@unwind\n", name
,
19224 flags
& SECTION_WRITE
? "aw" : "a");
19227 default_elf_asm_named_section (name
, flags
, decl
);
19230 /* Return the mangling of TYPE if it is an extended fundamental type. */
19232 static const char *
19233 ix86_mangle_fundamental_type (tree type
)
19235 switch (TYPE_MODE (type
))
19238 /* __float128 is "g". */
19241 /* "long double" or __float80 is "e". */
19248 /* For 32-bit code we can save PIC register setup by using
19249 __stack_chk_fail_local hidden function instead of calling
19250 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
19251 register, so it is better to call __stack_chk_fail directly. */
19254 ix86_stack_protect_fail (void)
19256 return TARGET_64BIT
19257 ? default_external_stack_protect_fail ()
19258 : default_hidden_stack_protect_fail ();
19261 /* Select a format to encode pointers in exception handling data. CODE
19262 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
19263 true if the symbol may be affected by dynamic relocations.
19265 ??? All x86 object file formats are capable of representing this.
19266 After all, the relocation needed is the same as for the call insn.
19267 Whether or not a particular assembler allows us to enter such, I
19268 guess we'll have to see. */
19270 asm_preferred_eh_data_format (int code
, int global
)
19274 int type
= DW_EH_PE_sdata8
;
19276 || ix86_cmodel
== CM_SMALL_PIC
19277 || (ix86_cmodel
== CM_MEDIUM_PIC
&& (global
|| code
)))
19278 type
= DW_EH_PE_sdata4
;
19279 return (global
? DW_EH_PE_indirect
: 0) | DW_EH_PE_pcrel
| type
;
19281 if (ix86_cmodel
== CM_SMALL
19282 || (ix86_cmodel
== CM_MEDIUM
&& code
))
19283 return DW_EH_PE_udata4
;
19284 return DW_EH_PE_absptr
;
19287 /* Expand copysign from SIGN to the positive value ABS_VALUE
19288 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
19291 ix86_sse_copysign_to_positive (rtx result
, rtx abs_value
, rtx sign
, rtx mask
)
19293 enum machine_mode mode
= GET_MODE (sign
);
19294 rtx sgn
= gen_reg_rtx (mode
);
19295 if (mask
== NULL_RTX
)
19297 mask
= ix86_build_signbit_mask (mode
, VECTOR_MODE_P (mode
), false);
19298 if (!VECTOR_MODE_P (mode
))
19300 /* We need to generate a scalar mode mask in this case. */
19301 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
19302 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
19303 mask
= gen_reg_rtx (mode
);
19304 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
19308 mask
= gen_rtx_NOT (mode
, mask
);
19309 emit_insn (gen_rtx_SET (VOIDmode
, sgn
,
19310 gen_rtx_AND (mode
, mask
, sign
)));
19311 emit_insn (gen_rtx_SET (VOIDmode
, result
,
19312 gen_rtx_IOR (mode
, abs_value
, sgn
)));
19315 /* Expand fabs (OP0) and return a new rtx that holds the result. The
19316 mask for masking out the sign-bit is stored in *SMASK, if that is
19319 ix86_expand_sse_fabs (rtx op0
, rtx
*smask
)
19321 enum machine_mode mode
= GET_MODE (op0
);
19324 xa
= gen_reg_rtx (mode
);
19325 mask
= ix86_build_signbit_mask (mode
, VECTOR_MODE_P (mode
), true);
19326 if (!VECTOR_MODE_P (mode
))
19328 /* We need to generate a scalar mode mask in this case. */
19329 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
19330 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
19331 mask
= gen_reg_rtx (mode
);
19332 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
19334 emit_insn (gen_rtx_SET (VOIDmode
, xa
,
19335 gen_rtx_AND (mode
, op0
, mask
)));
19343 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
19344 swapping the operands if SWAP_OPERANDS is true. The expanded
19345 code is a forward jump to a newly created label in case the
19346 comparison is true. The generated label rtx is returned. */
19348 ix86_expand_sse_compare_and_jump (enum rtx_code code
, rtx op0
, rtx op1
,
19349 bool swap_operands
)
19360 label
= gen_label_rtx ();
19361 tmp
= gen_rtx_REG (CCFPUmode
, FLAGS_REG
);
19362 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
19363 gen_rtx_COMPARE (CCFPUmode
, op0
, op1
)));
19364 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
, tmp
, const0_rtx
);
19365 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
19366 gen_rtx_LABEL_REF (VOIDmode
, label
), pc_rtx
);
19367 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
19368 JUMP_LABEL (tmp
) = label
;
19373 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
19374 using comparison code CODE. Operands are swapped for the comparison if
19375 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
19377 ix86_expand_sse_compare_mask (enum rtx_code code
, rtx op0
, rtx op1
,
19378 bool swap_operands
)
19380 enum machine_mode mode
= GET_MODE (op0
);
19381 rtx mask
= gen_reg_rtx (mode
);
19390 if (mode
== DFmode
)
19391 emit_insn (gen_sse2_maskcmpdf3 (mask
, op0
, op1
,
19392 gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
19394 emit_insn (gen_sse_maskcmpsf3 (mask
, op0
, op1
,
19395 gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
19400 /* Generate and return a rtx of mode MODE for 2**n where n is the number
19401 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
19403 ix86_gen_TWO52 (enum machine_mode mode
)
19405 REAL_VALUE_TYPE TWO52r
;
19408 real_ldexp (&TWO52r
, &dconst1
, mode
== DFmode
? 52 : 23);
19409 TWO52
= const_double_from_real_value (TWO52r
, mode
);
19410 TWO52
= force_reg (mode
, TWO52
);
19415 /* Expand SSE sequence for computing lround from OP1 storing
19418 ix86_expand_lround (rtx op0
, rtx op1
)
19420 /* C code for the stuff we're doing below:
19421 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
19424 enum machine_mode mode
= GET_MODE (op1
);
19425 const struct real_format
*fmt
;
19426 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
19429 /* load nextafter (0.5, 0.0) */
19430 fmt
= REAL_MODE_FORMAT (mode
);
19431 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1);
19432 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
19434 /* adj = copysign (0.5, op1) */
19435 adj
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
19436 ix86_sse_copysign_to_positive (adj
, adj
, force_reg (mode
, op1
), NULL_RTX
);
19438 /* adj = op1 + adj */
19439 expand_simple_binop (mode
, PLUS
, adj
, op1
, adj
, 0, OPTAB_DIRECT
);
19441 /* op0 = (imode)adj */
19442 expand_fix (op0
, adj
, 0);
19445 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
19448 ix86_expand_lfloorceil (rtx op0
, rtx op1
, bool do_floor
)
19450 /* C code for the stuff we're doing below (for do_floor):
19452 xi -= (double)xi > op1 ? 1 : 0;
19455 enum machine_mode fmode
= GET_MODE (op1
);
19456 enum machine_mode imode
= GET_MODE (op0
);
19457 rtx ireg
, freg
, label
;
19459 /* reg = (long)op1 */
19460 ireg
= gen_reg_rtx (imode
);
19461 expand_fix (ireg
, op1
, 0);
19463 /* freg = (double)reg */
19464 freg
= gen_reg_rtx (fmode
);
19465 expand_float (freg
, ireg
, 0);
19467 /* ireg = (freg > op1) ? ireg - 1 : ireg */
19468 label
= ix86_expand_sse_compare_and_jump (UNLE
,
19469 freg
, op1
, !do_floor
);
19470 expand_simple_binop (imode
, do_floor
? MINUS
: PLUS
,
19471 ireg
, const1_rtx
, ireg
, 0, OPTAB_DIRECT
);
19472 emit_label (label
);
19473 LABEL_NUSES (label
) = 1;
19475 emit_move_insn (op0
, ireg
);
19478 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
19479 result in OPERAND0. */
19481 ix86_expand_rint (rtx operand0
, rtx operand1
)
19483 /* C code for the stuff we're doing below:
19484 xa = fabs (operand1);
19485 if (!isless (xa, 2**52))
19487 xa = xa + 2**52 - 2**52;
19488 return copysign (xa, operand1);
19490 enum machine_mode mode
= GET_MODE (operand0
);
19491 rtx res
, xa
, label
, TWO52
, mask
;
19493 res
= gen_reg_rtx (mode
);
19494 emit_move_insn (res
, operand1
);
19496 /* xa = abs (operand1) */
19497 xa
= ix86_expand_sse_fabs (res
, &mask
);
19499 /* if (!isless (xa, TWO52)) goto label; */
19500 TWO52
= ix86_gen_TWO52 (mode
);
19501 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
19503 expand_simple_binop (mode
, PLUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
19504 expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
19506 ix86_sse_copysign_to_positive (res
, xa
, res
, mask
);
19508 emit_label (label
);
19509 LABEL_NUSES (label
) = 1;
19511 emit_move_insn (operand0
, res
);
19514 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
19517 ix86_expand_floorceildf_32 (rtx operand0
, rtx operand1
, bool do_floor
)
19519 /* C code for the stuff we expand below.
19520 double xa = fabs (x), x2;
19521 if (!isless (xa, TWO52))
19523 xa = xa + TWO52 - TWO52;
19524 x2 = copysign (xa, x);
19533 enum machine_mode mode
= GET_MODE (operand0
);
19534 rtx xa
, TWO52
, tmp
, label
, one
, res
, mask
;
19536 TWO52
= ix86_gen_TWO52 (mode
);
19538 /* Temporary for holding the result, initialized to the input
19539 operand to ease control flow. */
19540 res
= gen_reg_rtx (mode
);
19541 emit_move_insn (res
, operand1
);
19543 /* xa = abs (operand1) */
19544 xa
= ix86_expand_sse_fabs (res
, &mask
);
19546 /* if (!isless (xa, TWO52)) goto label; */
19547 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
19549 /* xa = xa + TWO52 - TWO52; */
19550 expand_simple_binop (mode
, PLUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
19551 expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
19553 /* xa = copysign (xa, operand1) */
19554 ix86_sse_copysign_to_positive (xa
, xa
, res
, mask
);
19556 /* generate 1.0 or -1.0 */
19557 one
= force_reg (mode
,
19558 const_double_from_real_value (do_floor
19559 ? dconst1
: dconstm1
, mode
));
19561 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
19562 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
19563 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
19564 gen_rtx_AND (mode
, one
, tmp
)));
19565 /* We always need to subtract here to preserve signed zero. */
19566 expand_simple_binop (mode
, MINUS
,
19567 xa
, tmp
, res
, 0, OPTAB_DIRECT
);
19569 emit_label (label
);
19570 LABEL_NUSES (label
) = 1;
19572 emit_move_insn (operand0
, res
);
19575 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
19578 ix86_expand_floorceil (rtx operand0
, rtx operand1
, bool do_floor
)
19580 /* C code for the stuff we expand below.
19581 double xa = fabs (x), x2;
19582 if (!isless (xa, TWO52))
19584 x2 = (double)(long)x;
19591 if (HONOR_SIGNED_ZEROS (mode))
19592 return copysign (x2, x);
19595 enum machine_mode mode
= GET_MODE (operand0
);
19596 rtx xa
, xi
, TWO52
, tmp
, label
, one
, res
, mask
;
19598 TWO52
= ix86_gen_TWO52 (mode
);
19600 /* Temporary for holding the result, initialized to the input
19601 operand to ease control flow. */
19602 res
= gen_reg_rtx (mode
);
19603 emit_move_insn (res
, operand1
);
19605 /* xa = abs (operand1) */
19606 xa
= ix86_expand_sse_fabs (res
, &mask
);
19608 /* if (!isless (xa, TWO52)) goto label; */
19609 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
19611 /* xa = (double)(long)x */
19612 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
19613 expand_fix (xi
, res
, 0);
19614 expand_float (xa
, xi
, 0);
19617 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
19619 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
19620 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
19621 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
19622 gen_rtx_AND (mode
, one
, tmp
)));
19623 expand_simple_binop (mode
, do_floor
? MINUS
: PLUS
,
19624 xa
, tmp
, res
, 0, OPTAB_DIRECT
);
19626 if (HONOR_SIGNED_ZEROS (mode
))
19627 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
19629 emit_label (label
);
19630 LABEL_NUSES (label
) = 1;
19632 emit_move_insn (operand0
, res
);
19635 /* Expand SSE sequence for computing round from OPERAND1 storing
19636 into OPERAND0. Sequence that works without relying on DImode truncation
19637 via cvttsd2siq that is only available on 64bit targets. */
19639 ix86_expand_rounddf_32 (rtx operand0
, rtx operand1
)
19641 /* C code for the stuff we expand below.
19642 double xa = fabs (x), xa2, x2;
19643 if (!isless (xa, TWO52))
19645 Using the absolute value and copying back sign makes
19646 -0.0 -> -0.0 correct.
19647 xa2 = xa + TWO52 - TWO52;
19652 else if (dxa > 0.5)
19654 x2 = copysign (xa2, x);
19657 enum machine_mode mode
= GET_MODE (operand0
);
19658 rtx xa
, xa2
, dxa
, TWO52
, tmp
, label
, half
, mhalf
, one
, res
, mask
;
19660 TWO52
= ix86_gen_TWO52 (mode
);
19662 /* Temporary for holding the result, initialized to the input
19663 operand to ease control flow. */
19664 res
= gen_reg_rtx (mode
);
19665 emit_move_insn (res
, operand1
);
19667 /* xa = abs (operand1) */
19668 xa
= ix86_expand_sse_fabs (res
, &mask
);
19670 /* if (!isless (xa, TWO52)) goto label; */
19671 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
19673 /* xa2 = xa + TWO52 - TWO52; */
19674 xa2
= gen_reg_rtx (mode
);
19675 expand_simple_binop (mode
, PLUS
, xa
, TWO52
, xa2
, 0, OPTAB_DIRECT
);
19676 expand_simple_binop (mode
, MINUS
, xa2
, TWO52
, xa2
, 0, OPTAB_DIRECT
);
19678 /* dxa = xa2 - xa; */
19679 dxa
= gen_reg_rtx (mode
);
19680 expand_simple_binop (mode
, MINUS
, xa2
, xa
, dxa
, 0, OPTAB_DIRECT
);
19682 /* generate 0.5, 1.0 and -0.5 */
19683 half
= force_reg (mode
, const_double_from_real_value (dconsthalf
, mode
));
19684 one
= gen_reg_rtx (mode
);
19685 expand_simple_binop (mode
, PLUS
, half
, half
, one
, 0, OPTAB_DIRECT
);
19686 mhalf
= gen_reg_rtx (mode
);
19687 expand_simple_binop (mode
, MINUS
, half
, one
, mhalf
, 0, OPTAB_DIRECT
);
19690 tmp
= gen_reg_rtx (mode
);
19691 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
19692 tmp
= ix86_expand_sse_compare_mask (UNGT
, dxa
, half
, false);
19693 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
19694 gen_rtx_AND (mode
, one
, tmp
)));
19695 expand_simple_binop (mode
, MINUS
, xa2
, tmp
, xa2
, 0, OPTAB_DIRECT
);
19696 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
19697 tmp
= ix86_expand_sse_compare_mask (UNGE
, mhalf
, dxa
, false);
19698 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
19699 gen_rtx_AND (mode
, one
, tmp
)));
19700 expand_simple_binop (mode
, PLUS
, xa2
, tmp
, xa2
, 0, OPTAB_DIRECT
);
19702 /* res = copysign (xa2, operand1) */
19703 ix86_sse_copysign_to_positive (res
, xa2
, force_reg (mode
, operand1
), mask
);
19705 emit_label (label
);
19706 LABEL_NUSES (label
) = 1;
19708 emit_move_insn (operand0
, res
);
19711 /* Expand SSE sequence for computing trunc from OPERAND1 storing
19714 ix86_expand_trunc (rtx operand0
, rtx operand1
)
19716 /* C code for SSE variant we expand below.
19717 double xa = fabs (x), x2;
19718 if (!isless (xa, TWO52))
19720 x2 = (double)(long)x;
19721 if (HONOR_SIGNED_ZEROS (mode))
19722 return copysign (x2, x);
19725 enum machine_mode mode
= GET_MODE (operand0
);
19726 rtx xa
, xi
, TWO52
, label
, res
, mask
;
19728 TWO52
= ix86_gen_TWO52 (mode
);
19730 /* Temporary for holding the result, initialized to the input
19731 operand to ease control flow. */
19732 res
= gen_reg_rtx (mode
);
19733 emit_move_insn (res
, operand1
);
19735 /* xa = abs (operand1) */
19736 xa
= ix86_expand_sse_fabs (res
, &mask
);
19738 /* if (!isless (xa, TWO52)) goto label; */
19739 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
19741 /* x = (double)(long)x */
19742 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
19743 expand_fix (xi
, res
, 0);
19744 expand_float (res
, xi
, 0);
19746 if (HONOR_SIGNED_ZEROS (mode
))
19747 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
19749 emit_label (label
);
19750 LABEL_NUSES (label
) = 1;
19752 emit_move_insn (operand0
, res
);
19755 /* Expand SSE sequence for computing trunc from OPERAND1 storing
19758 ix86_expand_truncdf_32 (rtx operand0
, rtx operand1
)
19760 enum machine_mode mode
= GET_MODE (operand0
);
19761 rtx xa
, mask
, TWO52
, label
, one
, res
, smask
;
19763 /* C code for SSE variant we expand below.
19764 double xa = fabs (x), x2;
19765 if (!isless (xa, TWO52))
19767 xa2 = xa + TWO52 - TWO52;
19771 x2 = copysign (xa2, x);
19775 TWO52
= ix86_gen_TWO52 (mode
);
19777 /* Temporary for holding the result, initialized to the input
19778 operand to ease control flow. */
19779 res
= gen_reg_rtx (mode
);
19780 emit_move_insn (res
, operand1
);
19782 /* xa = abs (operand1) */
19783 xa
= ix86_expand_sse_fabs (res
, &smask
);
19785 /* if (!isless (xa, TWO52)) goto label; */
19786 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
19788 /* res = xa + TWO52 - TWO52; */
19789 expand_simple_binop (mode
, PLUS
, xa
, TWO52
, res
, 0, OPTAB_DIRECT
);
19790 expand_simple_binop (mode
, MINUS
, res
, TWO52
, res
, 0, OPTAB_DIRECT
);
19793 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
19795 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
19796 mask
= ix86_expand_sse_compare_mask (UNGT
, res
, xa
, false);
19797 emit_insn (gen_rtx_SET (VOIDmode
, mask
,
19798 gen_rtx_AND (mode
, mask
, one
)));
19799 expand_simple_binop (mode
, MINUS
,
19800 res
, mask
, res
, 0, OPTAB_DIRECT
);
19802 /* res = copysign (res, operand1) */
19803 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), smask
);
19805 emit_label (label
);
19806 LABEL_NUSES (label
) = 1;
19808 emit_move_insn (operand0
, res
);
19811 /* Expand SSE sequence for computing round from OPERAND1 storing
19814 ix86_expand_round (rtx operand0
, rtx operand1
)
19816 /* C code for the stuff we're doing below:
19817 double xa = fabs (x);
19818 if (!isless (xa, TWO52))
19820 xa = (double)(long)(xa + nextafter (0.5, 0.0));
19821 return copysign (xa, x);
19823 enum machine_mode mode
= GET_MODE (operand0
);
19824 rtx res
, TWO52
, xa
, label
, xi
, half
, mask
;
19825 const struct real_format
*fmt
;
19826 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
19828 /* Temporary for holding the result, initialized to the input
19829 operand to ease control flow. */
19830 res
= gen_reg_rtx (mode
);
19831 emit_move_insn (res
, operand1
);
19833 TWO52
= ix86_gen_TWO52 (mode
);
19834 xa
= ix86_expand_sse_fabs (res
, &mask
);
19835 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
19837 /* load nextafter (0.5, 0.0) */
19838 fmt
= REAL_MODE_FORMAT (mode
);
19839 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1);
19840 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
19842 /* xa = xa + 0.5 */
19843 half
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
19844 expand_simple_binop (mode
, PLUS
, xa
, half
, xa
, 0, OPTAB_DIRECT
);
19846 /* xa = (double)(int64_t)xa */
19847 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
19848 expand_fix (xi
, xa
, 0);
19849 expand_float (xa
, xi
, 0);
19851 /* res = copysign (xa, operand1) */
19852 ix86_sse_copysign_to_positive (res
, xa
, force_reg (mode
, operand1
), mask
);
19854 emit_label (label
);
19855 LABEL_NUSES (label
) = 1;
19857 emit_move_insn (operand0
, res
);
19860 #include "gt-i386.h"