i386.c (ix86_swap_binary_operands_p): New helper function to simplify/factorize opera...
[gcc.git] / gcc / config / i386 / i386.c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 Boston, MA 02110-1301, USA. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "tm_p.h"
29 #include "regs.h"
30 #include "hard-reg-set.h"
31 #include "real.h"
32 #include "insn-config.h"
33 #include "conditions.h"
34 #include "output.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
37 #include "flags.h"
38 #include "except.h"
39 #include "function.h"
40 #include "recog.h"
41 #include "expr.h"
42 #include "optabs.h"
43 #include "toplev.h"
44 #include "basic-block.h"
45 #include "ggc.h"
46 #include "target.h"
47 #include "target-def.h"
48 #include "langhooks.h"
49 #include "cgraph.h"
50 #include "tree-gimple.h"
51 #include "dwarf2.h"
52 #include "tm-constrs.h"
53 #include "params.h"
54
55 #ifndef CHECK_STACK_LIMIT
56 #define CHECK_STACK_LIMIT (-1)
57 #endif
58
59 /* Return index of given mode in mult and division cost tables. */
60 #define MODE_INDEX(mode) \
61 ((mode) == QImode ? 0 \
62 : (mode) == HImode ? 1 \
63 : (mode) == SImode ? 2 \
64 : (mode) == DImode ? 3 \
65 : 4)
66
67 /* Processor costs (relative to an add) */
68 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
69 #define COSTS_N_BYTES(N) ((N) * 2)
70
71 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
72
73 static const
74 struct processor_costs size_cost = { /* costs for tuning for size */
75 COSTS_N_BYTES (2), /* cost of an add instruction */
76 COSTS_N_BYTES (3), /* cost of a lea instruction */
77 COSTS_N_BYTES (2), /* variable shift costs */
78 COSTS_N_BYTES (3), /* constant shift costs */
79 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
80 COSTS_N_BYTES (3), /* HI */
81 COSTS_N_BYTES (3), /* SI */
82 COSTS_N_BYTES (3), /* DI */
83 COSTS_N_BYTES (5)}, /* other */
84 0, /* cost of multiply per each bit set */
85 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
86 COSTS_N_BYTES (3), /* HI */
87 COSTS_N_BYTES (3), /* SI */
88 COSTS_N_BYTES (3), /* DI */
89 COSTS_N_BYTES (5)}, /* other */
90 COSTS_N_BYTES (3), /* cost of movsx */
91 COSTS_N_BYTES (3), /* cost of movzx */
92 0, /* "large" insn */
93 2, /* MOVE_RATIO */
94 2, /* cost for loading QImode using movzbl */
95 {2, 2, 2}, /* cost of loading integer registers
96 in QImode, HImode and SImode.
97 Relative to reg-reg move (2). */
98 {2, 2, 2}, /* cost of storing integer registers */
99 2, /* cost of reg,reg fld/fst */
100 {2, 2, 2}, /* cost of loading fp registers
101 in SFmode, DFmode and XFmode */
102 {2, 2, 2}, /* cost of storing fp registers
103 in SFmode, DFmode and XFmode */
104 3, /* cost of moving MMX register */
105 {3, 3}, /* cost of loading MMX registers
106 in SImode and DImode */
107 {3, 3}, /* cost of storing MMX registers
108 in SImode and DImode */
109 3, /* cost of moving SSE register */
110 {3, 3, 3}, /* cost of loading SSE registers
111 in SImode, DImode and TImode */
112 {3, 3, 3}, /* cost of storing SSE registers
113 in SImode, DImode and TImode */
114 3, /* MMX or SSE register to integer */
115 0, /* size of prefetch block */
116 0, /* number of parallel prefetches */
117 2, /* Branch cost */
118 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
119 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
120 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
121 COSTS_N_BYTES (2), /* cost of FABS instruction. */
122 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
123 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
124 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
125 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
126 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
127 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}}
128 };
129
130 /* Processor costs (relative to an add) */
131 static const
132 struct processor_costs i386_cost = { /* 386 specific costs */
133 COSTS_N_INSNS (1), /* cost of an add instruction */
134 COSTS_N_INSNS (1), /* cost of a lea instruction */
135 COSTS_N_INSNS (3), /* variable shift costs */
136 COSTS_N_INSNS (2), /* constant shift costs */
137 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
138 COSTS_N_INSNS (6), /* HI */
139 COSTS_N_INSNS (6), /* SI */
140 COSTS_N_INSNS (6), /* DI */
141 COSTS_N_INSNS (6)}, /* other */
142 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
143 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
144 COSTS_N_INSNS (23), /* HI */
145 COSTS_N_INSNS (23), /* SI */
146 COSTS_N_INSNS (23), /* DI */
147 COSTS_N_INSNS (23)}, /* other */
148 COSTS_N_INSNS (3), /* cost of movsx */
149 COSTS_N_INSNS (2), /* cost of movzx */
150 15, /* "large" insn */
151 3, /* MOVE_RATIO */
152 4, /* cost for loading QImode using movzbl */
153 {2, 4, 2}, /* cost of loading integer registers
154 in QImode, HImode and SImode.
155 Relative to reg-reg move (2). */
156 {2, 4, 2}, /* cost of storing integer registers */
157 2, /* cost of reg,reg fld/fst */
158 {8, 8, 8}, /* cost of loading fp registers
159 in SFmode, DFmode and XFmode */
160 {8, 8, 8}, /* cost of storing fp registers
161 in SFmode, DFmode and XFmode */
162 2, /* cost of moving MMX register */
163 {4, 8}, /* cost of loading MMX registers
164 in SImode and DImode */
165 {4, 8}, /* cost of storing MMX registers
166 in SImode and DImode */
167 2, /* cost of moving SSE register */
168 {4, 8, 16}, /* cost of loading SSE registers
169 in SImode, DImode and TImode */
170 {4, 8, 16}, /* cost of storing SSE registers
171 in SImode, DImode and TImode */
172 3, /* MMX or SSE register to integer */
173 0, /* size of prefetch block */
174 0, /* number of parallel prefetches */
175 1, /* Branch cost */
176 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
177 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
178 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
179 COSTS_N_INSNS (22), /* cost of FABS instruction. */
180 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
181 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
182 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
183 DUMMY_STRINGOP_ALGS},
184 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
185 DUMMY_STRINGOP_ALGS},
186 };
187
188 static const
189 struct processor_costs i486_cost = { /* 486 specific costs */
190 COSTS_N_INSNS (1), /* cost of an add instruction */
191 COSTS_N_INSNS (1), /* cost of a lea instruction */
192 COSTS_N_INSNS (3), /* variable shift costs */
193 COSTS_N_INSNS (2), /* constant shift costs */
194 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
195 COSTS_N_INSNS (12), /* HI */
196 COSTS_N_INSNS (12), /* SI */
197 COSTS_N_INSNS (12), /* DI */
198 COSTS_N_INSNS (12)}, /* other */
199 1, /* cost of multiply per each bit set */
200 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
201 COSTS_N_INSNS (40), /* HI */
202 COSTS_N_INSNS (40), /* SI */
203 COSTS_N_INSNS (40), /* DI */
204 COSTS_N_INSNS (40)}, /* other */
205 COSTS_N_INSNS (3), /* cost of movsx */
206 COSTS_N_INSNS (2), /* cost of movzx */
207 15, /* "large" insn */
208 3, /* MOVE_RATIO */
209 4, /* cost for loading QImode using movzbl */
210 {2, 4, 2}, /* cost of loading integer registers
211 in QImode, HImode and SImode.
212 Relative to reg-reg move (2). */
213 {2, 4, 2}, /* cost of storing integer registers */
214 2, /* cost of reg,reg fld/fst */
215 {8, 8, 8}, /* cost of loading fp registers
216 in SFmode, DFmode and XFmode */
217 {8, 8, 8}, /* cost of storing fp registers
218 in SFmode, DFmode and XFmode */
219 2, /* cost of moving MMX register */
220 {4, 8}, /* cost of loading MMX registers
221 in SImode and DImode */
222 {4, 8}, /* cost of storing MMX registers
223 in SImode and DImode */
224 2, /* cost of moving SSE register */
225 {4, 8, 16}, /* cost of loading SSE registers
226 in SImode, DImode and TImode */
227 {4, 8, 16}, /* cost of storing SSE registers
228 in SImode, DImode and TImode */
229 3, /* MMX or SSE register to integer */
230 0, /* size of prefetch block */
231 0, /* number of parallel prefetches */
232 1, /* Branch cost */
233 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
234 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
235 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
236 COSTS_N_INSNS (3), /* cost of FABS instruction. */
237 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
238 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
239 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
240 DUMMY_STRINGOP_ALGS},
241 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
242 DUMMY_STRINGOP_ALGS}
243 };
244
245 static const
246 struct processor_costs pentium_cost = {
247 COSTS_N_INSNS (1), /* cost of an add instruction */
248 COSTS_N_INSNS (1), /* cost of a lea instruction */
249 COSTS_N_INSNS (4), /* variable shift costs */
250 COSTS_N_INSNS (1), /* constant shift costs */
251 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
252 COSTS_N_INSNS (11), /* HI */
253 COSTS_N_INSNS (11), /* SI */
254 COSTS_N_INSNS (11), /* DI */
255 COSTS_N_INSNS (11)}, /* other */
256 0, /* cost of multiply per each bit set */
257 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
258 COSTS_N_INSNS (25), /* HI */
259 COSTS_N_INSNS (25), /* SI */
260 COSTS_N_INSNS (25), /* DI */
261 COSTS_N_INSNS (25)}, /* other */
262 COSTS_N_INSNS (3), /* cost of movsx */
263 COSTS_N_INSNS (2), /* cost of movzx */
264 8, /* "large" insn */
265 6, /* MOVE_RATIO */
266 6, /* cost for loading QImode using movzbl */
267 {2, 4, 2}, /* cost of loading integer registers
268 in QImode, HImode and SImode.
269 Relative to reg-reg move (2). */
270 {2, 4, 2}, /* cost of storing integer registers */
271 2, /* cost of reg,reg fld/fst */
272 {2, 2, 6}, /* cost of loading fp registers
273 in SFmode, DFmode and XFmode */
274 {4, 4, 6}, /* cost of storing fp registers
275 in SFmode, DFmode and XFmode */
276 8, /* cost of moving MMX register */
277 {8, 8}, /* cost of loading MMX registers
278 in SImode and DImode */
279 {8, 8}, /* cost of storing MMX registers
280 in SImode and DImode */
281 2, /* cost of moving SSE register */
282 {4, 8, 16}, /* cost of loading SSE registers
283 in SImode, DImode and TImode */
284 {4, 8, 16}, /* cost of storing SSE registers
285 in SImode, DImode and TImode */
286 3, /* MMX or SSE register to integer */
287 0, /* size of prefetch block */
288 0, /* number of parallel prefetches */
289 2, /* Branch cost */
290 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
291 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
292 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
293 COSTS_N_INSNS (1), /* cost of FABS instruction. */
294 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
295 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
296 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
297 DUMMY_STRINGOP_ALGS},
298 {{libcall, {{-1, rep_prefix_4_byte}}},
299 DUMMY_STRINGOP_ALGS}
300 };
301
302 static const
303 struct processor_costs pentiumpro_cost = {
304 COSTS_N_INSNS (1), /* cost of an add instruction */
305 COSTS_N_INSNS (1), /* cost of a lea instruction */
306 COSTS_N_INSNS (1), /* variable shift costs */
307 COSTS_N_INSNS (1), /* constant shift costs */
308 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
309 COSTS_N_INSNS (4), /* HI */
310 COSTS_N_INSNS (4), /* SI */
311 COSTS_N_INSNS (4), /* DI */
312 COSTS_N_INSNS (4)}, /* other */
313 0, /* cost of multiply per each bit set */
314 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
315 COSTS_N_INSNS (17), /* HI */
316 COSTS_N_INSNS (17), /* SI */
317 COSTS_N_INSNS (17), /* DI */
318 COSTS_N_INSNS (17)}, /* other */
319 COSTS_N_INSNS (1), /* cost of movsx */
320 COSTS_N_INSNS (1), /* cost of movzx */
321 8, /* "large" insn */
322 6, /* MOVE_RATIO */
323 2, /* cost for loading QImode using movzbl */
324 {4, 4, 4}, /* cost of loading integer registers
325 in QImode, HImode and SImode.
326 Relative to reg-reg move (2). */
327 {2, 2, 2}, /* cost of storing integer registers */
328 2, /* cost of reg,reg fld/fst */
329 {2, 2, 6}, /* cost of loading fp registers
330 in SFmode, DFmode and XFmode */
331 {4, 4, 6}, /* cost of storing fp registers
332 in SFmode, DFmode and XFmode */
333 2, /* cost of moving MMX register */
334 {2, 2}, /* cost of loading MMX registers
335 in SImode and DImode */
336 {2, 2}, /* cost of storing MMX registers
337 in SImode and DImode */
338 2, /* cost of moving SSE register */
339 {2, 2, 8}, /* cost of loading SSE registers
340 in SImode, DImode and TImode */
341 {2, 2, 8}, /* cost of storing SSE registers
342 in SImode, DImode and TImode */
343 3, /* MMX or SSE register to integer */
344 32, /* size of prefetch block */
345 6, /* number of parallel prefetches */
346 2, /* Branch cost */
347 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
348 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
349 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
350 COSTS_N_INSNS (2), /* cost of FABS instruction. */
351 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
352 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
353 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
354 the alignment). For small blocks inline loop is still a noticeable win, for bigger
355 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
356 more expensive startup time in CPU, but after 4K the difference is down in the noise.
357 */
358 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
359 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
360 DUMMY_STRINGOP_ALGS},
361 {{rep_prefix_4_byte, {{1024, unrolled_loop},
362 {8192, rep_prefix_4_byte}, {-1, libcall}}},
363 DUMMY_STRINGOP_ALGS}
364 };
365
366 static const
367 struct processor_costs geode_cost = {
368 COSTS_N_INSNS (1), /* cost of an add instruction */
369 COSTS_N_INSNS (1), /* cost of a lea instruction */
370 COSTS_N_INSNS (2), /* variable shift costs */
371 COSTS_N_INSNS (1), /* constant shift costs */
372 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
373 COSTS_N_INSNS (4), /* HI */
374 COSTS_N_INSNS (7), /* SI */
375 COSTS_N_INSNS (7), /* DI */
376 COSTS_N_INSNS (7)}, /* other */
377 0, /* cost of multiply per each bit set */
378 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
379 COSTS_N_INSNS (23), /* HI */
380 COSTS_N_INSNS (39), /* SI */
381 COSTS_N_INSNS (39), /* DI */
382 COSTS_N_INSNS (39)}, /* other */
383 COSTS_N_INSNS (1), /* cost of movsx */
384 COSTS_N_INSNS (1), /* cost of movzx */
385 8, /* "large" insn */
386 4, /* MOVE_RATIO */
387 1, /* cost for loading QImode using movzbl */
388 {1, 1, 1}, /* cost of loading integer registers
389 in QImode, HImode and SImode.
390 Relative to reg-reg move (2). */
391 {1, 1, 1}, /* cost of storing integer registers */
392 1, /* cost of reg,reg fld/fst */
393 {1, 1, 1}, /* cost of loading fp registers
394 in SFmode, DFmode and XFmode */
395 {4, 6, 6}, /* cost of storing fp registers
396 in SFmode, DFmode and XFmode */
397
398 1, /* cost of moving MMX register */
399 {1, 1}, /* cost of loading MMX registers
400 in SImode and DImode */
401 {1, 1}, /* cost of storing MMX registers
402 in SImode and DImode */
403 1, /* cost of moving SSE register */
404 {1, 1, 1}, /* cost of loading SSE registers
405 in SImode, DImode and TImode */
406 {1, 1, 1}, /* cost of storing SSE registers
407 in SImode, DImode and TImode */
408 1, /* MMX or SSE register to integer */
409 32, /* size of prefetch block */
410 1, /* number of parallel prefetches */
411 1, /* Branch cost */
412 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
413 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
414 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
415 COSTS_N_INSNS (1), /* cost of FABS instruction. */
416 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
417 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
418 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
419 DUMMY_STRINGOP_ALGS},
420 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
421 DUMMY_STRINGOP_ALGS}
422 };
423
424 static const
425 struct processor_costs k6_cost = {
426 COSTS_N_INSNS (1), /* cost of an add instruction */
427 COSTS_N_INSNS (2), /* cost of a lea instruction */
428 COSTS_N_INSNS (1), /* variable shift costs */
429 COSTS_N_INSNS (1), /* constant shift costs */
430 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
431 COSTS_N_INSNS (3), /* HI */
432 COSTS_N_INSNS (3), /* SI */
433 COSTS_N_INSNS (3), /* DI */
434 COSTS_N_INSNS (3)}, /* other */
435 0, /* cost of multiply per each bit set */
436 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
437 COSTS_N_INSNS (18), /* HI */
438 COSTS_N_INSNS (18), /* SI */
439 COSTS_N_INSNS (18), /* DI */
440 COSTS_N_INSNS (18)}, /* other */
441 COSTS_N_INSNS (2), /* cost of movsx */
442 COSTS_N_INSNS (2), /* cost of movzx */
443 8, /* "large" insn */
444 4, /* MOVE_RATIO */
445 3, /* cost for loading QImode using movzbl */
446 {4, 5, 4}, /* cost of loading integer registers
447 in QImode, HImode and SImode.
448 Relative to reg-reg move (2). */
449 {2, 3, 2}, /* cost of storing integer registers */
450 4, /* cost of reg,reg fld/fst */
451 {6, 6, 6}, /* cost of loading fp registers
452 in SFmode, DFmode and XFmode */
453 {4, 4, 4}, /* cost of storing fp registers
454 in SFmode, DFmode and XFmode */
455 2, /* cost of moving MMX register */
456 {2, 2}, /* cost of loading MMX registers
457 in SImode and DImode */
458 {2, 2}, /* cost of storing MMX registers
459 in SImode and DImode */
460 2, /* cost of moving SSE register */
461 {2, 2, 8}, /* cost of loading SSE registers
462 in SImode, DImode and TImode */
463 {2, 2, 8}, /* cost of storing SSE registers
464 in SImode, DImode and TImode */
465 6, /* MMX or SSE register to integer */
466 32, /* size of prefetch block */
467 1, /* number of parallel prefetches */
468 1, /* Branch cost */
469 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
470 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
471 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
472 COSTS_N_INSNS (2), /* cost of FABS instruction. */
473 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
474 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
475 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
476 DUMMY_STRINGOP_ALGS},
477 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
478 DUMMY_STRINGOP_ALGS}
479 };
480
481 static const
482 struct processor_costs athlon_cost = {
483 COSTS_N_INSNS (1), /* cost of an add instruction */
484 COSTS_N_INSNS (2), /* cost of a lea instruction */
485 COSTS_N_INSNS (1), /* variable shift costs */
486 COSTS_N_INSNS (1), /* constant shift costs */
487 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
488 COSTS_N_INSNS (5), /* HI */
489 COSTS_N_INSNS (5), /* SI */
490 COSTS_N_INSNS (5), /* DI */
491 COSTS_N_INSNS (5)}, /* other */
492 0, /* cost of multiply per each bit set */
493 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
494 COSTS_N_INSNS (26), /* HI */
495 COSTS_N_INSNS (42), /* SI */
496 COSTS_N_INSNS (74), /* DI */
497 COSTS_N_INSNS (74)}, /* other */
498 COSTS_N_INSNS (1), /* cost of movsx */
499 COSTS_N_INSNS (1), /* cost of movzx */
500 8, /* "large" insn */
501 9, /* MOVE_RATIO */
502 4, /* cost for loading QImode using movzbl */
503 {3, 4, 3}, /* cost of loading integer registers
504 in QImode, HImode and SImode.
505 Relative to reg-reg move (2). */
506 {3, 4, 3}, /* cost of storing integer registers */
507 4, /* cost of reg,reg fld/fst */
508 {4, 4, 12}, /* cost of loading fp registers
509 in SFmode, DFmode and XFmode */
510 {6, 6, 8}, /* cost of storing fp registers
511 in SFmode, DFmode and XFmode */
512 2, /* cost of moving MMX register */
513 {4, 4}, /* cost of loading MMX registers
514 in SImode and DImode */
515 {4, 4}, /* cost of storing MMX registers
516 in SImode and DImode */
517 2, /* cost of moving SSE register */
518 {4, 4, 6}, /* cost of loading SSE registers
519 in SImode, DImode and TImode */
520 {4, 4, 5}, /* cost of storing SSE registers
521 in SImode, DImode and TImode */
522 5, /* MMX or SSE register to integer */
523 64, /* size of prefetch block */
524 6, /* number of parallel prefetches */
525 5, /* Branch cost */
526 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
527 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
528 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
529 COSTS_N_INSNS (2), /* cost of FABS instruction. */
530 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
531 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
532 /* For some reason, Athlon deals better with REP prefix (relative to loops)
533 compared to K8. Alignment becomes important after 8 bytes for memcpy and
534 128 bytes for memset. */
535 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
536 DUMMY_STRINGOP_ALGS},
537 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
538 DUMMY_STRINGOP_ALGS}
539 };
540
541 static const
542 struct processor_costs k8_cost = {
543 COSTS_N_INSNS (1), /* cost of an add instruction */
544 COSTS_N_INSNS (2), /* cost of a lea instruction */
545 COSTS_N_INSNS (1), /* variable shift costs */
546 COSTS_N_INSNS (1), /* constant shift costs */
547 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
548 COSTS_N_INSNS (4), /* HI */
549 COSTS_N_INSNS (3), /* SI */
550 COSTS_N_INSNS (4), /* DI */
551 COSTS_N_INSNS (5)}, /* other */
552 0, /* cost of multiply per each bit set */
553 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
554 COSTS_N_INSNS (26), /* HI */
555 COSTS_N_INSNS (42), /* SI */
556 COSTS_N_INSNS (74), /* DI */
557 COSTS_N_INSNS (74)}, /* other */
558 COSTS_N_INSNS (1), /* cost of movsx */
559 COSTS_N_INSNS (1), /* cost of movzx */
560 8, /* "large" insn */
561 9, /* MOVE_RATIO */
562 4, /* cost for loading QImode using movzbl */
563 {3, 4, 3}, /* cost of loading integer registers
564 in QImode, HImode and SImode.
565 Relative to reg-reg move (2). */
566 {3, 4, 3}, /* cost of storing integer registers */
567 4, /* cost of reg,reg fld/fst */
568 {4, 4, 12}, /* cost of loading fp registers
569 in SFmode, DFmode and XFmode */
570 {6, 6, 8}, /* cost of storing fp registers
571 in SFmode, DFmode and XFmode */
572 2, /* cost of moving MMX register */
573 {3, 3}, /* cost of loading MMX registers
574 in SImode and DImode */
575 {4, 4}, /* cost of storing MMX registers
576 in SImode and DImode */
577 2, /* cost of moving SSE register */
578 {4, 3, 6}, /* cost of loading SSE registers
579 in SImode, DImode and TImode */
580 {4, 4, 5}, /* cost of storing SSE registers
581 in SImode, DImode and TImode */
582 5, /* MMX or SSE register to integer */
583 64, /* size of prefetch block */
584 /* New AMD processors never drop prefetches; if they cannot be performed
585 immediately, they are queued. We set number of simultaneous prefetches
586 to a large constant to reflect this (it probably is not a good idea not
587 to limit number of prefetches at all, as their execution also takes some
588 time). */
589 100, /* number of parallel prefetches */
590 5, /* Branch cost */
591 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
592 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
593 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
594 COSTS_N_INSNS (2), /* cost of FABS instruction. */
595 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
596 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
597 /* K8 has optimized REP instruction for medium sized blocks, but for very small
598 blocks it is better to use loop. For large blocks, libcall can do
599 nontemporary accesses and beat inline considerably. */
600 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
601 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
602 {{libcall, {{8, loop}, {24, unrolled_loop},
603 {2048, rep_prefix_4_byte}, {-1, libcall}}},
604 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
605 };
606
607 static const
608 struct processor_costs pentium4_cost = {
609 COSTS_N_INSNS (1), /* cost of an add instruction */
610 COSTS_N_INSNS (3), /* cost of a lea instruction */
611 COSTS_N_INSNS (4), /* variable shift costs */
612 COSTS_N_INSNS (4), /* constant shift costs */
613 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
614 COSTS_N_INSNS (15), /* HI */
615 COSTS_N_INSNS (15), /* SI */
616 COSTS_N_INSNS (15), /* DI */
617 COSTS_N_INSNS (15)}, /* other */
618 0, /* cost of multiply per each bit set */
619 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
620 COSTS_N_INSNS (56), /* HI */
621 COSTS_N_INSNS (56), /* SI */
622 COSTS_N_INSNS (56), /* DI */
623 COSTS_N_INSNS (56)}, /* other */
624 COSTS_N_INSNS (1), /* cost of movsx */
625 COSTS_N_INSNS (1), /* cost of movzx */
626 16, /* "large" insn */
627 6, /* MOVE_RATIO */
628 2, /* cost for loading QImode using movzbl */
629 {4, 5, 4}, /* cost of loading integer registers
630 in QImode, HImode and SImode.
631 Relative to reg-reg move (2). */
632 {2, 3, 2}, /* cost of storing integer registers */
633 2, /* cost of reg,reg fld/fst */
634 {2, 2, 6}, /* cost of loading fp registers
635 in SFmode, DFmode and XFmode */
636 {4, 4, 6}, /* cost of storing fp registers
637 in SFmode, DFmode and XFmode */
638 2, /* cost of moving MMX register */
639 {2, 2}, /* cost of loading MMX registers
640 in SImode and DImode */
641 {2, 2}, /* cost of storing MMX registers
642 in SImode and DImode */
643 12, /* cost of moving SSE register */
644 {12, 12, 12}, /* cost of loading SSE registers
645 in SImode, DImode and TImode */
646 {2, 2, 8}, /* cost of storing SSE registers
647 in SImode, DImode and TImode */
648 10, /* MMX or SSE register to integer */
649 64, /* size of prefetch block */
650 6, /* number of parallel prefetches */
651 2, /* Branch cost */
652 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
653 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
654 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
655 COSTS_N_INSNS (2), /* cost of FABS instruction. */
656 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
657 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
658 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
659 DUMMY_STRINGOP_ALGS},
660 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
661 {-1, libcall}}},
662 DUMMY_STRINGOP_ALGS},
663 };
664
665 static const
666 struct processor_costs nocona_cost = {
667 COSTS_N_INSNS (1), /* cost of an add instruction */
668 COSTS_N_INSNS (1), /* cost of a lea instruction */
669 COSTS_N_INSNS (1), /* variable shift costs */
670 COSTS_N_INSNS (1), /* constant shift costs */
671 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
672 COSTS_N_INSNS (10), /* HI */
673 COSTS_N_INSNS (10), /* SI */
674 COSTS_N_INSNS (10), /* DI */
675 COSTS_N_INSNS (10)}, /* other */
676 0, /* cost of multiply per each bit set */
677 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
678 COSTS_N_INSNS (66), /* HI */
679 COSTS_N_INSNS (66), /* SI */
680 COSTS_N_INSNS (66), /* DI */
681 COSTS_N_INSNS (66)}, /* other */
682 COSTS_N_INSNS (1), /* cost of movsx */
683 COSTS_N_INSNS (1), /* cost of movzx */
684 16, /* "large" insn */
685 17, /* MOVE_RATIO */
686 4, /* cost for loading QImode using movzbl */
687 {4, 4, 4}, /* cost of loading integer registers
688 in QImode, HImode and SImode.
689 Relative to reg-reg move (2). */
690 {4, 4, 4}, /* cost of storing integer registers */
691 3, /* cost of reg,reg fld/fst */
692 {12, 12, 12}, /* cost of loading fp registers
693 in SFmode, DFmode and XFmode */
694 {4, 4, 4}, /* cost of storing fp registers
695 in SFmode, DFmode and XFmode */
696 6, /* cost of moving MMX register */
697 {12, 12}, /* cost of loading MMX registers
698 in SImode and DImode */
699 {12, 12}, /* cost of storing MMX registers
700 in SImode and DImode */
701 6, /* cost of moving SSE register */
702 {12, 12, 12}, /* cost of loading SSE registers
703 in SImode, DImode and TImode */
704 {12, 12, 12}, /* cost of storing SSE registers
705 in SImode, DImode and TImode */
706 8, /* MMX or SSE register to integer */
707 128, /* size of prefetch block */
708 8, /* number of parallel prefetches */
709 1, /* Branch cost */
710 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
711 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
712 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
713 COSTS_N_INSNS (3), /* cost of FABS instruction. */
714 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
715 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
716 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
717 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
718 {100000, unrolled_loop}, {-1, libcall}}}},
719 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
720 {-1, libcall}}},
721 {libcall, {{24, loop}, {64, unrolled_loop},
722 {8192, rep_prefix_8_byte}, {-1, libcall}}}}
723 };
724
725 static const
726 struct processor_costs core2_cost = {
727 COSTS_N_INSNS (1), /* cost of an add instruction */
728 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
729 COSTS_N_INSNS (1), /* variable shift costs */
730 COSTS_N_INSNS (1), /* constant shift costs */
731 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
732 COSTS_N_INSNS (3), /* HI */
733 COSTS_N_INSNS (3), /* SI */
734 COSTS_N_INSNS (3), /* DI */
735 COSTS_N_INSNS (3)}, /* other */
736 0, /* cost of multiply per each bit set */
737 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
738 COSTS_N_INSNS (22), /* HI */
739 COSTS_N_INSNS (22), /* SI */
740 COSTS_N_INSNS (22), /* DI */
741 COSTS_N_INSNS (22)}, /* other */
742 COSTS_N_INSNS (1), /* cost of movsx */
743 COSTS_N_INSNS (1), /* cost of movzx */
744 8, /* "large" insn */
745 16, /* MOVE_RATIO */
746 2, /* cost for loading QImode using movzbl */
747 {6, 6, 6}, /* cost of loading integer registers
748 in QImode, HImode and SImode.
749 Relative to reg-reg move (2). */
750 {4, 4, 4}, /* cost of storing integer registers */
751 2, /* cost of reg,reg fld/fst */
752 {6, 6, 6}, /* cost of loading fp registers
753 in SFmode, DFmode and XFmode */
754 {4, 4, 4}, /* cost of loading integer registers */
755 2, /* cost of moving MMX register */
756 {6, 6}, /* cost of loading MMX registers
757 in SImode and DImode */
758 {4, 4}, /* cost of storing MMX registers
759 in SImode and DImode */
760 2, /* cost of moving SSE register */
761 {6, 6, 6}, /* cost of loading SSE registers
762 in SImode, DImode and TImode */
763 {4, 4, 4}, /* cost of storing SSE registers
764 in SImode, DImode and TImode */
765 2, /* MMX or SSE register to integer */
766 128, /* size of prefetch block */
767 8, /* number of parallel prefetches */
768 3, /* Branch cost */
769 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
770 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
771 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
772 COSTS_N_INSNS (1), /* cost of FABS instruction. */
773 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
774 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
775 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
776 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
777 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
778 {{libcall, {{8, loop}, {15, unrolled_loop},
779 {2048, rep_prefix_4_byte}, {-1, libcall}}},
780 {libcall, {{24, loop}, {32, unrolled_loop},
781 {8192, rep_prefix_8_byte}, {-1, libcall}}}}
782 };
783
784 /* Generic64 should produce code tuned for Nocona and K8. */
785 static const
786 struct processor_costs generic64_cost = {
787 COSTS_N_INSNS (1), /* cost of an add instruction */
788 /* On all chips taken into consideration lea is 2 cycles and more. With
789 this cost however our current implementation of synth_mult results in
790 use of unnecessary temporary registers causing regression on several
791 SPECfp benchmarks. */
792 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
793 COSTS_N_INSNS (1), /* variable shift costs */
794 COSTS_N_INSNS (1), /* constant shift costs */
795 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
796 COSTS_N_INSNS (4), /* HI */
797 COSTS_N_INSNS (3), /* SI */
798 COSTS_N_INSNS (4), /* DI */
799 COSTS_N_INSNS (2)}, /* other */
800 0, /* cost of multiply per each bit set */
801 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
802 COSTS_N_INSNS (26), /* HI */
803 COSTS_N_INSNS (42), /* SI */
804 COSTS_N_INSNS (74), /* DI */
805 COSTS_N_INSNS (74)}, /* other */
806 COSTS_N_INSNS (1), /* cost of movsx */
807 COSTS_N_INSNS (1), /* cost of movzx */
808 8, /* "large" insn */
809 17, /* MOVE_RATIO */
810 4, /* cost for loading QImode using movzbl */
811 {4, 4, 4}, /* cost of loading integer registers
812 in QImode, HImode and SImode.
813 Relative to reg-reg move (2). */
814 {4, 4, 4}, /* cost of storing integer registers */
815 4, /* cost of reg,reg fld/fst */
816 {12, 12, 12}, /* cost of loading fp registers
817 in SFmode, DFmode and XFmode */
818 {6, 6, 8}, /* cost of storing fp registers
819 in SFmode, DFmode and XFmode */
820 2, /* cost of moving MMX register */
821 {8, 8}, /* cost of loading MMX registers
822 in SImode and DImode */
823 {8, 8}, /* cost of storing MMX registers
824 in SImode and DImode */
825 2, /* cost of moving SSE register */
826 {8, 8, 8}, /* cost of loading SSE registers
827 in SImode, DImode and TImode */
828 {8, 8, 8}, /* cost of storing SSE registers
829 in SImode, DImode and TImode */
830 5, /* MMX or SSE register to integer */
831 64, /* size of prefetch block */
832 6, /* number of parallel prefetches */
833 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
834 is increased to perhaps more appropriate value of 5. */
835 3, /* Branch cost */
836 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
837 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
838 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
839 COSTS_N_INSNS (8), /* cost of FABS instruction. */
840 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
841 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
842 {DUMMY_STRINGOP_ALGS,
843 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
844 {DUMMY_STRINGOP_ALGS,
845 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
846 };
847
848 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
849 static const
850 struct processor_costs generic32_cost = {
851 COSTS_N_INSNS (1), /* cost of an add instruction */
852 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
853 COSTS_N_INSNS (1), /* variable shift costs */
854 COSTS_N_INSNS (1), /* constant shift costs */
855 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
856 COSTS_N_INSNS (4), /* HI */
857 COSTS_N_INSNS (3), /* SI */
858 COSTS_N_INSNS (4), /* DI */
859 COSTS_N_INSNS (2)}, /* other */
860 0, /* cost of multiply per each bit set */
861 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
862 COSTS_N_INSNS (26), /* HI */
863 COSTS_N_INSNS (42), /* SI */
864 COSTS_N_INSNS (74), /* DI */
865 COSTS_N_INSNS (74)}, /* other */
866 COSTS_N_INSNS (1), /* cost of movsx */
867 COSTS_N_INSNS (1), /* cost of movzx */
868 8, /* "large" insn */
869 17, /* MOVE_RATIO */
870 4, /* cost for loading QImode using movzbl */
871 {4, 4, 4}, /* cost of loading integer registers
872 in QImode, HImode and SImode.
873 Relative to reg-reg move (2). */
874 {4, 4, 4}, /* cost of storing integer registers */
875 4, /* cost of reg,reg fld/fst */
876 {12, 12, 12}, /* cost of loading fp registers
877 in SFmode, DFmode and XFmode */
878 {6, 6, 8}, /* cost of storing fp registers
879 in SFmode, DFmode and XFmode */
880 2, /* cost of moving MMX register */
881 {8, 8}, /* cost of loading MMX registers
882 in SImode and DImode */
883 {8, 8}, /* cost of storing MMX registers
884 in SImode and DImode */
885 2, /* cost of moving SSE register */
886 {8, 8, 8}, /* cost of loading SSE registers
887 in SImode, DImode and TImode */
888 {8, 8, 8}, /* cost of storing SSE registers
889 in SImode, DImode and TImode */
890 5, /* MMX or SSE register to integer */
891 64, /* size of prefetch block */
892 6, /* number of parallel prefetches */
893 3, /* Branch cost */
894 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
895 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
896 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
897 COSTS_N_INSNS (8), /* cost of FABS instruction. */
898 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
899 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
900 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
901 DUMMY_STRINGOP_ALGS},
902 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
903 DUMMY_STRINGOP_ALGS},
904 };
905
906 const struct processor_costs *ix86_cost = &pentium_cost;
907
908 /* Processor feature/optimization bitmasks. */
909 #define m_386 (1<<PROCESSOR_I386)
910 #define m_486 (1<<PROCESSOR_I486)
911 #define m_PENT (1<<PROCESSOR_PENTIUM)
912 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
913 #define m_GEODE (1<<PROCESSOR_GEODE)
914 #define m_K6_GEODE (m_K6 | m_GEODE)
915 #define m_K6 (1<<PROCESSOR_K6)
916 #define m_ATHLON (1<<PROCESSOR_ATHLON)
917 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
918 #define m_K8 (1<<PROCESSOR_K8)
919 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
920 #define m_NOCONA (1<<PROCESSOR_NOCONA)
921 #define m_CORE2 (1<<PROCESSOR_CORE2)
922 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
923 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
924 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
925
926 /* Generic instruction choice should be common subset of supported CPUs
927 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
928
929 /* Leave is not affecting Nocona SPEC2000 results negatively, so enabling for
930 Generic64 seems like good code size tradeoff. We can't enable it for 32bit
931 generic because it is not working well with PPro base chips. */
932 const int x86_use_leave = m_386 | m_K6_GEODE | m_ATHLON_K8 | m_CORE2 | m_GENERIC64;
933 const int x86_push_memory = m_386 | m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
934 const int x86_zero_extend_with_and = m_486 | m_PENT;
935 const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */;
936 const int x86_double_with_add = ~m_386;
937 const int x86_use_bit_test = m_386;
938 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6 | m_CORE2 | m_GENERIC;
939 const int x86_cmove = m_PPRO | m_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
940 const int x86_3dnow_a = m_ATHLON_K8;
941 const int x86_deep_branch = m_PPRO | m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
942 /* Branch hints were put in P4 based on simulation result. But
943 after P4 was made, no performance benefit was observed with
944 branch hints. It also increases the code size. As the result,
945 icc never generates branch hints. */
946 const int x86_branch_hints = 0;
947 const int x86_use_sahf = m_PPRO | m_K6_GEODE | m_PENT4 | m_NOCONA | m_GENERIC32; /*m_GENERIC | m_ATHLON_K8 ? */
948 /* We probably ought to watch for partial register stalls on Generic32
949 compilation setting as well. However in current implementation the
950 partial register stalls are not eliminated very well - they can
951 be introduced via subregs synthesized by combine and can happen
952 in caller/callee saving sequences.
953 Because this option pays back little on PPro based chips and is in conflict
954 with partial reg. dependencies used by Athlon/P4 based chips, it is better
955 to leave it off for generic32 for now. */
956 const int x86_partial_reg_stall = m_PPRO;
957 const int x86_partial_flag_reg_stall = m_CORE2 | m_GENERIC;
958 const int x86_use_himode_fiop = m_386 | m_486 | m_K6_GEODE;
959 const int x86_use_simode_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT | m_CORE2 | m_GENERIC);
960 const int x86_use_mov0 = m_K6;
961 const int x86_use_cltd = ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC);
962 const int x86_read_modify_write = ~m_PENT;
963 const int x86_read_modify = ~(m_PENT | m_PPRO);
964 const int x86_split_long_moves = m_PPRO;
965 const int x86_promote_QImode = m_K6_GEODE | m_PENT | m_386 | m_486 | m_ATHLON_K8 | m_CORE2 | m_GENERIC; /* m_PENT4 ? */
966 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
967 const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA;
968 const int x86_qimode_math = ~(0);
969 const int x86_promote_qi_regs = 0;
970 /* On PPro this flag is meant to avoid partial register stalls. Just like
971 the x86_partial_reg_stall this option might be considered for Generic32
972 if our scheme for avoiding partial stalls was more effective. */
973 const int x86_himode_math = ~(m_PPRO);
974 const int x86_promote_hi_regs = m_PPRO;
975 const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
976 const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
977 const int x86_add_esp_4 = m_ATHLON_K8 | m_K6_GEODE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
978 const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6_GEODE | m_386 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
979 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC | m_GEODE);
980 const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
981 const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
982 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
983 const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC;
984 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC;
985 const int x86_shift1 = ~m_486;
986 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
987 /* In Generic model we have an conflict here in between PPro/Pentium4 based chips
988 that thread 128bit SSE registers as single units versus K8 based chips that
989 divide SSE registers to two 64bit halves.
990 x86_sse_partial_reg_dependency promote all store destinations to be 128bit
991 to allow register renaming on 128bit SSE units, but usually results in one
992 extra microop on 64bit SSE units. Experimental results shows that disabling
993 this option on P4 brings over 20% SPECfp regression, while enabling it on
994 K8 brings roughly 2.4% regression that can be partly masked by careful scheduling
995 of moves. */
996 const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
997 /* Set for machines where the type and dependencies are resolved on SSE
998 register parts instead of whole registers, so we may maintain just
999 lower part of scalar values in proper format leaving the upper part
1000 undefined. */
1001 const int x86_sse_split_regs = m_ATHLON_K8;
1002 const int x86_sse_typeless_stores = m_ATHLON_K8;
1003 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA;
1004 const int x86_use_ffreep = m_ATHLON_K8;
1005 const int x86_use_incdec = ~(m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC);
1006
1007 /* ??? Allowing interunit moves makes it all too easy for the compiler to put
1008 integer data in xmm registers. Which results in pretty abysmal code. */
1009 const int x86_inter_unit_moves = 0 /* ~(m_ATHLON_K8) */;
1010
1011 const int x86_ext_80387_constants = m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
1012 /* Some CPU cores are not able to predict more than 4 branch instructions in
1013 the 16 byte window. */
1014 const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
1015 const int x86_schedule = m_PPRO | m_ATHLON_K8 | m_K6_GEODE | m_PENT | m_CORE2 | m_GENERIC;
1016 const int x86_use_bt = m_ATHLON_K8;
1017 /* Compare and exchange was added for 80486. */
1018 const int x86_cmpxchg = ~m_386;
1019 /* Compare and exchange 8 bytes was added for pentium. */
1020 const int x86_cmpxchg8b = ~(m_386 | m_486);
1021 /* Exchange and add was added for 80486. */
1022 const int x86_xadd = ~m_386;
1023 /* Byteswap was added for 80486. */
1024 const int x86_bswap = ~m_386;
1025 const int x86_pad_returns = m_ATHLON_K8 | m_CORE2 | m_GENERIC;
1026
1027 static enum stringop_alg stringop_alg = no_stringop;
1028
1029 /* In case the average insn count for single function invocation is
1030 lower than this constant, emit fast (but longer) prologue and
1031 epilogue code. */
1032 #define FAST_PROLOGUE_INSN_COUNT 20
1033
1034 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1035 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1036 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1037 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1038
1039 /* Array of the smallest class containing reg number REGNO, indexed by
1040 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1041
1042 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1043 {
1044 /* ax, dx, cx, bx */
1045 AREG, DREG, CREG, BREG,
1046 /* si, di, bp, sp */
1047 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1048 /* FP registers */
1049 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1050 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1051 /* arg pointer */
1052 NON_Q_REGS,
1053 /* flags, fpsr, fpcr, frame */
1054 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1055 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1056 SSE_REGS, SSE_REGS,
1057 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1058 MMX_REGS, MMX_REGS,
1059 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1060 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1061 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1062 SSE_REGS, SSE_REGS,
1063 };
1064
1065 /* The "default" register map used in 32bit mode. */
1066
1067 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1068 {
1069 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1070 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1071 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1072 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1073 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1074 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1075 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1076 };
1077
1078 static int const x86_64_int_parameter_registers[6] =
1079 {
1080 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
1081 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1082 };
1083
1084 static int const x86_64_int_return_registers[4] =
1085 {
1086 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
1087 };
1088
1089 /* The "default" register map used in 64bit mode. */
1090 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1091 {
1092 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1093 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1094 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1095 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1096 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1097 8,9,10,11,12,13,14,15, /* extended integer registers */
1098 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1099 };
1100
1101 /* Define the register numbers to be used in Dwarf debugging information.
1102 The SVR4 reference port C compiler uses the following register numbers
1103 in its Dwarf output code:
1104 0 for %eax (gcc regno = 0)
1105 1 for %ecx (gcc regno = 2)
1106 2 for %edx (gcc regno = 1)
1107 3 for %ebx (gcc regno = 3)
1108 4 for %esp (gcc regno = 7)
1109 5 for %ebp (gcc regno = 6)
1110 6 for %esi (gcc regno = 4)
1111 7 for %edi (gcc regno = 5)
1112 The following three DWARF register numbers are never generated by
1113 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1114 believes these numbers have these meanings.
1115 8 for %eip (no gcc equivalent)
1116 9 for %eflags (gcc regno = 17)
1117 10 for %trapno (no gcc equivalent)
1118 It is not at all clear how we should number the FP stack registers
1119 for the x86 architecture. If the version of SDB on x86/svr4 were
1120 a bit less brain dead with respect to floating-point then we would
1121 have a precedent to follow with respect to DWARF register numbers
1122 for x86 FP registers, but the SDB on x86/svr4 is so completely
1123 broken with respect to FP registers that it is hardly worth thinking
1124 of it as something to strive for compatibility with.
1125 The version of x86/svr4 SDB I have at the moment does (partially)
1126 seem to believe that DWARF register number 11 is associated with
1127 the x86 register %st(0), but that's about all. Higher DWARF
1128 register numbers don't seem to be associated with anything in
1129 particular, and even for DWARF regno 11, SDB only seems to under-
1130 stand that it should say that a variable lives in %st(0) (when
1131 asked via an `=' command) if we said it was in DWARF regno 11,
1132 but SDB still prints garbage when asked for the value of the
1133 variable in question (via a `/' command).
1134 (Also note that the labels SDB prints for various FP stack regs
1135 when doing an `x' command are all wrong.)
1136 Note that these problems generally don't affect the native SVR4
1137 C compiler because it doesn't allow the use of -O with -g and
1138 because when it is *not* optimizing, it allocates a memory
1139 location for each floating-point variable, and the memory
1140 location is what gets described in the DWARF AT_location
1141 attribute for the variable in question.
1142 Regardless of the severe mental illness of the x86/svr4 SDB, we
1143 do something sensible here and we use the following DWARF
1144 register numbers. Note that these are all stack-top-relative
1145 numbers.
1146 11 for %st(0) (gcc regno = 8)
1147 12 for %st(1) (gcc regno = 9)
1148 13 for %st(2) (gcc regno = 10)
1149 14 for %st(3) (gcc regno = 11)
1150 15 for %st(4) (gcc regno = 12)
1151 16 for %st(5) (gcc regno = 13)
1152 17 for %st(6) (gcc regno = 14)
1153 18 for %st(7) (gcc regno = 15)
1154 */
1155 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1156 {
1157 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1158 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1159 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1160 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1161 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1162 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1163 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1164 };
1165
1166 /* Test and compare insns in i386.md store the information needed to
1167 generate branch and scc insns here. */
1168
1169 rtx ix86_compare_op0 = NULL_RTX;
1170 rtx ix86_compare_op1 = NULL_RTX;
1171 rtx ix86_compare_emitted = NULL_RTX;
1172
1173 /* Size of the register save area. */
1174 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
1175
1176 /* Define the structure for the machine field in struct function. */
1177
1178 struct stack_local_entry GTY(())
1179 {
1180 unsigned short mode;
1181 unsigned short n;
1182 rtx rtl;
1183 struct stack_local_entry *next;
1184 };
1185
1186 /* Structure describing stack frame layout.
1187 Stack grows downward:
1188
1189 [arguments]
1190 <- ARG_POINTER
1191 saved pc
1192
1193 saved frame pointer if frame_pointer_needed
1194 <- HARD_FRAME_POINTER
1195 [saved regs]
1196
1197 [padding1] \
1198 )
1199 [va_arg registers] (
1200 > to_allocate <- FRAME_POINTER
1201 [frame] (
1202 )
1203 [padding2] /
1204 */
1205 struct ix86_frame
1206 {
1207 int nregs;
1208 int padding1;
1209 int va_arg_size;
1210 HOST_WIDE_INT frame;
1211 int padding2;
1212 int outgoing_arguments_size;
1213 int red_zone_size;
1214
1215 HOST_WIDE_INT to_allocate;
1216 /* The offsets relative to ARG_POINTER. */
1217 HOST_WIDE_INT frame_pointer_offset;
1218 HOST_WIDE_INT hard_frame_pointer_offset;
1219 HOST_WIDE_INT stack_pointer_offset;
1220
1221 /* When save_regs_using_mov is set, emit prologue using
1222 move instead of push instructions. */
1223 bool save_regs_using_mov;
1224 };
1225
1226 /* Code model option. */
1227 enum cmodel ix86_cmodel;
1228 /* Asm dialect. */
1229 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1230 /* TLS dialects. */
1231 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1232
1233 /* Which unit we are generating floating point math for. */
1234 enum fpmath_unit ix86_fpmath;
1235
1236 /* Which cpu are we scheduling for. */
1237 enum processor_type ix86_tune;
1238 /* Which instruction set architecture to use. */
1239 enum processor_type ix86_arch;
1240
1241 /* true if sse prefetch instruction is not NOOP. */
1242 int x86_prefetch_sse;
1243
1244 /* true if cmpxchg16b is supported. */
1245 int x86_cmpxchg16b;
1246
1247 /* ix86_regparm_string as a number */
1248 static int ix86_regparm;
1249
1250 /* -mstackrealign option */
1251 extern int ix86_force_align_arg_pointer;
1252 static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer";
1253
1254 /* Preferred alignment for stack boundary in bits. */
1255 unsigned int ix86_preferred_stack_boundary;
1256
1257 /* Values 1-5: see jump.c */
1258 int ix86_branch_cost;
1259
1260 /* Variables which are this size or smaller are put in the data/bss
1261 or ldata/lbss sections. */
1262
1263 int ix86_section_threshold = 65536;
1264
1265 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1266 char internal_label_prefix[16];
1267 int internal_label_prefix_len;
1268 \f
1269 static bool ix86_handle_option (size_t, const char *, int);
1270 static void output_pic_addr_const (FILE *, rtx, int);
1271 static void put_condition_code (enum rtx_code, enum machine_mode,
1272 int, int, FILE *);
1273 static const char *get_some_local_dynamic_name (void);
1274 static int get_some_local_dynamic_name_1 (rtx *, void *);
1275 static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
1276 static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
1277 rtx *);
1278 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
1279 static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
1280 enum machine_mode);
1281 static rtx get_thread_pointer (int);
1282 static rtx legitimize_tls_address (rtx, enum tls_model, int);
1283 static void get_pc_thunk_name (char [32], unsigned int);
1284 static rtx gen_push (rtx);
1285 static int ix86_flags_dependent (rtx, rtx, enum attr_type);
1286 static int ix86_agi_dependent (rtx, rtx, enum attr_type);
1287 static struct machine_function * ix86_init_machine_status (void);
1288 static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
1289 static int ix86_nsaved_regs (void);
1290 static void ix86_emit_save_regs (void);
1291 static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
1292 static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
1293 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
1294 static HOST_WIDE_INT ix86_GOT_alias_set (void);
1295 static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
1296 static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
1297 static int ix86_issue_rate (void);
1298 static int ix86_adjust_cost (rtx, rtx, rtx, int);
1299 static int ia32_multipass_dfa_lookahead (void);
1300 static void ix86_init_mmx_sse_builtins (void);
1301 static rtx x86_this_parameter (tree);
1302 static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
1303 HOST_WIDE_INT, tree);
1304 static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
1305 static void x86_file_start (void);
1306 static void ix86_reorg (void);
1307 static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
1308 static tree ix86_build_builtin_va_list (void);
1309 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
1310 tree, int *, int);
1311 static tree ix86_gimplify_va_arg (tree, tree, tree *, tree *);
1312 static bool ix86_scalar_mode_supported_p (enum machine_mode);
1313 static bool ix86_vector_mode_supported_p (enum machine_mode);
1314
1315 static int ix86_address_cost (rtx);
1316 static bool ix86_cannot_force_const_mem (rtx);
1317 static rtx ix86_delegitimize_address (rtx);
1318
1319 static void i386_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
1320
1321 struct builtin_description;
1322 static rtx ix86_expand_sse_comi (const struct builtin_description *,
1323 tree, rtx);
1324 static rtx ix86_expand_sse_compare (const struct builtin_description *,
1325 tree, rtx);
1326 static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
1327 static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
1328 static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
1329 static rtx ix86_expand_store_builtin (enum insn_code, tree);
1330 static rtx safe_vector_operand (rtx, enum machine_mode);
1331 static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
1332 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
1333 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
1334 static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
1335 static int ix86_fp_comparison_cost (enum rtx_code code);
1336 static unsigned int ix86_select_alt_pic_regnum (void);
1337 static int ix86_save_reg (unsigned int, int);
1338 static void ix86_compute_frame_layout (struct ix86_frame *);
1339 static int ix86_comp_type_attributes (tree, tree);
1340 static int ix86_function_regparm (tree, tree);
1341 const struct attribute_spec ix86_attribute_table[];
1342 static bool ix86_function_ok_for_sibcall (tree, tree);
1343 static tree ix86_handle_cconv_attribute (tree *, tree, tree, int, bool *);
1344 static int ix86_value_regno (enum machine_mode, tree, tree);
1345 static bool contains_128bit_aligned_vector_p (tree);
1346 static rtx ix86_struct_value_rtx (tree, int);
1347 static bool ix86_ms_bitfield_layout_p (tree);
1348 static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
1349 static int extended_reg_mentioned_1 (rtx *, void *);
1350 static bool ix86_rtx_costs (rtx, int, int, int *);
1351 static int min_insn_size (rtx);
1352 static tree ix86_md_asm_clobbers (tree outputs, tree inputs, tree clobbers);
1353 static bool ix86_must_pass_in_stack (enum machine_mode mode, tree type);
1354 static bool ix86_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
1355 tree, bool);
1356 static void ix86_init_builtins (void);
1357 static rtx ix86_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
1358 static tree ix86_builtin_vectorized_function (enum built_in_function, tree);
1359 static const char *ix86_mangle_fundamental_type (tree);
1360 static tree ix86_stack_protect_fail (void);
1361 static rtx ix86_internal_arg_pointer (void);
1362 static void ix86_dwarf_handle_frame_unspec (const char *, rtx, int);
1363
1364 /* This function is only used on Solaris. */
1365 static void i386_solaris_elf_named_section (const char *, unsigned int, tree)
1366 ATTRIBUTE_UNUSED;
1367
1368 /* Register class used for passing given 64bit part of the argument.
1369 These represent classes as documented by the PS ABI, with the exception
1370 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1371 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1372
1373 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1374 whenever possible (upper half does contain padding).
1375 */
1376 enum x86_64_reg_class
1377 {
1378 X86_64_NO_CLASS,
1379 X86_64_INTEGER_CLASS,
1380 X86_64_INTEGERSI_CLASS,
1381 X86_64_SSE_CLASS,
1382 X86_64_SSESF_CLASS,
1383 X86_64_SSEDF_CLASS,
1384 X86_64_SSEUP_CLASS,
1385 X86_64_X87_CLASS,
1386 X86_64_X87UP_CLASS,
1387 X86_64_COMPLEX_X87_CLASS,
1388 X86_64_MEMORY_CLASS
1389 };
1390 static const char * const x86_64_reg_class_name[] = {
1391 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1392 "sseup", "x87", "x87up", "cplx87", "no"
1393 };
1394
1395 #define MAX_CLASSES 4
1396
1397 /* Table of constants used by fldpi, fldln2, etc.... */
1398 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1399 static bool ext_80387_constants_init = 0;
1400 static void init_ext_80387_constants (void);
1401 static bool ix86_in_large_data_p (tree) ATTRIBUTE_UNUSED;
1402 static void ix86_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
1403 static void x86_64_elf_unique_section (tree decl, int reloc) ATTRIBUTE_UNUSED;
1404 static section *x86_64_elf_select_section (tree decl, int reloc,
1405 unsigned HOST_WIDE_INT align)
1406 ATTRIBUTE_UNUSED;
1407 \f
1408 /* Initialize the GCC target structure. */
1409 #undef TARGET_ATTRIBUTE_TABLE
1410 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
1411 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1412 # undef TARGET_MERGE_DECL_ATTRIBUTES
1413 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
1414 #endif
1415
1416 #undef TARGET_COMP_TYPE_ATTRIBUTES
1417 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
1418
1419 #undef TARGET_INIT_BUILTINS
1420 #define TARGET_INIT_BUILTINS ix86_init_builtins
1421 #undef TARGET_EXPAND_BUILTIN
1422 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
1423 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1424 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION ix86_builtin_vectorized_function
1425
1426 #undef TARGET_ASM_FUNCTION_EPILOGUE
1427 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
1428
1429 #undef TARGET_ENCODE_SECTION_INFO
1430 #ifndef SUBTARGET_ENCODE_SECTION_INFO
1431 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
1432 #else
1433 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
1434 #endif
1435
1436 #undef TARGET_ASM_OPEN_PAREN
1437 #define TARGET_ASM_OPEN_PAREN ""
1438 #undef TARGET_ASM_CLOSE_PAREN
1439 #define TARGET_ASM_CLOSE_PAREN ""
1440
1441 #undef TARGET_ASM_ALIGNED_HI_OP
1442 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
1443 #undef TARGET_ASM_ALIGNED_SI_OP
1444 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
1445 #ifdef ASM_QUAD
1446 #undef TARGET_ASM_ALIGNED_DI_OP
1447 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
1448 #endif
1449
1450 #undef TARGET_ASM_UNALIGNED_HI_OP
1451 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1452 #undef TARGET_ASM_UNALIGNED_SI_OP
1453 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1454 #undef TARGET_ASM_UNALIGNED_DI_OP
1455 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1456
1457 #undef TARGET_SCHED_ADJUST_COST
1458 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1459 #undef TARGET_SCHED_ISSUE_RATE
1460 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1461 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1462 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1463 ia32_multipass_dfa_lookahead
1464
1465 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1466 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1467
1468 #ifdef HAVE_AS_TLS
1469 #undef TARGET_HAVE_TLS
1470 #define TARGET_HAVE_TLS true
1471 #endif
1472 #undef TARGET_CANNOT_FORCE_CONST_MEM
1473 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1474 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1475 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_rtx_true
1476
1477 #undef TARGET_DELEGITIMIZE_ADDRESS
1478 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1479
1480 #undef TARGET_MS_BITFIELD_LAYOUT_P
1481 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1482
1483 #if TARGET_MACHO
1484 #undef TARGET_BINDS_LOCAL_P
1485 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1486 #endif
1487
1488 #undef TARGET_ASM_OUTPUT_MI_THUNK
1489 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1490 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1491 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1492
1493 #undef TARGET_ASM_FILE_START
1494 #define TARGET_ASM_FILE_START x86_file_start
1495
1496 #undef TARGET_DEFAULT_TARGET_FLAGS
1497 #define TARGET_DEFAULT_TARGET_FLAGS \
1498 (TARGET_DEFAULT \
1499 | TARGET_64BIT_DEFAULT \
1500 | TARGET_SUBTARGET_DEFAULT \
1501 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
1502
1503 #undef TARGET_HANDLE_OPTION
1504 #define TARGET_HANDLE_OPTION ix86_handle_option
1505
1506 #undef TARGET_RTX_COSTS
1507 #define TARGET_RTX_COSTS ix86_rtx_costs
1508 #undef TARGET_ADDRESS_COST
1509 #define TARGET_ADDRESS_COST ix86_address_cost
1510
1511 #undef TARGET_FIXED_CONDITION_CODE_REGS
1512 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1513 #undef TARGET_CC_MODES_COMPATIBLE
1514 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1515
1516 #undef TARGET_MACHINE_DEPENDENT_REORG
1517 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1518
1519 #undef TARGET_BUILD_BUILTIN_VA_LIST
1520 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1521
1522 #undef TARGET_MD_ASM_CLOBBERS
1523 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1524
1525 #undef TARGET_PROMOTE_PROTOTYPES
1526 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1527 #undef TARGET_STRUCT_VALUE_RTX
1528 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1529 #undef TARGET_SETUP_INCOMING_VARARGS
1530 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1531 #undef TARGET_MUST_PASS_IN_STACK
1532 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
1533 #undef TARGET_PASS_BY_REFERENCE
1534 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
1535 #undef TARGET_INTERNAL_ARG_POINTER
1536 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
1537 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
1538 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
1539
1540 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1541 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1542
1543 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1544 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
1545
1546 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1547 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
1548
1549 #ifdef HAVE_AS_TLS
1550 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1551 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
1552 #endif
1553
1554 #ifdef SUBTARGET_INSERT_ATTRIBUTES
1555 #undef TARGET_INSERT_ATTRIBUTES
1556 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1557 #endif
1558
1559 #undef TARGET_MANGLE_FUNDAMENTAL_TYPE
1560 #define TARGET_MANGLE_FUNDAMENTAL_TYPE ix86_mangle_fundamental_type
1561
1562 #undef TARGET_STACK_PROTECT_FAIL
1563 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
1564
1565 #undef TARGET_FUNCTION_VALUE
1566 #define TARGET_FUNCTION_VALUE ix86_function_value
1567
1568 struct gcc_target targetm = TARGET_INITIALIZER;
1569
1570 \f
1571 /* The svr4 ABI for the i386 says that records and unions are returned
1572 in memory. */
1573 #ifndef DEFAULT_PCC_STRUCT_RETURN
1574 #define DEFAULT_PCC_STRUCT_RETURN 1
1575 #endif
1576
1577 /* Implement TARGET_HANDLE_OPTION. */
1578
1579 static bool
1580 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1581 {
1582 switch (code)
1583 {
1584 case OPT_m3dnow:
1585 if (!value)
1586 {
1587 target_flags &= ~MASK_3DNOW_A;
1588 target_flags_explicit |= MASK_3DNOW_A;
1589 }
1590 return true;
1591
1592 case OPT_mmmx:
1593 if (!value)
1594 {
1595 target_flags &= ~(MASK_3DNOW | MASK_3DNOW_A);
1596 target_flags_explicit |= MASK_3DNOW | MASK_3DNOW_A;
1597 }
1598 return true;
1599
1600 case OPT_msse:
1601 if (!value)
1602 {
1603 target_flags &= ~(MASK_SSE2 | MASK_SSE3);
1604 target_flags_explicit |= MASK_SSE2 | MASK_SSE3;
1605 }
1606 return true;
1607
1608 case OPT_msse2:
1609 if (!value)
1610 {
1611 target_flags &= ~MASK_SSE3;
1612 target_flags_explicit |= MASK_SSE3;
1613 }
1614 return true;
1615
1616 default:
1617 return true;
1618 }
1619 }
1620
1621 /* Sometimes certain combinations of command options do not make
1622 sense on a particular target machine. You can define a macro
1623 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1624 defined, is executed once just after all the command options have
1625 been parsed.
1626
1627 Don't use this macro to turn on various extra optimizations for
1628 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1629
1630 void
1631 override_options (void)
1632 {
1633 int i;
1634 int ix86_tune_defaulted = 0;
1635
1636 /* Comes from final.c -- no real reason to change it. */
1637 #define MAX_CODE_ALIGN 16
1638
1639 static struct ptt
1640 {
1641 const struct processor_costs *cost; /* Processor costs */
1642 const int target_enable; /* Target flags to enable. */
1643 const int target_disable; /* Target flags to disable. */
1644 const int align_loop; /* Default alignments. */
1645 const int align_loop_max_skip;
1646 const int align_jump;
1647 const int align_jump_max_skip;
1648 const int align_func;
1649 }
1650 const processor_target_table[PROCESSOR_max] =
1651 {
1652 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1653 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1654 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1655 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1656 {&geode_cost, 0, 0, 0, 0, 0, 0, 0},
1657 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1658 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1659 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1660 {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
1661 {&nocona_cost, 0, 0, 0, 0, 0, 0, 0},
1662 {&core2_cost, 0, 0, 16, 7, 16, 7, 16},
1663 {&generic32_cost, 0, 0, 16, 7, 16, 7, 16},
1664 {&generic64_cost, 0, 0, 16, 7, 16, 7, 16}
1665 };
1666
1667 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1668 static struct pta
1669 {
1670 const char *const name; /* processor name or nickname. */
1671 const enum processor_type processor;
1672 const enum pta_flags
1673 {
1674 PTA_SSE = 1,
1675 PTA_SSE2 = 2,
1676 PTA_SSE3 = 4,
1677 PTA_MMX = 8,
1678 PTA_PREFETCH_SSE = 16,
1679 PTA_3DNOW = 32,
1680 PTA_3DNOW_A = 64,
1681 PTA_64BIT = 128,
1682 PTA_SSSE3 = 256,
1683 PTA_CX16 = 512
1684 } flags;
1685 }
1686 const processor_alias_table[] =
1687 {
1688 {"i386", PROCESSOR_I386, 0},
1689 {"i486", PROCESSOR_I486, 0},
1690 {"i586", PROCESSOR_PENTIUM, 0},
1691 {"pentium", PROCESSOR_PENTIUM, 0},
1692 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1693 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1694 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1695 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1696 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1697 {"i686", PROCESSOR_PENTIUMPRO, 0},
1698 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1699 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1700 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1701 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1702 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1703 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1704 | PTA_MMX | PTA_PREFETCH_SSE},
1705 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1706 | PTA_MMX | PTA_PREFETCH_SSE},
1707 {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
1708 | PTA_MMX | PTA_PREFETCH_SSE},
1709 {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1710 | PTA_MMX | PTA_PREFETCH_SSE | PTA_CX16},
1711 {"core2", PROCESSOR_CORE2, PTA_SSE | PTA_SSE2 | PTA_SSE3
1712 | PTA_64BIT | PTA_MMX
1713 | PTA_PREFETCH_SSE | PTA_CX16},
1714 {"geode", PROCESSOR_GEODE, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1715 | PTA_3DNOW_A},
1716 {"k6", PROCESSOR_K6, PTA_MMX},
1717 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1718 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1719 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1720 | PTA_3DNOW_A},
1721 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1722 | PTA_3DNOW | PTA_3DNOW_A},
1723 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1724 | PTA_3DNOW_A | PTA_SSE},
1725 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1726 | PTA_3DNOW_A | PTA_SSE},
1727 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1728 | PTA_3DNOW_A | PTA_SSE},
1729 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1730 | PTA_SSE | PTA_SSE2 },
1731 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1732 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1733 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1734 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1735 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1736 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1737 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1738 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1739 {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ },
1740 {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ },
1741 };
1742
1743 int const pta_size = ARRAY_SIZE (processor_alias_table);
1744
1745 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1746 SUBTARGET_OVERRIDE_OPTIONS;
1747 #endif
1748
1749 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
1750 SUBSUBTARGET_OVERRIDE_OPTIONS;
1751 #endif
1752
1753 /* -fPIC is the default for x86_64. */
1754 if (TARGET_MACHO && TARGET_64BIT)
1755 flag_pic = 2;
1756
1757 /* Set the default values for switches whose default depends on TARGET_64BIT
1758 in case they weren't overwritten by command line options. */
1759 if (TARGET_64BIT)
1760 {
1761 /* Mach-O doesn't support omitting the frame pointer for now. */
1762 if (flag_omit_frame_pointer == 2)
1763 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
1764 if (flag_asynchronous_unwind_tables == 2)
1765 flag_asynchronous_unwind_tables = 1;
1766 if (flag_pcc_struct_return == 2)
1767 flag_pcc_struct_return = 0;
1768 }
1769 else
1770 {
1771 if (flag_omit_frame_pointer == 2)
1772 flag_omit_frame_pointer = 0;
1773 if (flag_asynchronous_unwind_tables == 2)
1774 flag_asynchronous_unwind_tables = 0;
1775 if (flag_pcc_struct_return == 2)
1776 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1777 }
1778
1779 /* Need to check -mtune=generic first. */
1780 if (ix86_tune_string)
1781 {
1782 if (!strcmp (ix86_tune_string, "generic")
1783 || !strcmp (ix86_tune_string, "i686")
1784 /* As special support for cross compilers we read -mtune=native
1785 as -mtune=generic. With native compilers we won't see the
1786 -mtune=native, as it was changed by the driver. */
1787 || !strcmp (ix86_tune_string, "native"))
1788 {
1789 if (TARGET_64BIT)
1790 ix86_tune_string = "generic64";
1791 else
1792 ix86_tune_string = "generic32";
1793 }
1794 else if (!strncmp (ix86_tune_string, "generic", 7))
1795 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1796 }
1797 else
1798 {
1799 if (ix86_arch_string)
1800 ix86_tune_string = ix86_arch_string;
1801 if (!ix86_tune_string)
1802 {
1803 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1804 ix86_tune_defaulted = 1;
1805 }
1806
1807 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
1808 need to use a sensible tune option. */
1809 if (!strcmp (ix86_tune_string, "generic")
1810 || !strcmp (ix86_tune_string, "x86-64")
1811 || !strcmp (ix86_tune_string, "i686"))
1812 {
1813 if (TARGET_64BIT)
1814 ix86_tune_string = "generic64";
1815 else
1816 ix86_tune_string = "generic32";
1817 }
1818 }
1819 if (ix86_stringop_string)
1820 {
1821 if (!strcmp (ix86_stringop_string, "rep_byte"))
1822 stringop_alg = rep_prefix_1_byte;
1823 else if (!strcmp (ix86_stringop_string, "libcall"))
1824 stringop_alg = libcall;
1825 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
1826 stringop_alg = rep_prefix_4_byte;
1827 else if (!strcmp (ix86_stringop_string, "rep_8byte"))
1828 stringop_alg = rep_prefix_8_byte;
1829 else if (!strcmp (ix86_stringop_string, "byte_loop"))
1830 stringop_alg = loop_1_byte;
1831 else if (!strcmp (ix86_stringop_string, "loop"))
1832 stringop_alg = loop;
1833 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
1834 stringop_alg = unrolled_loop;
1835 else
1836 error ("bad value (%s) for -mstringop-strategy= switch", ix86_stringop_string);
1837 }
1838 if (!strcmp (ix86_tune_string, "x86-64"))
1839 warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
1840 "-mtune=generic instead as appropriate.");
1841
1842 if (!ix86_arch_string)
1843 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
1844 if (!strcmp (ix86_arch_string, "generic"))
1845 error ("generic CPU can be used only for -mtune= switch");
1846 if (!strncmp (ix86_arch_string, "generic", 7))
1847 error ("bad value (%s) for -march= switch", ix86_arch_string);
1848
1849 if (ix86_cmodel_string != 0)
1850 {
1851 if (!strcmp (ix86_cmodel_string, "small"))
1852 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1853 else if (!strcmp (ix86_cmodel_string, "medium"))
1854 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
1855 else if (flag_pic)
1856 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1857 else if (!strcmp (ix86_cmodel_string, "32"))
1858 ix86_cmodel = CM_32;
1859 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1860 ix86_cmodel = CM_KERNEL;
1861 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1862 ix86_cmodel = CM_LARGE;
1863 else
1864 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1865 }
1866 else
1867 {
1868 ix86_cmodel = CM_32;
1869 if (TARGET_64BIT)
1870 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1871 }
1872 if (ix86_asm_string != 0)
1873 {
1874 if (! TARGET_MACHO
1875 && !strcmp (ix86_asm_string, "intel"))
1876 ix86_asm_dialect = ASM_INTEL;
1877 else if (!strcmp (ix86_asm_string, "att"))
1878 ix86_asm_dialect = ASM_ATT;
1879 else
1880 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1881 }
1882 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1883 error ("code model %qs not supported in the %s bit mode",
1884 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1885 if (ix86_cmodel == CM_LARGE)
1886 sorry ("code model %<large%> not supported yet");
1887 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1888 sorry ("%i-bit mode not compiled in",
1889 (target_flags & MASK_64BIT) ? 64 : 32);
1890
1891 for (i = 0; i < pta_size; i++)
1892 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1893 {
1894 ix86_arch = processor_alias_table[i].processor;
1895 /* Default cpu tuning to the architecture. */
1896 ix86_tune = ix86_arch;
1897 if (processor_alias_table[i].flags & PTA_MMX
1898 && !(target_flags_explicit & MASK_MMX))
1899 target_flags |= MASK_MMX;
1900 if (processor_alias_table[i].flags & PTA_3DNOW
1901 && !(target_flags_explicit & MASK_3DNOW))
1902 target_flags |= MASK_3DNOW;
1903 if (processor_alias_table[i].flags & PTA_3DNOW_A
1904 && !(target_flags_explicit & MASK_3DNOW_A))
1905 target_flags |= MASK_3DNOW_A;
1906 if (processor_alias_table[i].flags & PTA_SSE
1907 && !(target_flags_explicit & MASK_SSE))
1908 target_flags |= MASK_SSE;
1909 if (processor_alias_table[i].flags & PTA_SSE2
1910 && !(target_flags_explicit & MASK_SSE2))
1911 target_flags |= MASK_SSE2;
1912 if (processor_alias_table[i].flags & PTA_SSE3
1913 && !(target_flags_explicit & MASK_SSE3))
1914 target_flags |= MASK_SSE3;
1915 if (processor_alias_table[i].flags & PTA_SSSE3
1916 && !(target_flags_explicit & MASK_SSSE3))
1917 target_flags |= MASK_SSSE3;
1918 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1919 x86_prefetch_sse = true;
1920 if (processor_alias_table[i].flags & PTA_CX16)
1921 x86_cmpxchg16b = true;
1922 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1923 error ("CPU you selected does not support x86-64 "
1924 "instruction set");
1925 break;
1926 }
1927
1928 if (i == pta_size)
1929 error ("bad value (%s) for -march= switch", ix86_arch_string);
1930
1931 for (i = 0; i < pta_size; i++)
1932 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1933 {
1934 ix86_tune = processor_alias_table[i].processor;
1935 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1936 {
1937 if (ix86_tune_defaulted)
1938 {
1939 ix86_tune_string = "x86-64";
1940 for (i = 0; i < pta_size; i++)
1941 if (! strcmp (ix86_tune_string,
1942 processor_alias_table[i].name))
1943 break;
1944 ix86_tune = processor_alias_table[i].processor;
1945 }
1946 else
1947 error ("CPU you selected does not support x86-64 "
1948 "instruction set");
1949 }
1950 /* Intel CPUs have always interpreted SSE prefetch instructions as
1951 NOPs; so, we can enable SSE prefetch instructions even when
1952 -mtune (rather than -march) points us to a processor that has them.
1953 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
1954 higher processors. */
1955 if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
1956 x86_prefetch_sse = true;
1957 break;
1958 }
1959 if (i == pta_size)
1960 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1961
1962 if (optimize_size)
1963 ix86_cost = &size_cost;
1964 else
1965 ix86_cost = processor_target_table[ix86_tune].cost;
1966 target_flags |= processor_target_table[ix86_tune].target_enable;
1967 target_flags &= ~processor_target_table[ix86_tune].target_disable;
1968
1969 /* Arrange to set up i386_stack_locals for all functions. */
1970 init_machine_status = ix86_init_machine_status;
1971
1972 /* Validate -mregparm= value. */
1973 if (ix86_regparm_string)
1974 {
1975 i = atoi (ix86_regparm_string);
1976 if (i < 0 || i > REGPARM_MAX)
1977 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1978 else
1979 ix86_regparm = i;
1980 }
1981 else
1982 if (TARGET_64BIT)
1983 ix86_regparm = REGPARM_MAX;
1984
1985 /* If the user has provided any of the -malign-* options,
1986 warn and use that value only if -falign-* is not set.
1987 Remove this code in GCC 3.2 or later. */
1988 if (ix86_align_loops_string)
1989 {
1990 warning (0, "-malign-loops is obsolete, use -falign-loops");
1991 if (align_loops == 0)
1992 {
1993 i = atoi (ix86_align_loops_string);
1994 if (i < 0 || i > MAX_CODE_ALIGN)
1995 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1996 else
1997 align_loops = 1 << i;
1998 }
1999 }
2000
2001 if (ix86_align_jumps_string)
2002 {
2003 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
2004 if (align_jumps == 0)
2005 {
2006 i = atoi (ix86_align_jumps_string);
2007 if (i < 0 || i > MAX_CODE_ALIGN)
2008 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2009 else
2010 align_jumps = 1 << i;
2011 }
2012 }
2013
2014 if (ix86_align_funcs_string)
2015 {
2016 warning (0, "-malign-functions is obsolete, use -falign-functions");
2017 if (align_functions == 0)
2018 {
2019 i = atoi (ix86_align_funcs_string);
2020 if (i < 0 || i > MAX_CODE_ALIGN)
2021 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2022 else
2023 align_functions = 1 << i;
2024 }
2025 }
2026
2027 /* Default align_* from the processor table. */
2028 if (align_loops == 0)
2029 {
2030 align_loops = processor_target_table[ix86_tune].align_loop;
2031 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
2032 }
2033 if (align_jumps == 0)
2034 {
2035 align_jumps = processor_target_table[ix86_tune].align_jump;
2036 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
2037 }
2038 if (align_functions == 0)
2039 {
2040 align_functions = processor_target_table[ix86_tune].align_func;
2041 }
2042
2043 /* Validate -mbranch-cost= value, or provide default. */
2044 ix86_branch_cost = ix86_cost->branch_cost;
2045 if (ix86_branch_cost_string)
2046 {
2047 i = atoi (ix86_branch_cost_string);
2048 if (i < 0 || i > 5)
2049 error ("-mbranch-cost=%d is not between 0 and 5", i);
2050 else
2051 ix86_branch_cost = i;
2052 }
2053 if (ix86_section_threshold_string)
2054 {
2055 i = atoi (ix86_section_threshold_string);
2056 if (i < 0)
2057 error ("-mlarge-data-threshold=%d is negative", i);
2058 else
2059 ix86_section_threshold = i;
2060 }
2061
2062 if (ix86_tls_dialect_string)
2063 {
2064 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
2065 ix86_tls_dialect = TLS_DIALECT_GNU;
2066 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
2067 ix86_tls_dialect = TLS_DIALECT_GNU2;
2068 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
2069 ix86_tls_dialect = TLS_DIALECT_SUN;
2070 else
2071 error ("bad value (%s) for -mtls-dialect= switch",
2072 ix86_tls_dialect_string);
2073 }
2074
2075 /* Keep nonleaf frame pointers. */
2076 if (flag_omit_frame_pointer)
2077 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
2078 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
2079 flag_omit_frame_pointer = 1;
2080
2081 /* If we're doing fast math, we don't care about comparison order
2082 wrt NaNs. This lets us use a shorter comparison sequence. */
2083 if (flag_finite_math_only)
2084 target_flags &= ~MASK_IEEE_FP;
2085
2086 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
2087 since the insns won't need emulation. */
2088 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
2089 target_flags &= ~MASK_NO_FANCY_MATH_387;
2090
2091 /* Likewise, if the target doesn't have a 387, or we've specified
2092 software floating point, don't use 387 inline intrinsics. */
2093 if (!TARGET_80387)
2094 target_flags |= MASK_NO_FANCY_MATH_387;
2095
2096 /* Turn on SSE3 builtins for -mssse3. */
2097 if (TARGET_SSSE3)
2098 target_flags |= MASK_SSE3;
2099
2100 /* Turn on SSE2 builtins for -msse3. */
2101 if (TARGET_SSE3)
2102 target_flags |= MASK_SSE2;
2103
2104 /* Turn on SSE builtins for -msse2. */
2105 if (TARGET_SSE2)
2106 target_flags |= MASK_SSE;
2107
2108 /* Turn on MMX builtins for -msse. */
2109 if (TARGET_SSE)
2110 {
2111 target_flags |= MASK_MMX & ~target_flags_explicit;
2112 x86_prefetch_sse = true;
2113 }
2114
2115 /* Turn on MMX builtins for 3Dnow. */
2116 if (TARGET_3DNOW)
2117 target_flags |= MASK_MMX;
2118
2119 if (TARGET_64BIT)
2120 {
2121 if (TARGET_ALIGN_DOUBLE)
2122 error ("-malign-double makes no sense in the 64bit mode");
2123 if (TARGET_RTD)
2124 error ("-mrtd calling convention not supported in the 64bit mode");
2125
2126 /* Enable by default the SSE and MMX builtins. Do allow the user to
2127 explicitly disable any of these. In particular, disabling SSE and
2128 MMX for kernel code is extremely useful. */
2129 target_flags
2130 |= ((MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE)
2131 & ~target_flags_explicit);
2132 }
2133 else
2134 {
2135 /* i386 ABI does not specify red zone. It still makes sense to use it
2136 when programmer takes care to stack from being destroyed. */
2137 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
2138 target_flags |= MASK_NO_RED_ZONE;
2139 }
2140
2141 /* Validate -mpreferred-stack-boundary= value, or provide default.
2142 The default of 128 bits is for Pentium III's SSE __m128. We can't
2143 change it because of optimize_size. Otherwise, we can't mix object
2144 files compiled with -Os and -On. */
2145 ix86_preferred_stack_boundary = 128;
2146 if (ix86_preferred_stack_boundary_string)
2147 {
2148 i = atoi (ix86_preferred_stack_boundary_string);
2149 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
2150 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
2151 TARGET_64BIT ? 4 : 2);
2152 else
2153 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
2154 }
2155
2156 /* Accept -msseregparm only if at least SSE support is enabled. */
2157 if (TARGET_SSEREGPARM
2158 && ! TARGET_SSE)
2159 error ("-msseregparm used without SSE enabled");
2160
2161 ix86_fpmath = TARGET_FPMATH_DEFAULT;
2162
2163 if (ix86_fpmath_string != 0)
2164 {
2165 if (! strcmp (ix86_fpmath_string, "387"))
2166 ix86_fpmath = FPMATH_387;
2167 else if (! strcmp (ix86_fpmath_string, "sse"))
2168 {
2169 if (!TARGET_SSE)
2170 {
2171 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2172 ix86_fpmath = FPMATH_387;
2173 }
2174 else
2175 ix86_fpmath = FPMATH_SSE;
2176 }
2177 else if (! strcmp (ix86_fpmath_string, "387,sse")
2178 || ! strcmp (ix86_fpmath_string, "sse,387"))
2179 {
2180 if (!TARGET_SSE)
2181 {
2182 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2183 ix86_fpmath = FPMATH_387;
2184 }
2185 else if (!TARGET_80387)
2186 {
2187 warning (0, "387 instruction set disabled, using SSE arithmetics");
2188 ix86_fpmath = FPMATH_SSE;
2189 }
2190 else
2191 ix86_fpmath = FPMATH_SSE | FPMATH_387;
2192 }
2193 else
2194 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
2195 }
2196
2197 /* If the i387 is disabled, then do not return values in it. */
2198 if (!TARGET_80387)
2199 target_flags &= ~MASK_FLOAT_RETURNS;
2200
2201 if ((x86_accumulate_outgoing_args & TUNEMASK)
2202 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2203 && !optimize_size)
2204 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2205
2206 /* ??? Unwind info is not correct around the CFG unless either a frame
2207 pointer is present or M_A_O_A is set. Fixing this requires rewriting
2208 unwind info generation to be aware of the CFG and propagating states
2209 around edges. */
2210 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
2211 || flag_exceptions || flag_non_call_exceptions)
2212 && flag_omit_frame_pointer
2213 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
2214 {
2215 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2216 warning (0, "unwind tables currently require either a frame pointer "
2217 "or -maccumulate-outgoing-args for correctness");
2218 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2219 }
2220
2221 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
2222 {
2223 char *p;
2224 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
2225 p = strchr (internal_label_prefix, 'X');
2226 internal_label_prefix_len = p - internal_label_prefix;
2227 *p = '\0';
2228 }
2229
2230 /* When scheduling description is not available, disable scheduler pass
2231 so it won't slow down the compilation and make x87 code slower. */
2232 if (!TARGET_SCHEDULE)
2233 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
2234
2235 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
2236 set_param_value ("simultaneous-prefetches",
2237 ix86_cost->simultaneous_prefetches);
2238 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
2239 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
2240 }
2241 \f
2242 /* switch to the appropriate section for output of DECL.
2243 DECL is either a `VAR_DECL' node or a constant of some sort.
2244 RELOC indicates whether forming the initial value of DECL requires
2245 link-time relocations. */
2246
2247 static section *
2248 x86_64_elf_select_section (tree decl, int reloc,
2249 unsigned HOST_WIDE_INT align)
2250 {
2251 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2252 && ix86_in_large_data_p (decl))
2253 {
2254 const char *sname = NULL;
2255 unsigned int flags = SECTION_WRITE;
2256 switch (categorize_decl_for_section (decl, reloc, flag_pic))
2257 {
2258 case SECCAT_DATA:
2259 sname = ".ldata";
2260 break;
2261 case SECCAT_DATA_REL:
2262 sname = ".ldata.rel";
2263 break;
2264 case SECCAT_DATA_REL_LOCAL:
2265 sname = ".ldata.rel.local";
2266 break;
2267 case SECCAT_DATA_REL_RO:
2268 sname = ".ldata.rel.ro";
2269 break;
2270 case SECCAT_DATA_REL_RO_LOCAL:
2271 sname = ".ldata.rel.ro.local";
2272 break;
2273 case SECCAT_BSS:
2274 sname = ".lbss";
2275 flags |= SECTION_BSS;
2276 break;
2277 case SECCAT_RODATA:
2278 case SECCAT_RODATA_MERGE_STR:
2279 case SECCAT_RODATA_MERGE_STR_INIT:
2280 case SECCAT_RODATA_MERGE_CONST:
2281 sname = ".lrodata";
2282 flags = 0;
2283 break;
2284 case SECCAT_SRODATA:
2285 case SECCAT_SDATA:
2286 case SECCAT_SBSS:
2287 gcc_unreachable ();
2288 case SECCAT_TEXT:
2289 case SECCAT_TDATA:
2290 case SECCAT_TBSS:
2291 /* We don't split these for medium model. Place them into
2292 default sections and hope for best. */
2293 break;
2294 }
2295 if (sname)
2296 {
2297 /* We might get called with string constants, but get_named_section
2298 doesn't like them as they are not DECLs. Also, we need to set
2299 flags in that case. */
2300 if (!DECL_P (decl))
2301 return get_section (sname, flags, NULL);
2302 return get_named_section (decl, sname, reloc);
2303 }
2304 }
2305 return default_elf_select_section (decl, reloc, align);
2306 }
2307
2308 /* Build up a unique section name, expressed as a
2309 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2310 RELOC indicates whether the initial value of EXP requires
2311 link-time relocations. */
2312
2313 static void
2314 x86_64_elf_unique_section (tree decl, int reloc)
2315 {
2316 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2317 && ix86_in_large_data_p (decl))
2318 {
2319 const char *prefix = NULL;
2320 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2321 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
2322
2323 switch (categorize_decl_for_section (decl, reloc, flag_pic))
2324 {
2325 case SECCAT_DATA:
2326 case SECCAT_DATA_REL:
2327 case SECCAT_DATA_REL_LOCAL:
2328 case SECCAT_DATA_REL_RO:
2329 case SECCAT_DATA_REL_RO_LOCAL:
2330 prefix = one_only ? ".gnu.linkonce.ld." : ".ldata.";
2331 break;
2332 case SECCAT_BSS:
2333 prefix = one_only ? ".gnu.linkonce.lb." : ".lbss.";
2334 break;
2335 case SECCAT_RODATA:
2336 case SECCAT_RODATA_MERGE_STR:
2337 case SECCAT_RODATA_MERGE_STR_INIT:
2338 case SECCAT_RODATA_MERGE_CONST:
2339 prefix = one_only ? ".gnu.linkonce.lr." : ".lrodata.";
2340 break;
2341 case SECCAT_SRODATA:
2342 case SECCAT_SDATA:
2343 case SECCAT_SBSS:
2344 gcc_unreachable ();
2345 case SECCAT_TEXT:
2346 case SECCAT_TDATA:
2347 case SECCAT_TBSS:
2348 /* We don't split these for medium model. Place them into
2349 default sections and hope for best. */
2350 break;
2351 }
2352 if (prefix)
2353 {
2354 const char *name;
2355 size_t nlen, plen;
2356 char *string;
2357 plen = strlen (prefix);
2358
2359 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
2360 name = targetm.strip_name_encoding (name);
2361 nlen = strlen (name);
2362
2363 string = alloca (nlen + plen + 1);
2364 memcpy (string, prefix, plen);
2365 memcpy (string + plen, name, nlen + 1);
2366
2367 DECL_SECTION_NAME (decl) = build_string (nlen + plen, string);
2368 return;
2369 }
2370 }
2371 default_unique_section (decl, reloc);
2372 }
2373
2374 #ifdef COMMON_ASM_OP
2375 /* This says how to output assembler code to declare an
2376 uninitialized external linkage data object.
2377
2378 For medium model x86-64 we need to use .largecomm opcode for
2379 large objects. */
2380 void
2381 x86_elf_aligned_common (FILE *file,
2382 const char *name, unsigned HOST_WIDE_INT size,
2383 int align)
2384 {
2385 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2386 && size > (unsigned int)ix86_section_threshold)
2387 fprintf (file, ".largecomm\t");
2388 else
2389 fprintf (file, "%s", COMMON_ASM_OP);
2390 assemble_name (file, name);
2391 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
2392 size, align / BITS_PER_UNIT);
2393 }
2394 #endif
2395 /* Utility function for targets to use in implementing
2396 ASM_OUTPUT_ALIGNED_BSS. */
2397
2398 void
2399 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
2400 const char *name, unsigned HOST_WIDE_INT size,
2401 int align)
2402 {
2403 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2404 && size > (unsigned int)ix86_section_threshold)
2405 switch_to_section (get_named_section (decl, ".lbss", 0));
2406 else
2407 switch_to_section (bss_section);
2408 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
2409 #ifdef ASM_DECLARE_OBJECT_NAME
2410 last_assemble_variable_decl = decl;
2411 ASM_DECLARE_OBJECT_NAME (file, name, decl);
2412 #else
2413 /* Standard thing is just output label for the object. */
2414 ASM_OUTPUT_LABEL (file, name);
2415 #endif /* ASM_DECLARE_OBJECT_NAME */
2416 ASM_OUTPUT_SKIP (file, size ? size : 1);
2417 }
2418 \f
2419 void
2420 optimization_options (int level, int size ATTRIBUTE_UNUSED)
2421 {
2422 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2423 make the problem with not enough registers even worse. */
2424 #ifdef INSN_SCHEDULING
2425 if (level > 1)
2426 flag_schedule_insns = 0;
2427 #endif
2428
2429 if (TARGET_MACHO)
2430 /* The Darwin libraries never set errno, so we might as well
2431 avoid calling them when that's the only reason we would. */
2432 flag_errno_math = 0;
2433
2434 /* The default values of these switches depend on the TARGET_64BIT
2435 that is not known at this moment. Mark these values with 2 and
2436 let user the to override these. In case there is no command line option
2437 specifying them, we will set the defaults in override_options. */
2438 if (optimize >= 1)
2439 flag_omit_frame_pointer = 2;
2440 flag_pcc_struct_return = 2;
2441 flag_asynchronous_unwind_tables = 2;
2442 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2443 SUBTARGET_OPTIMIZATION_OPTIONS;
2444 #endif
2445 }
2446 \f
2447 /* Table of valid machine attributes. */
2448 const struct attribute_spec ix86_attribute_table[] =
2449 {
2450 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
2451 /* Stdcall attribute says callee is responsible for popping arguments
2452 if they are not variable. */
2453 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2454 /* Fastcall attribute says callee is responsible for popping arguments
2455 if they are not variable. */
2456 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2457 /* Cdecl attribute says the callee is a normal C declaration */
2458 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2459 /* Regparm attribute specifies how many integer arguments are to be
2460 passed in registers. */
2461 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
2462 /* Sseregparm attribute says we are using x86_64 calling conventions
2463 for FP arguments. */
2464 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2465 /* force_align_arg_pointer says this function realigns the stack at entry. */
2466 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
2467 false, true, true, ix86_handle_cconv_attribute },
2468 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2469 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
2470 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
2471 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
2472 #endif
2473 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
2474 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
2475 #ifdef SUBTARGET_ATTRIBUTE_TABLE
2476 SUBTARGET_ATTRIBUTE_TABLE,
2477 #endif
2478 { NULL, 0, 0, false, false, false, NULL }
2479 };
2480
2481 /* Decide whether we can make a sibling call to a function. DECL is the
2482 declaration of the function being targeted by the call and EXP is the
2483 CALL_EXPR representing the call. */
2484
2485 static bool
2486 ix86_function_ok_for_sibcall (tree decl, tree exp)
2487 {
2488 tree func;
2489 rtx a, b;
2490
2491 /* If we are generating position-independent code, we cannot sibcall
2492 optimize any indirect call, or a direct call to a global function,
2493 as the PLT requires %ebx be live. */
2494 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
2495 return false;
2496
2497 if (decl)
2498 func = decl;
2499 else
2500 {
2501 func = TREE_TYPE (TREE_OPERAND (exp, 0));
2502 if (POINTER_TYPE_P (func))
2503 func = TREE_TYPE (func);
2504 }
2505
2506 /* Check that the return value locations are the same. Like
2507 if we are returning floats on the 80387 register stack, we cannot
2508 make a sibcall from a function that doesn't return a float to a
2509 function that does or, conversely, from a function that does return
2510 a float to a function that doesn't; the necessary stack adjustment
2511 would not be executed. This is also the place we notice
2512 differences in the return value ABI. Note that it is ok for one
2513 of the functions to have void return type as long as the return
2514 value of the other is passed in a register. */
2515 a = ix86_function_value (TREE_TYPE (exp), func, false);
2516 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
2517 cfun->decl, false);
2518 if (STACK_REG_P (a) || STACK_REG_P (b))
2519 {
2520 if (!rtx_equal_p (a, b))
2521 return false;
2522 }
2523 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
2524 ;
2525 else if (!rtx_equal_p (a, b))
2526 return false;
2527
2528 /* If this call is indirect, we'll need to be able to use a call-clobbered
2529 register for the address of the target function. Make sure that all
2530 such registers are not used for passing parameters. */
2531 if (!decl && !TARGET_64BIT)
2532 {
2533 tree type;
2534
2535 /* We're looking at the CALL_EXPR, we need the type of the function. */
2536 type = TREE_OPERAND (exp, 0); /* pointer expression */
2537 type = TREE_TYPE (type); /* pointer type */
2538 type = TREE_TYPE (type); /* function type */
2539
2540 if (ix86_function_regparm (type, NULL) >= 3)
2541 {
2542 /* ??? Need to count the actual number of registers to be used,
2543 not the possible number of registers. Fix later. */
2544 return false;
2545 }
2546 }
2547
2548 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2549 /* Dllimport'd functions are also called indirectly. */
2550 if (decl && DECL_DLLIMPORT_P (decl)
2551 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
2552 return false;
2553 #endif
2554
2555 /* If we forced aligned the stack, then sibcalling would unalign the
2556 stack, which may break the called function. */
2557 if (cfun->machine->force_align_arg_pointer)
2558 return false;
2559
2560 /* Otherwise okay. That also includes certain types of indirect calls. */
2561 return true;
2562 }
2563
2564 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
2565 calling convention attributes;
2566 arguments as in struct attribute_spec.handler. */
2567
2568 static tree
2569 ix86_handle_cconv_attribute (tree *node, tree name,
2570 tree args,
2571 int flags ATTRIBUTE_UNUSED,
2572 bool *no_add_attrs)
2573 {
2574 if (TREE_CODE (*node) != FUNCTION_TYPE
2575 && TREE_CODE (*node) != METHOD_TYPE
2576 && TREE_CODE (*node) != FIELD_DECL
2577 && TREE_CODE (*node) != TYPE_DECL)
2578 {
2579 warning (OPT_Wattributes, "%qs attribute only applies to functions",
2580 IDENTIFIER_POINTER (name));
2581 *no_add_attrs = true;
2582 return NULL_TREE;
2583 }
2584
2585 /* Can combine regparm with all attributes but fastcall. */
2586 if (is_attribute_p ("regparm", name))
2587 {
2588 tree cst;
2589
2590 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2591 {
2592 error ("fastcall and regparm attributes are not compatible");
2593 }
2594
2595 cst = TREE_VALUE (args);
2596 if (TREE_CODE (cst) != INTEGER_CST)
2597 {
2598 warning (OPT_Wattributes,
2599 "%qs attribute requires an integer constant argument",
2600 IDENTIFIER_POINTER (name));
2601 *no_add_attrs = true;
2602 }
2603 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
2604 {
2605 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
2606 IDENTIFIER_POINTER (name), REGPARM_MAX);
2607 *no_add_attrs = true;
2608 }
2609
2610 if (!TARGET_64BIT
2611 && lookup_attribute (ix86_force_align_arg_pointer_string,
2612 TYPE_ATTRIBUTES (*node))
2613 && compare_tree_int (cst, REGPARM_MAX-1))
2614 {
2615 error ("%s functions limited to %d register parameters",
2616 ix86_force_align_arg_pointer_string, REGPARM_MAX-1);
2617 }
2618
2619 return NULL_TREE;
2620 }
2621
2622 if (TARGET_64BIT)
2623 {
2624 warning (OPT_Wattributes, "%qs attribute ignored",
2625 IDENTIFIER_POINTER (name));
2626 *no_add_attrs = true;
2627 return NULL_TREE;
2628 }
2629
2630 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
2631 if (is_attribute_p ("fastcall", name))
2632 {
2633 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2634 {
2635 error ("fastcall and cdecl attributes are not compatible");
2636 }
2637 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2638 {
2639 error ("fastcall and stdcall attributes are not compatible");
2640 }
2641 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
2642 {
2643 error ("fastcall and regparm attributes are not compatible");
2644 }
2645 }
2646
2647 /* Can combine stdcall with fastcall (redundant), regparm and
2648 sseregparm. */
2649 else if (is_attribute_p ("stdcall", name))
2650 {
2651 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2652 {
2653 error ("stdcall and cdecl attributes are not compatible");
2654 }
2655 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2656 {
2657 error ("stdcall and fastcall attributes are not compatible");
2658 }
2659 }
2660
2661 /* Can combine cdecl with regparm and sseregparm. */
2662 else if (is_attribute_p ("cdecl", name))
2663 {
2664 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2665 {
2666 error ("stdcall and cdecl attributes are not compatible");
2667 }
2668 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2669 {
2670 error ("fastcall and cdecl attributes are not compatible");
2671 }
2672 }
2673
2674 /* Can combine sseregparm with all attributes. */
2675
2676 return NULL_TREE;
2677 }
2678
2679 /* Return 0 if the attributes for two types are incompatible, 1 if they
2680 are compatible, and 2 if they are nearly compatible (which causes a
2681 warning to be generated). */
2682
2683 static int
2684 ix86_comp_type_attributes (tree type1, tree type2)
2685 {
2686 /* Check for mismatch of non-default calling convention. */
2687 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
2688
2689 if (TREE_CODE (type1) != FUNCTION_TYPE)
2690 return 1;
2691
2692 /* Check for mismatched fastcall/regparm types. */
2693 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
2694 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
2695 || (ix86_function_regparm (type1, NULL)
2696 != ix86_function_regparm (type2, NULL)))
2697 return 0;
2698
2699 /* Check for mismatched sseregparm types. */
2700 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
2701 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
2702 return 0;
2703
2704 /* Check for mismatched return types (cdecl vs stdcall). */
2705 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
2706 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
2707 return 0;
2708
2709 return 1;
2710 }
2711 \f
2712 /* Return the regparm value for a function with the indicated TYPE and DECL.
2713 DECL may be NULL when calling function indirectly
2714 or considering a libcall. */
2715
2716 static int
2717 ix86_function_regparm (tree type, tree decl)
2718 {
2719 tree attr;
2720 int regparm = ix86_regparm;
2721 bool user_convention = false;
2722
2723 if (!TARGET_64BIT)
2724 {
2725 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
2726 if (attr)
2727 {
2728 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
2729 user_convention = true;
2730 }
2731
2732 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
2733 {
2734 regparm = 2;
2735 user_convention = true;
2736 }
2737
2738 /* Use register calling convention for local functions when possible. */
2739 if (!TARGET_64BIT && !user_convention && decl
2740 && flag_unit_at_a_time && !profile_flag)
2741 {
2742 struct cgraph_local_info *i = cgraph_local_info (decl);
2743 if (i && i->local)
2744 {
2745 int local_regparm, globals = 0, regno;
2746
2747 /* Make sure no regparm register is taken by a global register
2748 variable. */
2749 for (local_regparm = 0; local_regparm < 3; local_regparm++)
2750 if (global_regs[local_regparm])
2751 break;
2752 /* We can't use regparm(3) for nested functions as these use
2753 static chain pointer in third argument. */
2754 if (local_regparm == 3
2755 && decl_function_context (decl)
2756 && !DECL_NO_STATIC_CHAIN (decl))
2757 local_regparm = 2;
2758 /* If the function realigns its stackpointer, the
2759 prologue will clobber %ecx. If we've already
2760 generated code for the callee, the callee
2761 DECL_STRUCT_FUNCTION is gone, so we fall back to
2762 scanning the attributes for the self-realigning
2763 property. */
2764 if ((DECL_STRUCT_FUNCTION (decl)
2765 && DECL_STRUCT_FUNCTION (decl)->machine->force_align_arg_pointer)
2766 || (!DECL_STRUCT_FUNCTION (decl)
2767 && lookup_attribute (ix86_force_align_arg_pointer_string,
2768 TYPE_ATTRIBUTES (TREE_TYPE (decl)))))
2769 local_regparm = 2;
2770 /* Each global register variable increases register preassure,
2771 so the more global reg vars there are, the smaller regparm
2772 optimization use, unless requested by the user explicitly. */
2773 for (regno = 0; regno < 6; regno++)
2774 if (global_regs[regno])
2775 globals++;
2776 local_regparm
2777 = globals < local_regparm ? local_regparm - globals : 0;
2778
2779 if (local_regparm > regparm)
2780 regparm = local_regparm;
2781 }
2782 }
2783 }
2784 return regparm;
2785 }
2786
2787 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
2788 DFmode (2) arguments in SSE registers for a function with the
2789 indicated TYPE and DECL. DECL may be NULL when calling function
2790 indirectly or considering a libcall. Otherwise return 0. */
2791
2792 static int
2793 ix86_function_sseregparm (tree type, tree decl)
2794 {
2795 /* Use SSE registers to pass SFmode and DFmode arguments if requested
2796 by the sseregparm attribute. */
2797 if (TARGET_SSEREGPARM
2798 || (type
2799 && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
2800 {
2801 if (!TARGET_SSE)
2802 {
2803 if (decl)
2804 error ("Calling %qD with attribute sseregparm without "
2805 "SSE/SSE2 enabled", decl);
2806 else
2807 error ("Calling %qT with attribute sseregparm without "
2808 "SSE/SSE2 enabled", type);
2809 return 0;
2810 }
2811
2812 return 2;
2813 }
2814
2815 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
2816 (and DFmode for SSE2) arguments in SSE registers,
2817 even for 32-bit targets. */
2818 if (!TARGET_64BIT && decl
2819 && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
2820 {
2821 struct cgraph_local_info *i = cgraph_local_info (decl);
2822 if (i && i->local)
2823 return TARGET_SSE2 ? 2 : 1;
2824 }
2825
2826 return 0;
2827 }
2828
2829 /* Return true if EAX is live at the start of the function. Used by
2830 ix86_expand_prologue to determine if we need special help before
2831 calling allocate_stack_worker. */
2832
2833 static bool
2834 ix86_eax_live_at_start_p (void)
2835 {
2836 /* Cheat. Don't bother working forward from ix86_function_regparm
2837 to the function type to whether an actual argument is located in
2838 eax. Instead just look at cfg info, which is still close enough
2839 to correct at this point. This gives false positives for broken
2840 functions that might use uninitialized data that happens to be
2841 allocated in eax, but who cares? */
2842 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->il.rtl->global_live_at_end, 0);
2843 }
2844
2845 /* Value is the number of bytes of arguments automatically
2846 popped when returning from a subroutine call.
2847 FUNDECL is the declaration node of the function (as a tree),
2848 FUNTYPE is the data type of the function (as a tree),
2849 or for a library call it is an identifier node for the subroutine name.
2850 SIZE is the number of bytes of arguments passed on the stack.
2851
2852 On the 80386, the RTD insn may be used to pop them if the number
2853 of args is fixed, but if the number is variable then the caller
2854 must pop them all. RTD can't be used for library calls now
2855 because the library is compiled with the Unix compiler.
2856 Use of RTD is a selectable option, since it is incompatible with
2857 standard Unix calling sequences. If the option is not selected,
2858 the caller must always pop the args.
2859
2860 The attribute stdcall is equivalent to RTD on a per module basis. */
2861
2862 int
2863 ix86_return_pops_args (tree fundecl, tree funtype, int size)
2864 {
2865 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
2866
2867 /* Cdecl functions override -mrtd, and never pop the stack. */
2868 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
2869
2870 /* Stdcall and fastcall functions will pop the stack if not
2871 variable args. */
2872 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
2873 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
2874 rtd = 1;
2875
2876 if (rtd
2877 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
2878 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
2879 == void_type_node)))
2880 return size;
2881 }
2882
2883 /* Lose any fake structure return argument if it is passed on the stack. */
2884 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
2885 && !TARGET_64BIT
2886 && !KEEP_AGGREGATE_RETURN_POINTER)
2887 {
2888 int nregs = ix86_function_regparm (funtype, fundecl);
2889
2890 if (!nregs)
2891 return GET_MODE_SIZE (Pmode);
2892 }
2893
2894 return 0;
2895 }
2896 \f
2897 /* Argument support functions. */
2898
2899 /* Return true when register may be used to pass function parameters. */
2900 bool
2901 ix86_function_arg_regno_p (int regno)
2902 {
2903 int i;
2904 if (!TARGET_64BIT)
2905 return (regno < REGPARM_MAX
2906 || (TARGET_MMX && MMX_REGNO_P (regno)
2907 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
2908 || (TARGET_SSE && SSE_REGNO_P (regno)
2909 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
2910
2911 if (TARGET_SSE && SSE_REGNO_P (regno)
2912 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
2913 return true;
2914 /* RAX is used as hidden argument to va_arg functions. */
2915 if (!regno)
2916 return true;
2917 for (i = 0; i < REGPARM_MAX; i++)
2918 if (regno == x86_64_int_parameter_registers[i])
2919 return true;
2920 return false;
2921 }
2922
2923 /* Return if we do not know how to pass TYPE solely in registers. */
2924
2925 static bool
2926 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
2927 {
2928 if (must_pass_in_stack_var_size_or_pad (mode, type))
2929 return true;
2930
2931 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
2932 The layout_type routine is crafty and tries to trick us into passing
2933 currently unsupported vector types on the stack by using TImode. */
2934 return (!TARGET_64BIT && mode == TImode
2935 && type && TREE_CODE (type) != VECTOR_TYPE);
2936 }
2937
2938 /* Initialize a variable CUM of type CUMULATIVE_ARGS
2939 for a call to a function whose data type is FNTYPE.
2940 For a library call, FNTYPE is 0. */
2941
2942 void
2943 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
2944 tree fntype, /* tree ptr for function decl */
2945 rtx libname, /* SYMBOL_REF of library name or 0 */
2946 tree fndecl)
2947 {
2948 static CUMULATIVE_ARGS zero_cum;
2949 tree param, next_param;
2950
2951 if (TARGET_DEBUG_ARG)
2952 {
2953 fprintf (stderr, "\ninit_cumulative_args (");
2954 if (fntype)
2955 fprintf (stderr, "fntype code = %s, ret code = %s",
2956 tree_code_name[(int) TREE_CODE (fntype)],
2957 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
2958 else
2959 fprintf (stderr, "no fntype");
2960
2961 if (libname)
2962 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
2963 }
2964
2965 *cum = zero_cum;
2966
2967 /* Set up the number of registers to use for passing arguments. */
2968 cum->nregs = ix86_regparm;
2969 if (TARGET_SSE)
2970 cum->sse_nregs = SSE_REGPARM_MAX;
2971 if (TARGET_MMX)
2972 cum->mmx_nregs = MMX_REGPARM_MAX;
2973 cum->warn_sse = true;
2974 cum->warn_mmx = true;
2975 cum->maybe_vaarg = false;
2976
2977 /* Use ecx and edx registers if function has fastcall attribute,
2978 else look for regparm information. */
2979 if (fntype && !TARGET_64BIT)
2980 {
2981 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
2982 {
2983 cum->nregs = 2;
2984 cum->fastcall = 1;
2985 }
2986 else
2987 cum->nregs = ix86_function_regparm (fntype, fndecl);
2988 }
2989
2990 /* Set up the number of SSE registers used for passing SFmode
2991 and DFmode arguments. Warn for mismatching ABI. */
2992 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl);
2993
2994 /* Determine if this function has variable arguments. This is
2995 indicated by the last argument being 'void_type_mode' if there
2996 are no variable arguments. If there are variable arguments, then
2997 we won't pass anything in registers in 32-bit mode. */
2998
2999 if (cum->nregs || cum->mmx_nregs || cum->sse_nregs)
3000 {
3001 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
3002 param != 0; param = next_param)
3003 {
3004 next_param = TREE_CHAIN (param);
3005 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
3006 {
3007 if (!TARGET_64BIT)
3008 {
3009 cum->nregs = 0;
3010 cum->sse_nregs = 0;
3011 cum->mmx_nregs = 0;
3012 cum->warn_sse = 0;
3013 cum->warn_mmx = 0;
3014 cum->fastcall = 0;
3015 cum->float_in_sse = 0;
3016 }
3017 cum->maybe_vaarg = true;
3018 }
3019 }
3020 }
3021 if ((!fntype && !libname)
3022 || (fntype && !TYPE_ARG_TYPES (fntype)))
3023 cum->maybe_vaarg = true;
3024
3025 if (TARGET_DEBUG_ARG)
3026 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
3027
3028 return;
3029 }
3030
3031 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
3032 But in the case of vector types, it is some vector mode.
3033
3034 When we have only some of our vector isa extensions enabled, then there
3035 are some modes for which vector_mode_supported_p is false. For these
3036 modes, the generic vector support in gcc will choose some non-vector mode
3037 in order to implement the type. By computing the natural mode, we'll
3038 select the proper ABI location for the operand and not depend on whatever
3039 the middle-end decides to do with these vector types. */
3040
3041 static enum machine_mode
3042 type_natural_mode (tree type)
3043 {
3044 enum machine_mode mode = TYPE_MODE (type);
3045
3046 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
3047 {
3048 HOST_WIDE_INT size = int_size_in_bytes (type);
3049 if ((size == 8 || size == 16)
3050 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
3051 && TYPE_VECTOR_SUBPARTS (type) > 1)
3052 {
3053 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
3054
3055 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
3056 mode = MIN_MODE_VECTOR_FLOAT;
3057 else
3058 mode = MIN_MODE_VECTOR_INT;
3059
3060 /* Get the mode which has this inner mode and number of units. */
3061 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
3062 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
3063 && GET_MODE_INNER (mode) == innermode)
3064 return mode;
3065
3066 gcc_unreachable ();
3067 }
3068 }
3069
3070 return mode;
3071 }
3072
3073 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
3074 this may not agree with the mode that the type system has chosen for the
3075 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
3076 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
3077
3078 static rtx
3079 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
3080 unsigned int regno)
3081 {
3082 rtx tmp;
3083
3084 if (orig_mode != BLKmode)
3085 tmp = gen_rtx_REG (orig_mode, regno);
3086 else
3087 {
3088 tmp = gen_rtx_REG (mode, regno);
3089 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
3090 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
3091 }
3092
3093 return tmp;
3094 }
3095
3096 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
3097 of this code is to classify each 8bytes of incoming argument by the register
3098 class and assign registers accordingly. */
3099
3100 /* Return the union class of CLASS1 and CLASS2.
3101 See the x86-64 PS ABI for details. */
3102
3103 static enum x86_64_reg_class
3104 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
3105 {
3106 /* Rule #1: If both classes are equal, this is the resulting class. */
3107 if (class1 == class2)
3108 return class1;
3109
3110 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
3111 the other class. */
3112 if (class1 == X86_64_NO_CLASS)
3113 return class2;
3114 if (class2 == X86_64_NO_CLASS)
3115 return class1;
3116
3117 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
3118 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
3119 return X86_64_MEMORY_CLASS;
3120
3121 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
3122 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
3123 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
3124 return X86_64_INTEGERSI_CLASS;
3125 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
3126 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
3127 return X86_64_INTEGER_CLASS;
3128
3129 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
3130 MEMORY is used. */
3131 if (class1 == X86_64_X87_CLASS
3132 || class1 == X86_64_X87UP_CLASS
3133 || class1 == X86_64_COMPLEX_X87_CLASS
3134 || class2 == X86_64_X87_CLASS
3135 || class2 == X86_64_X87UP_CLASS
3136 || class2 == X86_64_COMPLEX_X87_CLASS)
3137 return X86_64_MEMORY_CLASS;
3138
3139 /* Rule #6: Otherwise class SSE is used. */
3140 return X86_64_SSE_CLASS;
3141 }
3142
3143 /* Classify the argument of type TYPE and mode MODE.
3144 CLASSES will be filled by the register class used to pass each word
3145 of the operand. The number of words is returned. In case the parameter
3146 should be passed in memory, 0 is returned. As a special case for zero
3147 sized containers, classes[0] will be NO_CLASS and 1 is returned.
3148
3149 BIT_OFFSET is used internally for handling records and specifies offset
3150 of the offset in bits modulo 256 to avoid overflow cases.
3151
3152 See the x86-64 PS ABI for details.
3153 */
3154
3155 static int
3156 classify_argument (enum machine_mode mode, tree type,
3157 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
3158 {
3159 HOST_WIDE_INT bytes =
3160 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3161 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3162
3163 /* Variable sized entities are always passed/returned in memory. */
3164 if (bytes < 0)
3165 return 0;
3166
3167 if (mode != VOIDmode
3168 && targetm.calls.must_pass_in_stack (mode, type))
3169 return 0;
3170
3171 if (type && AGGREGATE_TYPE_P (type))
3172 {
3173 int i;
3174 tree field;
3175 enum x86_64_reg_class subclasses[MAX_CLASSES];
3176
3177 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
3178 if (bytes > 16)
3179 return 0;
3180
3181 for (i = 0; i < words; i++)
3182 classes[i] = X86_64_NO_CLASS;
3183
3184 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
3185 signalize memory class, so handle it as special case. */
3186 if (!words)
3187 {
3188 classes[0] = X86_64_NO_CLASS;
3189 return 1;
3190 }
3191
3192 /* Classify each field of record and merge classes. */
3193 switch (TREE_CODE (type))
3194 {
3195 case RECORD_TYPE:
3196 /* And now merge the fields of structure. */
3197 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3198 {
3199 if (TREE_CODE (field) == FIELD_DECL)
3200 {
3201 int num;
3202
3203 if (TREE_TYPE (field) == error_mark_node)
3204 continue;
3205
3206 /* Bitfields are always classified as integer. Handle them
3207 early, since later code would consider them to be
3208 misaligned integers. */
3209 if (DECL_BIT_FIELD (field))
3210 {
3211 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3212 i < ((int_bit_position (field) + (bit_offset % 64))
3213 + tree_low_cst (DECL_SIZE (field), 0)
3214 + 63) / 8 / 8; i++)
3215 classes[i] =
3216 merge_classes (X86_64_INTEGER_CLASS,
3217 classes[i]);
3218 }
3219 else
3220 {
3221 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3222 TREE_TYPE (field), subclasses,
3223 (int_bit_position (field)
3224 + bit_offset) % 256);
3225 if (!num)
3226 return 0;
3227 for (i = 0; i < num; i++)
3228 {
3229 int pos =
3230 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3231 classes[i + pos] =
3232 merge_classes (subclasses[i], classes[i + pos]);
3233 }
3234 }
3235 }
3236 }
3237 break;
3238
3239 case ARRAY_TYPE:
3240 /* Arrays are handled as small records. */
3241 {
3242 int num;
3243 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
3244 TREE_TYPE (type), subclasses, bit_offset);
3245 if (!num)
3246 return 0;
3247
3248 /* The partial classes are now full classes. */
3249 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
3250 subclasses[0] = X86_64_SSE_CLASS;
3251 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
3252 subclasses[0] = X86_64_INTEGER_CLASS;
3253
3254 for (i = 0; i < words; i++)
3255 classes[i] = subclasses[i % num];
3256
3257 break;
3258 }
3259 case UNION_TYPE:
3260 case QUAL_UNION_TYPE:
3261 /* Unions are similar to RECORD_TYPE but offset is always 0.
3262 */
3263 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3264 {
3265 if (TREE_CODE (field) == FIELD_DECL)
3266 {
3267 int num;
3268
3269 if (TREE_TYPE (field) == error_mark_node)
3270 continue;
3271
3272 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3273 TREE_TYPE (field), subclasses,
3274 bit_offset);
3275 if (!num)
3276 return 0;
3277 for (i = 0; i < num; i++)
3278 classes[i] = merge_classes (subclasses[i], classes[i]);
3279 }
3280 }
3281 break;
3282
3283 default:
3284 gcc_unreachable ();
3285 }
3286
3287 /* Final merger cleanup. */
3288 for (i = 0; i < words; i++)
3289 {
3290 /* If one class is MEMORY, everything should be passed in
3291 memory. */
3292 if (classes[i] == X86_64_MEMORY_CLASS)
3293 return 0;
3294
3295 /* The X86_64_SSEUP_CLASS should be always preceded by
3296 X86_64_SSE_CLASS. */
3297 if (classes[i] == X86_64_SSEUP_CLASS
3298 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
3299 classes[i] = X86_64_SSE_CLASS;
3300
3301 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
3302 if (classes[i] == X86_64_X87UP_CLASS
3303 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
3304 classes[i] = X86_64_SSE_CLASS;
3305 }
3306 return words;
3307 }
3308
3309 /* Compute alignment needed. We align all types to natural boundaries with
3310 exception of XFmode that is aligned to 64bits. */
3311 if (mode != VOIDmode && mode != BLKmode)
3312 {
3313 int mode_alignment = GET_MODE_BITSIZE (mode);
3314
3315 if (mode == XFmode)
3316 mode_alignment = 128;
3317 else if (mode == XCmode)
3318 mode_alignment = 256;
3319 if (COMPLEX_MODE_P (mode))
3320 mode_alignment /= 2;
3321 /* Misaligned fields are always returned in memory. */
3322 if (bit_offset % mode_alignment)
3323 return 0;
3324 }
3325
3326 /* for V1xx modes, just use the base mode */
3327 if (VECTOR_MODE_P (mode)
3328 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
3329 mode = GET_MODE_INNER (mode);
3330
3331 /* Classification of atomic types. */
3332 switch (mode)
3333 {
3334 case SDmode:
3335 case DDmode:
3336 classes[0] = X86_64_SSE_CLASS;
3337 return 1;
3338 case TDmode:
3339 classes[0] = X86_64_SSE_CLASS;
3340 classes[1] = X86_64_SSEUP_CLASS;
3341 return 2;
3342 case DImode:
3343 case SImode:
3344 case HImode:
3345 case QImode:
3346 case CSImode:
3347 case CHImode:
3348 case CQImode:
3349 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3350 classes[0] = X86_64_INTEGERSI_CLASS;
3351 else
3352 classes[0] = X86_64_INTEGER_CLASS;
3353 return 1;
3354 case CDImode:
3355 case TImode:
3356 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
3357 return 2;
3358 case CTImode:
3359 return 0;
3360 case SFmode:
3361 if (!(bit_offset % 64))
3362 classes[0] = X86_64_SSESF_CLASS;
3363 else
3364 classes[0] = X86_64_SSE_CLASS;
3365 return 1;
3366 case DFmode:
3367 classes[0] = X86_64_SSEDF_CLASS;
3368 return 1;
3369 case XFmode:
3370 classes[0] = X86_64_X87_CLASS;
3371 classes[1] = X86_64_X87UP_CLASS;
3372 return 2;
3373 case TFmode:
3374 classes[0] = X86_64_SSE_CLASS;
3375 classes[1] = X86_64_SSEUP_CLASS;
3376 return 2;
3377 case SCmode:
3378 classes[0] = X86_64_SSE_CLASS;
3379 return 1;
3380 case DCmode:
3381 classes[0] = X86_64_SSEDF_CLASS;
3382 classes[1] = X86_64_SSEDF_CLASS;
3383 return 2;
3384 case XCmode:
3385 classes[0] = X86_64_COMPLEX_X87_CLASS;
3386 return 1;
3387 case TCmode:
3388 /* This modes is larger than 16 bytes. */
3389 return 0;
3390 case V4SFmode:
3391 case V4SImode:
3392 case V16QImode:
3393 case V8HImode:
3394 case V2DFmode:
3395 case V2DImode:
3396 classes[0] = X86_64_SSE_CLASS;
3397 classes[1] = X86_64_SSEUP_CLASS;
3398 return 2;
3399 case V2SFmode:
3400 case V2SImode:
3401 case V4HImode:
3402 case V8QImode:
3403 classes[0] = X86_64_SSE_CLASS;
3404 return 1;
3405 case BLKmode:
3406 case VOIDmode:
3407 return 0;
3408 default:
3409 gcc_assert (VECTOR_MODE_P (mode));
3410
3411 if (bytes > 16)
3412 return 0;
3413
3414 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
3415
3416 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3417 classes[0] = X86_64_INTEGERSI_CLASS;
3418 else
3419 classes[0] = X86_64_INTEGER_CLASS;
3420 classes[1] = X86_64_INTEGER_CLASS;
3421 return 1 + (bytes > 8);
3422 }
3423 }
3424
3425 /* Examine the argument and return set number of register required in each
3426 class. Return 0 iff parameter should be passed in memory. */
3427 static int
3428 examine_argument (enum machine_mode mode, tree type, int in_return,
3429 int *int_nregs, int *sse_nregs)
3430 {
3431 enum x86_64_reg_class class[MAX_CLASSES];
3432 int n = classify_argument (mode, type, class, 0);
3433
3434 *int_nregs = 0;
3435 *sse_nregs = 0;
3436 if (!n)
3437 return 0;
3438 for (n--; n >= 0; n--)
3439 switch (class[n])
3440 {
3441 case X86_64_INTEGER_CLASS:
3442 case X86_64_INTEGERSI_CLASS:
3443 (*int_nregs)++;
3444 break;
3445 case X86_64_SSE_CLASS:
3446 case X86_64_SSESF_CLASS:
3447 case X86_64_SSEDF_CLASS:
3448 (*sse_nregs)++;
3449 break;
3450 case X86_64_NO_CLASS:
3451 case X86_64_SSEUP_CLASS:
3452 break;
3453 case X86_64_X87_CLASS:
3454 case X86_64_X87UP_CLASS:
3455 if (!in_return)
3456 return 0;
3457 break;
3458 case X86_64_COMPLEX_X87_CLASS:
3459 return in_return ? 2 : 0;
3460 case X86_64_MEMORY_CLASS:
3461 gcc_unreachable ();
3462 }
3463 return 1;
3464 }
3465
3466 /* Construct container for the argument used by GCC interface. See
3467 FUNCTION_ARG for the detailed description. */
3468
3469 static rtx
3470 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
3471 tree type, int in_return, int nintregs, int nsseregs,
3472 const int *intreg, int sse_regno)
3473 {
3474 /* The following variables hold the static issued_error state. */
3475 static bool issued_sse_arg_error;
3476 static bool issued_sse_ret_error;
3477 static bool issued_x87_ret_error;
3478
3479 enum machine_mode tmpmode;
3480 int bytes =
3481 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3482 enum x86_64_reg_class class[MAX_CLASSES];
3483 int n;
3484 int i;
3485 int nexps = 0;
3486 int needed_sseregs, needed_intregs;
3487 rtx exp[MAX_CLASSES];
3488 rtx ret;
3489
3490 n = classify_argument (mode, type, class, 0);
3491 if (TARGET_DEBUG_ARG)
3492 {
3493 if (!n)
3494 fprintf (stderr, "Memory class\n");
3495 else
3496 {
3497 fprintf (stderr, "Classes:");
3498 for (i = 0; i < n; i++)
3499 {
3500 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
3501 }
3502 fprintf (stderr, "\n");
3503 }
3504 }
3505 if (!n)
3506 return NULL;
3507 if (!examine_argument (mode, type, in_return, &needed_intregs,
3508 &needed_sseregs))
3509 return NULL;
3510 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
3511 return NULL;
3512
3513 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
3514 some less clueful developer tries to use floating-point anyway. */
3515 if (needed_sseregs && !TARGET_SSE)
3516 {
3517 if (in_return)
3518 {
3519 if (!issued_sse_ret_error)
3520 {
3521 error ("SSE register return with SSE disabled");
3522 issued_sse_ret_error = true;
3523 }
3524 }
3525 else if (!issued_sse_arg_error)
3526 {
3527 error ("SSE register argument with SSE disabled");
3528 issued_sse_arg_error = true;
3529 }
3530 return NULL;
3531 }
3532
3533 /* Likewise, error if the ABI requires us to return values in the
3534 x87 registers and the user specified -mno-80387. */
3535 if (!TARGET_80387 && in_return)
3536 for (i = 0; i < n; i++)
3537 if (class[i] == X86_64_X87_CLASS
3538 || class[i] == X86_64_X87UP_CLASS
3539 || class[i] == X86_64_COMPLEX_X87_CLASS)
3540 {
3541 if (!issued_x87_ret_error)
3542 {
3543 error ("x87 register return with x87 disabled");
3544 issued_x87_ret_error = true;
3545 }
3546 return NULL;
3547 }
3548
3549 /* First construct simple cases. Avoid SCmode, since we want to use
3550 single register to pass this type. */
3551 if (n == 1 && mode != SCmode)
3552 switch (class[0])
3553 {
3554 case X86_64_INTEGER_CLASS:
3555 case X86_64_INTEGERSI_CLASS:
3556 return gen_rtx_REG (mode, intreg[0]);
3557 case X86_64_SSE_CLASS:
3558 case X86_64_SSESF_CLASS:
3559 case X86_64_SSEDF_CLASS:
3560 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
3561 case X86_64_X87_CLASS:
3562 case X86_64_COMPLEX_X87_CLASS:
3563 return gen_rtx_REG (mode, FIRST_STACK_REG);
3564 case X86_64_NO_CLASS:
3565 /* Zero sized array, struct or class. */
3566 return NULL;
3567 default:
3568 gcc_unreachable ();
3569 }
3570 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
3571 && mode != BLKmode)
3572 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
3573 if (n == 2
3574 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
3575 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
3576 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
3577 && class[1] == X86_64_INTEGER_CLASS
3578 && (mode == CDImode || mode == TImode || mode == TFmode)
3579 && intreg[0] + 1 == intreg[1])
3580 return gen_rtx_REG (mode, intreg[0]);
3581
3582 /* Otherwise figure out the entries of the PARALLEL. */
3583 for (i = 0; i < n; i++)
3584 {
3585 switch (class[i])
3586 {
3587 case X86_64_NO_CLASS:
3588 break;
3589 case X86_64_INTEGER_CLASS:
3590 case X86_64_INTEGERSI_CLASS:
3591 /* Merge TImodes on aligned occasions here too. */
3592 if (i * 8 + 8 > bytes)
3593 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
3594 else if (class[i] == X86_64_INTEGERSI_CLASS)
3595 tmpmode = SImode;
3596 else
3597 tmpmode = DImode;
3598 /* We've requested 24 bytes we don't have mode for. Use DImode. */
3599 if (tmpmode == BLKmode)
3600 tmpmode = DImode;
3601 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3602 gen_rtx_REG (tmpmode, *intreg),
3603 GEN_INT (i*8));
3604 intreg++;
3605 break;
3606 case X86_64_SSESF_CLASS:
3607 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3608 gen_rtx_REG (SFmode,
3609 SSE_REGNO (sse_regno)),
3610 GEN_INT (i*8));
3611 sse_regno++;
3612 break;
3613 case X86_64_SSEDF_CLASS:
3614 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3615 gen_rtx_REG (DFmode,
3616 SSE_REGNO (sse_regno)),
3617 GEN_INT (i*8));
3618 sse_regno++;
3619 break;
3620 case X86_64_SSE_CLASS:
3621 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
3622 tmpmode = TImode;
3623 else
3624 tmpmode = DImode;
3625 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3626 gen_rtx_REG (tmpmode,
3627 SSE_REGNO (sse_regno)),
3628 GEN_INT (i*8));
3629 if (tmpmode == TImode)
3630 i++;
3631 sse_regno++;
3632 break;
3633 default:
3634 gcc_unreachable ();
3635 }
3636 }
3637
3638 /* Empty aligned struct, union or class. */
3639 if (nexps == 0)
3640 return NULL;
3641
3642 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
3643 for (i = 0; i < nexps; i++)
3644 XVECEXP (ret, 0, i) = exp [i];
3645 return ret;
3646 }
3647
3648 /* Update the data in CUM to advance over an argument
3649 of mode MODE and data type TYPE.
3650 (TYPE is null for libcalls where that information may not be available.) */
3651
3652 void
3653 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3654 tree type, int named)
3655 {
3656 int bytes =
3657 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3658 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3659
3660 if (type)
3661 mode = type_natural_mode (type);
3662
3663 if (TARGET_DEBUG_ARG)
3664 fprintf (stderr, "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, "
3665 "mode=%s, named=%d)\n\n",
3666 words, cum->words, cum->nregs, cum->sse_nregs,
3667 GET_MODE_NAME (mode), named);
3668
3669 if (TARGET_64BIT)
3670 {
3671 int int_nregs, sse_nregs;
3672 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
3673 cum->words += words;
3674 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
3675 {
3676 cum->nregs -= int_nregs;
3677 cum->sse_nregs -= sse_nregs;
3678 cum->regno += int_nregs;
3679 cum->sse_regno += sse_nregs;
3680 }
3681 else
3682 cum->words += words;
3683 }
3684 else
3685 {
3686 switch (mode)
3687 {
3688 default:
3689 break;
3690
3691 case BLKmode:
3692 if (bytes < 0)
3693 break;
3694 /* FALLTHRU */
3695
3696 case DImode:
3697 case SImode:
3698 case HImode:
3699 case QImode:
3700 cum->words += words;
3701 cum->nregs -= words;
3702 cum->regno += words;
3703
3704 if (cum->nregs <= 0)
3705 {
3706 cum->nregs = 0;
3707 cum->regno = 0;
3708 }
3709 break;
3710
3711 case DFmode:
3712 if (cum->float_in_sse < 2)
3713 break;
3714 case SFmode:
3715 if (cum->float_in_sse < 1)
3716 break;
3717 /* FALLTHRU */
3718
3719 case TImode:
3720 case V16QImode:
3721 case V8HImode:
3722 case V4SImode:
3723 case V2DImode:
3724 case V4SFmode:
3725 case V2DFmode:
3726 if (!type || !AGGREGATE_TYPE_P (type))
3727 {
3728 cum->sse_words += words;
3729 cum->sse_nregs -= 1;
3730 cum->sse_regno += 1;
3731 if (cum->sse_nregs <= 0)
3732 {
3733 cum->sse_nregs = 0;
3734 cum->sse_regno = 0;
3735 }
3736 }
3737 break;
3738
3739 case V8QImode:
3740 case V4HImode:
3741 case V2SImode:
3742 case V2SFmode:
3743 if (!type || !AGGREGATE_TYPE_P (type))
3744 {
3745 cum->mmx_words += words;
3746 cum->mmx_nregs -= 1;
3747 cum->mmx_regno += 1;
3748 if (cum->mmx_nregs <= 0)
3749 {
3750 cum->mmx_nregs = 0;
3751 cum->mmx_regno = 0;
3752 }
3753 }
3754 break;
3755 }
3756 }
3757 }
3758
3759 /* Define where to put the arguments to a function.
3760 Value is zero to push the argument on the stack,
3761 or a hard register in which to store the argument.
3762
3763 MODE is the argument's machine mode.
3764 TYPE is the data type of the argument (as a tree).
3765 This is null for libcalls where that information may
3766 not be available.
3767 CUM is a variable of type CUMULATIVE_ARGS which gives info about
3768 the preceding args and about the function being called.
3769 NAMED is nonzero if this argument is a named parameter
3770 (otherwise it is an extra parameter matching an ellipsis). */
3771
3772 rtx
3773 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode orig_mode,
3774 tree type, int named)
3775 {
3776 enum machine_mode mode = orig_mode;
3777 rtx ret = NULL_RTX;
3778 int bytes =
3779 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3780 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3781 static bool warnedsse, warnedmmx;
3782
3783 /* To simplify the code below, represent vector types with a vector mode
3784 even if MMX/SSE are not active. */
3785 if (type && TREE_CODE (type) == VECTOR_TYPE)
3786 mode = type_natural_mode (type);
3787
3788 /* Handle a hidden AL argument containing number of registers for varargs
3789 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
3790 any AL settings. */
3791 if (mode == VOIDmode)
3792 {
3793 if (TARGET_64BIT)
3794 return GEN_INT (cum->maybe_vaarg
3795 ? (cum->sse_nregs < 0
3796 ? SSE_REGPARM_MAX
3797 : cum->sse_regno)
3798 : -1);
3799 else
3800 return constm1_rtx;
3801 }
3802 if (TARGET_64BIT)
3803 ret = construct_container (mode, orig_mode, type, 0, cum->nregs,
3804 cum->sse_nregs,
3805 &x86_64_int_parameter_registers [cum->regno],
3806 cum->sse_regno);
3807 else
3808 switch (mode)
3809 {
3810 /* For now, pass fp/complex values on the stack. */
3811 default:
3812 break;
3813
3814 case BLKmode:
3815 if (bytes < 0)
3816 break;
3817 /* FALLTHRU */
3818 case DImode:
3819 case SImode:
3820 case HImode:
3821 case QImode:
3822 if (words <= cum->nregs)
3823 {
3824 int regno = cum->regno;
3825
3826 /* Fastcall allocates the first two DWORD (SImode) or
3827 smaller arguments to ECX and EDX. */
3828 if (cum->fastcall)
3829 {
3830 if (mode == BLKmode || mode == DImode)
3831 break;
3832
3833 /* ECX not EAX is the first allocated register. */
3834 if (regno == 0)
3835 regno = 2;
3836 }
3837 ret = gen_rtx_REG (mode, regno);
3838 }
3839 break;
3840 case DFmode:
3841 if (cum->float_in_sse < 2)
3842 break;
3843 case SFmode:
3844 if (cum->float_in_sse < 1)
3845 break;
3846 /* FALLTHRU */
3847 case TImode:
3848 case V16QImode:
3849 case V8HImode:
3850 case V4SImode:
3851 case V2DImode:
3852 case V4SFmode:
3853 case V2DFmode:
3854 if (!type || !AGGREGATE_TYPE_P (type))
3855 {
3856 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
3857 {
3858 warnedsse = true;
3859 warning (0, "SSE vector argument without SSE enabled "
3860 "changes the ABI");
3861 }
3862 if (cum->sse_nregs)
3863 ret = gen_reg_or_parallel (mode, orig_mode,
3864 cum->sse_regno + FIRST_SSE_REG);
3865 }
3866 break;
3867 case V8QImode:
3868 case V4HImode:
3869 case V2SImode:
3870 case V2SFmode:
3871 if (!type || !AGGREGATE_TYPE_P (type))
3872 {
3873 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
3874 {
3875 warnedmmx = true;
3876 warning (0, "MMX vector argument without MMX enabled "
3877 "changes the ABI");
3878 }
3879 if (cum->mmx_nregs)
3880 ret = gen_reg_or_parallel (mode, orig_mode,
3881 cum->mmx_regno + FIRST_MMX_REG);
3882 }
3883 break;
3884 }
3885
3886 if (TARGET_DEBUG_ARG)
3887 {
3888 fprintf (stderr,
3889 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
3890 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
3891
3892 if (ret)
3893 print_simple_rtl (stderr, ret);
3894 else
3895 fprintf (stderr, ", stack");
3896
3897 fprintf (stderr, " )\n");
3898 }
3899
3900 return ret;
3901 }
3902
3903 /* A C expression that indicates when an argument must be passed by
3904 reference. If nonzero for an argument, a copy of that argument is
3905 made in memory and a pointer to the argument is passed instead of
3906 the argument itself. The pointer is passed in whatever way is
3907 appropriate for passing a pointer to that type. */
3908
3909 static bool
3910 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
3911 enum machine_mode mode ATTRIBUTE_UNUSED,
3912 tree type, bool named ATTRIBUTE_UNUSED)
3913 {
3914 if (!TARGET_64BIT)
3915 return 0;
3916
3917 if (type && int_size_in_bytes (type) == -1)
3918 {
3919 if (TARGET_DEBUG_ARG)
3920 fprintf (stderr, "function_arg_pass_by_reference\n");
3921 return 1;
3922 }
3923
3924 return 0;
3925 }
3926
3927 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
3928 ABI. Only called if TARGET_SSE. */
3929 static bool
3930 contains_128bit_aligned_vector_p (tree type)
3931 {
3932 enum machine_mode mode = TYPE_MODE (type);
3933 if (SSE_REG_MODE_P (mode)
3934 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
3935 return true;
3936 if (TYPE_ALIGN (type) < 128)
3937 return false;
3938
3939 if (AGGREGATE_TYPE_P (type))
3940 {
3941 /* Walk the aggregates recursively. */
3942 switch (TREE_CODE (type))
3943 {
3944 case RECORD_TYPE:
3945 case UNION_TYPE:
3946 case QUAL_UNION_TYPE:
3947 {
3948 tree field;
3949
3950 /* Walk all the structure fields. */
3951 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3952 {
3953 if (TREE_CODE (field) == FIELD_DECL
3954 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
3955 return true;
3956 }
3957 break;
3958 }
3959
3960 case ARRAY_TYPE:
3961 /* Just for use if some languages passes arrays by value. */
3962 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
3963 return true;
3964 break;
3965
3966 default:
3967 gcc_unreachable ();
3968 }
3969 }
3970 return false;
3971 }
3972
3973 /* Gives the alignment boundary, in bits, of an argument with the
3974 specified mode and type. */
3975
3976 int
3977 ix86_function_arg_boundary (enum machine_mode mode, tree type)
3978 {
3979 int align;
3980 if (type)
3981 align = TYPE_ALIGN (type);
3982 else
3983 align = GET_MODE_ALIGNMENT (mode);
3984 if (align < PARM_BOUNDARY)
3985 align = PARM_BOUNDARY;
3986 if (!TARGET_64BIT)
3987 {
3988 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
3989 make an exception for SSE modes since these require 128bit
3990 alignment.
3991
3992 The handling here differs from field_alignment. ICC aligns MMX
3993 arguments to 4 byte boundaries, while structure fields are aligned
3994 to 8 byte boundaries. */
3995 if (!TARGET_SSE)
3996 align = PARM_BOUNDARY;
3997 else if (!type)
3998 {
3999 if (!SSE_REG_MODE_P (mode))
4000 align = PARM_BOUNDARY;
4001 }
4002 else
4003 {
4004 if (!contains_128bit_aligned_vector_p (type))
4005 align = PARM_BOUNDARY;
4006 }
4007 }
4008 if (align > 128)
4009 align = 128;
4010 return align;
4011 }
4012
4013 /* Return true if N is a possible register number of function value. */
4014 bool
4015 ix86_function_value_regno_p (int regno)
4016 {
4017 if (regno == 0
4018 || (regno == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
4019 || (regno == FIRST_SSE_REG && TARGET_SSE))
4020 return true;
4021
4022 if (!TARGET_64BIT
4023 && (regno == FIRST_MMX_REG && TARGET_MMX))
4024 return true;
4025
4026 return false;
4027 }
4028
4029 /* Define how to find the value returned by a function.
4030 VALTYPE is the data type of the value (as a tree).
4031 If the precise function being called is known, FUNC is its FUNCTION_DECL;
4032 otherwise, FUNC is 0. */
4033 rtx
4034 ix86_function_value (tree valtype, tree fntype_or_decl,
4035 bool outgoing ATTRIBUTE_UNUSED)
4036 {
4037 enum machine_mode natmode = type_natural_mode (valtype);
4038
4039 if (TARGET_64BIT)
4040 {
4041 rtx ret = construct_container (natmode, TYPE_MODE (valtype), valtype,
4042 1, REGPARM_MAX, SSE_REGPARM_MAX,
4043 x86_64_int_return_registers, 0);
4044 /* For zero sized structures, construct_container return NULL, but we
4045 need to keep rest of compiler happy by returning meaningful value. */
4046 if (!ret)
4047 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
4048 return ret;
4049 }
4050 else
4051 {
4052 tree fn = NULL_TREE, fntype;
4053 if (fntype_or_decl
4054 && DECL_P (fntype_or_decl))
4055 fn = fntype_or_decl;
4056 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
4057 return gen_rtx_REG (TYPE_MODE (valtype),
4058 ix86_value_regno (natmode, fn, fntype));
4059 }
4060 }
4061
4062 /* Return true iff type is returned in memory. */
4063 int
4064 ix86_return_in_memory (tree type)
4065 {
4066 int needed_intregs, needed_sseregs, size;
4067 enum machine_mode mode = type_natural_mode (type);
4068
4069 if (TARGET_64BIT)
4070 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
4071
4072 if (mode == BLKmode)
4073 return 1;
4074
4075 size = int_size_in_bytes (type);
4076
4077 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
4078 return 0;
4079
4080 if (VECTOR_MODE_P (mode) || mode == TImode)
4081 {
4082 /* User-created vectors small enough to fit in EAX. */
4083 if (size < 8)
4084 return 0;
4085
4086 /* MMX/3dNow values are returned in MM0,
4087 except when it doesn't exits. */
4088 if (size == 8)
4089 return (TARGET_MMX ? 0 : 1);
4090
4091 /* SSE values are returned in XMM0, except when it doesn't exist. */
4092 if (size == 16)
4093 return (TARGET_SSE ? 0 : 1);
4094 }
4095
4096 if (mode == XFmode)
4097 return 0;
4098
4099 if (mode == TDmode)
4100 return 1;
4101
4102 if (size > 12)
4103 return 1;
4104 return 0;
4105 }
4106
4107 /* When returning SSE vector types, we have a choice of either
4108 (1) being abi incompatible with a -march switch, or
4109 (2) generating an error.
4110 Given no good solution, I think the safest thing is one warning.
4111 The user won't be able to use -Werror, but....
4112
4113 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
4114 called in response to actually generating a caller or callee that
4115 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
4116 via aggregate_value_p for general type probing from tree-ssa. */
4117
4118 static rtx
4119 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
4120 {
4121 static bool warnedsse, warnedmmx;
4122
4123 if (type)
4124 {
4125 /* Look at the return type of the function, not the function type. */
4126 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
4127
4128 if (!TARGET_SSE && !warnedsse)
4129 {
4130 if (mode == TImode
4131 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4132 {
4133 warnedsse = true;
4134 warning (0, "SSE vector return without SSE enabled "
4135 "changes the ABI");
4136 }
4137 }
4138
4139 if (!TARGET_MMX && !warnedmmx)
4140 {
4141 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4142 {
4143 warnedmmx = true;
4144 warning (0, "MMX vector return without MMX enabled "
4145 "changes the ABI");
4146 }
4147 }
4148 }
4149
4150 return NULL;
4151 }
4152
4153 /* Define how to find the value returned by a library function
4154 assuming the value has mode MODE. */
4155 rtx
4156 ix86_libcall_value (enum machine_mode mode)
4157 {
4158 if (TARGET_64BIT)
4159 {
4160 switch (mode)
4161 {
4162 case SFmode:
4163 case SCmode:
4164 case DFmode:
4165 case DCmode:
4166 case TFmode:
4167 case SDmode:
4168 case DDmode:
4169 case TDmode:
4170 return gen_rtx_REG (mode, FIRST_SSE_REG);
4171 case XFmode:
4172 case XCmode:
4173 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
4174 case TCmode:
4175 return NULL;
4176 default:
4177 return gen_rtx_REG (mode, 0);
4178 }
4179 }
4180 else
4181 return gen_rtx_REG (mode, ix86_value_regno (mode, NULL, NULL));
4182 }
4183
4184 /* Given a mode, return the register to use for a return value. */
4185
4186 static int
4187 ix86_value_regno (enum machine_mode mode, tree func, tree fntype)
4188 {
4189 gcc_assert (!TARGET_64BIT);
4190
4191 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4192 we normally prevent this case when mmx is not available. However
4193 some ABIs may require the result to be returned like DImode. */
4194 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4195 return TARGET_MMX ? FIRST_MMX_REG : 0;
4196
4197 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4198 we prevent this case when sse is not available. However some ABIs
4199 may require the result to be returned like integer TImode. */
4200 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4201 return TARGET_SSE ? FIRST_SSE_REG : 0;
4202
4203 /* Decimal floating point values can go in %eax, unlike other float modes. */
4204 if (DECIMAL_FLOAT_MODE_P (mode))
4205 return 0;
4206
4207 /* Most things go in %eax, except (unless -mno-fp-ret-in-387) fp values. */
4208 if (!SCALAR_FLOAT_MODE_P (mode) || !TARGET_FLOAT_RETURNS_IN_80387)
4209 return 0;
4210
4211 /* Floating point return values in %st(0), except for local functions when
4212 SSE math is enabled or for functions with sseregparm attribute. */
4213 if ((func || fntype)
4214 && (mode == SFmode || mode == DFmode))
4215 {
4216 int sse_level = ix86_function_sseregparm (fntype, func);
4217 if ((sse_level >= 1 && mode == SFmode)
4218 || (sse_level == 2 && mode == DFmode))
4219 return FIRST_SSE_REG;
4220 }
4221
4222 return FIRST_FLOAT_REG;
4223 }
4224 \f
4225 /* Create the va_list data type. */
4226
4227 static tree
4228 ix86_build_builtin_va_list (void)
4229 {
4230 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
4231
4232 /* For i386 we use plain pointer to argument area. */
4233 if (!TARGET_64BIT)
4234 return build_pointer_type (char_type_node);
4235
4236 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
4237 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
4238
4239 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
4240 unsigned_type_node);
4241 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
4242 unsigned_type_node);
4243 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
4244 ptr_type_node);
4245 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
4246 ptr_type_node);
4247
4248 va_list_gpr_counter_field = f_gpr;
4249 va_list_fpr_counter_field = f_fpr;
4250
4251 DECL_FIELD_CONTEXT (f_gpr) = record;
4252 DECL_FIELD_CONTEXT (f_fpr) = record;
4253 DECL_FIELD_CONTEXT (f_ovf) = record;
4254 DECL_FIELD_CONTEXT (f_sav) = record;
4255
4256 TREE_CHAIN (record) = type_decl;
4257 TYPE_NAME (record) = type_decl;
4258 TYPE_FIELDS (record) = f_gpr;
4259 TREE_CHAIN (f_gpr) = f_fpr;
4260 TREE_CHAIN (f_fpr) = f_ovf;
4261 TREE_CHAIN (f_ovf) = f_sav;
4262
4263 layout_type (record);
4264
4265 /* The correct type is an array type of one element. */
4266 return build_array_type (record, build_index_type (size_zero_node));
4267 }
4268
4269 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4270
4271 static void
4272 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4273 tree type, int *pretend_size ATTRIBUTE_UNUSED,
4274 int no_rtl)
4275 {
4276 CUMULATIVE_ARGS next_cum;
4277 rtx save_area = NULL_RTX, mem;
4278 rtx label;
4279 rtx label_ref;
4280 rtx tmp_reg;
4281 rtx nsse_reg;
4282 int set;
4283 tree fntype;
4284 int stdarg_p;
4285 int i;
4286
4287 if (!TARGET_64BIT)
4288 return;
4289
4290 if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
4291 return;
4292
4293 /* Indicate to allocate space on the stack for varargs save area. */
4294 ix86_save_varrargs_registers = 1;
4295
4296 cfun->stack_alignment_needed = 128;
4297
4298 fntype = TREE_TYPE (current_function_decl);
4299 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
4300 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
4301 != void_type_node));
4302
4303 /* For varargs, we do not want to skip the dummy va_dcl argument.
4304 For stdargs, we do want to skip the last named argument. */
4305 next_cum = *cum;
4306 if (stdarg_p)
4307 function_arg_advance (&next_cum, mode, type, 1);
4308
4309 if (!no_rtl)
4310 save_area = frame_pointer_rtx;
4311
4312 set = get_varargs_alias_set ();
4313
4314 for (i = next_cum.regno;
4315 i < ix86_regparm
4316 && i < next_cum.regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
4317 i++)
4318 {
4319 mem = gen_rtx_MEM (Pmode,
4320 plus_constant (save_area, i * UNITS_PER_WORD));
4321 MEM_NOTRAP_P (mem) = 1;
4322 set_mem_alias_set (mem, set);
4323 emit_move_insn (mem, gen_rtx_REG (Pmode,
4324 x86_64_int_parameter_registers[i]));
4325 }
4326
4327 if (next_cum.sse_nregs && cfun->va_list_fpr_size)
4328 {
4329 /* Now emit code to save SSE registers. The AX parameter contains number
4330 of SSE parameter registers used to call this function. We use
4331 sse_prologue_save insn template that produces computed jump across
4332 SSE saves. We need some preparation work to get this working. */
4333
4334 label = gen_label_rtx ();
4335 label_ref = gen_rtx_LABEL_REF (Pmode, label);
4336
4337 /* Compute address to jump to :
4338 label - 5*eax + nnamed_sse_arguments*5 */
4339 tmp_reg = gen_reg_rtx (Pmode);
4340 nsse_reg = gen_reg_rtx (Pmode);
4341 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
4342 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4343 gen_rtx_MULT (Pmode, nsse_reg,
4344 GEN_INT (4))));
4345 if (next_cum.sse_regno)
4346 emit_move_insn
4347 (nsse_reg,
4348 gen_rtx_CONST (DImode,
4349 gen_rtx_PLUS (DImode,
4350 label_ref,
4351 GEN_INT (next_cum.sse_regno * 4))));
4352 else
4353 emit_move_insn (nsse_reg, label_ref);
4354 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
4355
4356 /* Compute address of memory block we save into. We always use pointer
4357 pointing 127 bytes after first byte to store - this is needed to keep
4358 instruction size limited by 4 bytes. */
4359 tmp_reg = gen_reg_rtx (Pmode);
4360 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4361 plus_constant (save_area,
4362 8 * REGPARM_MAX + 127)));
4363 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
4364 MEM_NOTRAP_P (mem) = 1;
4365 set_mem_alias_set (mem, set);
4366 set_mem_align (mem, BITS_PER_WORD);
4367
4368 /* And finally do the dirty job! */
4369 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
4370 GEN_INT (next_cum.sse_regno), label));
4371 }
4372
4373 }
4374
4375 /* Implement va_start. */
4376
4377 void
4378 ix86_va_start (tree valist, rtx nextarg)
4379 {
4380 HOST_WIDE_INT words, n_gpr, n_fpr;
4381 tree f_gpr, f_fpr, f_ovf, f_sav;
4382 tree gpr, fpr, ovf, sav, t;
4383 tree type;
4384
4385 /* Only 64bit target needs something special. */
4386 if (!TARGET_64BIT)
4387 {
4388 std_expand_builtin_va_start (valist, nextarg);
4389 return;
4390 }
4391
4392 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4393 f_fpr = TREE_CHAIN (f_gpr);
4394 f_ovf = TREE_CHAIN (f_fpr);
4395 f_sav = TREE_CHAIN (f_ovf);
4396
4397 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
4398 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4399 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4400 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4401 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4402
4403 /* Count number of gp and fp argument registers used. */
4404 words = current_function_args_info.words;
4405 n_gpr = current_function_args_info.regno;
4406 n_fpr = current_function_args_info.sse_regno;
4407
4408 if (TARGET_DEBUG_ARG)
4409 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
4410 (int) words, (int) n_gpr, (int) n_fpr);
4411
4412 if (cfun->va_list_gpr_size)
4413 {
4414 type = TREE_TYPE (gpr);
4415 t = build2 (GIMPLE_MODIFY_STMT, type, gpr,
4416 build_int_cst (type, n_gpr * 8));
4417 TREE_SIDE_EFFECTS (t) = 1;
4418 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4419 }
4420
4421 if (cfun->va_list_fpr_size)
4422 {
4423 type = TREE_TYPE (fpr);
4424 t = build2 (GIMPLE_MODIFY_STMT, type, fpr,
4425 build_int_cst (type, n_fpr * 16 + 8*REGPARM_MAX));
4426 TREE_SIDE_EFFECTS (t) = 1;
4427 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4428 }
4429
4430 /* Find the overflow area. */
4431 type = TREE_TYPE (ovf);
4432 t = make_tree (type, virtual_incoming_args_rtx);
4433 if (words != 0)
4434 t = build2 (PLUS_EXPR, type, t,
4435 build_int_cst (type, words * UNITS_PER_WORD));
4436 t = build2 (GIMPLE_MODIFY_STMT, type, ovf, t);
4437 TREE_SIDE_EFFECTS (t) = 1;
4438 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4439
4440 if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
4441 {
4442 /* Find the register save area.
4443 Prologue of the function save it right above stack frame. */
4444 type = TREE_TYPE (sav);
4445 t = make_tree (type, frame_pointer_rtx);
4446 t = build2 (GIMPLE_MODIFY_STMT, type, sav, t);
4447 TREE_SIDE_EFFECTS (t) = 1;
4448 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4449 }
4450 }
4451
4452 /* Implement va_arg. */
4453
4454 tree
4455 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
4456 {
4457 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
4458 tree f_gpr, f_fpr, f_ovf, f_sav;
4459 tree gpr, fpr, ovf, sav, t;
4460 int size, rsize;
4461 tree lab_false, lab_over = NULL_TREE;
4462 tree addr, t2;
4463 rtx container;
4464 int indirect_p = 0;
4465 tree ptrtype;
4466 enum machine_mode nat_mode;
4467
4468 /* Only 64bit target needs something special. */
4469 if (!TARGET_64BIT)
4470 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4471
4472 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4473 f_fpr = TREE_CHAIN (f_gpr);
4474 f_ovf = TREE_CHAIN (f_fpr);
4475 f_sav = TREE_CHAIN (f_ovf);
4476
4477 valist = build_va_arg_indirect_ref (valist);
4478 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4479 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4480 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4481 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4482
4483 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
4484 if (indirect_p)
4485 type = build_pointer_type (type);
4486 size = int_size_in_bytes (type);
4487 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4488
4489 nat_mode = type_natural_mode (type);
4490 container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
4491 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
4492
4493 /* Pull the value out of the saved registers. */
4494
4495 addr = create_tmp_var (ptr_type_node, "addr");
4496 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
4497
4498 if (container)
4499 {
4500 int needed_intregs, needed_sseregs;
4501 bool need_temp;
4502 tree int_addr, sse_addr;
4503
4504 lab_false = create_artificial_label ();
4505 lab_over = create_artificial_label ();
4506
4507 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
4508
4509 need_temp = (!REG_P (container)
4510 && ((needed_intregs && TYPE_ALIGN (type) > 64)
4511 || TYPE_ALIGN (type) > 128));
4512
4513 /* In case we are passing structure, verify that it is consecutive block
4514 on the register save area. If not we need to do moves. */
4515 if (!need_temp && !REG_P (container))
4516 {
4517 /* Verify that all registers are strictly consecutive */
4518 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
4519 {
4520 int i;
4521
4522 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4523 {
4524 rtx slot = XVECEXP (container, 0, i);
4525 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
4526 || INTVAL (XEXP (slot, 1)) != i * 16)
4527 need_temp = 1;
4528 }
4529 }
4530 else
4531 {
4532 int i;
4533
4534 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4535 {
4536 rtx slot = XVECEXP (container, 0, i);
4537 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
4538 || INTVAL (XEXP (slot, 1)) != i * 8)
4539 need_temp = 1;
4540 }
4541 }
4542 }
4543 if (!need_temp)
4544 {
4545 int_addr = addr;
4546 sse_addr = addr;
4547 }
4548 else
4549 {
4550 int_addr = create_tmp_var (ptr_type_node, "int_addr");
4551 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
4552 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
4553 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
4554 }
4555
4556 /* First ensure that we fit completely in registers. */
4557 if (needed_intregs)
4558 {
4559 t = build_int_cst (TREE_TYPE (gpr),
4560 (REGPARM_MAX - needed_intregs + 1) * 8);
4561 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
4562 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4563 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4564 gimplify_and_add (t, pre_p);
4565 }
4566 if (needed_sseregs)
4567 {
4568 t = build_int_cst (TREE_TYPE (fpr),
4569 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
4570 + REGPARM_MAX * 8);
4571 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
4572 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4573 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4574 gimplify_and_add (t, pre_p);
4575 }
4576
4577 /* Compute index to start of area used for integer regs. */
4578 if (needed_intregs)
4579 {
4580 /* int_addr = gpr + sav; */
4581 t = fold_convert (ptr_type_node, gpr);
4582 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4583 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, int_addr, t);
4584 gimplify_and_add (t, pre_p);
4585 }
4586 if (needed_sseregs)
4587 {
4588 /* sse_addr = fpr + sav; */
4589 t = fold_convert (ptr_type_node, fpr);
4590 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4591 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, sse_addr, t);
4592 gimplify_and_add (t, pre_p);
4593 }
4594 if (need_temp)
4595 {
4596 int i;
4597 tree temp = create_tmp_var (type, "va_arg_tmp");
4598
4599 /* addr = &temp; */
4600 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
4601 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
4602 gimplify_and_add (t, pre_p);
4603
4604 for (i = 0; i < XVECLEN (container, 0); i++)
4605 {
4606 rtx slot = XVECEXP (container, 0, i);
4607 rtx reg = XEXP (slot, 0);
4608 enum machine_mode mode = GET_MODE (reg);
4609 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
4610 tree addr_type = build_pointer_type (piece_type);
4611 tree src_addr, src;
4612 int src_offset;
4613 tree dest_addr, dest;
4614
4615 if (SSE_REGNO_P (REGNO (reg)))
4616 {
4617 src_addr = sse_addr;
4618 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
4619 }
4620 else
4621 {
4622 src_addr = int_addr;
4623 src_offset = REGNO (reg) * 8;
4624 }
4625 src_addr = fold_convert (addr_type, src_addr);
4626 src_addr = fold (build2 (PLUS_EXPR, addr_type, src_addr,
4627 size_int (src_offset)));
4628 src = build_va_arg_indirect_ref (src_addr);
4629
4630 dest_addr = fold_convert (addr_type, addr);
4631 dest_addr = fold (build2 (PLUS_EXPR, addr_type, dest_addr,
4632 size_int (INTVAL (XEXP (slot, 1)))));
4633 dest = build_va_arg_indirect_ref (dest_addr);
4634
4635 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, dest, src);
4636 gimplify_and_add (t, pre_p);
4637 }
4638 }
4639
4640 if (needed_intregs)
4641 {
4642 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
4643 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
4644 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (gpr), gpr, t);
4645 gimplify_and_add (t, pre_p);
4646 }
4647 if (needed_sseregs)
4648 {
4649 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
4650 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
4651 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (fpr), fpr, t);
4652 gimplify_and_add (t, pre_p);
4653 }
4654
4655 t = build1 (GOTO_EXPR, void_type_node, lab_over);
4656 gimplify_and_add (t, pre_p);
4657
4658 t = build1 (LABEL_EXPR, void_type_node, lab_false);
4659 append_to_statement_list (t, pre_p);
4660 }
4661
4662 /* ... otherwise out of the overflow area. */
4663
4664 /* Care for on-stack alignment if needed. */
4665 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64
4666 || integer_zerop (TYPE_SIZE (type)))
4667 t = ovf;
4668 else
4669 {
4670 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
4671 t = build2 (PLUS_EXPR, TREE_TYPE (ovf), ovf,
4672 build_int_cst (TREE_TYPE (ovf), align - 1));
4673 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
4674 build_int_cst (TREE_TYPE (t), -align));
4675 }
4676 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
4677
4678 t2 = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
4679 gimplify_and_add (t2, pre_p);
4680
4681 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
4682 build_int_cst (TREE_TYPE (t), rsize * UNITS_PER_WORD));
4683 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (ovf), ovf, t);
4684 gimplify_and_add (t, pre_p);
4685
4686 if (container)
4687 {
4688 t = build1 (LABEL_EXPR, void_type_node, lab_over);
4689 append_to_statement_list (t, pre_p);
4690 }
4691
4692 ptrtype = build_pointer_type (type);
4693 addr = fold_convert (ptrtype, addr);
4694
4695 if (indirect_p)
4696 addr = build_va_arg_indirect_ref (addr);
4697 return build_va_arg_indirect_ref (addr);
4698 }
4699 \f
4700 /* Return nonzero if OPNUM's MEM should be matched
4701 in movabs* patterns. */
4702
4703 int
4704 ix86_check_movabs (rtx insn, int opnum)
4705 {
4706 rtx set, mem;
4707
4708 set = PATTERN (insn);
4709 if (GET_CODE (set) == PARALLEL)
4710 set = XVECEXP (set, 0, 0);
4711 gcc_assert (GET_CODE (set) == SET);
4712 mem = XEXP (set, opnum);
4713 while (GET_CODE (mem) == SUBREG)
4714 mem = SUBREG_REG (mem);
4715 gcc_assert (MEM_P (mem));
4716 return (volatile_ok || !MEM_VOLATILE_P (mem));
4717 }
4718 \f
4719 /* Initialize the table of extra 80387 mathematical constants. */
4720
4721 static void
4722 init_ext_80387_constants (void)
4723 {
4724 static const char * cst[5] =
4725 {
4726 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4727 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4728 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4729 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4730 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4731 };
4732 int i;
4733
4734 for (i = 0; i < 5; i++)
4735 {
4736 real_from_string (&ext_80387_constants_table[i], cst[i]);
4737 /* Ensure each constant is rounded to XFmode precision. */
4738 real_convert (&ext_80387_constants_table[i],
4739 XFmode, &ext_80387_constants_table[i]);
4740 }
4741
4742 ext_80387_constants_init = 1;
4743 }
4744
4745 /* Return true if the constant is something that can be loaded with
4746 a special instruction. */
4747
4748 int
4749 standard_80387_constant_p (rtx x)
4750 {
4751 REAL_VALUE_TYPE r;
4752
4753 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4754 return -1;
4755
4756 if (x == CONST0_RTX (GET_MODE (x)))
4757 return 1;
4758 if (x == CONST1_RTX (GET_MODE (x)))
4759 return 2;
4760
4761 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4762
4763 /* For XFmode constants, try to find a special 80387 instruction when
4764 optimizing for size or on those CPUs that benefit from them. */
4765 if (GET_MODE (x) == XFmode
4766 && (optimize_size || x86_ext_80387_constants & TUNEMASK))
4767 {
4768 int i;
4769
4770 if (! ext_80387_constants_init)
4771 init_ext_80387_constants ();
4772
4773 for (i = 0; i < 5; i++)
4774 if (real_identical (&r, &ext_80387_constants_table[i]))
4775 return i + 3;
4776 }
4777
4778 /* Load of the constant -0.0 or -1.0 will be split as
4779 fldz;fchs or fld1;fchs sequence. */
4780 if (real_isnegzero (&r))
4781 return 8;
4782 if (real_identical (&r, &dconstm1))
4783 return 9;
4784
4785 return 0;
4786 }
4787
4788 /* Return the opcode of the special instruction to be used to load
4789 the constant X. */
4790
4791 const char *
4792 standard_80387_constant_opcode (rtx x)
4793 {
4794 switch (standard_80387_constant_p (x))
4795 {
4796 case 1:
4797 return "fldz";
4798 case 2:
4799 return "fld1";
4800 case 3:
4801 return "fldlg2";
4802 case 4:
4803 return "fldln2";
4804 case 5:
4805 return "fldl2e";
4806 case 6:
4807 return "fldl2t";
4808 case 7:
4809 return "fldpi";
4810 case 8:
4811 case 9:
4812 return "#";
4813 default:
4814 gcc_unreachable ();
4815 }
4816 }
4817
4818 /* Return the CONST_DOUBLE representing the 80387 constant that is
4819 loaded by the specified special instruction. The argument IDX
4820 matches the return value from standard_80387_constant_p. */
4821
4822 rtx
4823 standard_80387_constant_rtx (int idx)
4824 {
4825 int i;
4826
4827 if (! ext_80387_constants_init)
4828 init_ext_80387_constants ();
4829
4830 switch (idx)
4831 {
4832 case 3:
4833 case 4:
4834 case 5:
4835 case 6:
4836 case 7:
4837 i = idx - 3;
4838 break;
4839
4840 default:
4841 gcc_unreachable ();
4842 }
4843
4844 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
4845 XFmode);
4846 }
4847
4848 /* Return 1 if mode is a valid mode for sse. */
4849 static int
4850 standard_sse_mode_p (enum machine_mode mode)
4851 {
4852 switch (mode)
4853 {
4854 case V16QImode:
4855 case V8HImode:
4856 case V4SImode:
4857 case V2DImode:
4858 case V4SFmode:
4859 case V2DFmode:
4860 return 1;
4861
4862 default:
4863 return 0;
4864 }
4865 }
4866
4867 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4868 */
4869 int
4870 standard_sse_constant_p (rtx x)
4871 {
4872 enum machine_mode mode = GET_MODE (x);
4873
4874 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
4875 return 1;
4876 if (vector_all_ones_operand (x, mode)
4877 && standard_sse_mode_p (mode))
4878 return TARGET_SSE2 ? 2 : -1;
4879
4880 return 0;
4881 }
4882
4883 /* Return the opcode of the special instruction to be used to load
4884 the constant X. */
4885
4886 const char *
4887 standard_sse_constant_opcode (rtx insn, rtx x)
4888 {
4889 switch (standard_sse_constant_p (x))
4890 {
4891 case 1:
4892 if (get_attr_mode (insn) == MODE_V4SF)
4893 return "xorps\t%0, %0";
4894 else if (get_attr_mode (insn) == MODE_V2DF)
4895 return "xorpd\t%0, %0";
4896 else
4897 return "pxor\t%0, %0";
4898 case 2:
4899 return "pcmpeqd\t%0, %0";
4900 }
4901 gcc_unreachable ();
4902 }
4903
4904 /* Returns 1 if OP contains a symbol reference */
4905
4906 int
4907 symbolic_reference_mentioned_p (rtx op)
4908 {
4909 const char *fmt;
4910 int i;
4911
4912 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4913 return 1;
4914
4915 fmt = GET_RTX_FORMAT (GET_CODE (op));
4916 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4917 {
4918 if (fmt[i] == 'E')
4919 {
4920 int j;
4921
4922 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4923 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4924 return 1;
4925 }
4926
4927 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4928 return 1;
4929 }
4930
4931 return 0;
4932 }
4933
4934 /* Return 1 if it is appropriate to emit `ret' instructions in the
4935 body of a function. Do this only if the epilogue is simple, needing a
4936 couple of insns. Prior to reloading, we can't tell how many registers
4937 must be saved, so return 0 then. Return 0 if there is no frame
4938 marker to de-allocate. */
4939
4940 int
4941 ix86_can_use_return_insn_p (void)
4942 {
4943 struct ix86_frame frame;
4944
4945 if (! reload_completed || frame_pointer_needed)
4946 return 0;
4947
4948 /* Don't allow more than 32 pop, since that's all we can do
4949 with one instruction. */
4950 if (current_function_pops_args
4951 && current_function_args_size >= 32768)
4952 return 0;
4953
4954 ix86_compute_frame_layout (&frame);
4955 return frame.to_allocate == 0 && frame.nregs == 0;
4956 }
4957 \f
4958 /* Value should be nonzero if functions must have frame pointers.
4959 Zero means the frame pointer need not be set up (and parms may
4960 be accessed via the stack pointer) in functions that seem suitable. */
4961
4962 int
4963 ix86_frame_pointer_required (void)
4964 {
4965 /* If we accessed previous frames, then the generated code expects
4966 to be able to access the saved ebp value in our frame. */
4967 if (cfun->machine->accesses_prev_frame)
4968 return 1;
4969
4970 /* Several x86 os'es need a frame pointer for other reasons,
4971 usually pertaining to setjmp. */
4972 if (SUBTARGET_FRAME_POINTER_REQUIRED)
4973 return 1;
4974
4975 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4976 the frame pointer by default. Turn it back on now if we've not
4977 got a leaf function. */
4978 if (TARGET_OMIT_LEAF_FRAME_POINTER
4979 && (!current_function_is_leaf
4980 || ix86_current_function_calls_tls_descriptor))
4981 return 1;
4982
4983 if (current_function_profile)
4984 return 1;
4985
4986 return 0;
4987 }
4988
4989 /* Record that the current function accesses previous call frames. */
4990
4991 void
4992 ix86_setup_frame_addresses (void)
4993 {
4994 cfun->machine->accesses_prev_frame = 1;
4995 }
4996 \f
4997 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
4998 # define USE_HIDDEN_LINKONCE 1
4999 #else
5000 # define USE_HIDDEN_LINKONCE 0
5001 #endif
5002
5003 static int pic_labels_used;
5004
5005 /* Fills in the label name that should be used for a pc thunk for
5006 the given register. */
5007
5008 static void
5009 get_pc_thunk_name (char name[32], unsigned int regno)
5010 {
5011 gcc_assert (!TARGET_64BIT);
5012
5013 if (USE_HIDDEN_LINKONCE)
5014 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
5015 else
5016 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
5017 }
5018
5019
5020 /* This function generates code for -fpic that loads %ebx with
5021 the return address of the caller and then returns. */
5022
5023 void
5024 ix86_file_end (void)
5025 {
5026 rtx xops[2];
5027 int regno;
5028
5029 for (regno = 0; regno < 8; ++regno)
5030 {
5031 char name[32];
5032
5033 if (! ((pic_labels_used >> regno) & 1))
5034 continue;
5035
5036 get_pc_thunk_name (name, regno);
5037
5038 #if TARGET_MACHO
5039 if (TARGET_MACHO)
5040 {
5041 switch_to_section (darwin_sections[text_coal_section]);
5042 fputs ("\t.weak_definition\t", asm_out_file);
5043 assemble_name (asm_out_file, name);
5044 fputs ("\n\t.private_extern\t", asm_out_file);
5045 assemble_name (asm_out_file, name);
5046 fputs ("\n", asm_out_file);
5047 ASM_OUTPUT_LABEL (asm_out_file, name);
5048 }
5049 else
5050 #endif
5051 if (USE_HIDDEN_LINKONCE)
5052 {
5053 tree decl;
5054
5055 decl = build_decl (FUNCTION_DECL, get_identifier (name),
5056 error_mark_node);
5057 TREE_PUBLIC (decl) = 1;
5058 TREE_STATIC (decl) = 1;
5059 DECL_ONE_ONLY (decl) = 1;
5060
5061 (*targetm.asm_out.unique_section) (decl, 0);
5062 switch_to_section (get_named_section (decl, NULL, 0));
5063
5064 (*targetm.asm_out.globalize_label) (asm_out_file, name);
5065 fputs ("\t.hidden\t", asm_out_file);
5066 assemble_name (asm_out_file, name);
5067 fputc ('\n', asm_out_file);
5068 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
5069 }
5070 else
5071 {
5072 switch_to_section (text_section);
5073 ASM_OUTPUT_LABEL (asm_out_file, name);
5074 }
5075
5076 xops[0] = gen_rtx_REG (SImode, regno);
5077 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
5078 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
5079 output_asm_insn ("ret", xops);
5080 }
5081
5082 if (NEED_INDICATE_EXEC_STACK)
5083 file_end_indicate_exec_stack ();
5084 }
5085
5086 /* Emit code for the SET_GOT patterns. */
5087
5088 const char *
5089 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
5090 {
5091 rtx xops[3];
5092
5093 xops[0] = dest;
5094 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
5095
5096 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
5097 {
5098 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
5099
5100 if (!flag_pic)
5101 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5102 else
5103 output_asm_insn ("call\t%a2", xops);
5104
5105 #if TARGET_MACHO
5106 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5107 is what will be referenced by the Mach-O PIC subsystem. */
5108 if (!label)
5109 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5110 #endif
5111
5112 (*targetm.asm_out.internal_label) (asm_out_file, "L",
5113 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
5114
5115 if (flag_pic)
5116 output_asm_insn ("pop{l}\t%0", xops);
5117 }
5118 else
5119 {
5120 char name[32];
5121 get_pc_thunk_name (name, REGNO (dest));
5122 pic_labels_used |= 1 << REGNO (dest);
5123
5124 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
5125 xops[2] = gen_rtx_MEM (QImode, xops[2]);
5126 output_asm_insn ("call\t%X2", xops);
5127 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5128 is what will be referenced by the Mach-O PIC subsystem. */
5129 #if TARGET_MACHO
5130 if (!label)
5131 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5132 else
5133 targetm.asm_out.internal_label (asm_out_file, "L",
5134 CODE_LABEL_NUMBER (label));
5135 #endif
5136 }
5137
5138 if (TARGET_MACHO)
5139 return "";
5140
5141 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
5142 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
5143 else
5144 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
5145
5146 return "";
5147 }
5148
5149 /* Generate an "push" pattern for input ARG. */
5150
5151 static rtx
5152 gen_push (rtx arg)
5153 {
5154 return gen_rtx_SET (VOIDmode,
5155 gen_rtx_MEM (Pmode,
5156 gen_rtx_PRE_DEC (Pmode,
5157 stack_pointer_rtx)),
5158 arg);
5159 }
5160
5161 /* Return >= 0 if there is an unused call-clobbered register available
5162 for the entire function. */
5163
5164 static unsigned int
5165 ix86_select_alt_pic_regnum (void)
5166 {
5167 if (current_function_is_leaf && !current_function_profile
5168 && !ix86_current_function_calls_tls_descriptor)
5169 {
5170 int i;
5171 for (i = 2; i >= 0; --i)
5172 if (!regs_ever_live[i])
5173 return i;
5174 }
5175
5176 return INVALID_REGNUM;
5177 }
5178
5179 /* Return 1 if we need to save REGNO. */
5180 static int
5181 ix86_save_reg (unsigned int regno, int maybe_eh_return)
5182 {
5183 if (pic_offset_table_rtx
5184 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
5185 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5186 || current_function_profile
5187 || current_function_calls_eh_return
5188 || current_function_uses_const_pool))
5189 {
5190 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
5191 return 0;
5192 return 1;
5193 }
5194
5195 if (current_function_calls_eh_return && maybe_eh_return)
5196 {
5197 unsigned i;
5198 for (i = 0; ; i++)
5199 {
5200 unsigned test = EH_RETURN_DATA_REGNO (i);
5201 if (test == INVALID_REGNUM)
5202 break;
5203 if (test == regno)
5204 return 1;
5205 }
5206 }
5207
5208 if (cfun->machine->force_align_arg_pointer
5209 && regno == REGNO (cfun->machine->force_align_arg_pointer))
5210 return 1;
5211
5212 return (regs_ever_live[regno]
5213 && !call_used_regs[regno]
5214 && !fixed_regs[regno]
5215 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
5216 }
5217
5218 /* Return number of registers to be saved on the stack. */
5219
5220 static int
5221 ix86_nsaved_regs (void)
5222 {
5223 int nregs = 0;
5224 int regno;
5225
5226 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5227 if (ix86_save_reg (regno, true))
5228 nregs++;
5229 return nregs;
5230 }
5231
5232 /* Return the offset between two registers, one to be eliminated, and the other
5233 its replacement, at the start of a routine. */
5234
5235 HOST_WIDE_INT
5236 ix86_initial_elimination_offset (int from, int to)
5237 {
5238 struct ix86_frame frame;
5239 ix86_compute_frame_layout (&frame);
5240
5241 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
5242 return frame.hard_frame_pointer_offset;
5243 else if (from == FRAME_POINTER_REGNUM
5244 && to == HARD_FRAME_POINTER_REGNUM)
5245 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
5246 else
5247 {
5248 gcc_assert (to == STACK_POINTER_REGNUM);
5249
5250 if (from == ARG_POINTER_REGNUM)
5251 return frame.stack_pointer_offset;
5252
5253 gcc_assert (from == FRAME_POINTER_REGNUM);
5254 return frame.stack_pointer_offset - frame.frame_pointer_offset;
5255 }
5256 }
5257
5258 /* Fill structure ix86_frame about frame of currently computed function. */
5259
5260 static void
5261 ix86_compute_frame_layout (struct ix86_frame *frame)
5262 {
5263 HOST_WIDE_INT total_size;
5264 unsigned int stack_alignment_needed;
5265 HOST_WIDE_INT offset;
5266 unsigned int preferred_alignment;
5267 HOST_WIDE_INT size = get_frame_size ();
5268
5269 frame->nregs = ix86_nsaved_regs ();
5270 total_size = size;
5271
5272 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
5273 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
5274
5275 /* During reload iteration the amount of registers saved can change.
5276 Recompute the value as needed. Do not recompute when amount of registers
5277 didn't change as reload does multiple calls to the function and does not
5278 expect the decision to change within single iteration. */
5279 if (!optimize_size
5280 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
5281 {
5282 int count = frame->nregs;
5283
5284 cfun->machine->use_fast_prologue_epilogue_nregs = count;
5285 /* The fast prologue uses move instead of push to save registers. This
5286 is significantly longer, but also executes faster as modern hardware
5287 can execute the moves in parallel, but can't do that for push/pop.
5288
5289 Be careful about choosing what prologue to emit: When function takes
5290 many instructions to execute we may use slow version as well as in
5291 case function is known to be outside hot spot (this is known with
5292 feedback only). Weight the size of function by number of registers
5293 to save as it is cheap to use one or two push instructions but very
5294 slow to use many of them. */
5295 if (count)
5296 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
5297 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
5298 || (flag_branch_probabilities
5299 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
5300 cfun->machine->use_fast_prologue_epilogue = false;
5301 else
5302 cfun->machine->use_fast_prologue_epilogue
5303 = !expensive_function_p (count);
5304 }
5305 if (TARGET_PROLOGUE_USING_MOVE
5306 && cfun->machine->use_fast_prologue_epilogue)
5307 frame->save_regs_using_mov = true;
5308 else
5309 frame->save_regs_using_mov = false;
5310
5311
5312 /* Skip return address and saved base pointer. */
5313 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
5314
5315 frame->hard_frame_pointer_offset = offset;
5316
5317 /* Do some sanity checking of stack_alignment_needed and
5318 preferred_alignment, since i386 port is the only using those features
5319 that may break easily. */
5320
5321 gcc_assert (!size || stack_alignment_needed);
5322 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
5323 gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5324 gcc_assert (stack_alignment_needed
5325 <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5326
5327 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
5328 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
5329
5330 /* Register save area */
5331 offset += frame->nregs * UNITS_PER_WORD;
5332
5333 /* Va-arg area */
5334 if (ix86_save_varrargs_registers)
5335 {
5336 offset += X86_64_VARARGS_SIZE;
5337 frame->va_arg_size = X86_64_VARARGS_SIZE;
5338 }
5339 else
5340 frame->va_arg_size = 0;
5341
5342 /* Align start of frame for local function. */
5343 frame->padding1 = ((offset + stack_alignment_needed - 1)
5344 & -stack_alignment_needed) - offset;
5345
5346 offset += frame->padding1;
5347
5348 /* Frame pointer points here. */
5349 frame->frame_pointer_offset = offset;
5350
5351 offset += size;
5352
5353 /* Add outgoing arguments area. Can be skipped if we eliminated
5354 all the function calls as dead code.
5355 Skipping is however impossible when function calls alloca. Alloca
5356 expander assumes that last current_function_outgoing_args_size
5357 of stack frame are unused. */
5358 if (ACCUMULATE_OUTGOING_ARGS
5359 && (!current_function_is_leaf || current_function_calls_alloca
5360 || ix86_current_function_calls_tls_descriptor))
5361 {
5362 offset += current_function_outgoing_args_size;
5363 frame->outgoing_arguments_size = current_function_outgoing_args_size;
5364 }
5365 else
5366 frame->outgoing_arguments_size = 0;
5367
5368 /* Align stack boundary. Only needed if we're calling another function
5369 or using alloca. */
5370 if (!current_function_is_leaf || current_function_calls_alloca
5371 || ix86_current_function_calls_tls_descriptor)
5372 frame->padding2 = ((offset + preferred_alignment - 1)
5373 & -preferred_alignment) - offset;
5374 else
5375 frame->padding2 = 0;
5376
5377 offset += frame->padding2;
5378
5379 /* We've reached end of stack frame. */
5380 frame->stack_pointer_offset = offset;
5381
5382 /* Size prologue needs to allocate. */
5383 frame->to_allocate =
5384 (size + frame->padding1 + frame->padding2
5385 + frame->outgoing_arguments_size + frame->va_arg_size);
5386
5387 if ((!frame->to_allocate && frame->nregs <= 1)
5388 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
5389 frame->save_regs_using_mov = false;
5390
5391 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5392 && current_function_is_leaf
5393 && !ix86_current_function_calls_tls_descriptor)
5394 {
5395 frame->red_zone_size = frame->to_allocate;
5396 if (frame->save_regs_using_mov)
5397 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
5398 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5399 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5400 }
5401 else
5402 frame->red_zone_size = 0;
5403 frame->to_allocate -= frame->red_zone_size;
5404 frame->stack_pointer_offset -= frame->red_zone_size;
5405 #if 0
5406 fprintf (stderr, "\n");
5407 fprintf (stderr, "nregs: %ld\n", (long)frame->nregs);
5408 fprintf (stderr, "size: %ld\n", (long)size);
5409 fprintf (stderr, "alignment1: %ld\n", (long)stack_alignment_needed);
5410 fprintf (stderr, "padding1: %ld\n", (long)frame->padding1);
5411 fprintf (stderr, "va_arg: %ld\n", (long)frame->va_arg_size);
5412 fprintf (stderr, "padding2: %ld\n", (long)frame->padding2);
5413 fprintf (stderr, "to_allocate: %ld\n", (long)frame->to_allocate);
5414 fprintf (stderr, "red_zone_size: %ld\n", (long)frame->red_zone_size);
5415 fprintf (stderr, "frame_pointer_offset: %ld\n", (long)frame->frame_pointer_offset);
5416 fprintf (stderr, "hard_frame_pointer_offset: %ld\n",
5417 (long)frame->hard_frame_pointer_offset);
5418 fprintf (stderr, "stack_pointer_offset: %ld\n", (long)frame->stack_pointer_offset);
5419 fprintf (stderr, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf);
5420 fprintf (stderr, "current_function_calls_alloca: %ld\n", (long)current_function_calls_alloca);
5421 fprintf (stderr, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor);
5422 #endif
5423 }
5424
5425 /* Emit code to save registers in the prologue. */
5426
5427 static void
5428 ix86_emit_save_regs (void)
5429 {
5430 unsigned int regno;
5431 rtx insn;
5432
5433 for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
5434 if (ix86_save_reg (regno, true))
5435 {
5436 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5437 RTX_FRAME_RELATED_P (insn) = 1;
5438 }
5439 }
5440
5441 /* Emit code to save registers using MOV insns. First register
5442 is restored from POINTER + OFFSET. */
5443 static void
5444 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
5445 {
5446 unsigned int regno;
5447 rtx insn;
5448
5449 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5450 if (ix86_save_reg (regno, true))
5451 {
5452 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5453 Pmode, offset),
5454 gen_rtx_REG (Pmode, regno));
5455 RTX_FRAME_RELATED_P (insn) = 1;
5456 offset += UNITS_PER_WORD;
5457 }
5458 }
5459
5460 /* Expand prologue or epilogue stack adjustment.
5461 The pattern exist to put a dependency on all ebp-based memory accesses.
5462 STYLE should be negative if instructions should be marked as frame related,
5463 zero if %r11 register is live and cannot be freely used and positive
5464 otherwise. */
5465
5466 static void
5467 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5468 {
5469 rtx insn;
5470
5471 if (! TARGET_64BIT)
5472 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5473 else if (x86_64_immediate_operand (offset, DImode))
5474 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5475 else
5476 {
5477 rtx r11;
5478 /* r11 is used by indirect sibcall return as well, set before the
5479 epilogue and used after the epilogue. ATM indirect sibcall
5480 shouldn't be used together with huge frame sizes in one
5481 function because of the frame_size check in sibcall.c. */
5482 gcc_assert (style);
5483 r11 = gen_rtx_REG (DImode, R11_REG);
5484 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5485 if (style < 0)
5486 RTX_FRAME_RELATED_P (insn) = 1;
5487 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5488 offset));
5489 }
5490 if (style < 0)
5491 RTX_FRAME_RELATED_P (insn) = 1;
5492 }
5493
5494 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
5495
5496 static rtx
5497 ix86_internal_arg_pointer (void)
5498 {
5499 bool has_force_align_arg_pointer =
5500 (0 != lookup_attribute (ix86_force_align_arg_pointer_string,
5501 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))));
5502 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
5503 && DECL_NAME (current_function_decl)
5504 && MAIN_NAME_P (DECL_NAME (current_function_decl))
5505 && DECL_FILE_SCOPE_P (current_function_decl))
5506 || ix86_force_align_arg_pointer
5507 || has_force_align_arg_pointer)
5508 {
5509 /* Nested functions can't realign the stack due to a register
5510 conflict. */
5511 if (DECL_CONTEXT (current_function_decl)
5512 && TREE_CODE (DECL_CONTEXT (current_function_decl)) == FUNCTION_DECL)
5513 {
5514 if (ix86_force_align_arg_pointer)
5515 warning (0, "-mstackrealign ignored for nested functions");
5516 if (has_force_align_arg_pointer)
5517 error ("%s not supported for nested functions",
5518 ix86_force_align_arg_pointer_string);
5519 return virtual_incoming_args_rtx;
5520 }
5521 cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, 2);
5522 return copy_to_reg (cfun->machine->force_align_arg_pointer);
5523 }
5524 else
5525 return virtual_incoming_args_rtx;
5526 }
5527
5528 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
5529 This is called from dwarf2out.c to emit call frame instructions
5530 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
5531 static void
5532 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
5533 {
5534 rtx unspec = SET_SRC (pattern);
5535 gcc_assert (GET_CODE (unspec) == UNSPEC);
5536
5537 switch (index)
5538 {
5539 case UNSPEC_REG_SAVE:
5540 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
5541 SET_DEST (pattern));
5542 break;
5543 case UNSPEC_DEF_CFA:
5544 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
5545 INTVAL (XVECEXP (unspec, 0, 0)));
5546 break;
5547 default:
5548 gcc_unreachable ();
5549 }
5550 }
5551
5552 /* Expand the prologue into a bunch of separate insns. */
5553
5554 void
5555 ix86_expand_prologue (void)
5556 {
5557 rtx insn;
5558 bool pic_reg_used;
5559 struct ix86_frame frame;
5560 HOST_WIDE_INT allocate;
5561
5562 ix86_compute_frame_layout (&frame);
5563
5564 if (cfun->machine->force_align_arg_pointer)
5565 {
5566 rtx x, y;
5567
5568 /* Grab the argument pointer. */
5569 x = plus_constant (stack_pointer_rtx, 4);
5570 y = cfun->machine->force_align_arg_pointer;
5571 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
5572 RTX_FRAME_RELATED_P (insn) = 1;
5573
5574 /* The unwind info consists of two parts: install the fafp as the cfa,
5575 and record the fafp as the "save register" of the stack pointer.
5576 The later is there in order that the unwinder can see where it
5577 should restore the stack pointer across the and insn. */
5578 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA);
5579 x = gen_rtx_SET (VOIDmode, y, x);
5580 RTX_FRAME_RELATED_P (x) = 1;
5581 y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx),
5582 UNSPEC_REG_SAVE);
5583 y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y);
5584 RTX_FRAME_RELATED_P (y) = 1;
5585 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y));
5586 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5587 REG_NOTES (insn) = x;
5588
5589 /* Align the stack. */
5590 emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx,
5591 GEN_INT (-16)));
5592
5593 /* And here we cheat like madmen with the unwind info. We force the
5594 cfa register back to sp+4, which is exactly what it was at the
5595 start of the function. Re-pushing the return address results in
5596 the return at the same spot relative to the cfa, and thus is
5597 correct wrt the unwind info. */
5598 x = cfun->machine->force_align_arg_pointer;
5599 x = gen_frame_mem (Pmode, plus_constant (x, -4));
5600 insn = emit_insn (gen_push (x));
5601 RTX_FRAME_RELATED_P (insn) = 1;
5602
5603 x = GEN_INT (4);
5604 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA);
5605 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
5606 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5607 REG_NOTES (insn) = x;
5608 }
5609
5610 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5611 slower on all targets. Also sdb doesn't like it. */
5612
5613 if (frame_pointer_needed)
5614 {
5615 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
5616 RTX_FRAME_RELATED_P (insn) = 1;
5617
5618 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
5619 RTX_FRAME_RELATED_P (insn) = 1;
5620 }
5621
5622 allocate = frame.to_allocate;
5623
5624 if (!frame.save_regs_using_mov)
5625 ix86_emit_save_regs ();
5626 else
5627 allocate += frame.nregs * UNITS_PER_WORD;
5628
5629 /* When using red zone we may start register saving before allocating
5630 the stack frame saving one cycle of the prologue. */
5631 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5632 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5633 : stack_pointer_rtx,
5634 -frame.nregs * UNITS_PER_WORD);
5635
5636 if (allocate == 0)
5637 ;
5638 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
5639 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5640 GEN_INT (-allocate), -1);
5641 else
5642 {
5643 /* Only valid for Win32. */
5644 rtx eax = gen_rtx_REG (SImode, 0);
5645 bool eax_live = ix86_eax_live_at_start_p ();
5646 rtx t;
5647
5648 gcc_assert (!TARGET_64BIT);
5649
5650 if (eax_live)
5651 {
5652 emit_insn (gen_push (eax));
5653 allocate -= 4;
5654 }
5655
5656 emit_move_insn (eax, GEN_INT (allocate));
5657
5658 insn = emit_insn (gen_allocate_stack_worker (eax));
5659 RTX_FRAME_RELATED_P (insn) = 1;
5660 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
5661 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
5662 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
5663 t, REG_NOTES (insn));
5664
5665 if (eax_live)
5666 {
5667 if (frame_pointer_needed)
5668 t = plus_constant (hard_frame_pointer_rtx,
5669 allocate
5670 - frame.to_allocate
5671 - frame.nregs * UNITS_PER_WORD);
5672 else
5673 t = plus_constant (stack_pointer_rtx, allocate);
5674 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
5675 }
5676 }
5677
5678 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
5679 {
5680 if (!frame_pointer_needed || !frame.to_allocate)
5681 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5682 else
5683 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5684 -frame.nregs * UNITS_PER_WORD);
5685 }
5686
5687 pic_reg_used = false;
5688 if (pic_offset_table_rtx
5689 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5690 || current_function_profile))
5691 {
5692 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5693
5694 if (alt_pic_reg_used != INVALID_REGNUM)
5695 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5696
5697 pic_reg_used = true;
5698 }
5699
5700 if (pic_reg_used)
5701 {
5702 if (TARGET_64BIT)
5703 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
5704 else
5705 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
5706
5707 /* Even with accurate pre-reload life analysis, we can wind up
5708 deleting all references to the pic register after reload.
5709 Consider if cross-jumping unifies two sides of a branch
5710 controlled by a comparison vs the only read from a global.
5711 In which case, allow the set_got to be deleted, though we're
5712 too late to do anything about the ebx save in the prologue. */
5713 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5714 }
5715
5716 /* Prevent function calls from be scheduled before the call to mcount.
5717 In the pic_reg_used case, make sure that the got load isn't deleted. */
5718 if (current_function_profile)
5719 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
5720 }
5721
5722 /* Emit code to restore saved registers using MOV insns. First register
5723 is restored from POINTER + OFFSET. */
5724 static void
5725 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
5726 int maybe_eh_return)
5727 {
5728 int regno;
5729 rtx base_address = gen_rtx_MEM (Pmode, pointer);
5730
5731 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5732 if (ix86_save_reg (regno, maybe_eh_return))
5733 {
5734 /* Ensure that adjust_address won't be forced to produce pointer
5735 out of range allowed by x86-64 instruction set. */
5736 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
5737 {
5738 rtx r11;
5739
5740 r11 = gen_rtx_REG (DImode, R11_REG);
5741 emit_move_insn (r11, GEN_INT (offset));
5742 emit_insn (gen_adddi3 (r11, r11, pointer));
5743 base_address = gen_rtx_MEM (Pmode, r11);
5744 offset = 0;
5745 }
5746 emit_move_insn (gen_rtx_REG (Pmode, regno),
5747 adjust_address (base_address, Pmode, offset));
5748 offset += UNITS_PER_WORD;
5749 }
5750 }
5751
5752 /* Restore function stack, frame, and registers. */
5753
5754 void
5755 ix86_expand_epilogue (int style)
5756 {
5757 int regno;
5758 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
5759 struct ix86_frame frame;
5760 HOST_WIDE_INT offset;
5761
5762 ix86_compute_frame_layout (&frame);
5763
5764 /* Calculate start of saved registers relative to ebp. Special care
5765 must be taken for the normal return case of a function using
5766 eh_return: the eax and edx registers are marked as saved, but not
5767 restored along this path. */
5768 offset = frame.nregs;
5769 if (current_function_calls_eh_return && style != 2)
5770 offset -= 2;
5771 offset *= -UNITS_PER_WORD;
5772
5773 /* If we're only restoring one register and sp is not valid then
5774 using a move instruction to restore the register since it's
5775 less work than reloading sp and popping the register.
5776
5777 The default code result in stack adjustment using add/lea instruction,
5778 while this code results in LEAVE instruction (or discrete equivalent),
5779 so it is profitable in some other cases as well. Especially when there
5780 are no registers to restore. We also use this code when TARGET_USE_LEAVE
5781 and there is exactly one register to pop. This heuristic may need some
5782 tuning in future. */
5783 if ((!sp_valid && frame.nregs <= 1)
5784 || (TARGET_EPILOGUE_USING_MOVE
5785 && cfun->machine->use_fast_prologue_epilogue
5786 && (frame.nregs > 1 || frame.to_allocate))
5787 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
5788 || (frame_pointer_needed && TARGET_USE_LEAVE
5789 && cfun->machine->use_fast_prologue_epilogue
5790 && frame.nregs == 1)
5791 || current_function_calls_eh_return)
5792 {
5793 /* Restore registers. We can use ebp or esp to address the memory
5794 locations. If both are available, default to ebp, since offsets
5795 are known to be small. Only exception is esp pointing directly to the
5796 end of block of saved registers, where we may simplify addressing
5797 mode. */
5798
5799 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
5800 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
5801 frame.to_allocate, style == 2);
5802 else
5803 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
5804 offset, style == 2);
5805
5806 /* eh_return epilogues need %ecx added to the stack pointer. */
5807 if (style == 2)
5808 {
5809 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
5810
5811 if (frame_pointer_needed)
5812 {
5813 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
5814 tmp = plus_constant (tmp, UNITS_PER_WORD);
5815 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
5816
5817 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
5818 emit_move_insn (hard_frame_pointer_rtx, tmp);
5819
5820 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
5821 const0_rtx, style);
5822 }
5823 else
5824 {
5825 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
5826 tmp = plus_constant (tmp, (frame.to_allocate
5827 + frame.nregs * UNITS_PER_WORD));
5828 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
5829 }
5830 }
5831 else if (!frame_pointer_needed)
5832 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5833 GEN_INT (frame.to_allocate
5834 + frame.nregs * UNITS_PER_WORD),
5835 style);
5836 /* If not an i386, mov & pop is faster than "leave". */
5837 else if (TARGET_USE_LEAVE || optimize_size
5838 || !cfun->machine->use_fast_prologue_epilogue)
5839 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5840 else
5841 {
5842 pro_epilogue_adjust_stack (stack_pointer_rtx,
5843 hard_frame_pointer_rtx,
5844 const0_rtx, style);
5845 if (TARGET_64BIT)
5846 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5847 else
5848 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5849 }
5850 }
5851 else
5852 {
5853 /* First step is to deallocate the stack frame so that we can
5854 pop the registers. */
5855 if (!sp_valid)
5856 {
5857 gcc_assert (frame_pointer_needed);
5858 pro_epilogue_adjust_stack (stack_pointer_rtx,
5859 hard_frame_pointer_rtx,
5860 GEN_INT (offset), style);
5861 }
5862 else if (frame.to_allocate)
5863 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5864 GEN_INT (frame.to_allocate), style);
5865
5866 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5867 if (ix86_save_reg (regno, false))
5868 {
5869 if (TARGET_64BIT)
5870 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
5871 else
5872 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
5873 }
5874 if (frame_pointer_needed)
5875 {
5876 /* Leave results in shorter dependency chains on CPUs that are
5877 able to grok it fast. */
5878 if (TARGET_USE_LEAVE)
5879 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5880 else if (TARGET_64BIT)
5881 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5882 else
5883 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5884 }
5885 }
5886
5887 if (cfun->machine->force_align_arg_pointer)
5888 {
5889 emit_insn (gen_addsi3 (stack_pointer_rtx,
5890 cfun->machine->force_align_arg_pointer,
5891 GEN_INT (-4)));
5892 }
5893
5894 /* Sibcall epilogues don't want a return instruction. */
5895 if (style == 0)
5896 return;
5897
5898 if (current_function_pops_args && current_function_args_size)
5899 {
5900 rtx popc = GEN_INT (current_function_pops_args);
5901
5902 /* i386 can only pop 64K bytes. If asked to pop more, pop
5903 return address, do explicit add, and jump indirectly to the
5904 caller. */
5905
5906 if (current_function_pops_args >= 65536)
5907 {
5908 rtx ecx = gen_rtx_REG (SImode, 2);
5909
5910 /* There is no "pascal" calling convention in 64bit ABI. */
5911 gcc_assert (!TARGET_64BIT);
5912
5913 emit_insn (gen_popsi1 (ecx));
5914 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
5915 emit_jump_insn (gen_return_indirect_internal (ecx));
5916 }
5917 else
5918 emit_jump_insn (gen_return_pop_internal (popc));
5919 }
5920 else
5921 emit_jump_insn (gen_return_internal ());
5922 }
5923
5924 /* Reset from the function's potential modifications. */
5925
5926 static void
5927 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
5928 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5929 {
5930 if (pic_offset_table_rtx)
5931 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
5932 #if TARGET_MACHO
5933 /* Mach-O doesn't support labels at the end of objects, so if
5934 it looks like we might want one, insert a NOP. */
5935 {
5936 rtx insn = get_last_insn ();
5937 while (insn
5938 && NOTE_P (insn)
5939 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_DELETED_LABEL)
5940 insn = PREV_INSN (insn);
5941 if (insn
5942 && (LABEL_P (insn)
5943 || (NOTE_P (insn)
5944 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_DELETED_LABEL)))
5945 fputs ("\tnop\n", file);
5946 }
5947 #endif
5948
5949 }
5950 \f
5951 /* Extract the parts of an RTL expression that is a valid memory address
5952 for an instruction. Return 0 if the structure of the address is
5953 grossly off. Return -1 if the address contains ASHIFT, so it is not
5954 strictly valid, but still used for computing length of lea instruction. */
5955
5956 int
5957 ix86_decompose_address (rtx addr, struct ix86_address *out)
5958 {
5959 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
5960 rtx base_reg, index_reg;
5961 HOST_WIDE_INT scale = 1;
5962 rtx scale_rtx = NULL_RTX;
5963 int retval = 1;
5964 enum ix86_address_seg seg = SEG_DEFAULT;
5965
5966 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
5967 base = addr;
5968 else if (GET_CODE (addr) == PLUS)
5969 {
5970 rtx addends[4], op;
5971 int n = 0, i;
5972
5973 op = addr;
5974 do
5975 {
5976 if (n >= 4)
5977 return 0;
5978 addends[n++] = XEXP (op, 1);
5979 op = XEXP (op, 0);
5980 }
5981 while (GET_CODE (op) == PLUS);
5982 if (n >= 4)
5983 return 0;
5984 addends[n] = op;
5985
5986 for (i = n; i >= 0; --i)
5987 {
5988 op = addends[i];
5989 switch (GET_CODE (op))
5990 {
5991 case MULT:
5992 if (index)
5993 return 0;
5994 index = XEXP (op, 0);
5995 scale_rtx = XEXP (op, 1);
5996 break;
5997
5998 case UNSPEC:
5999 if (XINT (op, 1) == UNSPEC_TP
6000 && TARGET_TLS_DIRECT_SEG_REFS
6001 && seg == SEG_DEFAULT)
6002 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
6003 else
6004 return 0;
6005 break;
6006
6007 case REG:
6008 case SUBREG:
6009 if (!base)
6010 base = op;
6011 else if (!index)
6012 index = op;
6013 else
6014 return 0;
6015 break;
6016
6017 case CONST:
6018 case CONST_INT:
6019 case SYMBOL_REF:
6020 case LABEL_REF:
6021 if (disp)
6022 return 0;
6023 disp = op;
6024 break;
6025
6026 default:
6027 return 0;
6028 }
6029 }
6030 }
6031 else if (GET_CODE (addr) == MULT)
6032 {
6033 index = XEXP (addr, 0); /* index*scale */
6034 scale_rtx = XEXP (addr, 1);
6035 }
6036 else if (GET_CODE (addr) == ASHIFT)
6037 {
6038 rtx tmp;
6039
6040 /* We're called for lea too, which implements ashift on occasion. */
6041 index = XEXP (addr, 0);
6042 tmp = XEXP (addr, 1);
6043 if (!CONST_INT_P (tmp))
6044 return 0;
6045 scale = INTVAL (tmp);
6046 if ((unsigned HOST_WIDE_INT) scale > 3)
6047 return 0;
6048 scale = 1 << scale;
6049 retval = -1;
6050 }
6051 else
6052 disp = addr; /* displacement */
6053
6054 /* Extract the integral value of scale. */
6055 if (scale_rtx)
6056 {
6057 if (!CONST_INT_P (scale_rtx))
6058 return 0;
6059 scale = INTVAL (scale_rtx);
6060 }
6061
6062 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
6063 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
6064
6065 /* Allow arg pointer and stack pointer as index if there is not scaling. */
6066 if (base_reg && index_reg && scale == 1
6067 && (index_reg == arg_pointer_rtx
6068 || index_reg == frame_pointer_rtx
6069 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
6070 {
6071 rtx tmp;
6072 tmp = base, base = index, index = tmp;
6073 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
6074 }
6075
6076 /* Special case: %ebp cannot be encoded as a base without a displacement. */
6077 if ((base_reg == hard_frame_pointer_rtx
6078 || base_reg == frame_pointer_rtx
6079 || base_reg == arg_pointer_rtx) && !disp)
6080 disp = const0_rtx;
6081
6082 /* Special case: on K6, [%esi] makes the instruction vector decoded.
6083 Avoid this by transforming to [%esi+0]. */
6084 if (ix86_tune == PROCESSOR_K6 && !optimize_size
6085 && base_reg && !index_reg && !disp
6086 && REG_P (base_reg)
6087 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
6088 disp = const0_rtx;
6089
6090 /* Special case: encode reg+reg instead of reg*2. */
6091 if (!base && index && scale && scale == 2)
6092 base = index, base_reg = index_reg, scale = 1;
6093
6094 /* Special case: scaling cannot be encoded without base or displacement. */
6095 if (!base && !disp && index && scale != 1)
6096 disp = const0_rtx;
6097
6098 out->base = base;
6099 out->index = index;
6100 out->disp = disp;
6101 out->scale = scale;
6102 out->seg = seg;
6103
6104 return retval;
6105 }
6106 \f
6107 /* Return cost of the memory address x.
6108 For i386, it is better to use a complex address than let gcc copy
6109 the address into a reg and make a new pseudo. But not if the address
6110 requires to two regs - that would mean more pseudos with longer
6111 lifetimes. */
6112 static int
6113 ix86_address_cost (rtx x)
6114 {
6115 struct ix86_address parts;
6116 int cost = 1;
6117 int ok = ix86_decompose_address (x, &parts);
6118
6119 gcc_assert (ok);
6120
6121 if (parts.base && GET_CODE (parts.base) == SUBREG)
6122 parts.base = SUBREG_REG (parts.base);
6123 if (parts.index && GET_CODE (parts.index) == SUBREG)
6124 parts.index = SUBREG_REG (parts.index);
6125
6126 /* More complex memory references are better. */
6127 if (parts.disp && parts.disp != const0_rtx)
6128 cost--;
6129 if (parts.seg != SEG_DEFAULT)
6130 cost--;
6131
6132 /* Attempt to minimize number of registers in the address. */
6133 if ((parts.base
6134 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
6135 || (parts.index
6136 && (!REG_P (parts.index)
6137 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
6138 cost++;
6139
6140 if (parts.base
6141 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
6142 && parts.index
6143 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
6144 && parts.base != parts.index)
6145 cost++;
6146
6147 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
6148 since it's predecode logic can't detect the length of instructions
6149 and it degenerates to vector decoded. Increase cost of such
6150 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
6151 to split such addresses or even refuse such addresses at all.
6152
6153 Following addressing modes are affected:
6154 [base+scale*index]
6155 [scale*index+disp]
6156 [base+index]
6157
6158 The first and last case may be avoidable by explicitly coding the zero in
6159 memory address, but I don't have AMD-K6 machine handy to check this
6160 theory. */
6161
6162 if (TARGET_K6
6163 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
6164 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
6165 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
6166 cost += 10;
6167
6168 return cost;
6169 }
6170 \f
6171 /* If X is a machine specific address (i.e. a symbol or label being
6172 referenced as a displacement from the GOT implemented using an
6173 UNSPEC), then return the base term. Otherwise return X. */
6174
6175 rtx
6176 ix86_find_base_term (rtx x)
6177 {
6178 rtx term;
6179
6180 if (TARGET_64BIT)
6181 {
6182 if (GET_CODE (x) != CONST)
6183 return x;
6184 term = XEXP (x, 0);
6185 if (GET_CODE (term) == PLUS
6186 && (CONST_INT_P (XEXP (term, 1))
6187 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
6188 term = XEXP (term, 0);
6189 if (GET_CODE (term) != UNSPEC
6190 || XINT (term, 1) != UNSPEC_GOTPCREL)
6191 return x;
6192
6193 term = XVECEXP (term, 0, 0);
6194
6195 if (GET_CODE (term) != SYMBOL_REF
6196 && GET_CODE (term) != LABEL_REF)
6197 return x;
6198
6199 return term;
6200 }
6201
6202 term = ix86_delegitimize_address (x);
6203
6204 if (GET_CODE (term) != SYMBOL_REF
6205 && GET_CODE (term) != LABEL_REF)
6206 return x;
6207
6208 return term;
6209 }
6210
6211 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
6212 this is used for to form addresses to local data when -fPIC is in
6213 use. */
6214
6215 static bool
6216 darwin_local_data_pic (rtx disp)
6217 {
6218 if (GET_CODE (disp) == MINUS)
6219 {
6220 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
6221 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
6222 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
6223 {
6224 const char *sym_name = XSTR (XEXP (disp, 1), 0);
6225 if (! strcmp (sym_name, "<pic base>"))
6226 return true;
6227 }
6228 }
6229
6230 return false;
6231 }
6232 \f
6233 /* Determine if a given RTX is a valid constant. We already know this
6234 satisfies CONSTANT_P. */
6235
6236 bool
6237 legitimate_constant_p (rtx x)
6238 {
6239 switch (GET_CODE (x))
6240 {
6241 case CONST:
6242 x = XEXP (x, 0);
6243
6244 if (GET_CODE (x) == PLUS)
6245 {
6246 if (!CONST_INT_P (XEXP (x, 1)))
6247 return false;
6248 x = XEXP (x, 0);
6249 }
6250
6251 if (TARGET_MACHO && darwin_local_data_pic (x))
6252 return true;
6253
6254 /* Only some unspecs are valid as "constants". */
6255 if (GET_CODE (x) == UNSPEC)
6256 switch (XINT (x, 1))
6257 {
6258 case UNSPEC_GOTOFF:
6259 return TARGET_64BIT;
6260 case UNSPEC_TPOFF:
6261 case UNSPEC_NTPOFF:
6262 x = XVECEXP (x, 0, 0);
6263 return (GET_CODE (x) == SYMBOL_REF
6264 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6265 case UNSPEC_DTPOFF:
6266 x = XVECEXP (x, 0, 0);
6267 return (GET_CODE (x) == SYMBOL_REF
6268 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
6269 default:
6270 return false;
6271 }
6272
6273 /* We must have drilled down to a symbol. */
6274 if (GET_CODE (x) == LABEL_REF)
6275 return true;
6276 if (GET_CODE (x) != SYMBOL_REF)
6277 return false;
6278 /* FALLTHRU */
6279
6280 case SYMBOL_REF:
6281 /* TLS symbols are never valid. */
6282 if (SYMBOL_REF_TLS_MODEL (x))
6283 return false;
6284 break;
6285
6286 case CONST_DOUBLE:
6287 if (GET_MODE (x) == TImode
6288 && x != CONST0_RTX (TImode)
6289 && !TARGET_64BIT)
6290 return false;
6291 break;
6292
6293 case CONST_VECTOR:
6294 if (x == CONST0_RTX (GET_MODE (x)))
6295 return true;
6296 return false;
6297
6298 default:
6299 break;
6300 }
6301
6302 /* Otherwise we handle everything else in the move patterns. */
6303 return true;
6304 }
6305
6306 /* Determine if it's legal to put X into the constant pool. This
6307 is not possible for the address of thread-local symbols, which
6308 is checked above. */
6309
6310 static bool
6311 ix86_cannot_force_const_mem (rtx x)
6312 {
6313 /* We can always put integral constants and vectors in memory. */
6314 switch (GET_CODE (x))
6315 {
6316 case CONST_INT:
6317 case CONST_DOUBLE:
6318 case CONST_VECTOR:
6319 return false;
6320
6321 default:
6322 break;
6323 }
6324 return !legitimate_constant_p (x);
6325 }
6326
6327 /* Determine if a given RTX is a valid constant address. */
6328
6329 bool
6330 constant_address_p (rtx x)
6331 {
6332 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
6333 }
6334
6335 /* Nonzero if the constant value X is a legitimate general operand
6336 when generating PIC code. It is given that flag_pic is on and
6337 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
6338
6339 bool
6340 legitimate_pic_operand_p (rtx x)
6341 {
6342 rtx inner;
6343
6344 switch (GET_CODE (x))
6345 {
6346 case CONST:
6347 inner = XEXP (x, 0);
6348 if (GET_CODE (inner) == PLUS
6349 && CONST_INT_P (XEXP (inner, 1)))
6350 inner = XEXP (inner, 0);
6351
6352 /* Only some unspecs are valid as "constants". */
6353 if (GET_CODE (inner) == UNSPEC)
6354 switch (XINT (inner, 1))
6355 {
6356 case UNSPEC_GOTOFF:
6357 return TARGET_64BIT;
6358 case UNSPEC_TPOFF:
6359 x = XVECEXP (inner, 0, 0);
6360 return (GET_CODE (x) == SYMBOL_REF
6361 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6362 default:
6363 return false;
6364 }
6365 /* FALLTHRU */
6366
6367 case SYMBOL_REF:
6368 case LABEL_REF:
6369 return legitimate_pic_address_disp_p (x);
6370
6371 default:
6372 return true;
6373 }
6374 }
6375
6376 /* Determine if a given CONST RTX is a valid memory displacement
6377 in PIC mode. */
6378
6379 int
6380 legitimate_pic_address_disp_p (rtx disp)
6381 {
6382 bool saw_plus;
6383
6384 /* In 64bit mode we can allow direct addresses of symbols and labels
6385 when they are not dynamic symbols. */
6386 if (TARGET_64BIT)
6387 {
6388 rtx op0 = disp, op1;
6389
6390 switch (GET_CODE (disp))
6391 {
6392 case LABEL_REF:
6393 return true;
6394
6395 case CONST:
6396 if (GET_CODE (XEXP (disp, 0)) != PLUS)
6397 break;
6398 op0 = XEXP (XEXP (disp, 0), 0);
6399 op1 = XEXP (XEXP (disp, 0), 1);
6400 if (!CONST_INT_P (op1)
6401 || INTVAL (op1) >= 16*1024*1024
6402 || INTVAL (op1) < -16*1024*1024)
6403 break;
6404 if (GET_CODE (op0) == LABEL_REF)
6405 return true;
6406 if (GET_CODE (op0) != SYMBOL_REF)
6407 break;
6408 /* FALLTHRU */
6409
6410 case SYMBOL_REF:
6411 /* TLS references should always be enclosed in UNSPEC. */
6412 if (SYMBOL_REF_TLS_MODEL (op0))
6413 return false;
6414 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0))
6415 return true;
6416 break;
6417
6418 default:
6419 break;
6420 }
6421 }
6422 if (GET_CODE (disp) != CONST)
6423 return 0;
6424 disp = XEXP (disp, 0);
6425
6426 if (TARGET_64BIT)
6427 {
6428 /* We are unsafe to allow PLUS expressions. This limit allowed distance
6429 of GOT tables. We should not need these anyway. */
6430 if (GET_CODE (disp) != UNSPEC
6431 || (XINT (disp, 1) != UNSPEC_GOTPCREL
6432 && XINT (disp, 1) != UNSPEC_GOTOFF))
6433 return 0;
6434
6435 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
6436 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
6437 return 0;
6438 return 1;
6439 }
6440
6441 saw_plus = false;
6442 if (GET_CODE (disp) == PLUS)
6443 {
6444 if (!CONST_INT_P (XEXP (disp, 1)))
6445 return 0;
6446 disp = XEXP (disp, 0);
6447 saw_plus = true;
6448 }
6449
6450 if (TARGET_MACHO && darwin_local_data_pic (disp))
6451 return 1;
6452
6453 if (GET_CODE (disp) != UNSPEC)
6454 return 0;
6455
6456 switch (XINT (disp, 1))
6457 {
6458 case UNSPEC_GOT:
6459 if (saw_plus)
6460 return false;
6461 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
6462 case UNSPEC_GOTOFF:
6463 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
6464 While ABI specify also 32bit relocation but we don't produce it in
6465 small PIC model at all. */
6466 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6467 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
6468 && !TARGET_64BIT)
6469 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6470 return false;
6471 case UNSPEC_GOTTPOFF:
6472 case UNSPEC_GOTNTPOFF:
6473 case UNSPEC_INDNTPOFF:
6474 if (saw_plus)
6475 return false;
6476 disp = XVECEXP (disp, 0, 0);
6477 return (GET_CODE (disp) == SYMBOL_REF
6478 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
6479 case UNSPEC_NTPOFF:
6480 disp = XVECEXP (disp, 0, 0);
6481 return (GET_CODE (disp) == SYMBOL_REF
6482 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
6483 case UNSPEC_DTPOFF:
6484 disp = XVECEXP (disp, 0, 0);
6485 return (GET_CODE (disp) == SYMBOL_REF
6486 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
6487 }
6488
6489 return 0;
6490 }
6491
6492 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6493 memory address for an instruction. The MODE argument is the machine mode
6494 for the MEM expression that wants to use this address.
6495
6496 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6497 convert common non-canonical forms to canonical form so that they will
6498 be recognized. */
6499
6500 int
6501 legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
6502 {
6503 struct ix86_address parts;
6504 rtx base, index, disp;
6505 HOST_WIDE_INT scale;
6506 const char *reason = NULL;
6507 rtx reason_rtx = NULL_RTX;
6508
6509 if (TARGET_DEBUG_ADDR)
6510 {
6511 fprintf (stderr,
6512 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
6513 GET_MODE_NAME (mode), strict);
6514 debug_rtx (addr);
6515 }
6516
6517 if (ix86_decompose_address (addr, &parts) <= 0)
6518 {
6519 reason = "decomposition failed";
6520 goto report_error;
6521 }
6522
6523 base = parts.base;
6524 index = parts.index;
6525 disp = parts.disp;
6526 scale = parts.scale;
6527
6528 /* Validate base register.
6529
6530 Don't allow SUBREG's that span more than a word here. It can lead to spill
6531 failures when the base is one word out of a two word structure, which is
6532 represented internally as a DImode int. */
6533
6534 if (base)
6535 {
6536 rtx reg;
6537 reason_rtx = base;
6538
6539 if (REG_P (base))
6540 reg = base;
6541 else if (GET_CODE (base) == SUBREG
6542 && REG_P (SUBREG_REG (base))
6543 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
6544 <= UNITS_PER_WORD)
6545 reg = SUBREG_REG (base);
6546 else
6547 {
6548 reason = "base is not a register";
6549 goto report_error;
6550 }
6551
6552 if (GET_MODE (base) != Pmode)
6553 {
6554 reason = "base is not in Pmode";
6555 goto report_error;
6556 }
6557
6558 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
6559 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
6560 {
6561 reason = "base is not valid";
6562 goto report_error;
6563 }
6564 }
6565
6566 /* Validate index register.
6567
6568 Don't allow SUBREG's that span more than a word here -- same as above. */
6569
6570 if (index)
6571 {
6572 rtx reg;
6573 reason_rtx = index;
6574
6575 if (REG_P (index))
6576 reg = index;
6577 else if (GET_CODE (index) == SUBREG
6578 && REG_P (SUBREG_REG (index))
6579 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
6580 <= UNITS_PER_WORD)
6581 reg = SUBREG_REG (index);
6582 else
6583 {
6584 reason = "index is not a register";
6585 goto report_error;
6586 }
6587
6588 if (GET_MODE (index) != Pmode)
6589 {
6590 reason = "index is not in Pmode";
6591 goto report_error;
6592 }
6593
6594 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
6595 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
6596 {
6597 reason = "index is not valid";
6598 goto report_error;
6599 }
6600 }
6601
6602 /* Validate scale factor. */
6603 if (scale != 1)
6604 {
6605 reason_rtx = GEN_INT (scale);
6606 if (!index)
6607 {
6608 reason = "scale without index";
6609 goto report_error;
6610 }
6611
6612 if (scale != 2 && scale != 4 && scale != 8)
6613 {
6614 reason = "scale is not a valid multiplier";
6615 goto report_error;
6616 }
6617 }
6618
6619 /* Validate displacement. */
6620 if (disp)
6621 {
6622 reason_rtx = disp;
6623
6624 if (GET_CODE (disp) == CONST
6625 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
6626 switch (XINT (XEXP (disp, 0), 1))
6627 {
6628 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
6629 used. While ABI specify also 32bit relocations, we don't produce
6630 them at all and use IP relative instead. */
6631 case UNSPEC_GOT:
6632 case UNSPEC_GOTOFF:
6633 gcc_assert (flag_pic);
6634 if (!TARGET_64BIT)
6635 goto is_legitimate_pic;
6636 reason = "64bit address unspec";
6637 goto report_error;
6638
6639 case UNSPEC_GOTPCREL:
6640 gcc_assert (flag_pic);
6641 goto is_legitimate_pic;
6642
6643 case UNSPEC_GOTTPOFF:
6644 case UNSPEC_GOTNTPOFF:
6645 case UNSPEC_INDNTPOFF:
6646 case UNSPEC_NTPOFF:
6647 case UNSPEC_DTPOFF:
6648 break;
6649
6650 default:
6651 reason = "invalid address unspec";
6652 goto report_error;
6653 }
6654
6655 else if (SYMBOLIC_CONST (disp)
6656 && (flag_pic
6657 || (TARGET_MACHO
6658 #if TARGET_MACHO
6659 && MACHOPIC_INDIRECT
6660 && !machopic_operand_p (disp)
6661 #endif
6662 )))
6663 {
6664
6665 is_legitimate_pic:
6666 if (TARGET_64BIT && (index || base))
6667 {
6668 /* foo@dtpoff(%rX) is ok. */
6669 if (GET_CODE (disp) != CONST
6670 || GET_CODE (XEXP (disp, 0)) != PLUS
6671 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
6672 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
6673 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
6674 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
6675 {
6676 reason = "non-constant pic memory reference";
6677 goto report_error;
6678 }
6679 }
6680 else if (! legitimate_pic_address_disp_p (disp))
6681 {
6682 reason = "displacement is an invalid pic construct";
6683 goto report_error;
6684 }
6685
6686 /* This code used to verify that a symbolic pic displacement
6687 includes the pic_offset_table_rtx register.
6688
6689 While this is good idea, unfortunately these constructs may
6690 be created by "adds using lea" optimization for incorrect
6691 code like:
6692
6693 int a;
6694 int foo(int i)
6695 {
6696 return *(&a+i);
6697 }
6698
6699 This code is nonsensical, but results in addressing
6700 GOT table with pic_offset_table_rtx base. We can't
6701 just refuse it easily, since it gets matched by
6702 "addsi3" pattern, that later gets split to lea in the
6703 case output register differs from input. While this
6704 can be handled by separate addsi pattern for this case
6705 that never results in lea, this seems to be easier and
6706 correct fix for crash to disable this test. */
6707 }
6708 else if (GET_CODE (disp) != LABEL_REF
6709 && !CONST_INT_P (disp)
6710 && (GET_CODE (disp) != CONST
6711 || !legitimate_constant_p (disp))
6712 && (GET_CODE (disp) != SYMBOL_REF
6713 || !legitimate_constant_p (disp)))
6714 {
6715 reason = "displacement is not constant";
6716 goto report_error;
6717 }
6718 else if (TARGET_64BIT
6719 && !x86_64_immediate_operand (disp, VOIDmode))
6720 {
6721 reason = "displacement is out of range";
6722 goto report_error;
6723 }
6724 }
6725
6726 /* Everything looks valid. */
6727 if (TARGET_DEBUG_ADDR)
6728 fprintf (stderr, "Success.\n");
6729 return TRUE;
6730
6731 report_error:
6732 if (TARGET_DEBUG_ADDR)
6733 {
6734 fprintf (stderr, "Error: %s\n", reason);
6735 debug_rtx (reason_rtx);
6736 }
6737 return FALSE;
6738 }
6739 \f
6740 /* Return a unique alias set for the GOT. */
6741
6742 static HOST_WIDE_INT
6743 ix86_GOT_alias_set (void)
6744 {
6745 static HOST_WIDE_INT set = -1;
6746 if (set == -1)
6747 set = new_alias_set ();
6748 return set;
6749 }
6750
6751 /* Return a legitimate reference for ORIG (an address) using the
6752 register REG. If REG is 0, a new pseudo is generated.
6753
6754 There are two types of references that must be handled:
6755
6756 1. Global data references must load the address from the GOT, via
6757 the PIC reg. An insn is emitted to do this load, and the reg is
6758 returned.
6759
6760 2. Static data references, constant pool addresses, and code labels
6761 compute the address as an offset from the GOT, whose base is in
6762 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
6763 differentiate them from global data objects. The returned
6764 address is the PIC reg + an unspec constant.
6765
6766 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6767 reg also appears in the address. */
6768
6769 static rtx
6770 legitimize_pic_address (rtx orig, rtx reg)
6771 {
6772 rtx addr = orig;
6773 rtx new = orig;
6774 rtx base;
6775
6776 #if TARGET_MACHO
6777 if (TARGET_MACHO && !TARGET_64BIT)
6778 {
6779 if (reg == 0)
6780 reg = gen_reg_rtx (Pmode);
6781 /* Use the generic Mach-O PIC machinery. */
6782 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
6783 }
6784 #endif
6785
6786 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
6787 new = addr;
6788 else if (TARGET_64BIT
6789 && ix86_cmodel != CM_SMALL_PIC
6790 && local_symbolic_operand (addr, Pmode))
6791 {
6792 rtx tmpreg;
6793 /* This symbol may be referenced via a displacement from the PIC
6794 base address (@GOTOFF). */
6795
6796 if (reload_in_progress)
6797 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6798 if (GET_CODE (addr) == CONST)
6799 addr = XEXP (addr, 0);
6800 if (GET_CODE (addr) == PLUS)
6801 {
6802 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6803 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6804 }
6805 else
6806 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6807 new = gen_rtx_CONST (Pmode, new);
6808 if (!reg)
6809 tmpreg = gen_reg_rtx (Pmode);
6810 else
6811 tmpreg = reg;
6812 emit_move_insn (tmpreg, new);
6813
6814 if (reg != 0)
6815 {
6816 new = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
6817 tmpreg, 1, OPTAB_DIRECT);
6818 new = reg;
6819 }
6820 else new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
6821 }
6822 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
6823 {
6824 /* This symbol may be referenced via a displacement from the PIC
6825 base address (@GOTOFF). */
6826
6827 if (reload_in_progress)
6828 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6829 if (GET_CODE (addr) == CONST)
6830 addr = XEXP (addr, 0);
6831 if (GET_CODE (addr) == PLUS)
6832 {
6833 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6834 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6835 }
6836 else
6837 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6838 new = gen_rtx_CONST (Pmode, new);
6839 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6840
6841 if (reg != 0)
6842 {
6843 emit_move_insn (reg, new);
6844 new = reg;
6845 }
6846 }
6847 else if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
6848 {
6849 if (TARGET_64BIT)
6850 {
6851 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
6852 new = gen_rtx_CONST (Pmode, new);
6853 new = gen_const_mem (Pmode, new);
6854 set_mem_alias_set (new, ix86_GOT_alias_set ());
6855
6856 if (reg == 0)
6857 reg = gen_reg_rtx (Pmode);
6858 /* Use directly gen_movsi, otherwise the address is loaded
6859 into register for CSE. We don't want to CSE this addresses,
6860 instead we CSE addresses from the GOT table, so skip this. */
6861 emit_insn (gen_movsi (reg, new));
6862 new = reg;
6863 }
6864 else
6865 {
6866 /* This symbol must be referenced via a load from the
6867 Global Offset Table (@GOT). */
6868
6869 if (reload_in_progress)
6870 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6871 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
6872 new = gen_rtx_CONST (Pmode, new);
6873 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6874 new = gen_const_mem (Pmode, new);
6875 set_mem_alias_set (new, ix86_GOT_alias_set ());
6876
6877 if (reg == 0)
6878 reg = gen_reg_rtx (Pmode);
6879 emit_move_insn (reg, new);
6880 new = reg;
6881 }
6882 }
6883 else
6884 {
6885 if (CONST_INT_P (addr)
6886 && !x86_64_immediate_operand (addr, VOIDmode))
6887 {
6888 if (reg)
6889 {
6890 emit_move_insn (reg, addr);
6891 new = reg;
6892 }
6893 else
6894 new = force_reg (Pmode, addr);
6895 }
6896 else if (GET_CODE (addr) == CONST)
6897 {
6898 addr = XEXP (addr, 0);
6899
6900 /* We must match stuff we generate before. Assume the only
6901 unspecs that can get here are ours. Not that we could do
6902 anything with them anyway.... */
6903 if (GET_CODE (addr) == UNSPEC
6904 || (GET_CODE (addr) == PLUS
6905 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
6906 return orig;
6907 gcc_assert (GET_CODE (addr) == PLUS);
6908 }
6909 if (GET_CODE (addr) == PLUS)
6910 {
6911 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
6912
6913 /* Check first to see if this is a constant offset from a @GOTOFF
6914 symbol reference. */
6915 if (local_symbolic_operand (op0, Pmode)
6916 && CONST_INT_P (op1))
6917 {
6918 if (!TARGET_64BIT)
6919 {
6920 if (reload_in_progress)
6921 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6922 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
6923 UNSPEC_GOTOFF);
6924 new = gen_rtx_PLUS (Pmode, new, op1);
6925 new = gen_rtx_CONST (Pmode, new);
6926 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6927
6928 if (reg != 0)
6929 {
6930 emit_move_insn (reg, new);
6931 new = reg;
6932 }
6933 }
6934 else
6935 {
6936 if (INTVAL (op1) < -16*1024*1024
6937 || INTVAL (op1) >= 16*1024*1024)
6938 {
6939 if (!x86_64_immediate_operand (op1, Pmode))
6940 op1 = force_reg (Pmode, op1);
6941 new = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
6942 }
6943 }
6944 }
6945 else
6946 {
6947 base = legitimize_pic_address (XEXP (addr, 0), reg);
6948 new = legitimize_pic_address (XEXP (addr, 1),
6949 base == reg ? NULL_RTX : reg);
6950
6951 if (CONST_INT_P (new))
6952 new = plus_constant (base, INTVAL (new));
6953 else
6954 {
6955 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
6956 {
6957 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
6958 new = XEXP (new, 1);
6959 }
6960 new = gen_rtx_PLUS (Pmode, base, new);
6961 }
6962 }
6963 }
6964 }
6965 return new;
6966 }
6967 \f
6968 /* Load the thread pointer. If TO_REG is true, force it into a register. */
6969
6970 static rtx
6971 get_thread_pointer (int to_reg)
6972 {
6973 rtx tp, reg, insn;
6974
6975 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
6976 if (!to_reg)
6977 return tp;
6978
6979 reg = gen_reg_rtx (Pmode);
6980 insn = gen_rtx_SET (VOIDmode, reg, tp);
6981 insn = emit_insn (insn);
6982
6983 return reg;
6984 }
6985
6986 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
6987 false if we expect this to be used for a memory address and true if
6988 we expect to load the address into a register. */
6989
6990 static rtx
6991 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
6992 {
6993 rtx dest, base, off, pic, tp;
6994 int type;
6995
6996 switch (model)
6997 {
6998 case TLS_MODEL_GLOBAL_DYNAMIC:
6999 dest = gen_reg_rtx (Pmode);
7000 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7001
7002 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7003 {
7004 rtx rax = gen_rtx_REG (Pmode, 0), insns;
7005
7006 start_sequence ();
7007 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
7008 insns = get_insns ();
7009 end_sequence ();
7010
7011 emit_libcall_block (insns, dest, rax, x);
7012 }
7013 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7014 emit_insn (gen_tls_global_dynamic_64 (dest, x));
7015 else
7016 emit_insn (gen_tls_global_dynamic_32 (dest, x));
7017
7018 if (TARGET_GNU2_TLS)
7019 {
7020 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
7021
7022 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7023 }
7024 break;
7025
7026 case TLS_MODEL_LOCAL_DYNAMIC:
7027 base = gen_reg_rtx (Pmode);
7028 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7029
7030 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7031 {
7032 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
7033
7034 start_sequence ();
7035 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
7036 insns = get_insns ();
7037 end_sequence ();
7038
7039 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
7040 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
7041 emit_libcall_block (insns, base, rax, note);
7042 }
7043 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7044 emit_insn (gen_tls_local_dynamic_base_64 (base));
7045 else
7046 emit_insn (gen_tls_local_dynamic_base_32 (base));
7047
7048 if (TARGET_GNU2_TLS)
7049 {
7050 rtx x = ix86_tls_module_base ();
7051
7052 set_unique_reg_note (get_last_insn (), REG_EQUIV,
7053 gen_rtx_MINUS (Pmode, x, tp));
7054 }
7055
7056 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
7057 off = gen_rtx_CONST (Pmode, off);
7058
7059 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
7060
7061 if (TARGET_GNU2_TLS)
7062 {
7063 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
7064
7065 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7066 }
7067
7068 break;
7069
7070 case TLS_MODEL_INITIAL_EXEC:
7071 if (TARGET_64BIT)
7072 {
7073 pic = NULL;
7074 type = UNSPEC_GOTNTPOFF;
7075 }
7076 else if (flag_pic)
7077 {
7078 if (reload_in_progress)
7079 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7080 pic = pic_offset_table_rtx;
7081 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
7082 }
7083 else if (!TARGET_ANY_GNU_TLS)
7084 {
7085 pic = gen_reg_rtx (Pmode);
7086 emit_insn (gen_set_got (pic));
7087 type = UNSPEC_GOTTPOFF;
7088 }
7089 else
7090 {
7091 pic = NULL;
7092 type = UNSPEC_INDNTPOFF;
7093 }
7094
7095 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
7096 off = gen_rtx_CONST (Pmode, off);
7097 if (pic)
7098 off = gen_rtx_PLUS (Pmode, pic, off);
7099 off = gen_const_mem (Pmode, off);
7100 set_mem_alias_set (off, ix86_GOT_alias_set ());
7101
7102 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7103 {
7104 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7105 off = force_reg (Pmode, off);
7106 return gen_rtx_PLUS (Pmode, base, off);
7107 }
7108 else
7109 {
7110 base = get_thread_pointer (true);
7111 dest = gen_reg_rtx (Pmode);
7112 emit_insn (gen_subsi3 (dest, base, off));
7113 }
7114 break;
7115
7116 case TLS_MODEL_LOCAL_EXEC:
7117 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
7118 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7119 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
7120 off = gen_rtx_CONST (Pmode, off);
7121
7122 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7123 {
7124 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7125 return gen_rtx_PLUS (Pmode, base, off);
7126 }
7127 else
7128 {
7129 base = get_thread_pointer (true);
7130 dest = gen_reg_rtx (Pmode);
7131 emit_insn (gen_subsi3 (dest, base, off));
7132 }
7133 break;
7134
7135 default:
7136 gcc_unreachable ();
7137 }
7138
7139 return dest;
7140 }
7141
7142 /* Try machine-dependent ways of modifying an illegitimate address
7143 to be legitimate. If we find one, return the new, valid address.
7144 This macro is used in only one place: `memory_address' in explow.c.
7145
7146 OLDX is the address as it was before break_out_memory_refs was called.
7147 In some cases it is useful to look at this to decide what needs to be done.
7148
7149 MODE and WIN are passed so that this macro can use
7150 GO_IF_LEGITIMATE_ADDRESS.
7151
7152 It is always safe for this macro to do nothing. It exists to recognize
7153 opportunities to optimize the output.
7154
7155 For the 80386, we handle X+REG by loading X into a register R and
7156 using R+REG. R will go in a general reg and indexing will be used.
7157 However, if REG is a broken-out memory address or multiplication,
7158 nothing needs to be done because REG can certainly go in a general reg.
7159
7160 When -fpic is used, special handling is needed for symbolic references.
7161 See comments by legitimize_pic_address in i386.c for details. */
7162
7163 rtx
7164 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
7165 {
7166 int changed = 0;
7167 unsigned log;
7168
7169 if (TARGET_DEBUG_ADDR)
7170 {
7171 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
7172 GET_MODE_NAME (mode));
7173 debug_rtx (x);
7174 }
7175
7176 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
7177 if (log)
7178 return legitimize_tls_address (x, log, false);
7179 if (GET_CODE (x) == CONST
7180 && GET_CODE (XEXP (x, 0)) == PLUS
7181 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
7182 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
7183 {
7184 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false);
7185 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
7186 }
7187
7188 if (flag_pic && SYMBOLIC_CONST (x))
7189 return legitimize_pic_address (x, 0);
7190
7191 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
7192 if (GET_CODE (x) == ASHIFT
7193 && CONST_INT_P (XEXP (x, 1))
7194 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
7195 {
7196 changed = 1;
7197 log = INTVAL (XEXP (x, 1));
7198 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
7199 GEN_INT (1 << log));
7200 }
7201
7202 if (GET_CODE (x) == PLUS)
7203 {
7204 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
7205
7206 if (GET_CODE (XEXP (x, 0)) == ASHIFT
7207 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
7208 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
7209 {
7210 changed = 1;
7211 log = INTVAL (XEXP (XEXP (x, 0), 1));
7212 XEXP (x, 0) = gen_rtx_MULT (Pmode,
7213 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
7214 GEN_INT (1 << log));
7215 }
7216
7217 if (GET_CODE (XEXP (x, 1)) == ASHIFT
7218 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
7219 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
7220 {
7221 changed = 1;
7222 log = INTVAL (XEXP (XEXP (x, 1), 1));
7223 XEXP (x, 1) = gen_rtx_MULT (Pmode,
7224 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
7225 GEN_INT (1 << log));
7226 }
7227
7228 /* Put multiply first if it isn't already. */
7229 if (GET_CODE (XEXP (x, 1)) == MULT)
7230 {
7231 rtx tmp = XEXP (x, 0);
7232 XEXP (x, 0) = XEXP (x, 1);
7233 XEXP (x, 1) = tmp;
7234 changed = 1;
7235 }
7236
7237 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
7238 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
7239 created by virtual register instantiation, register elimination, and
7240 similar optimizations. */
7241 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
7242 {
7243 changed = 1;
7244 x = gen_rtx_PLUS (Pmode,
7245 gen_rtx_PLUS (Pmode, XEXP (x, 0),
7246 XEXP (XEXP (x, 1), 0)),
7247 XEXP (XEXP (x, 1), 1));
7248 }
7249
7250 /* Canonicalize
7251 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
7252 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
7253 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
7254 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
7255 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
7256 && CONSTANT_P (XEXP (x, 1)))
7257 {
7258 rtx constant;
7259 rtx other = NULL_RTX;
7260
7261 if (CONST_INT_P (XEXP (x, 1)))
7262 {
7263 constant = XEXP (x, 1);
7264 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
7265 }
7266 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
7267 {
7268 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
7269 other = XEXP (x, 1);
7270 }
7271 else
7272 constant = 0;
7273
7274 if (constant)
7275 {
7276 changed = 1;
7277 x = gen_rtx_PLUS (Pmode,
7278 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
7279 XEXP (XEXP (XEXP (x, 0), 1), 0)),
7280 plus_constant (other, INTVAL (constant)));
7281 }
7282 }
7283
7284 if (changed && legitimate_address_p (mode, x, FALSE))
7285 return x;
7286
7287 if (GET_CODE (XEXP (x, 0)) == MULT)
7288 {
7289 changed = 1;
7290 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
7291 }
7292
7293 if (GET_CODE (XEXP (x, 1)) == MULT)
7294 {
7295 changed = 1;
7296 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
7297 }
7298
7299 if (changed
7300 && REG_P (XEXP (x, 1))
7301 && REG_P (XEXP (x, 0)))
7302 return x;
7303
7304 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
7305 {
7306 changed = 1;
7307 x = legitimize_pic_address (x, 0);
7308 }
7309
7310 if (changed && legitimate_address_p (mode, x, FALSE))
7311 return x;
7312
7313 if (REG_P (XEXP (x, 0)))
7314 {
7315 rtx temp = gen_reg_rtx (Pmode);
7316 rtx val = force_operand (XEXP (x, 1), temp);
7317 if (val != temp)
7318 emit_move_insn (temp, val);
7319
7320 XEXP (x, 1) = temp;
7321 return x;
7322 }
7323
7324 else if (REG_P (XEXP (x, 1)))
7325 {
7326 rtx temp = gen_reg_rtx (Pmode);
7327 rtx val = force_operand (XEXP (x, 0), temp);
7328 if (val != temp)
7329 emit_move_insn (temp, val);
7330
7331 XEXP (x, 0) = temp;
7332 return x;
7333 }
7334 }
7335
7336 return x;
7337 }
7338 \f
7339 /* Print an integer constant expression in assembler syntax. Addition
7340 and subtraction are the only arithmetic that may appear in these
7341 expressions. FILE is the stdio stream to write to, X is the rtx, and
7342 CODE is the operand print code from the output string. */
7343
7344 static void
7345 output_pic_addr_const (FILE *file, rtx x, int code)
7346 {
7347 char buf[256];
7348
7349 switch (GET_CODE (x))
7350 {
7351 case PC:
7352 gcc_assert (flag_pic);
7353 putc ('.', file);
7354 break;
7355
7356 case SYMBOL_REF:
7357 output_addr_const (file, x);
7358 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
7359 fputs ("@PLT", file);
7360 break;
7361
7362 case LABEL_REF:
7363 x = XEXP (x, 0);
7364 /* FALLTHRU */
7365 case CODE_LABEL:
7366 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
7367 assemble_name (asm_out_file, buf);
7368 break;
7369
7370 case CONST_INT:
7371 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7372 break;
7373
7374 case CONST:
7375 /* This used to output parentheses around the expression,
7376 but that does not work on the 386 (either ATT or BSD assembler). */
7377 output_pic_addr_const (file, XEXP (x, 0), code);
7378 break;
7379
7380 case CONST_DOUBLE:
7381 if (GET_MODE (x) == VOIDmode)
7382 {
7383 /* We can use %d if the number is <32 bits and positive. */
7384 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
7385 fprintf (file, "0x%lx%08lx",
7386 (unsigned long) CONST_DOUBLE_HIGH (x),
7387 (unsigned long) CONST_DOUBLE_LOW (x));
7388 else
7389 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
7390 }
7391 else
7392 /* We can't handle floating point constants;
7393 PRINT_OPERAND must handle them. */
7394 output_operand_lossage ("floating constant misused");
7395 break;
7396
7397 case PLUS:
7398 /* Some assemblers need integer constants to appear first. */
7399 if (CONST_INT_P (XEXP (x, 0)))
7400 {
7401 output_pic_addr_const (file, XEXP (x, 0), code);
7402 putc ('+', file);
7403 output_pic_addr_const (file, XEXP (x, 1), code);
7404 }
7405 else
7406 {
7407 gcc_assert (CONST_INT_P (XEXP (x, 1)));
7408 output_pic_addr_const (file, XEXP (x, 1), code);
7409 putc ('+', file);
7410 output_pic_addr_const (file, XEXP (x, 0), code);
7411 }
7412 break;
7413
7414 case MINUS:
7415 if (!TARGET_MACHO)
7416 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
7417 output_pic_addr_const (file, XEXP (x, 0), code);
7418 putc ('-', file);
7419 output_pic_addr_const (file, XEXP (x, 1), code);
7420 if (!TARGET_MACHO)
7421 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
7422 break;
7423
7424 case UNSPEC:
7425 gcc_assert (XVECLEN (x, 0) == 1);
7426 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
7427 switch (XINT (x, 1))
7428 {
7429 case UNSPEC_GOT:
7430 fputs ("@GOT", file);
7431 break;
7432 case UNSPEC_GOTOFF:
7433 fputs ("@GOTOFF", file);
7434 break;
7435 case UNSPEC_GOTPCREL:
7436 fputs ("@GOTPCREL(%rip)", file);
7437 break;
7438 case UNSPEC_GOTTPOFF:
7439 /* FIXME: This might be @TPOFF in Sun ld too. */
7440 fputs ("@GOTTPOFF", file);
7441 break;
7442 case UNSPEC_TPOFF:
7443 fputs ("@TPOFF", file);
7444 break;
7445 case UNSPEC_NTPOFF:
7446 if (TARGET_64BIT)
7447 fputs ("@TPOFF", file);
7448 else
7449 fputs ("@NTPOFF", file);
7450 break;
7451 case UNSPEC_DTPOFF:
7452 fputs ("@DTPOFF", file);
7453 break;
7454 case UNSPEC_GOTNTPOFF:
7455 if (TARGET_64BIT)
7456 fputs ("@GOTTPOFF(%rip)", file);
7457 else
7458 fputs ("@GOTNTPOFF", file);
7459 break;
7460 case UNSPEC_INDNTPOFF:
7461 fputs ("@INDNTPOFF", file);
7462 break;
7463 default:
7464 output_operand_lossage ("invalid UNSPEC as operand");
7465 break;
7466 }
7467 break;
7468
7469 default:
7470 output_operand_lossage ("invalid expression as operand");
7471 }
7472 }
7473
7474 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7475 We need to emit DTP-relative relocations. */
7476
7477 static void
7478 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
7479 {
7480 fputs (ASM_LONG, file);
7481 output_addr_const (file, x);
7482 fputs ("@DTPOFF", file);
7483 switch (size)
7484 {
7485 case 4:
7486 break;
7487 case 8:
7488 fputs (", 0", file);
7489 break;
7490 default:
7491 gcc_unreachable ();
7492 }
7493 }
7494
7495 /* In the name of slightly smaller debug output, and to cater to
7496 general assembler lossage, recognize PIC+GOTOFF and turn it back
7497 into a direct symbol reference.
7498
7499 On Darwin, this is necessary to avoid a crash, because Darwin
7500 has a different PIC label for each routine but the DWARF debugging
7501 information is not associated with any particular routine, so it's
7502 necessary to remove references to the PIC label from RTL stored by
7503 the DWARF output code. */
7504
7505 static rtx
7506 ix86_delegitimize_address (rtx orig_x)
7507 {
7508 rtx x = orig_x;
7509 /* reg_addend is NULL or a multiple of some register. */
7510 rtx reg_addend = NULL_RTX;
7511 /* const_addend is NULL or a const_int. */
7512 rtx const_addend = NULL_RTX;
7513 /* This is the result, or NULL. */
7514 rtx result = NULL_RTX;
7515
7516 if (MEM_P (x))
7517 x = XEXP (x, 0);
7518
7519 if (TARGET_64BIT)
7520 {
7521 if (GET_CODE (x) != CONST
7522 || GET_CODE (XEXP (x, 0)) != UNSPEC
7523 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
7524 || !MEM_P (orig_x))
7525 return orig_x;
7526 return XVECEXP (XEXP (x, 0), 0, 0);
7527 }
7528
7529 if (GET_CODE (x) != PLUS
7530 || GET_CODE (XEXP (x, 1)) != CONST)
7531 return orig_x;
7532
7533 if (REG_P (XEXP (x, 0))
7534 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
7535 /* %ebx + GOT/GOTOFF */
7536 ;
7537 else if (GET_CODE (XEXP (x, 0)) == PLUS)
7538 {
7539 /* %ebx + %reg * scale + GOT/GOTOFF */
7540 reg_addend = XEXP (x, 0);
7541 if (REG_P (XEXP (reg_addend, 0))
7542 && REGNO (XEXP (reg_addend, 0)) == PIC_OFFSET_TABLE_REGNUM)
7543 reg_addend = XEXP (reg_addend, 1);
7544 else if (REG_P (XEXP (reg_addend, 1))
7545 && REGNO (XEXP (reg_addend, 1)) == PIC_OFFSET_TABLE_REGNUM)
7546 reg_addend = XEXP (reg_addend, 0);
7547 else
7548 return orig_x;
7549 if (!REG_P (reg_addend)
7550 && GET_CODE (reg_addend) != MULT
7551 && GET_CODE (reg_addend) != ASHIFT)
7552 return orig_x;
7553 }
7554 else
7555 return orig_x;
7556
7557 x = XEXP (XEXP (x, 1), 0);
7558 if (GET_CODE (x) == PLUS
7559 && CONST_INT_P (XEXP (x, 1)))
7560 {
7561 const_addend = XEXP (x, 1);
7562 x = XEXP (x, 0);
7563 }
7564
7565 if (GET_CODE (x) == UNSPEC
7566 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x))
7567 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
7568 result = XVECEXP (x, 0, 0);
7569
7570 if (TARGET_MACHO && darwin_local_data_pic (x)
7571 && !MEM_P (orig_x))
7572 result = XEXP (x, 0);
7573
7574 if (! result)
7575 return orig_x;
7576
7577 if (const_addend)
7578 result = gen_rtx_PLUS (Pmode, result, const_addend);
7579 if (reg_addend)
7580 result = gen_rtx_PLUS (Pmode, reg_addend, result);
7581 return result;
7582 }
7583 \f
7584 static void
7585 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
7586 int fp, FILE *file)
7587 {
7588 const char *suffix;
7589
7590 if (mode == CCFPmode || mode == CCFPUmode)
7591 {
7592 enum rtx_code second_code, bypass_code;
7593 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
7594 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
7595 code = ix86_fp_compare_code_to_integer (code);
7596 mode = CCmode;
7597 }
7598 if (reverse)
7599 code = reverse_condition (code);
7600
7601 switch (code)
7602 {
7603 case EQ:
7604 suffix = "e";
7605 break;
7606 case NE:
7607 suffix = "ne";
7608 break;
7609 case GT:
7610 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
7611 suffix = "g";
7612 break;
7613 case GTU:
7614 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
7615 Those same assemblers have the same but opposite lossage on cmov. */
7616 gcc_assert (mode == CCmode);
7617 suffix = fp ? "nbe" : "a";
7618 break;
7619 case LT:
7620 switch (mode)
7621 {
7622 case CCNOmode:
7623 case CCGOCmode:
7624 suffix = "s";
7625 break;
7626
7627 case CCmode:
7628 case CCGCmode:
7629 suffix = "l";
7630 break;
7631
7632 default:
7633 gcc_unreachable ();
7634 }
7635 break;
7636 case LTU:
7637 gcc_assert (mode == CCmode);
7638 suffix = "b";
7639 break;
7640 case GE:
7641 switch (mode)
7642 {
7643 case CCNOmode:
7644 case CCGOCmode:
7645 suffix = "ns";
7646 break;
7647
7648 case CCmode:
7649 case CCGCmode:
7650 suffix = "ge";
7651 break;
7652
7653 default:
7654 gcc_unreachable ();
7655 }
7656 break;
7657 case GEU:
7658 /* ??? As above. */
7659 gcc_assert (mode == CCmode);
7660 suffix = fp ? "nb" : "ae";
7661 break;
7662 case LE:
7663 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
7664 suffix = "le";
7665 break;
7666 case LEU:
7667 gcc_assert (mode == CCmode);
7668 suffix = "be";
7669 break;
7670 case UNORDERED:
7671 suffix = fp ? "u" : "p";
7672 break;
7673 case ORDERED:
7674 suffix = fp ? "nu" : "np";
7675 break;
7676 default:
7677 gcc_unreachable ();
7678 }
7679 fputs (suffix, file);
7680 }
7681
7682 /* Print the name of register X to FILE based on its machine mode and number.
7683 If CODE is 'w', pretend the mode is HImode.
7684 If CODE is 'b', pretend the mode is QImode.
7685 If CODE is 'k', pretend the mode is SImode.
7686 If CODE is 'q', pretend the mode is DImode.
7687 If CODE is 'h', pretend the reg is the 'high' byte register.
7688 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
7689
7690 void
7691 print_reg (rtx x, int code, FILE *file)
7692 {
7693 gcc_assert (REGNO (x) != ARG_POINTER_REGNUM
7694 && REGNO (x) != FRAME_POINTER_REGNUM
7695 && REGNO (x) != FLAGS_REG
7696 && REGNO (x) != FPSR_REG
7697 && REGNO (x) != FPCR_REG);
7698
7699 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
7700 putc ('%', file);
7701
7702 if (code == 'w' || MMX_REG_P (x))
7703 code = 2;
7704 else if (code == 'b')
7705 code = 1;
7706 else if (code == 'k')
7707 code = 4;
7708 else if (code == 'q')
7709 code = 8;
7710 else if (code == 'y')
7711 code = 3;
7712 else if (code == 'h')
7713 code = 0;
7714 else
7715 code = GET_MODE_SIZE (GET_MODE (x));
7716
7717 /* Irritatingly, AMD extended registers use different naming convention
7718 from the normal registers. */
7719 if (REX_INT_REG_P (x))
7720 {
7721 gcc_assert (TARGET_64BIT);
7722 switch (code)
7723 {
7724 case 0:
7725 error ("extended registers have no high halves");
7726 break;
7727 case 1:
7728 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
7729 break;
7730 case 2:
7731 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
7732 break;
7733 case 4:
7734 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
7735 break;
7736 case 8:
7737 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
7738 break;
7739 default:
7740 error ("unsupported operand size for extended register");
7741 break;
7742 }
7743 return;
7744 }
7745 switch (code)
7746 {
7747 case 3:
7748 if (STACK_TOP_P (x))
7749 {
7750 fputs ("st(0)", file);
7751 break;
7752 }
7753 /* FALLTHRU */
7754 case 8:
7755 case 4:
7756 case 12:
7757 if (! ANY_FP_REG_P (x))
7758 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
7759 /* FALLTHRU */
7760 case 16:
7761 case 2:
7762 normal:
7763 fputs (hi_reg_name[REGNO (x)], file);
7764 break;
7765 case 1:
7766 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
7767 goto normal;
7768 fputs (qi_reg_name[REGNO (x)], file);
7769 break;
7770 case 0:
7771 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
7772 goto normal;
7773 fputs (qi_high_reg_name[REGNO (x)], file);
7774 break;
7775 default:
7776 gcc_unreachable ();
7777 }
7778 }
7779
7780 /* Locate some local-dynamic symbol still in use by this function
7781 so that we can print its name in some tls_local_dynamic_base
7782 pattern. */
7783
7784 static const char *
7785 get_some_local_dynamic_name (void)
7786 {
7787 rtx insn;
7788
7789 if (cfun->machine->some_ld_name)
7790 return cfun->machine->some_ld_name;
7791
7792 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
7793 if (INSN_P (insn)
7794 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
7795 return cfun->machine->some_ld_name;
7796
7797 gcc_unreachable ();
7798 }
7799
7800 static int
7801 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
7802 {
7803 rtx x = *px;
7804
7805 if (GET_CODE (x) == SYMBOL_REF
7806 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
7807 {
7808 cfun->machine->some_ld_name = XSTR (x, 0);
7809 return 1;
7810 }
7811
7812 return 0;
7813 }
7814
7815 /* Meaning of CODE:
7816 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
7817 C -- print opcode suffix for set/cmov insn.
7818 c -- like C, but print reversed condition
7819 F,f -- likewise, but for floating-point.
7820 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
7821 otherwise nothing
7822 R -- print the prefix for register names.
7823 z -- print the opcode suffix for the size of the current operand.
7824 * -- print a star (in certain assembler syntax)
7825 A -- print an absolute memory reference.
7826 w -- print the operand as if it's a "word" (HImode) even if it isn't.
7827 s -- print a shift double count, followed by the assemblers argument
7828 delimiter.
7829 b -- print the QImode name of the register for the indicated operand.
7830 %b0 would print %al if operands[0] is reg 0.
7831 w -- likewise, print the HImode name of the register.
7832 k -- likewise, print the SImode name of the register.
7833 q -- likewise, print the DImode name of the register.
7834 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
7835 y -- print "st(0)" instead of "st" as a register.
7836 D -- print condition for SSE cmp instruction.
7837 P -- if PIC, print an @PLT suffix.
7838 X -- don't print any sort of PIC '@' suffix for a symbol.
7839 & -- print some in-use local-dynamic symbol name.
7840 H -- print a memory address offset by 8; used for sse high-parts
7841 */
7842
7843 void
7844 print_operand (FILE *file, rtx x, int code)
7845 {
7846 if (code)
7847 {
7848 switch (code)
7849 {
7850 case '*':
7851 if (ASSEMBLER_DIALECT == ASM_ATT)
7852 putc ('*', file);
7853 return;
7854
7855 case '&':
7856 assemble_name (file, get_some_local_dynamic_name ());
7857 return;
7858
7859 case 'A':
7860 switch (ASSEMBLER_DIALECT)
7861 {
7862 case ASM_ATT:
7863 putc ('*', file);
7864 break;
7865
7866 case ASM_INTEL:
7867 /* Intel syntax. For absolute addresses, registers should not
7868 be surrounded by braces. */
7869 if (!REG_P (x))
7870 {
7871 putc ('[', file);
7872 PRINT_OPERAND (file, x, 0);
7873 putc (']', file);
7874 return;
7875 }
7876 break;
7877
7878 default:
7879 gcc_unreachable ();
7880 }
7881
7882 PRINT_OPERAND (file, x, 0);
7883 return;
7884
7885
7886 case 'L':
7887 if (ASSEMBLER_DIALECT == ASM_ATT)
7888 putc ('l', file);
7889 return;
7890
7891 case 'W':
7892 if (ASSEMBLER_DIALECT == ASM_ATT)
7893 putc ('w', file);
7894 return;
7895
7896 case 'B':
7897 if (ASSEMBLER_DIALECT == ASM_ATT)
7898 putc ('b', file);
7899 return;
7900
7901 case 'Q':
7902 if (ASSEMBLER_DIALECT == ASM_ATT)
7903 putc ('l', file);
7904 return;
7905
7906 case 'S':
7907 if (ASSEMBLER_DIALECT == ASM_ATT)
7908 putc ('s', file);
7909 return;
7910
7911 case 'T':
7912 if (ASSEMBLER_DIALECT == ASM_ATT)
7913 putc ('t', file);
7914 return;
7915
7916 case 'z':
7917 /* 387 opcodes don't get size suffixes if the operands are
7918 registers. */
7919 if (STACK_REG_P (x))
7920 return;
7921
7922 /* Likewise if using Intel opcodes. */
7923 if (ASSEMBLER_DIALECT == ASM_INTEL)
7924 return;
7925
7926 /* This is the size of op from size of operand. */
7927 switch (GET_MODE_SIZE (GET_MODE (x)))
7928 {
7929 case 1:
7930 putc ('b', file);
7931 return;
7932
7933 case 2:
7934 #ifdef HAVE_GAS_FILDS_FISTS
7935 putc ('s', file);
7936 #endif
7937 return;
7938
7939 case 4:
7940 if (GET_MODE (x) == SFmode)
7941 {
7942 putc ('s', file);
7943 return;
7944 }
7945 else
7946 putc ('l', file);
7947 return;
7948
7949 case 12:
7950 case 16:
7951 putc ('t', file);
7952 return;
7953
7954 case 8:
7955 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
7956 {
7957 #ifdef GAS_MNEMONICS
7958 putc ('q', file);
7959 #else
7960 putc ('l', file);
7961 putc ('l', file);
7962 #endif
7963 }
7964 else
7965 putc ('l', file);
7966 return;
7967
7968 default:
7969 gcc_unreachable ();
7970 }
7971
7972 case 'b':
7973 case 'w':
7974 case 'k':
7975 case 'q':
7976 case 'h':
7977 case 'y':
7978 case 'X':
7979 case 'P':
7980 break;
7981
7982 case 's':
7983 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
7984 {
7985 PRINT_OPERAND (file, x, 0);
7986 putc (',', file);
7987 }
7988 return;
7989
7990 case 'D':
7991 /* Little bit of braindamage here. The SSE compare instructions
7992 does use completely different names for the comparisons that the
7993 fp conditional moves. */
7994 switch (GET_CODE (x))
7995 {
7996 case EQ:
7997 case UNEQ:
7998 fputs ("eq", file);
7999 break;
8000 case LT:
8001 case UNLT:
8002 fputs ("lt", file);
8003 break;
8004 case LE:
8005 case UNLE:
8006 fputs ("le", file);
8007 break;
8008 case UNORDERED:
8009 fputs ("unord", file);
8010 break;
8011 case NE:
8012 case LTGT:
8013 fputs ("neq", file);
8014 break;
8015 case UNGE:
8016 case GE:
8017 fputs ("nlt", file);
8018 break;
8019 case UNGT:
8020 case GT:
8021 fputs ("nle", file);
8022 break;
8023 case ORDERED:
8024 fputs ("ord", file);
8025 break;
8026 default:
8027 gcc_unreachable ();
8028 }
8029 return;
8030 case 'O':
8031 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8032 if (ASSEMBLER_DIALECT == ASM_ATT)
8033 {
8034 switch (GET_MODE (x))
8035 {
8036 case HImode: putc ('w', file); break;
8037 case SImode:
8038 case SFmode: putc ('l', file); break;
8039 case DImode:
8040 case DFmode: putc ('q', file); break;
8041 default: gcc_unreachable ();
8042 }
8043 putc ('.', file);
8044 }
8045 #endif
8046 return;
8047 case 'C':
8048 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
8049 return;
8050 case 'F':
8051 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8052 if (ASSEMBLER_DIALECT == ASM_ATT)
8053 putc ('.', file);
8054 #endif
8055 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
8056 return;
8057
8058 /* Like above, but reverse condition */
8059 case 'c':
8060 /* Check to see if argument to %c is really a constant
8061 and not a condition code which needs to be reversed. */
8062 if (!COMPARISON_P (x))
8063 {
8064 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
8065 return;
8066 }
8067 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
8068 return;
8069 case 'f':
8070 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8071 if (ASSEMBLER_DIALECT == ASM_ATT)
8072 putc ('.', file);
8073 #endif
8074 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
8075 return;
8076
8077 case 'H':
8078 /* It doesn't actually matter what mode we use here, as we're
8079 only going to use this for printing. */
8080 x = adjust_address_nv (x, DImode, 8);
8081 break;
8082
8083 case '+':
8084 {
8085 rtx x;
8086
8087 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
8088 return;
8089
8090 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
8091 if (x)
8092 {
8093 int pred_val = INTVAL (XEXP (x, 0));
8094
8095 if (pred_val < REG_BR_PROB_BASE * 45 / 100
8096 || pred_val > REG_BR_PROB_BASE * 55 / 100)
8097 {
8098 int taken = pred_val > REG_BR_PROB_BASE / 2;
8099 int cputaken = final_forward_branch_p (current_output_insn) == 0;
8100
8101 /* Emit hints only in the case default branch prediction
8102 heuristics would fail. */
8103 if (taken != cputaken)
8104 {
8105 /* We use 3e (DS) prefix for taken branches and
8106 2e (CS) prefix for not taken branches. */
8107 if (taken)
8108 fputs ("ds ; ", file);
8109 else
8110 fputs ("cs ; ", file);
8111 }
8112 }
8113 }
8114 return;
8115 }
8116 default:
8117 output_operand_lossage ("invalid operand code '%c'", code);
8118 }
8119 }
8120
8121 if (REG_P (x))
8122 print_reg (x, code, file);
8123
8124 else if (MEM_P (x))
8125 {
8126 /* No `byte ptr' prefix for call instructions. */
8127 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
8128 {
8129 const char * size;
8130 switch (GET_MODE_SIZE (GET_MODE (x)))
8131 {
8132 case 1: size = "BYTE"; break;
8133 case 2: size = "WORD"; break;
8134 case 4: size = "DWORD"; break;
8135 case 8: size = "QWORD"; break;
8136 case 12: size = "XWORD"; break;
8137 case 16: size = "XMMWORD"; break;
8138 default:
8139 gcc_unreachable ();
8140 }
8141
8142 /* Check for explicit size override (codes 'b', 'w' and 'k') */
8143 if (code == 'b')
8144 size = "BYTE";
8145 else if (code == 'w')
8146 size = "WORD";
8147 else if (code == 'k')
8148 size = "DWORD";
8149
8150 fputs (size, file);
8151 fputs (" PTR ", file);
8152 }
8153
8154 x = XEXP (x, 0);
8155 /* Avoid (%rip) for call operands. */
8156 if (CONSTANT_ADDRESS_P (x) && code == 'P'
8157 && !CONST_INT_P (x))
8158 output_addr_const (file, x);
8159 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
8160 output_operand_lossage ("invalid constraints for operand");
8161 else
8162 output_address (x);
8163 }
8164
8165 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
8166 {
8167 REAL_VALUE_TYPE r;
8168 long l;
8169
8170 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8171 REAL_VALUE_TO_TARGET_SINGLE (r, l);
8172
8173 if (ASSEMBLER_DIALECT == ASM_ATT)
8174 putc ('$', file);
8175 fprintf (file, "0x%08lx", l);
8176 }
8177
8178 /* These float cases don't actually occur as immediate operands. */
8179 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
8180 {
8181 char dstr[30];
8182
8183 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8184 fprintf (file, "%s", dstr);
8185 }
8186
8187 else if (GET_CODE (x) == CONST_DOUBLE
8188 && GET_MODE (x) == XFmode)
8189 {
8190 char dstr[30];
8191
8192 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8193 fprintf (file, "%s", dstr);
8194 }
8195
8196 else
8197 {
8198 /* We have patterns that allow zero sets of memory, for instance.
8199 In 64-bit mode, we should probably support all 8-byte vectors,
8200 since we can in fact encode that into an immediate. */
8201 if (GET_CODE (x) == CONST_VECTOR)
8202 {
8203 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
8204 x = const0_rtx;
8205 }
8206
8207 if (code != 'P')
8208 {
8209 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
8210 {
8211 if (ASSEMBLER_DIALECT == ASM_ATT)
8212 putc ('$', file);
8213 }
8214 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
8215 || GET_CODE (x) == LABEL_REF)
8216 {
8217 if (ASSEMBLER_DIALECT == ASM_ATT)
8218 putc ('$', file);
8219 else
8220 fputs ("OFFSET FLAT:", file);
8221 }
8222 }
8223 if (CONST_INT_P (x))
8224 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
8225 else if (flag_pic)
8226 output_pic_addr_const (file, x, code);
8227 else
8228 output_addr_const (file, x);
8229 }
8230 }
8231 \f
8232 /* Print a memory operand whose address is ADDR. */
8233
8234 void
8235 print_operand_address (FILE *file, rtx addr)
8236 {
8237 struct ix86_address parts;
8238 rtx base, index, disp;
8239 int scale;
8240 int ok = ix86_decompose_address (addr, &parts);
8241
8242 gcc_assert (ok);
8243
8244 base = parts.base;
8245 index = parts.index;
8246 disp = parts.disp;
8247 scale = parts.scale;
8248
8249 switch (parts.seg)
8250 {
8251 case SEG_DEFAULT:
8252 break;
8253 case SEG_FS:
8254 case SEG_GS:
8255 if (USER_LABEL_PREFIX[0] == 0)
8256 putc ('%', file);
8257 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
8258 break;
8259 default:
8260 gcc_unreachable ();
8261 }
8262
8263 if (!base && !index)
8264 {
8265 /* Displacement only requires special attention. */
8266
8267 if (CONST_INT_P (disp))
8268 {
8269 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
8270 {
8271 if (USER_LABEL_PREFIX[0] == 0)
8272 putc ('%', file);
8273 fputs ("ds:", file);
8274 }
8275 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
8276 }
8277 else if (flag_pic)
8278 output_pic_addr_const (file, disp, 0);
8279 else
8280 output_addr_const (file, disp);
8281
8282 /* Use one byte shorter RIP relative addressing for 64bit mode. */
8283 if (TARGET_64BIT)
8284 {
8285 if (GET_CODE (disp) == CONST
8286 && GET_CODE (XEXP (disp, 0)) == PLUS
8287 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
8288 disp = XEXP (XEXP (disp, 0), 0);
8289 if (GET_CODE (disp) == LABEL_REF
8290 || (GET_CODE (disp) == SYMBOL_REF
8291 && SYMBOL_REF_TLS_MODEL (disp) == 0))
8292 fputs ("(%rip)", file);
8293 }
8294 }
8295 else
8296 {
8297 if (ASSEMBLER_DIALECT == ASM_ATT)
8298 {
8299 if (disp)
8300 {
8301 if (flag_pic)
8302 output_pic_addr_const (file, disp, 0);
8303 else if (GET_CODE (disp) == LABEL_REF)
8304 output_asm_label (disp);
8305 else
8306 output_addr_const (file, disp);
8307 }
8308
8309 putc ('(', file);
8310 if (base)
8311 print_reg (base, 0, file);
8312 if (index)
8313 {
8314 putc (',', file);
8315 print_reg (index, 0, file);
8316 if (scale != 1)
8317 fprintf (file, ",%d", scale);
8318 }
8319 putc (')', file);
8320 }
8321 else
8322 {
8323 rtx offset = NULL_RTX;
8324
8325 if (disp)
8326 {
8327 /* Pull out the offset of a symbol; print any symbol itself. */
8328 if (GET_CODE (disp) == CONST
8329 && GET_CODE (XEXP (disp, 0)) == PLUS
8330 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
8331 {
8332 offset = XEXP (XEXP (disp, 0), 1);
8333 disp = gen_rtx_CONST (VOIDmode,
8334 XEXP (XEXP (disp, 0), 0));
8335 }
8336
8337 if (flag_pic)
8338 output_pic_addr_const (file, disp, 0);
8339 else if (GET_CODE (disp) == LABEL_REF)
8340 output_asm_label (disp);
8341 else if (CONST_INT_P (disp))
8342 offset = disp;
8343 else
8344 output_addr_const (file, disp);
8345 }
8346
8347 putc ('[', file);
8348 if (base)
8349 {
8350 print_reg (base, 0, file);
8351 if (offset)
8352 {
8353 if (INTVAL (offset) >= 0)
8354 putc ('+', file);
8355 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8356 }
8357 }
8358 else if (offset)
8359 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8360 else
8361 putc ('0', file);
8362
8363 if (index)
8364 {
8365 putc ('+', file);
8366 print_reg (index, 0, file);
8367 if (scale != 1)
8368 fprintf (file, "*%d", scale);
8369 }
8370 putc (']', file);
8371 }
8372 }
8373 }
8374
8375 bool
8376 output_addr_const_extra (FILE *file, rtx x)
8377 {
8378 rtx op;
8379
8380 if (GET_CODE (x) != UNSPEC)
8381 return false;
8382
8383 op = XVECEXP (x, 0, 0);
8384 switch (XINT (x, 1))
8385 {
8386 case UNSPEC_GOTTPOFF:
8387 output_addr_const (file, op);
8388 /* FIXME: This might be @TPOFF in Sun ld. */
8389 fputs ("@GOTTPOFF", file);
8390 break;
8391 case UNSPEC_TPOFF:
8392 output_addr_const (file, op);
8393 fputs ("@TPOFF", file);
8394 break;
8395 case UNSPEC_NTPOFF:
8396 output_addr_const (file, op);
8397 if (TARGET_64BIT)
8398 fputs ("@TPOFF", file);
8399 else
8400 fputs ("@NTPOFF", file);
8401 break;
8402 case UNSPEC_DTPOFF:
8403 output_addr_const (file, op);
8404 fputs ("@DTPOFF", file);
8405 break;
8406 case UNSPEC_GOTNTPOFF:
8407 output_addr_const (file, op);
8408 if (TARGET_64BIT)
8409 fputs ("@GOTTPOFF(%rip)", file);
8410 else
8411 fputs ("@GOTNTPOFF", file);
8412 break;
8413 case UNSPEC_INDNTPOFF:
8414 output_addr_const (file, op);
8415 fputs ("@INDNTPOFF", file);
8416 break;
8417
8418 default:
8419 return false;
8420 }
8421
8422 return true;
8423 }
8424 \f
8425 /* Split one or more DImode RTL references into pairs of SImode
8426 references. The RTL can be REG, offsettable MEM, integer constant, or
8427 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8428 split and "num" is its length. lo_half and hi_half are output arrays
8429 that parallel "operands". */
8430
8431 void
8432 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8433 {
8434 while (num--)
8435 {
8436 rtx op = operands[num];
8437
8438 /* simplify_subreg refuse to split volatile memory addresses,
8439 but we still have to handle it. */
8440 if (MEM_P (op))
8441 {
8442 lo_half[num] = adjust_address (op, SImode, 0);
8443 hi_half[num] = adjust_address (op, SImode, 4);
8444 }
8445 else
8446 {
8447 lo_half[num] = simplify_gen_subreg (SImode, op,
8448 GET_MODE (op) == VOIDmode
8449 ? DImode : GET_MODE (op), 0);
8450 hi_half[num] = simplify_gen_subreg (SImode, op,
8451 GET_MODE (op) == VOIDmode
8452 ? DImode : GET_MODE (op), 4);
8453 }
8454 }
8455 }
8456 /* Split one or more TImode RTL references into pairs of DImode
8457 references. The RTL can be REG, offsettable MEM, integer constant, or
8458 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8459 split and "num" is its length. lo_half and hi_half are output arrays
8460 that parallel "operands". */
8461
8462 void
8463 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8464 {
8465 while (num--)
8466 {
8467 rtx op = operands[num];
8468
8469 /* simplify_subreg refuse to split volatile memory addresses, but we
8470 still have to handle it. */
8471 if (MEM_P (op))
8472 {
8473 lo_half[num] = adjust_address (op, DImode, 0);
8474 hi_half[num] = adjust_address (op, DImode, 8);
8475 }
8476 else
8477 {
8478 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
8479 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
8480 }
8481 }
8482 }
8483 \f
8484 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
8485 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
8486 is the expression of the binary operation. The output may either be
8487 emitted here, or returned to the caller, like all output_* functions.
8488
8489 There is no guarantee that the operands are the same mode, as they
8490 might be within FLOAT or FLOAT_EXTEND expressions. */
8491
8492 #ifndef SYSV386_COMPAT
8493 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
8494 wants to fix the assemblers because that causes incompatibility
8495 with gcc. No-one wants to fix gcc because that causes
8496 incompatibility with assemblers... You can use the option of
8497 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
8498 #define SYSV386_COMPAT 1
8499 #endif
8500
8501 const char *
8502 output_387_binary_op (rtx insn, rtx *operands)
8503 {
8504 static char buf[30];
8505 const char *p;
8506 const char *ssep;
8507 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
8508
8509 #ifdef ENABLE_CHECKING
8510 /* Even if we do not want to check the inputs, this documents input
8511 constraints. Which helps in understanding the following code. */
8512 if (STACK_REG_P (operands[0])
8513 && ((REG_P (operands[1])
8514 && REGNO (operands[0]) == REGNO (operands[1])
8515 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
8516 || (REG_P (operands[2])
8517 && REGNO (operands[0]) == REGNO (operands[2])
8518 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
8519 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
8520 ; /* ok */
8521 else
8522 gcc_assert (is_sse);
8523 #endif
8524
8525 switch (GET_CODE (operands[3]))
8526 {
8527 case PLUS:
8528 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8529 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8530 p = "fiadd";
8531 else
8532 p = "fadd";
8533 ssep = "add";
8534 break;
8535
8536 case MINUS:
8537 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8538 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8539 p = "fisub";
8540 else
8541 p = "fsub";
8542 ssep = "sub";
8543 break;
8544
8545 case MULT:
8546 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8547 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8548 p = "fimul";
8549 else
8550 p = "fmul";
8551 ssep = "mul";
8552 break;
8553
8554 case DIV:
8555 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8556 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8557 p = "fidiv";
8558 else
8559 p = "fdiv";
8560 ssep = "div";
8561 break;
8562
8563 default:
8564 gcc_unreachable ();
8565 }
8566
8567 if (is_sse)
8568 {
8569 strcpy (buf, ssep);
8570 if (GET_MODE (operands[0]) == SFmode)
8571 strcat (buf, "ss\t{%2, %0|%0, %2}");
8572 else
8573 strcat (buf, "sd\t{%2, %0|%0, %2}");
8574 return buf;
8575 }
8576 strcpy (buf, p);
8577
8578 switch (GET_CODE (operands[3]))
8579 {
8580 case MULT:
8581 case PLUS:
8582 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
8583 {
8584 rtx temp = operands[2];
8585 operands[2] = operands[1];
8586 operands[1] = temp;
8587 }
8588
8589 /* know operands[0] == operands[1]. */
8590
8591 if (MEM_P (operands[2]))
8592 {
8593 p = "%z2\t%2";
8594 break;
8595 }
8596
8597 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8598 {
8599 if (STACK_TOP_P (operands[0]))
8600 /* How is it that we are storing to a dead operand[2]?
8601 Well, presumably operands[1] is dead too. We can't
8602 store the result to st(0) as st(0) gets popped on this
8603 instruction. Instead store to operands[2] (which I
8604 think has to be st(1)). st(1) will be popped later.
8605 gcc <= 2.8.1 didn't have this check and generated
8606 assembly code that the Unixware assembler rejected. */
8607 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8608 else
8609 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8610 break;
8611 }
8612
8613 if (STACK_TOP_P (operands[0]))
8614 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8615 else
8616 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8617 break;
8618
8619 case MINUS:
8620 case DIV:
8621 if (MEM_P (operands[1]))
8622 {
8623 p = "r%z1\t%1";
8624 break;
8625 }
8626
8627 if (MEM_P (operands[2]))
8628 {
8629 p = "%z2\t%2";
8630 break;
8631 }
8632
8633 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8634 {
8635 #if SYSV386_COMPAT
8636 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
8637 derived assemblers, confusingly reverse the direction of
8638 the operation for fsub{r} and fdiv{r} when the
8639 destination register is not st(0). The Intel assembler
8640 doesn't have this brain damage. Read !SYSV386_COMPAT to
8641 figure out what the hardware really does. */
8642 if (STACK_TOP_P (operands[0]))
8643 p = "{p\t%0, %2|rp\t%2, %0}";
8644 else
8645 p = "{rp\t%2, %0|p\t%0, %2}";
8646 #else
8647 if (STACK_TOP_P (operands[0]))
8648 /* As above for fmul/fadd, we can't store to st(0). */
8649 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8650 else
8651 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8652 #endif
8653 break;
8654 }
8655
8656 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
8657 {
8658 #if SYSV386_COMPAT
8659 if (STACK_TOP_P (operands[0]))
8660 p = "{rp\t%0, %1|p\t%1, %0}";
8661 else
8662 p = "{p\t%1, %0|rp\t%0, %1}";
8663 #else
8664 if (STACK_TOP_P (operands[0]))
8665 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
8666 else
8667 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
8668 #endif
8669 break;
8670 }
8671
8672 if (STACK_TOP_P (operands[0]))
8673 {
8674 if (STACK_TOP_P (operands[1]))
8675 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8676 else
8677 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
8678 break;
8679 }
8680 else if (STACK_TOP_P (operands[1]))
8681 {
8682 #if SYSV386_COMPAT
8683 p = "{\t%1, %0|r\t%0, %1}";
8684 #else
8685 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
8686 #endif
8687 }
8688 else
8689 {
8690 #if SYSV386_COMPAT
8691 p = "{r\t%2, %0|\t%0, %2}";
8692 #else
8693 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8694 #endif
8695 }
8696 break;
8697
8698 default:
8699 gcc_unreachable ();
8700 }
8701
8702 strcat (buf, p);
8703 return buf;
8704 }
8705
8706 /* Return needed mode for entity in optimize_mode_switching pass. */
8707
8708 int
8709 ix86_mode_needed (int entity, rtx insn)
8710 {
8711 enum attr_i387_cw mode;
8712
8713 /* The mode UNINITIALIZED is used to store control word after a
8714 function call or ASM pattern. The mode ANY specify that function
8715 has no requirements on the control word and make no changes in the
8716 bits we are interested in. */
8717
8718 if (CALL_P (insn)
8719 || (NONJUMP_INSN_P (insn)
8720 && (asm_noperands (PATTERN (insn)) >= 0
8721 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
8722 return I387_CW_UNINITIALIZED;
8723
8724 if (recog_memoized (insn) < 0)
8725 return I387_CW_ANY;
8726
8727 mode = get_attr_i387_cw (insn);
8728
8729 switch (entity)
8730 {
8731 case I387_TRUNC:
8732 if (mode == I387_CW_TRUNC)
8733 return mode;
8734 break;
8735
8736 case I387_FLOOR:
8737 if (mode == I387_CW_FLOOR)
8738 return mode;
8739 break;
8740
8741 case I387_CEIL:
8742 if (mode == I387_CW_CEIL)
8743 return mode;
8744 break;
8745
8746 case I387_MASK_PM:
8747 if (mode == I387_CW_MASK_PM)
8748 return mode;
8749 break;
8750
8751 default:
8752 gcc_unreachable ();
8753 }
8754
8755 return I387_CW_ANY;
8756 }
8757
8758 /* Output code to initialize control word copies used by trunc?f?i and
8759 rounding patterns. CURRENT_MODE is set to current control word,
8760 while NEW_MODE is set to new control word. */
8761
8762 void
8763 emit_i387_cw_initialization (int mode)
8764 {
8765 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
8766 rtx new_mode;
8767
8768 int slot;
8769
8770 rtx reg = gen_reg_rtx (HImode);
8771
8772 emit_insn (gen_x86_fnstcw_1 (stored_mode));
8773 emit_move_insn (reg, copy_rtx (stored_mode));
8774
8775 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
8776 {
8777 switch (mode)
8778 {
8779 case I387_CW_TRUNC:
8780 /* round toward zero (truncate) */
8781 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
8782 slot = SLOT_CW_TRUNC;
8783 break;
8784
8785 case I387_CW_FLOOR:
8786 /* round down toward -oo */
8787 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
8788 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
8789 slot = SLOT_CW_FLOOR;
8790 break;
8791
8792 case I387_CW_CEIL:
8793 /* round up toward +oo */
8794 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
8795 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
8796 slot = SLOT_CW_CEIL;
8797 break;
8798
8799 case I387_CW_MASK_PM:
8800 /* mask precision exception for nearbyint() */
8801 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
8802 slot = SLOT_CW_MASK_PM;
8803 break;
8804
8805 default:
8806 gcc_unreachable ();
8807 }
8808 }
8809 else
8810 {
8811 switch (mode)
8812 {
8813 case I387_CW_TRUNC:
8814 /* round toward zero (truncate) */
8815 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
8816 slot = SLOT_CW_TRUNC;
8817 break;
8818
8819 case I387_CW_FLOOR:
8820 /* round down toward -oo */
8821 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
8822 slot = SLOT_CW_FLOOR;
8823 break;
8824
8825 case I387_CW_CEIL:
8826 /* round up toward +oo */
8827 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
8828 slot = SLOT_CW_CEIL;
8829 break;
8830
8831 case I387_CW_MASK_PM:
8832 /* mask precision exception for nearbyint() */
8833 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
8834 slot = SLOT_CW_MASK_PM;
8835 break;
8836
8837 default:
8838 gcc_unreachable ();
8839 }
8840 }
8841
8842 gcc_assert (slot < MAX_386_STACK_LOCALS);
8843
8844 new_mode = assign_386_stack_local (HImode, slot);
8845 emit_move_insn (new_mode, reg);
8846 }
8847
8848 /* Output code for INSN to convert a float to a signed int. OPERANDS
8849 are the insn operands. The output may be [HSD]Imode and the input
8850 operand may be [SDX]Fmode. */
8851
8852 const char *
8853 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
8854 {
8855 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8856 int dimode_p = GET_MODE (operands[0]) == DImode;
8857 int round_mode = get_attr_i387_cw (insn);
8858
8859 /* Jump through a hoop or two for DImode, since the hardware has no
8860 non-popping instruction. We used to do this a different way, but
8861 that was somewhat fragile and broke with post-reload splitters. */
8862 if ((dimode_p || fisttp) && !stack_top_dies)
8863 output_asm_insn ("fld\t%y1", operands);
8864
8865 gcc_assert (STACK_TOP_P (operands[1]));
8866 gcc_assert (MEM_P (operands[0]));
8867
8868 if (fisttp)
8869 output_asm_insn ("fisttp%z0\t%0", operands);
8870 else
8871 {
8872 if (round_mode != I387_CW_ANY)
8873 output_asm_insn ("fldcw\t%3", operands);
8874 if (stack_top_dies || dimode_p)
8875 output_asm_insn ("fistp%z0\t%0", operands);
8876 else
8877 output_asm_insn ("fist%z0\t%0", operands);
8878 if (round_mode != I387_CW_ANY)
8879 output_asm_insn ("fldcw\t%2", operands);
8880 }
8881
8882 return "";
8883 }
8884
8885 /* Output code for x87 ffreep insn. The OPNO argument, which may only
8886 have the values zero or one, indicates the ffreep insn's operand
8887 from the OPERANDS array. */
8888
8889 static const char *
8890 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
8891 {
8892 if (TARGET_USE_FFREEP)
8893 #if HAVE_AS_IX86_FFREEP
8894 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
8895 #else
8896 {
8897 static char retval[] = ".word\t0xc_df";
8898 int regno = REGNO (operands[opno]);
8899
8900 gcc_assert (FP_REGNO_P (regno));
8901
8902 retval[9] = '0' + (regno - FIRST_STACK_REG);
8903 return retval;
8904 }
8905 #endif
8906
8907 return opno ? "fstp\t%y1" : "fstp\t%y0";
8908 }
8909
8910
8911 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
8912 should be used. UNORDERED_P is true when fucom should be used. */
8913
8914 const char *
8915 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
8916 {
8917 int stack_top_dies;
8918 rtx cmp_op0, cmp_op1;
8919 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
8920
8921 if (eflags_p)
8922 {
8923 cmp_op0 = operands[0];
8924 cmp_op1 = operands[1];
8925 }
8926 else
8927 {
8928 cmp_op0 = operands[1];
8929 cmp_op1 = operands[2];
8930 }
8931
8932 if (is_sse)
8933 {
8934 if (GET_MODE (operands[0]) == SFmode)
8935 if (unordered_p)
8936 return "ucomiss\t{%1, %0|%0, %1}";
8937 else
8938 return "comiss\t{%1, %0|%0, %1}";
8939 else
8940 if (unordered_p)
8941 return "ucomisd\t{%1, %0|%0, %1}";
8942 else
8943 return "comisd\t{%1, %0|%0, %1}";
8944 }
8945
8946 gcc_assert (STACK_TOP_P (cmp_op0));
8947
8948 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8949
8950 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
8951 {
8952 if (stack_top_dies)
8953 {
8954 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
8955 return output_387_ffreep (operands, 1);
8956 }
8957 else
8958 return "ftst\n\tfnstsw\t%0";
8959 }
8960
8961 if (STACK_REG_P (cmp_op1)
8962 && stack_top_dies
8963 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
8964 && REGNO (cmp_op1) != FIRST_STACK_REG)
8965 {
8966 /* If both the top of the 387 stack dies, and the other operand
8967 is also a stack register that dies, then this must be a
8968 `fcompp' float compare */
8969
8970 if (eflags_p)
8971 {
8972 /* There is no double popping fcomi variant. Fortunately,
8973 eflags is immune from the fstp's cc clobbering. */
8974 if (unordered_p)
8975 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
8976 else
8977 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
8978 return output_387_ffreep (operands, 0);
8979 }
8980 else
8981 {
8982 if (unordered_p)
8983 return "fucompp\n\tfnstsw\t%0";
8984 else
8985 return "fcompp\n\tfnstsw\t%0";
8986 }
8987 }
8988 else
8989 {
8990 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
8991
8992 static const char * const alt[16] =
8993 {
8994 "fcom%z2\t%y2\n\tfnstsw\t%0",
8995 "fcomp%z2\t%y2\n\tfnstsw\t%0",
8996 "fucom%z2\t%y2\n\tfnstsw\t%0",
8997 "fucomp%z2\t%y2\n\tfnstsw\t%0",
8998
8999 "ficom%z2\t%y2\n\tfnstsw\t%0",
9000 "ficomp%z2\t%y2\n\tfnstsw\t%0",
9001 NULL,
9002 NULL,
9003
9004 "fcomi\t{%y1, %0|%0, %y1}",
9005 "fcomip\t{%y1, %0|%0, %y1}",
9006 "fucomi\t{%y1, %0|%0, %y1}",
9007 "fucomip\t{%y1, %0|%0, %y1}",
9008
9009 NULL,
9010 NULL,
9011 NULL,
9012 NULL
9013 };
9014
9015 int mask;
9016 const char *ret;
9017
9018 mask = eflags_p << 3;
9019 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
9020 mask |= unordered_p << 1;
9021 mask |= stack_top_dies;
9022
9023 gcc_assert (mask < 16);
9024 ret = alt[mask];
9025 gcc_assert (ret);
9026
9027 return ret;
9028 }
9029 }
9030
9031 void
9032 ix86_output_addr_vec_elt (FILE *file, int value)
9033 {
9034 const char *directive = ASM_LONG;
9035
9036 #ifdef ASM_QUAD
9037 if (TARGET_64BIT)
9038 directive = ASM_QUAD;
9039 #else
9040 gcc_assert (!TARGET_64BIT);
9041 #endif
9042
9043 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
9044 }
9045
9046 void
9047 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
9048 {
9049 if (TARGET_64BIT)
9050 fprintf (file, "%s%s%d-%s%d\n",
9051 ASM_LONG, LPREFIX, value, LPREFIX, rel);
9052 else if (HAVE_AS_GOTOFF_IN_DATA)
9053 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
9054 #if TARGET_MACHO
9055 else if (TARGET_MACHO)
9056 {
9057 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
9058 machopic_output_function_base_name (file);
9059 fprintf(file, "\n");
9060 }
9061 #endif
9062 else
9063 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
9064 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
9065 }
9066 \f
9067 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
9068 for the target. */
9069
9070 void
9071 ix86_expand_clear (rtx dest)
9072 {
9073 rtx tmp;
9074
9075 /* We play register width games, which are only valid after reload. */
9076 gcc_assert (reload_completed);
9077
9078 /* Avoid HImode and its attendant prefix byte. */
9079 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
9080 dest = gen_rtx_REG (SImode, REGNO (dest));
9081
9082 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
9083
9084 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
9085 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
9086 {
9087 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
9088 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
9089 }
9090
9091 emit_insn (tmp);
9092 }
9093
9094 /* X is an unchanging MEM. If it is a constant pool reference, return
9095 the constant pool rtx, else NULL. */
9096
9097 rtx
9098 maybe_get_pool_constant (rtx x)
9099 {
9100 x = ix86_delegitimize_address (XEXP (x, 0));
9101
9102 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9103 return get_pool_constant (x);
9104
9105 return NULL_RTX;
9106 }
9107
9108 void
9109 ix86_expand_move (enum machine_mode mode, rtx operands[])
9110 {
9111 int strict = (reload_in_progress || reload_completed);
9112 rtx op0, op1;
9113 enum tls_model model;
9114
9115 op0 = operands[0];
9116 op1 = operands[1];
9117
9118 if (GET_CODE (op1) == SYMBOL_REF)
9119 {
9120 model = SYMBOL_REF_TLS_MODEL (op1);
9121 if (model)
9122 {
9123 op1 = legitimize_tls_address (op1, model, true);
9124 op1 = force_operand (op1, op0);
9125 if (op1 == op0)
9126 return;
9127 }
9128 }
9129 else if (GET_CODE (op1) == CONST
9130 && GET_CODE (XEXP (op1, 0)) == PLUS
9131 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
9132 {
9133 model = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (op1, 0), 0));
9134 if (model)
9135 {
9136 rtx addend = XEXP (XEXP (op1, 0), 1);
9137 op1 = legitimize_tls_address (XEXP (XEXP (op1, 0), 0), model, true);
9138 op1 = force_operand (op1, NULL);
9139 op1 = expand_simple_binop (Pmode, PLUS, op1, addend,
9140 op0, 1, OPTAB_DIRECT);
9141 if (op1 == op0)
9142 return;
9143 }
9144 }
9145
9146 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
9147 {
9148 if (TARGET_MACHO && !TARGET_64BIT)
9149 {
9150 #if TARGET_MACHO
9151 if (MACHOPIC_PURE)
9152 {
9153 rtx temp = ((reload_in_progress
9154 || ((op0 && REG_P (op0))
9155 && mode == Pmode))
9156 ? op0 : gen_reg_rtx (Pmode));
9157 op1 = machopic_indirect_data_reference (op1, temp);
9158 op1 = machopic_legitimize_pic_address (op1, mode,
9159 temp == op1 ? 0 : temp);
9160 }
9161 else if (MACHOPIC_INDIRECT)
9162 op1 = machopic_indirect_data_reference (op1, 0);
9163 if (op0 == op1)
9164 return;
9165 #endif
9166 }
9167 else
9168 {
9169 if (MEM_P (op0))
9170 op1 = force_reg (Pmode, op1);
9171 else
9172 op1 = legitimize_address (op1, op1, Pmode);
9173 }
9174 }
9175 else
9176 {
9177 if (MEM_P (op0)
9178 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
9179 || !push_operand (op0, mode))
9180 && MEM_P (op1))
9181 op1 = force_reg (mode, op1);
9182
9183 if (push_operand (op0, mode)
9184 && ! general_no_elim_operand (op1, mode))
9185 op1 = copy_to_mode_reg (mode, op1);
9186
9187 /* Force large constants in 64bit compilation into register
9188 to get them CSEed. */
9189 if (TARGET_64BIT && mode == DImode
9190 && immediate_operand (op1, mode)
9191 && !x86_64_zext_immediate_operand (op1, VOIDmode)
9192 && !register_operand (op0, mode)
9193 && optimize && !reload_completed && !reload_in_progress)
9194 op1 = copy_to_mode_reg (mode, op1);
9195
9196 if (FLOAT_MODE_P (mode))
9197 {
9198 /* If we are loading a floating point constant to a register,
9199 force the value to memory now, since we'll get better code
9200 out the back end. */
9201
9202 if (strict)
9203 ;
9204 else if (GET_CODE (op1) == CONST_DOUBLE)
9205 {
9206 op1 = validize_mem (force_const_mem (mode, op1));
9207 if (!register_operand (op0, mode))
9208 {
9209 rtx temp = gen_reg_rtx (mode);
9210 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
9211 emit_move_insn (op0, temp);
9212 return;
9213 }
9214 }
9215 }
9216 }
9217
9218 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9219 }
9220
9221 void
9222 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
9223 {
9224 rtx op0 = operands[0], op1 = operands[1];
9225
9226 /* Force constants other than zero into memory. We do not know how
9227 the instructions used to build constants modify the upper 64 bits
9228 of the register, once we have that information we may be able
9229 to handle some of them more efficiently. */
9230 if ((reload_in_progress | reload_completed) == 0
9231 && register_operand (op0, mode)
9232 && CONSTANT_P (op1)
9233 && standard_sse_constant_p (op1) <= 0)
9234 op1 = validize_mem (force_const_mem (mode, op1));
9235
9236 /* Make operand1 a register if it isn't already. */
9237 if (!no_new_pseudos
9238 && !register_operand (op0, mode)
9239 && !register_operand (op1, mode))
9240 {
9241 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
9242 return;
9243 }
9244
9245 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9246 }
9247
9248 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
9249 straight to ix86_expand_vector_move. */
9250
9251 void
9252 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
9253 {
9254 rtx op0, op1, m;
9255
9256 op0 = operands[0];
9257 op1 = operands[1];
9258
9259 if (MEM_P (op1))
9260 {
9261 /* If we're optimizing for size, movups is the smallest. */
9262 if (optimize_size)
9263 {
9264 op0 = gen_lowpart (V4SFmode, op0);
9265 op1 = gen_lowpart (V4SFmode, op1);
9266 emit_insn (gen_sse_movups (op0, op1));
9267 return;
9268 }
9269
9270 /* ??? If we have typed data, then it would appear that using
9271 movdqu is the only way to get unaligned data loaded with
9272 integer type. */
9273 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9274 {
9275 op0 = gen_lowpart (V16QImode, op0);
9276 op1 = gen_lowpart (V16QImode, op1);
9277 emit_insn (gen_sse2_movdqu (op0, op1));
9278 return;
9279 }
9280
9281 if (TARGET_SSE2 && mode == V2DFmode)
9282 {
9283 rtx zero;
9284
9285 /* When SSE registers are split into halves, we can avoid
9286 writing to the top half twice. */
9287 if (TARGET_SSE_SPLIT_REGS)
9288 {
9289 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9290 zero = op0;
9291 }
9292 else
9293 {
9294 /* ??? Not sure about the best option for the Intel chips.
9295 The following would seem to satisfy; the register is
9296 entirely cleared, breaking the dependency chain. We
9297 then store to the upper half, with a dependency depth
9298 of one. A rumor has it that Intel recommends two movsd
9299 followed by an unpacklpd, but this is unconfirmed. And
9300 given that the dependency depth of the unpacklpd would
9301 still be one, I'm not sure why this would be better. */
9302 zero = CONST0_RTX (V2DFmode);
9303 }
9304
9305 m = adjust_address (op1, DFmode, 0);
9306 emit_insn (gen_sse2_loadlpd (op0, zero, m));
9307 m = adjust_address (op1, DFmode, 8);
9308 emit_insn (gen_sse2_loadhpd (op0, op0, m));
9309 }
9310 else
9311 {
9312 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
9313 emit_move_insn (op0, CONST0_RTX (mode));
9314 else
9315 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9316
9317 if (mode != V4SFmode)
9318 op0 = gen_lowpart (V4SFmode, op0);
9319 m = adjust_address (op1, V2SFmode, 0);
9320 emit_insn (gen_sse_loadlps (op0, op0, m));
9321 m = adjust_address (op1, V2SFmode, 8);
9322 emit_insn (gen_sse_loadhps (op0, op0, m));
9323 }
9324 }
9325 else if (MEM_P (op0))
9326 {
9327 /* If we're optimizing for size, movups is the smallest. */
9328 if (optimize_size)
9329 {
9330 op0 = gen_lowpart (V4SFmode, op0);
9331 op1 = gen_lowpart (V4SFmode, op1);
9332 emit_insn (gen_sse_movups (op0, op1));
9333 return;
9334 }
9335
9336 /* ??? Similar to above, only less clear because of quote
9337 typeless stores unquote. */
9338 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
9339 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9340 {
9341 op0 = gen_lowpart (V16QImode, op0);
9342 op1 = gen_lowpart (V16QImode, op1);
9343 emit_insn (gen_sse2_movdqu (op0, op1));
9344 return;
9345 }
9346
9347 if (TARGET_SSE2 && mode == V2DFmode)
9348 {
9349 m = adjust_address (op0, DFmode, 0);
9350 emit_insn (gen_sse2_storelpd (m, op1));
9351 m = adjust_address (op0, DFmode, 8);
9352 emit_insn (gen_sse2_storehpd (m, op1));
9353 }
9354 else
9355 {
9356 if (mode != V4SFmode)
9357 op1 = gen_lowpart (V4SFmode, op1);
9358 m = adjust_address (op0, V2SFmode, 0);
9359 emit_insn (gen_sse_storelps (m, op1));
9360 m = adjust_address (op0, V2SFmode, 8);
9361 emit_insn (gen_sse_storehps (m, op1));
9362 }
9363 }
9364 else
9365 gcc_unreachable ();
9366 }
9367
9368 /* Expand a push in MODE. This is some mode for which we do not support
9369 proper push instructions, at least from the registers that we expect
9370 the value to live in. */
9371
9372 void
9373 ix86_expand_push (enum machine_mode mode, rtx x)
9374 {
9375 rtx tmp;
9376
9377 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
9378 GEN_INT (-GET_MODE_SIZE (mode)),
9379 stack_pointer_rtx, 1, OPTAB_DIRECT);
9380 if (tmp != stack_pointer_rtx)
9381 emit_move_insn (stack_pointer_rtx, tmp);
9382
9383 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
9384 emit_move_insn (tmp, x);
9385 }
9386
9387 /* Helper function of ix86_fixup_binary_operands to canonicalize
9388 operand order. Returns true if the operands should be swapped. */
9389
9390 static bool
9391 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
9392 rtx operands[])
9393 {
9394 rtx dst = operands[0];
9395 rtx src1 = operands[1];
9396 rtx src2 = operands[2];
9397
9398 /* If the operation is not commutative, we can't do anything. */
9399 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
9400 return false;
9401
9402 /* Highest priority is that src1 should match dst. */
9403 if (rtx_equal_p (dst, src1))
9404 return false;
9405 if (rtx_equal_p (dst, src2))
9406 return true;
9407
9408 /* Next highest priority is that immediate constants come second. */
9409 if (immediate_operand (src2, mode))
9410 return false;
9411 if (immediate_operand (src1, mode))
9412 return true;
9413
9414 /* Lowest priority is that memory references should come second. */
9415 if (MEM_P (src2))
9416 return false;
9417 if (MEM_P (src1))
9418 return true;
9419
9420 return false;
9421 }
9422
9423
9424 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
9425 destination to use for the operation. If different from the true
9426 destination in operands[0], a copy operation will be required. */
9427
9428 rtx
9429 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
9430 rtx operands[])
9431 {
9432 rtx dst = operands[0];
9433 rtx src1 = operands[1];
9434 rtx src2 = operands[2];
9435
9436 /* Canonicalize operand order. */
9437 if (ix86_swap_binary_operands_p (code, mode, operands))
9438 {
9439 rtx temp = src1;
9440 src1 = src2;
9441 src2 = temp;
9442 }
9443
9444 /* Both source operands cannot be in memory. */
9445 if (MEM_P (src1) && MEM_P (src2))
9446 {
9447 /* Optimization: Only read from memory once. */
9448 if (rtx_equal_p (src1, src2))
9449 {
9450 src2 = force_reg (mode, src2);
9451 src1 = src2;
9452 }
9453 else
9454 src2 = force_reg (mode, src2);
9455 }
9456
9457 /* If the destination is memory, and we do not have matching source
9458 operands, do things in registers. */
9459 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
9460 dst = gen_reg_rtx (mode);
9461
9462 /* Source 1 cannot be a constant. */
9463 if (CONSTANT_P (src1))
9464 src1 = force_reg (mode, src1);
9465
9466 /* Source 1 cannot be a non-matching memory. */
9467 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
9468 src1 = force_reg (mode, src1);
9469
9470 operands[1] = src1;
9471 operands[2] = src2;
9472 return dst;
9473 }
9474
9475 /* Similarly, but assume that the destination has already been
9476 set up properly. */
9477
9478 void
9479 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
9480 enum machine_mode mode, rtx operands[])
9481 {
9482 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
9483 gcc_assert (dst == operands[0]);
9484 }
9485
9486 /* Attempt to expand a binary operator. Make the expansion closer to the
9487 actual machine, then just general_operand, which will allow 3 separate
9488 memory references (one output, two input) in a single insn. */
9489
9490 void
9491 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
9492 rtx operands[])
9493 {
9494 rtx src1, src2, dst, op, clob;
9495
9496 dst = ix86_fixup_binary_operands (code, mode, operands);
9497 src1 = operands[1];
9498 src2 = operands[2];
9499
9500 /* Emit the instruction. */
9501
9502 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
9503 if (reload_in_progress)
9504 {
9505 /* Reload doesn't know about the flags register, and doesn't know that
9506 it doesn't want to clobber it. We can only do this with PLUS. */
9507 gcc_assert (code == PLUS);
9508 emit_insn (op);
9509 }
9510 else
9511 {
9512 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9513 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
9514 }
9515
9516 /* Fix up the destination if needed. */
9517 if (dst != operands[0])
9518 emit_move_insn (operands[0], dst);
9519 }
9520
9521 /* Return TRUE or FALSE depending on whether the binary operator meets the
9522 appropriate constraints. */
9523
9524 int
9525 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
9526 rtx operands[3])
9527 {
9528 rtx dst = operands[0];
9529 rtx src1 = operands[1];
9530 rtx src2 = operands[2];
9531
9532 /* Both source operands cannot be in memory. */
9533 if (MEM_P (src1) && MEM_P (src2))
9534 return 0;
9535
9536 /* Canonicalize operand order for commutative operators. */
9537 if (ix86_swap_binary_operands_p (code, mode, operands))
9538 {
9539 rtx temp = src1;
9540 src1 = src2;
9541 src2 = temp;
9542 }
9543
9544 /* If the destination is memory, we must have a matching source operand. */
9545 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
9546 return 0;
9547
9548 /* Source 1 cannot be a constant. */
9549 if (CONSTANT_P (src1))
9550 return 0;
9551
9552 /* Source 1 cannot be a non-matching memory. */
9553 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
9554 return 0;
9555
9556 return 1;
9557 }
9558
9559 /* Attempt to expand a unary operator. Make the expansion closer to the
9560 actual machine, then just general_operand, which will allow 2 separate
9561 memory references (one output, one input) in a single insn. */
9562
9563 void
9564 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
9565 rtx operands[])
9566 {
9567 int matching_memory;
9568 rtx src, dst, op, clob;
9569
9570 dst = operands[0];
9571 src = operands[1];
9572
9573 /* If the destination is memory, and we do not have matching source
9574 operands, do things in registers. */
9575 matching_memory = 0;
9576 if (MEM_P (dst))
9577 {
9578 if (rtx_equal_p (dst, src))
9579 matching_memory = 1;
9580 else
9581 dst = gen_reg_rtx (mode);
9582 }
9583
9584 /* When source operand is memory, destination must match. */
9585 if (MEM_P (src) && !matching_memory)
9586 src = force_reg (mode, src);
9587
9588 /* Emit the instruction. */
9589
9590 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
9591 if (reload_in_progress || code == NOT)
9592 {
9593 /* Reload doesn't know about the flags register, and doesn't know that
9594 it doesn't want to clobber it. */
9595 gcc_assert (code == NOT);
9596 emit_insn (op);
9597 }
9598 else
9599 {
9600 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9601 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
9602 }
9603
9604 /* Fix up the destination if needed. */
9605 if (dst != operands[0])
9606 emit_move_insn (operands[0], dst);
9607 }
9608
9609 /* Return TRUE or FALSE depending on whether the unary operator meets the
9610 appropriate constraints. */
9611
9612 int
9613 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
9614 enum machine_mode mode ATTRIBUTE_UNUSED,
9615 rtx operands[2] ATTRIBUTE_UNUSED)
9616 {
9617 /* If one of operands is memory, source and destination must match. */
9618 if ((MEM_P (operands[0])
9619 || MEM_P (operands[1]))
9620 && ! rtx_equal_p (operands[0], operands[1]))
9621 return FALSE;
9622 return TRUE;
9623 }
9624
9625 /* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders.
9626 Create a mask for the sign bit in MODE for an SSE register. If VECT is
9627 true, then replicate the mask for all elements of the vector register.
9628 If INVERT is true, then create a mask excluding the sign bit. */
9629
9630 rtx
9631 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
9632 {
9633 enum machine_mode vec_mode;
9634 HOST_WIDE_INT hi, lo;
9635 int shift = 63;
9636 rtvec v;
9637 rtx mask;
9638
9639 /* Find the sign bit, sign extended to 2*HWI. */
9640 if (mode == SFmode)
9641 lo = 0x80000000, hi = lo < 0;
9642 else if (HOST_BITS_PER_WIDE_INT >= 64)
9643 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
9644 else
9645 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
9646
9647 if (invert)
9648 lo = ~lo, hi = ~hi;
9649
9650 /* Force this value into the low part of a fp vector constant. */
9651 mask = immed_double_const (lo, hi, mode == SFmode ? SImode : DImode);
9652 mask = gen_lowpart (mode, mask);
9653
9654 if (mode == SFmode)
9655 {
9656 if (vect)
9657 v = gen_rtvec (4, mask, mask, mask, mask);
9658 else
9659 v = gen_rtvec (4, mask, CONST0_RTX (SFmode),
9660 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
9661 vec_mode = V4SFmode;
9662 }
9663 else
9664 {
9665 if (vect)
9666 v = gen_rtvec (2, mask, mask);
9667 else
9668 v = gen_rtvec (2, mask, CONST0_RTX (DFmode));
9669 vec_mode = V2DFmode;
9670 }
9671
9672 return force_reg (vec_mode, gen_rtx_CONST_VECTOR (vec_mode, v));
9673 }
9674
9675 /* Generate code for floating point ABS or NEG. */
9676
9677 void
9678 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
9679 rtx operands[])
9680 {
9681 rtx mask, set, use, clob, dst, src;
9682 bool matching_memory;
9683 bool use_sse = false;
9684 bool vector_mode = VECTOR_MODE_P (mode);
9685 enum machine_mode elt_mode = mode;
9686
9687 if (vector_mode)
9688 {
9689 elt_mode = GET_MODE_INNER (mode);
9690 use_sse = true;
9691 }
9692 else if (TARGET_SSE_MATH)
9693 use_sse = SSE_FLOAT_MODE_P (mode);
9694
9695 /* NEG and ABS performed with SSE use bitwise mask operations.
9696 Create the appropriate mask now. */
9697 if (use_sse)
9698 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
9699 else
9700 mask = NULL_RTX;
9701
9702 dst = operands[0];
9703 src = operands[1];
9704
9705 /* If the destination is memory, and we don't have matching source
9706 operands or we're using the x87, do things in registers. */
9707 matching_memory = false;
9708 if (MEM_P (dst))
9709 {
9710 if (use_sse && rtx_equal_p (dst, src))
9711 matching_memory = true;
9712 else
9713 dst = gen_reg_rtx (mode);
9714 }
9715 if (MEM_P (src) && !matching_memory)
9716 src = force_reg (mode, src);
9717
9718 if (vector_mode)
9719 {
9720 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
9721 set = gen_rtx_SET (VOIDmode, dst, set);
9722 emit_insn (set);
9723 }
9724 else
9725 {
9726 set = gen_rtx_fmt_e (code, mode, src);
9727 set = gen_rtx_SET (VOIDmode, dst, set);
9728 if (mask)
9729 {
9730 use = gen_rtx_USE (VOIDmode, mask);
9731 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9732 emit_insn (gen_rtx_PARALLEL (VOIDmode,
9733 gen_rtvec (3, set, use, clob)));
9734 }
9735 else
9736 emit_insn (set);
9737 }
9738
9739 if (dst != operands[0])
9740 emit_move_insn (operands[0], dst);
9741 }
9742
9743 /* Expand a copysign operation. Special case operand 0 being a constant. */
9744
9745 void
9746 ix86_expand_copysign (rtx operands[])
9747 {
9748 enum machine_mode mode, vmode;
9749 rtx dest, op0, op1, mask, nmask;
9750
9751 dest = operands[0];
9752 op0 = operands[1];
9753 op1 = operands[2];
9754
9755 mode = GET_MODE (dest);
9756 vmode = mode == SFmode ? V4SFmode : V2DFmode;
9757
9758 if (GET_CODE (op0) == CONST_DOUBLE)
9759 {
9760 rtvec v;
9761
9762 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
9763 op0 = simplify_unary_operation (ABS, mode, op0, mode);
9764
9765 if (op0 == CONST0_RTX (mode))
9766 op0 = CONST0_RTX (vmode);
9767 else
9768 {
9769 if (mode == SFmode)
9770 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
9771 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
9772 else
9773 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
9774 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
9775 }
9776
9777 mask = ix86_build_signbit_mask (mode, 0, 0);
9778
9779 if (mode == SFmode)
9780 emit_insn (gen_copysignsf3_const (dest, op0, op1, mask));
9781 else
9782 emit_insn (gen_copysigndf3_const (dest, op0, op1, mask));
9783 }
9784 else
9785 {
9786 nmask = ix86_build_signbit_mask (mode, 0, 1);
9787 mask = ix86_build_signbit_mask (mode, 0, 0);
9788
9789 if (mode == SFmode)
9790 emit_insn (gen_copysignsf3_var (dest, NULL, op0, op1, nmask, mask));
9791 else
9792 emit_insn (gen_copysigndf3_var (dest, NULL, op0, op1, nmask, mask));
9793 }
9794 }
9795
9796 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
9797 be a constant, and so has already been expanded into a vector constant. */
9798
9799 void
9800 ix86_split_copysign_const (rtx operands[])
9801 {
9802 enum machine_mode mode, vmode;
9803 rtx dest, op0, op1, mask, x;
9804
9805 dest = operands[0];
9806 op0 = operands[1];
9807 op1 = operands[2];
9808 mask = operands[3];
9809
9810 mode = GET_MODE (dest);
9811 vmode = GET_MODE (mask);
9812
9813 dest = simplify_gen_subreg (vmode, dest, mode, 0);
9814 x = gen_rtx_AND (vmode, dest, mask);
9815 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9816
9817 if (op0 != CONST0_RTX (vmode))
9818 {
9819 x = gen_rtx_IOR (vmode, dest, op0);
9820 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9821 }
9822 }
9823
9824 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
9825 so we have to do two masks. */
9826
9827 void
9828 ix86_split_copysign_var (rtx operands[])
9829 {
9830 enum machine_mode mode, vmode;
9831 rtx dest, scratch, op0, op1, mask, nmask, x;
9832
9833 dest = operands[0];
9834 scratch = operands[1];
9835 op0 = operands[2];
9836 op1 = operands[3];
9837 nmask = operands[4];
9838 mask = operands[5];
9839
9840 mode = GET_MODE (dest);
9841 vmode = GET_MODE (mask);
9842
9843 if (rtx_equal_p (op0, op1))
9844 {
9845 /* Shouldn't happen often (it's useless, obviously), but when it does
9846 we'd generate incorrect code if we continue below. */
9847 emit_move_insn (dest, op0);
9848 return;
9849 }
9850
9851 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
9852 {
9853 gcc_assert (REGNO (op1) == REGNO (scratch));
9854
9855 x = gen_rtx_AND (vmode, scratch, mask);
9856 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
9857
9858 dest = mask;
9859 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
9860 x = gen_rtx_NOT (vmode, dest);
9861 x = gen_rtx_AND (vmode, x, op0);
9862 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9863 }
9864 else
9865 {
9866 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
9867 {
9868 x = gen_rtx_AND (vmode, scratch, mask);
9869 }
9870 else /* alternative 2,4 */
9871 {
9872 gcc_assert (REGNO (mask) == REGNO (scratch));
9873 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
9874 x = gen_rtx_AND (vmode, scratch, op1);
9875 }
9876 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
9877
9878 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
9879 {
9880 dest = simplify_gen_subreg (vmode, op0, mode, 0);
9881 x = gen_rtx_AND (vmode, dest, nmask);
9882 }
9883 else /* alternative 3,4 */
9884 {
9885 gcc_assert (REGNO (nmask) == REGNO (dest));
9886 dest = nmask;
9887 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
9888 x = gen_rtx_AND (vmode, dest, op0);
9889 }
9890 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9891 }
9892
9893 x = gen_rtx_IOR (vmode, dest, scratch);
9894 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9895 }
9896
9897 /* Return TRUE or FALSE depending on whether the first SET in INSN
9898 has source and destination with matching CC modes, and that the
9899 CC mode is at least as constrained as REQ_MODE. */
9900
9901 int
9902 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
9903 {
9904 rtx set;
9905 enum machine_mode set_mode;
9906
9907 set = PATTERN (insn);
9908 if (GET_CODE (set) == PARALLEL)
9909 set = XVECEXP (set, 0, 0);
9910 gcc_assert (GET_CODE (set) == SET);
9911 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
9912
9913 set_mode = GET_MODE (SET_DEST (set));
9914 switch (set_mode)
9915 {
9916 case CCNOmode:
9917 if (req_mode != CCNOmode
9918 && (req_mode != CCmode
9919 || XEXP (SET_SRC (set), 1) != const0_rtx))
9920 return 0;
9921 break;
9922 case CCmode:
9923 if (req_mode == CCGCmode)
9924 return 0;
9925 /* FALLTHRU */
9926 case CCGCmode:
9927 if (req_mode == CCGOCmode || req_mode == CCNOmode)
9928 return 0;
9929 /* FALLTHRU */
9930 case CCGOCmode:
9931 if (req_mode == CCZmode)
9932 return 0;
9933 /* FALLTHRU */
9934 case CCZmode:
9935 break;
9936
9937 default:
9938 gcc_unreachable ();
9939 }
9940
9941 return (GET_MODE (SET_SRC (set)) == set_mode);
9942 }
9943
9944 /* Generate insn patterns to do an integer compare of OPERANDS. */
9945
9946 static rtx
9947 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
9948 {
9949 enum machine_mode cmpmode;
9950 rtx tmp, flags;
9951
9952 cmpmode = SELECT_CC_MODE (code, op0, op1);
9953 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
9954
9955 /* This is very simple, but making the interface the same as in the
9956 FP case makes the rest of the code easier. */
9957 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
9958 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
9959
9960 /* Return the test that should be put into the flags user, i.e.
9961 the bcc, scc, or cmov instruction. */
9962 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
9963 }
9964
9965 /* Figure out whether to use ordered or unordered fp comparisons.
9966 Return the appropriate mode to use. */
9967
9968 enum machine_mode
9969 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
9970 {
9971 /* ??? In order to make all comparisons reversible, we do all comparisons
9972 non-trapping when compiling for IEEE. Once gcc is able to distinguish
9973 all forms trapping and nontrapping comparisons, we can make inequality
9974 comparisons trapping again, since it results in better code when using
9975 FCOM based compares. */
9976 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
9977 }
9978
9979 enum machine_mode
9980 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
9981 {
9982 if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
9983 return ix86_fp_compare_mode (code);
9984 switch (code)
9985 {
9986 /* Only zero flag is needed. */
9987 case EQ: /* ZF=0 */
9988 case NE: /* ZF!=0 */
9989 return CCZmode;
9990 /* Codes needing carry flag. */
9991 case GEU: /* CF=0 */
9992 case GTU: /* CF=0 & ZF=0 */
9993 case LTU: /* CF=1 */
9994 case LEU: /* CF=1 | ZF=1 */
9995 return CCmode;
9996 /* Codes possibly doable only with sign flag when
9997 comparing against zero. */
9998 case GE: /* SF=OF or SF=0 */
9999 case LT: /* SF<>OF or SF=1 */
10000 if (op1 == const0_rtx)
10001 return CCGOCmode;
10002 else
10003 /* For other cases Carry flag is not required. */
10004 return CCGCmode;
10005 /* Codes doable only with sign flag when comparing
10006 against zero, but we miss jump instruction for it
10007 so we need to use relational tests against overflow
10008 that thus needs to be zero. */
10009 case GT: /* ZF=0 & SF=OF */
10010 case LE: /* ZF=1 | SF<>OF */
10011 if (op1 == const0_rtx)
10012 return CCNOmode;
10013 else
10014 return CCGCmode;
10015 /* strcmp pattern do (use flags) and combine may ask us for proper
10016 mode. */
10017 case USE:
10018 return CCmode;
10019 default:
10020 gcc_unreachable ();
10021 }
10022 }
10023
10024 /* Return the fixed registers used for condition codes. */
10025
10026 static bool
10027 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
10028 {
10029 *p1 = FLAGS_REG;
10030 *p2 = FPSR_REG;
10031 return true;
10032 }
10033
10034 /* If two condition code modes are compatible, return a condition code
10035 mode which is compatible with both. Otherwise, return
10036 VOIDmode. */
10037
10038 static enum machine_mode
10039 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
10040 {
10041 if (m1 == m2)
10042 return m1;
10043
10044 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
10045 return VOIDmode;
10046
10047 if ((m1 == CCGCmode && m2 == CCGOCmode)
10048 || (m1 == CCGOCmode && m2 == CCGCmode))
10049 return CCGCmode;
10050
10051 switch (m1)
10052 {
10053 default:
10054 gcc_unreachable ();
10055
10056 case CCmode:
10057 case CCGCmode:
10058 case CCGOCmode:
10059 case CCNOmode:
10060 case CCZmode:
10061 switch (m2)
10062 {
10063 default:
10064 return VOIDmode;
10065
10066 case CCmode:
10067 case CCGCmode:
10068 case CCGOCmode:
10069 case CCNOmode:
10070 case CCZmode:
10071 return CCmode;
10072 }
10073
10074 case CCFPmode:
10075 case CCFPUmode:
10076 /* These are only compatible with themselves, which we already
10077 checked above. */
10078 return VOIDmode;
10079 }
10080 }
10081
10082 /* Return true if we should use an FCOMI instruction for this fp comparison. */
10083
10084 int
10085 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
10086 {
10087 enum rtx_code swapped_code = swap_condition (code);
10088 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
10089 || (ix86_fp_comparison_cost (swapped_code)
10090 == ix86_fp_comparison_fcomi_cost (swapped_code)));
10091 }
10092
10093 /* Swap, force into registers, or otherwise massage the two operands
10094 to a fp comparison. The operands are updated in place; the new
10095 comparison code is returned. */
10096
10097 static enum rtx_code
10098 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
10099 {
10100 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
10101 rtx op0 = *pop0, op1 = *pop1;
10102 enum machine_mode op_mode = GET_MODE (op0);
10103 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
10104
10105 /* All of the unordered compare instructions only work on registers.
10106 The same is true of the fcomi compare instructions. The XFmode
10107 compare instructions require registers except when comparing
10108 against zero or when converting operand 1 from fixed point to
10109 floating point. */
10110
10111 if (!is_sse
10112 && (fpcmp_mode == CCFPUmode
10113 || (op_mode == XFmode
10114 && ! (standard_80387_constant_p (op0) == 1
10115 || standard_80387_constant_p (op1) == 1)
10116 && GET_CODE (op1) != FLOAT)
10117 || ix86_use_fcomi_compare (code)))
10118 {
10119 op0 = force_reg (op_mode, op0);
10120 op1 = force_reg (op_mode, op1);
10121 }
10122 else
10123 {
10124 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
10125 things around if they appear profitable, otherwise force op0
10126 into a register. */
10127
10128 if (standard_80387_constant_p (op0) == 0
10129 || (MEM_P (op0)
10130 && ! (standard_80387_constant_p (op1) == 0
10131 || MEM_P (op1))))
10132 {
10133 rtx tmp;
10134 tmp = op0, op0 = op1, op1 = tmp;
10135 code = swap_condition (code);
10136 }
10137
10138 if (!REG_P (op0))
10139 op0 = force_reg (op_mode, op0);
10140
10141 if (CONSTANT_P (op1))
10142 {
10143 int tmp = standard_80387_constant_p (op1);
10144 if (tmp == 0)
10145 op1 = validize_mem (force_const_mem (op_mode, op1));
10146 else if (tmp == 1)
10147 {
10148 if (TARGET_CMOVE)
10149 op1 = force_reg (op_mode, op1);
10150 }
10151 else
10152 op1 = force_reg (op_mode, op1);
10153 }
10154 }
10155
10156 /* Try to rearrange the comparison to make it cheaper. */
10157 if (ix86_fp_comparison_cost (code)
10158 > ix86_fp_comparison_cost (swap_condition (code))
10159 && (REG_P (op1) || !no_new_pseudos))
10160 {
10161 rtx tmp;
10162 tmp = op0, op0 = op1, op1 = tmp;
10163 code = swap_condition (code);
10164 if (!REG_P (op0))
10165 op0 = force_reg (op_mode, op0);
10166 }
10167
10168 *pop0 = op0;
10169 *pop1 = op1;
10170 return code;
10171 }
10172
10173 /* Convert comparison codes we use to represent FP comparison to integer
10174 code that will result in proper branch. Return UNKNOWN if no such code
10175 is available. */
10176
10177 enum rtx_code
10178 ix86_fp_compare_code_to_integer (enum rtx_code code)
10179 {
10180 switch (code)
10181 {
10182 case GT:
10183 return GTU;
10184 case GE:
10185 return GEU;
10186 case ORDERED:
10187 case UNORDERED:
10188 return code;
10189 break;
10190 case UNEQ:
10191 return EQ;
10192 break;
10193 case UNLT:
10194 return LTU;
10195 break;
10196 case UNLE:
10197 return LEU;
10198 break;
10199 case LTGT:
10200 return NE;
10201 break;
10202 default:
10203 return UNKNOWN;
10204 }
10205 }
10206
10207 /* Split comparison code CODE into comparisons we can do using branch
10208 instructions. BYPASS_CODE is comparison code for branch that will
10209 branch around FIRST_CODE and SECOND_CODE. If some of branches
10210 is not required, set value to UNKNOWN.
10211 We never require more than two branches. */
10212
10213 void
10214 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
10215 enum rtx_code *first_code,
10216 enum rtx_code *second_code)
10217 {
10218 *first_code = code;
10219 *bypass_code = UNKNOWN;
10220 *second_code = UNKNOWN;
10221
10222 /* The fcomi comparison sets flags as follows:
10223
10224 cmp ZF PF CF
10225 > 0 0 0
10226 < 0 0 1
10227 = 1 0 0
10228 un 1 1 1 */
10229
10230 switch (code)
10231 {
10232 case GT: /* GTU - CF=0 & ZF=0 */
10233 case GE: /* GEU - CF=0 */
10234 case ORDERED: /* PF=0 */
10235 case UNORDERED: /* PF=1 */
10236 case UNEQ: /* EQ - ZF=1 */
10237 case UNLT: /* LTU - CF=1 */
10238 case UNLE: /* LEU - CF=1 | ZF=1 */
10239 case LTGT: /* EQ - ZF=0 */
10240 break;
10241 case LT: /* LTU - CF=1 - fails on unordered */
10242 *first_code = UNLT;
10243 *bypass_code = UNORDERED;
10244 break;
10245 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
10246 *first_code = UNLE;
10247 *bypass_code = UNORDERED;
10248 break;
10249 case EQ: /* EQ - ZF=1 - fails on unordered */
10250 *first_code = UNEQ;
10251 *bypass_code = UNORDERED;
10252 break;
10253 case NE: /* NE - ZF=0 - fails on unordered */
10254 *first_code = LTGT;
10255 *second_code = UNORDERED;
10256 break;
10257 case UNGE: /* GEU - CF=0 - fails on unordered */
10258 *first_code = GE;
10259 *second_code = UNORDERED;
10260 break;
10261 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
10262 *first_code = GT;
10263 *second_code = UNORDERED;
10264 break;
10265 default:
10266 gcc_unreachable ();
10267 }
10268 if (!TARGET_IEEE_FP)
10269 {
10270 *second_code = UNKNOWN;
10271 *bypass_code = UNKNOWN;
10272 }
10273 }
10274
10275 /* Return cost of comparison done fcom + arithmetics operations on AX.
10276 All following functions do use number of instructions as a cost metrics.
10277 In future this should be tweaked to compute bytes for optimize_size and
10278 take into account performance of various instructions on various CPUs. */
10279 static int
10280 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
10281 {
10282 if (!TARGET_IEEE_FP)
10283 return 4;
10284 /* The cost of code output by ix86_expand_fp_compare. */
10285 switch (code)
10286 {
10287 case UNLE:
10288 case UNLT:
10289 case LTGT:
10290 case GT:
10291 case GE:
10292 case UNORDERED:
10293 case ORDERED:
10294 case UNEQ:
10295 return 4;
10296 break;
10297 case LT:
10298 case NE:
10299 case EQ:
10300 case UNGE:
10301 return 5;
10302 break;
10303 case LE:
10304 case UNGT:
10305 return 6;
10306 break;
10307 default:
10308 gcc_unreachable ();
10309 }
10310 }
10311
10312 /* Return cost of comparison done using fcomi operation.
10313 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10314 static int
10315 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
10316 {
10317 enum rtx_code bypass_code, first_code, second_code;
10318 /* Return arbitrarily high cost when instruction is not supported - this
10319 prevents gcc from using it. */
10320 if (!TARGET_CMOVE)
10321 return 1024;
10322 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10323 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
10324 }
10325
10326 /* Return cost of comparison done using sahf operation.
10327 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10328 static int
10329 ix86_fp_comparison_sahf_cost (enum rtx_code code)
10330 {
10331 enum rtx_code bypass_code, first_code, second_code;
10332 /* Return arbitrarily high cost when instruction is not preferred - this
10333 avoids gcc from using it. */
10334 if (!TARGET_USE_SAHF && !optimize_size)
10335 return 1024;
10336 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10337 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
10338 }
10339
10340 /* Compute cost of the comparison done using any method.
10341 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10342 static int
10343 ix86_fp_comparison_cost (enum rtx_code code)
10344 {
10345 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
10346 int min;
10347
10348 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
10349 sahf_cost = ix86_fp_comparison_sahf_cost (code);
10350
10351 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
10352 if (min > sahf_cost)
10353 min = sahf_cost;
10354 if (min > fcomi_cost)
10355 min = fcomi_cost;
10356 return min;
10357 }
10358
10359 /* Generate insn patterns to do a floating point compare of OPERANDS. */
10360
10361 static rtx
10362 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
10363 rtx *second_test, rtx *bypass_test)
10364 {
10365 enum machine_mode fpcmp_mode, intcmp_mode;
10366 rtx tmp, tmp2;
10367 int cost = ix86_fp_comparison_cost (code);
10368 enum rtx_code bypass_code, first_code, second_code;
10369
10370 fpcmp_mode = ix86_fp_compare_mode (code);
10371 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
10372
10373 if (second_test)
10374 *second_test = NULL_RTX;
10375 if (bypass_test)
10376 *bypass_test = NULL_RTX;
10377
10378 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10379
10380 /* Do fcomi/sahf based test when profitable. */
10381 if ((bypass_code == UNKNOWN || bypass_test)
10382 && (second_code == UNKNOWN || second_test)
10383 && ix86_fp_comparison_arithmetics_cost (code) > cost)
10384 {
10385 if (TARGET_CMOVE)
10386 {
10387 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10388 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
10389 tmp);
10390 emit_insn (tmp);
10391 }
10392 else
10393 {
10394 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10395 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
10396 if (!scratch)
10397 scratch = gen_reg_rtx (HImode);
10398 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
10399 emit_insn (gen_x86_sahf_1 (scratch));
10400 }
10401
10402 /* The FP codes work out to act like unsigned. */
10403 intcmp_mode = fpcmp_mode;
10404 code = first_code;
10405 if (bypass_code != UNKNOWN)
10406 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
10407 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10408 const0_rtx);
10409 if (second_code != UNKNOWN)
10410 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
10411 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10412 const0_rtx);
10413 }
10414 else
10415 {
10416 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
10417 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10418 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
10419 if (!scratch)
10420 scratch = gen_reg_rtx (HImode);
10421 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
10422
10423 /* In the unordered case, we have to check C2 for NaN's, which
10424 doesn't happen to work out to anything nice combination-wise.
10425 So do some bit twiddling on the value we've got in AH to come
10426 up with an appropriate set of condition codes. */
10427
10428 intcmp_mode = CCNOmode;
10429 switch (code)
10430 {
10431 case GT:
10432 case UNGT:
10433 if (code == GT || !TARGET_IEEE_FP)
10434 {
10435 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
10436 code = EQ;
10437 }
10438 else
10439 {
10440 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10441 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
10442 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
10443 intcmp_mode = CCmode;
10444 code = GEU;
10445 }
10446 break;
10447 case LT:
10448 case UNLT:
10449 if (code == LT && TARGET_IEEE_FP)
10450 {
10451 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10452 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
10453 intcmp_mode = CCmode;
10454 code = EQ;
10455 }
10456 else
10457 {
10458 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
10459 code = NE;
10460 }
10461 break;
10462 case GE:
10463 case UNGE:
10464 if (code == GE || !TARGET_IEEE_FP)
10465 {
10466 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
10467 code = EQ;
10468 }
10469 else
10470 {
10471 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10472 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
10473 GEN_INT (0x01)));
10474 code = NE;
10475 }
10476 break;
10477 case LE:
10478 case UNLE:
10479 if (code == LE && TARGET_IEEE_FP)
10480 {
10481 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10482 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
10483 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
10484 intcmp_mode = CCmode;
10485 code = LTU;
10486 }
10487 else
10488 {
10489 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
10490 code = NE;
10491 }
10492 break;
10493 case EQ:
10494 case UNEQ:
10495 if (code == EQ && TARGET_IEEE_FP)
10496 {
10497 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10498 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
10499 intcmp_mode = CCmode;
10500 code = EQ;
10501 }
10502 else
10503 {
10504 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
10505 code = NE;
10506 break;
10507 }
10508 break;
10509 case NE:
10510 case LTGT:
10511 if (code == NE && TARGET_IEEE_FP)
10512 {
10513 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10514 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
10515 GEN_INT (0x40)));
10516 code = NE;
10517 }
10518 else
10519 {
10520 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
10521 code = EQ;
10522 }
10523 break;
10524
10525 case UNORDERED:
10526 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
10527 code = NE;
10528 break;
10529 case ORDERED:
10530 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
10531 code = EQ;
10532 break;
10533
10534 default:
10535 gcc_unreachable ();
10536 }
10537 }
10538
10539 /* Return the test that should be put into the flags user, i.e.
10540 the bcc, scc, or cmov instruction. */
10541 return gen_rtx_fmt_ee (code, VOIDmode,
10542 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10543 const0_rtx);
10544 }
10545
10546 rtx
10547 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
10548 {
10549 rtx op0, op1, ret;
10550 op0 = ix86_compare_op0;
10551 op1 = ix86_compare_op1;
10552
10553 if (second_test)
10554 *second_test = NULL_RTX;
10555 if (bypass_test)
10556 *bypass_test = NULL_RTX;
10557
10558 if (ix86_compare_emitted)
10559 {
10560 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
10561 ix86_compare_emitted = NULL_RTX;
10562 }
10563 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
10564 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
10565 second_test, bypass_test);
10566 else
10567 ret = ix86_expand_int_compare (code, op0, op1);
10568
10569 return ret;
10570 }
10571
10572 /* Return true if the CODE will result in nontrivial jump sequence. */
10573 bool
10574 ix86_fp_jump_nontrivial_p (enum rtx_code code)
10575 {
10576 enum rtx_code bypass_code, first_code, second_code;
10577 if (!TARGET_CMOVE)
10578 return true;
10579 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10580 return bypass_code != UNKNOWN || second_code != UNKNOWN;
10581 }
10582
10583 void
10584 ix86_expand_branch (enum rtx_code code, rtx label)
10585 {
10586 rtx tmp;
10587
10588 /* If we have emitted a compare insn, go straight to simple.
10589 ix86_expand_compare won't emit anything if ix86_compare_emitted
10590 is non NULL. */
10591 if (ix86_compare_emitted)
10592 goto simple;
10593
10594 switch (GET_MODE (ix86_compare_op0))
10595 {
10596 case QImode:
10597 case HImode:
10598 case SImode:
10599 simple:
10600 tmp = ix86_expand_compare (code, NULL, NULL);
10601 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10602 gen_rtx_LABEL_REF (VOIDmode, label),
10603 pc_rtx);
10604 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
10605 return;
10606
10607 case SFmode:
10608 case DFmode:
10609 case XFmode:
10610 {
10611 rtvec vec;
10612 int use_fcomi;
10613 enum rtx_code bypass_code, first_code, second_code;
10614
10615 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
10616 &ix86_compare_op1);
10617
10618 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10619
10620 /* Check whether we will use the natural sequence with one jump. If
10621 so, we can expand jump early. Otherwise delay expansion by
10622 creating compound insn to not confuse optimizers. */
10623 if (bypass_code == UNKNOWN && second_code == UNKNOWN
10624 && TARGET_CMOVE)
10625 {
10626 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
10627 gen_rtx_LABEL_REF (VOIDmode, label),
10628 pc_rtx, NULL_RTX, NULL_RTX);
10629 }
10630 else
10631 {
10632 tmp = gen_rtx_fmt_ee (code, VOIDmode,
10633 ix86_compare_op0, ix86_compare_op1);
10634 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10635 gen_rtx_LABEL_REF (VOIDmode, label),
10636 pc_rtx);
10637 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
10638
10639 use_fcomi = ix86_use_fcomi_compare (code);
10640 vec = rtvec_alloc (3 + !use_fcomi);
10641 RTVEC_ELT (vec, 0) = tmp;
10642 RTVEC_ELT (vec, 1)
10643 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
10644 RTVEC_ELT (vec, 2)
10645 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
10646 if (! use_fcomi)
10647 RTVEC_ELT (vec, 3)
10648 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
10649
10650 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
10651 }
10652 return;
10653 }
10654
10655 case DImode:
10656 if (TARGET_64BIT)
10657 goto simple;
10658 case TImode:
10659 /* Expand DImode branch into multiple compare+branch. */
10660 {
10661 rtx lo[2], hi[2], label2;
10662 enum rtx_code code1, code2, code3;
10663 enum machine_mode submode;
10664
10665 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
10666 {
10667 tmp = ix86_compare_op0;
10668 ix86_compare_op0 = ix86_compare_op1;
10669 ix86_compare_op1 = tmp;
10670 code = swap_condition (code);
10671 }
10672 if (GET_MODE (ix86_compare_op0) == DImode)
10673 {
10674 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
10675 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
10676 submode = SImode;
10677 }
10678 else
10679 {
10680 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
10681 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
10682 submode = DImode;
10683 }
10684
10685 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
10686 avoid two branches. This costs one extra insn, so disable when
10687 optimizing for size. */
10688
10689 if ((code == EQ || code == NE)
10690 && (!optimize_size
10691 || hi[1] == const0_rtx || lo[1] == const0_rtx))
10692 {
10693 rtx xor0, xor1;
10694
10695 xor1 = hi[0];
10696 if (hi[1] != const0_rtx)
10697 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
10698 NULL_RTX, 0, OPTAB_WIDEN);
10699
10700 xor0 = lo[0];
10701 if (lo[1] != const0_rtx)
10702 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
10703 NULL_RTX, 0, OPTAB_WIDEN);
10704
10705 tmp = expand_binop (submode, ior_optab, xor1, xor0,
10706 NULL_RTX, 0, OPTAB_WIDEN);
10707
10708 ix86_compare_op0 = tmp;
10709 ix86_compare_op1 = const0_rtx;
10710 ix86_expand_branch (code, label);
10711 return;
10712 }
10713
10714 /* Otherwise, if we are doing less-than or greater-or-equal-than,
10715 op1 is a constant and the low word is zero, then we can just
10716 examine the high word. */
10717
10718 if (CONST_INT_P (hi[1]) && lo[1] == const0_rtx)
10719 switch (code)
10720 {
10721 case LT: case LTU: case GE: case GEU:
10722 ix86_compare_op0 = hi[0];
10723 ix86_compare_op1 = hi[1];
10724 ix86_expand_branch (code, label);
10725 return;
10726 default:
10727 break;
10728 }
10729
10730 /* Otherwise, we need two or three jumps. */
10731
10732 label2 = gen_label_rtx ();
10733
10734 code1 = code;
10735 code2 = swap_condition (code);
10736 code3 = unsigned_condition (code);
10737
10738 switch (code)
10739 {
10740 case LT: case GT: case LTU: case GTU:
10741 break;
10742
10743 case LE: code1 = LT; code2 = GT; break;
10744 case GE: code1 = GT; code2 = LT; break;
10745 case LEU: code1 = LTU; code2 = GTU; break;
10746 case GEU: code1 = GTU; code2 = LTU; break;
10747
10748 case EQ: code1 = UNKNOWN; code2 = NE; break;
10749 case NE: code2 = UNKNOWN; break;
10750
10751 default:
10752 gcc_unreachable ();
10753 }
10754
10755 /*
10756 * a < b =>
10757 * if (hi(a) < hi(b)) goto true;
10758 * if (hi(a) > hi(b)) goto false;
10759 * if (lo(a) < lo(b)) goto true;
10760 * false:
10761 */
10762
10763 ix86_compare_op0 = hi[0];
10764 ix86_compare_op1 = hi[1];
10765
10766 if (code1 != UNKNOWN)
10767 ix86_expand_branch (code1, label);
10768 if (code2 != UNKNOWN)
10769 ix86_expand_branch (code2, label2);
10770
10771 ix86_compare_op0 = lo[0];
10772 ix86_compare_op1 = lo[1];
10773 ix86_expand_branch (code3, label);
10774
10775 if (code2 != UNKNOWN)
10776 emit_label (label2);
10777 return;
10778 }
10779
10780 default:
10781 gcc_unreachable ();
10782 }
10783 }
10784
10785 /* Split branch based on floating point condition. */
10786 void
10787 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
10788 rtx target1, rtx target2, rtx tmp, rtx pushed)
10789 {
10790 rtx second, bypass;
10791 rtx label = NULL_RTX;
10792 rtx condition;
10793 int bypass_probability = -1, second_probability = -1, probability = -1;
10794 rtx i;
10795
10796 if (target2 != pc_rtx)
10797 {
10798 rtx tmp = target2;
10799 code = reverse_condition_maybe_unordered (code);
10800 target2 = target1;
10801 target1 = tmp;
10802 }
10803
10804 condition = ix86_expand_fp_compare (code, op1, op2,
10805 tmp, &second, &bypass);
10806
10807 /* Remove pushed operand from stack. */
10808 if (pushed)
10809 ix86_free_from_memory (GET_MODE (pushed));
10810
10811 if (split_branch_probability >= 0)
10812 {
10813 /* Distribute the probabilities across the jumps.
10814 Assume the BYPASS and SECOND to be always test
10815 for UNORDERED. */
10816 probability = split_branch_probability;
10817
10818 /* Value of 1 is low enough to make no need for probability
10819 to be updated. Later we may run some experiments and see
10820 if unordered values are more frequent in practice. */
10821 if (bypass)
10822 bypass_probability = 1;
10823 if (second)
10824 second_probability = 1;
10825 }
10826 if (bypass != NULL_RTX)
10827 {
10828 label = gen_label_rtx ();
10829 i = emit_jump_insn (gen_rtx_SET
10830 (VOIDmode, pc_rtx,
10831 gen_rtx_IF_THEN_ELSE (VOIDmode,
10832 bypass,
10833 gen_rtx_LABEL_REF (VOIDmode,
10834 label),
10835 pc_rtx)));
10836 if (bypass_probability >= 0)
10837 REG_NOTES (i)
10838 = gen_rtx_EXPR_LIST (REG_BR_PROB,
10839 GEN_INT (bypass_probability),
10840 REG_NOTES (i));
10841 }
10842 i = emit_jump_insn (gen_rtx_SET
10843 (VOIDmode, pc_rtx,
10844 gen_rtx_IF_THEN_ELSE (VOIDmode,
10845 condition, target1, target2)));
10846 if (probability >= 0)
10847 REG_NOTES (i)
10848 = gen_rtx_EXPR_LIST (REG_BR_PROB,
10849 GEN_INT (probability),
10850 REG_NOTES (i));
10851 if (second != NULL_RTX)
10852 {
10853 i = emit_jump_insn (gen_rtx_SET
10854 (VOIDmode, pc_rtx,
10855 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
10856 target2)));
10857 if (second_probability >= 0)
10858 REG_NOTES (i)
10859 = gen_rtx_EXPR_LIST (REG_BR_PROB,
10860 GEN_INT (second_probability),
10861 REG_NOTES (i));
10862 }
10863 if (label != NULL_RTX)
10864 emit_label (label);
10865 }
10866
10867 int
10868 ix86_expand_setcc (enum rtx_code code, rtx dest)
10869 {
10870 rtx ret, tmp, tmpreg, equiv;
10871 rtx second_test, bypass_test;
10872
10873 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
10874 return 0; /* FAIL */
10875
10876 gcc_assert (GET_MODE (dest) == QImode);
10877
10878 ret = ix86_expand_compare (code, &second_test, &bypass_test);
10879 PUT_MODE (ret, QImode);
10880
10881 tmp = dest;
10882 tmpreg = dest;
10883
10884 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
10885 if (bypass_test || second_test)
10886 {
10887 rtx test = second_test;
10888 int bypass = 0;
10889 rtx tmp2 = gen_reg_rtx (QImode);
10890 if (bypass_test)
10891 {
10892 gcc_assert (!second_test);
10893 test = bypass_test;
10894 bypass = 1;
10895 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
10896 }
10897 PUT_MODE (test, QImode);
10898 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
10899
10900 if (bypass)
10901 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
10902 else
10903 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
10904 }
10905
10906 /* Attach a REG_EQUAL note describing the comparison result. */
10907 if (ix86_compare_op0 && ix86_compare_op1)
10908 {
10909 equiv = simplify_gen_relational (code, QImode,
10910 GET_MODE (ix86_compare_op0),
10911 ix86_compare_op0, ix86_compare_op1);
10912 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
10913 }
10914
10915 return 1; /* DONE */
10916 }
10917
10918 /* Expand comparison setting or clearing carry flag. Return true when
10919 successful and set pop for the operation. */
10920 static bool
10921 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
10922 {
10923 enum machine_mode mode =
10924 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
10925
10926 /* Do not handle DImode compares that go through special path. Also we can't
10927 deal with FP compares yet. This is possible to add. */
10928 if (mode == (TARGET_64BIT ? TImode : DImode))
10929 return false;
10930 if (FLOAT_MODE_P (mode))
10931 {
10932 rtx second_test = NULL, bypass_test = NULL;
10933 rtx compare_op, compare_seq;
10934
10935 /* Shortcut: following common codes never translate into carry flag compares. */
10936 if (code == EQ || code == NE || code == UNEQ || code == LTGT
10937 || code == ORDERED || code == UNORDERED)
10938 return false;
10939
10940 /* These comparisons require zero flag; swap operands so they won't. */
10941 if ((code == GT || code == UNLE || code == LE || code == UNGT)
10942 && !TARGET_IEEE_FP)
10943 {
10944 rtx tmp = op0;
10945 op0 = op1;
10946 op1 = tmp;
10947 code = swap_condition (code);
10948 }
10949
10950 /* Try to expand the comparison and verify that we end up with carry flag
10951 based comparison. This is fails to be true only when we decide to expand
10952 comparison using arithmetic that is not too common scenario. */
10953 start_sequence ();
10954 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
10955 &second_test, &bypass_test);
10956 compare_seq = get_insns ();
10957 end_sequence ();
10958
10959 if (second_test || bypass_test)
10960 return false;
10961 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10962 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10963 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
10964 else
10965 code = GET_CODE (compare_op);
10966 if (code != LTU && code != GEU)
10967 return false;
10968 emit_insn (compare_seq);
10969 *pop = compare_op;
10970 return true;
10971 }
10972 if (!INTEGRAL_MODE_P (mode))
10973 return false;
10974 switch (code)
10975 {
10976 case LTU:
10977 case GEU:
10978 break;
10979
10980 /* Convert a==0 into (unsigned)a<1. */
10981 case EQ:
10982 case NE:
10983 if (op1 != const0_rtx)
10984 return false;
10985 op1 = const1_rtx;
10986 code = (code == EQ ? LTU : GEU);
10987 break;
10988
10989 /* Convert a>b into b<a or a>=b-1. */
10990 case GTU:
10991 case LEU:
10992 if (CONST_INT_P (op1))
10993 {
10994 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
10995 /* Bail out on overflow. We still can swap operands but that
10996 would force loading of the constant into register. */
10997 if (op1 == const0_rtx
10998 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
10999 return false;
11000 code = (code == GTU ? GEU : LTU);
11001 }
11002 else
11003 {
11004 rtx tmp = op1;
11005 op1 = op0;
11006 op0 = tmp;
11007 code = (code == GTU ? LTU : GEU);
11008 }
11009 break;
11010
11011 /* Convert a>=0 into (unsigned)a<0x80000000. */
11012 case LT:
11013 case GE:
11014 if (mode == DImode || op1 != const0_rtx)
11015 return false;
11016 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
11017 code = (code == LT ? GEU : LTU);
11018 break;
11019 case LE:
11020 case GT:
11021 if (mode == DImode || op1 != constm1_rtx)
11022 return false;
11023 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
11024 code = (code == LE ? GEU : LTU);
11025 break;
11026
11027 default:
11028 return false;
11029 }
11030 /* Swapping operands may cause constant to appear as first operand. */
11031 if (!nonimmediate_operand (op0, VOIDmode))
11032 {
11033 if (no_new_pseudos)
11034 return false;
11035 op0 = force_reg (mode, op0);
11036 }
11037 ix86_compare_op0 = op0;
11038 ix86_compare_op1 = op1;
11039 *pop = ix86_expand_compare (code, NULL, NULL);
11040 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
11041 return true;
11042 }
11043
11044 int
11045 ix86_expand_int_movcc (rtx operands[])
11046 {
11047 enum rtx_code code = GET_CODE (operands[1]), compare_code;
11048 rtx compare_seq, compare_op;
11049 rtx second_test, bypass_test;
11050 enum machine_mode mode = GET_MODE (operands[0]);
11051 bool sign_bit_compare_p = false;;
11052
11053 start_sequence ();
11054 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11055 compare_seq = get_insns ();
11056 end_sequence ();
11057
11058 compare_code = GET_CODE (compare_op);
11059
11060 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
11061 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
11062 sign_bit_compare_p = true;
11063
11064 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
11065 HImode insns, we'd be swallowed in word prefix ops. */
11066
11067 if ((mode != HImode || TARGET_FAST_PREFIX)
11068 && (mode != (TARGET_64BIT ? TImode : DImode))
11069 && CONST_INT_P (operands[2])
11070 && CONST_INT_P (operands[3]))
11071 {
11072 rtx out = operands[0];
11073 HOST_WIDE_INT ct = INTVAL (operands[2]);
11074 HOST_WIDE_INT cf = INTVAL (operands[3]);
11075 HOST_WIDE_INT diff;
11076
11077 diff = ct - cf;
11078 /* Sign bit compares are better done using shifts than we do by using
11079 sbb. */
11080 if (sign_bit_compare_p
11081 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
11082 ix86_compare_op1, &compare_op))
11083 {
11084 /* Detect overlap between destination and compare sources. */
11085 rtx tmp = out;
11086
11087 if (!sign_bit_compare_p)
11088 {
11089 bool fpcmp = false;
11090
11091 compare_code = GET_CODE (compare_op);
11092
11093 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11094 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11095 {
11096 fpcmp = true;
11097 compare_code = ix86_fp_compare_code_to_integer (compare_code);
11098 }
11099
11100 /* To simplify rest of code, restrict to the GEU case. */
11101 if (compare_code == LTU)
11102 {
11103 HOST_WIDE_INT tmp = ct;
11104 ct = cf;
11105 cf = tmp;
11106 compare_code = reverse_condition (compare_code);
11107 code = reverse_condition (code);
11108 }
11109 else
11110 {
11111 if (fpcmp)
11112 PUT_CODE (compare_op,
11113 reverse_condition_maybe_unordered
11114 (GET_CODE (compare_op)));
11115 else
11116 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
11117 }
11118 diff = ct - cf;
11119
11120 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
11121 || reg_overlap_mentioned_p (out, ix86_compare_op1))
11122 tmp = gen_reg_rtx (mode);
11123
11124 if (mode == DImode)
11125 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
11126 else
11127 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
11128 }
11129 else
11130 {
11131 if (code == GT || code == GE)
11132 code = reverse_condition (code);
11133 else
11134 {
11135 HOST_WIDE_INT tmp = ct;
11136 ct = cf;
11137 cf = tmp;
11138 diff = ct - cf;
11139 }
11140 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
11141 ix86_compare_op1, VOIDmode, 0, -1);
11142 }
11143
11144 if (diff == 1)
11145 {
11146 /*
11147 * cmpl op0,op1
11148 * sbbl dest,dest
11149 * [addl dest, ct]
11150 *
11151 * Size 5 - 8.
11152 */
11153 if (ct)
11154 tmp = expand_simple_binop (mode, PLUS,
11155 tmp, GEN_INT (ct),
11156 copy_rtx (tmp), 1, OPTAB_DIRECT);
11157 }
11158 else if (cf == -1)
11159 {
11160 /*
11161 * cmpl op0,op1
11162 * sbbl dest,dest
11163 * orl $ct, dest
11164 *
11165 * Size 8.
11166 */
11167 tmp = expand_simple_binop (mode, IOR,
11168 tmp, GEN_INT (ct),
11169 copy_rtx (tmp), 1, OPTAB_DIRECT);
11170 }
11171 else if (diff == -1 && ct)
11172 {
11173 /*
11174 * cmpl op0,op1
11175 * sbbl dest,dest
11176 * notl dest
11177 * [addl dest, cf]
11178 *
11179 * Size 8 - 11.
11180 */
11181 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
11182 if (cf)
11183 tmp = expand_simple_binop (mode, PLUS,
11184 copy_rtx (tmp), GEN_INT (cf),
11185 copy_rtx (tmp), 1, OPTAB_DIRECT);
11186 }
11187 else
11188 {
11189 /*
11190 * cmpl op0,op1
11191 * sbbl dest,dest
11192 * [notl dest]
11193 * andl cf - ct, dest
11194 * [addl dest, ct]
11195 *
11196 * Size 8 - 11.
11197 */
11198
11199 if (cf == 0)
11200 {
11201 cf = ct;
11202 ct = 0;
11203 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
11204 }
11205
11206 tmp = expand_simple_binop (mode, AND,
11207 copy_rtx (tmp),
11208 gen_int_mode (cf - ct, mode),
11209 copy_rtx (tmp), 1, OPTAB_DIRECT);
11210 if (ct)
11211 tmp = expand_simple_binop (mode, PLUS,
11212 copy_rtx (tmp), GEN_INT (ct),
11213 copy_rtx (tmp), 1, OPTAB_DIRECT);
11214 }
11215
11216 if (!rtx_equal_p (tmp, out))
11217 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
11218
11219 return 1; /* DONE */
11220 }
11221
11222 if (diff < 0)
11223 {
11224 HOST_WIDE_INT tmp;
11225 tmp = ct, ct = cf, cf = tmp;
11226 diff = -diff;
11227 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
11228 {
11229 /* We may be reversing unordered compare to normal compare, that
11230 is not valid in general (we may convert non-trapping condition
11231 to trapping one), however on i386 we currently emit all
11232 comparisons unordered. */
11233 compare_code = reverse_condition_maybe_unordered (compare_code);
11234 code = reverse_condition_maybe_unordered (code);
11235 }
11236 else
11237 {
11238 compare_code = reverse_condition (compare_code);
11239 code = reverse_condition (code);
11240 }
11241 }
11242
11243 compare_code = UNKNOWN;
11244 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
11245 && CONST_INT_P (ix86_compare_op1))
11246 {
11247 if (ix86_compare_op1 == const0_rtx
11248 && (code == LT || code == GE))
11249 compare_code = code;
11250 else if (ix86_compare_op1 == constm1_rtx)
11251 {
11252 if (code == LE)
11253 compare_code = LT;
11254 else if (code == GT)
11255 compare_code = GE;
11256 }
11257 }
11258
11259 /* Optimize dest = (op0 < 0) ? -1 : cf. */
11260 if (compare_code != UNKNOWN
11261 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
11262 && (cf == -1 || ct == -1))
11263 {
11264 /* If lea code below could be used, only optimize
11265 if it results in a 2 insn sequence. */
11266
11267 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
11268 || diff == 3 || diff == 5 || diff == 9)
11269 || (compare_code == LT && ct == -1)
11270 || (compare_code == GE && cf == -1))
11271 {
11272 /*
11273 * notl op1 (if necessary)
11274 * sarl $31, op1
11275 * orl cf, op1
11276 */
11277 if (ct != -1)
11278 {
11279 cf = ct;
11280 ct = -1;
11281 code = reverse_condition (code);
11282 }
11283
11284 out = emit_store_flag (out, code, ix86_compare_op0,
11285 ix86_compare_op1, VOIDmode, 0, -1);
11286
11287 out = expand_simple_binop (mode, IOR,
11288 out, GEN_INT (cf),
11289 out, 1, OPTAB_DIRECT);
11290 if (out != operands[0])
11291 emit_move_insn (operands[0], out);
11292
11293 return 1; /* DONE */
11294 }
11295 }
11296
11297
11298 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
11299 || diff == 3 || diff == 5 || diff == 9)
11300 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
11301 && (mode != DImode
11302 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
11303 {
11304 /*
11305 * xorl dest,dest
11306 * cmpl op1,op2
11307 * setcc dest
11308 * lea cf(dest*(ct-cf)),dest
11309 *
11310 * Size 14.
11311 *
11312 * This also catches the degenerate setcc-only case.
11313 */
11314
11315 rtx tmp;
11316 int nops;
11317
11318 out = emit_store_flag (out, code, ix86_compare_op0,
11319 ix86_compare_op1, VOIDmode, 0, 1);
11320
11321 nops = 0;
11322 /* On x86_64 the lea instruction operates on Pmode, so we need
11323 to get arithmetics done in proper mode to match. */
11324 if (diff == 1)
11325 tmp = copy_rtx (out);
11326 else
11327 {
11328 rtx out1;
11329 out1 = copy_rtx (out);
11330 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
11331 nops++;
11332 if (diff & 1)
11333 {
11334 tmp = gen_rtx_PLUS (mode, tmp, out1);
11335 nops++;
11336 }
11337 }
11338 if (cf != 0)
11339 {
11340 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
11341 nops++;
11342 }
11343 if (!rtx_equal_p (tmp, out))
11344 {
11345 if (nops == 1)
11346 out = force_operand (tmp, copy_rtx (out));
11347 else
11348 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
11349 }
11350 if (!rtx_equal_p (out, operands[0]))
11351 emit_move_insn (operands[0], copy_rtx (out));
11352
11353 return 1; /* DONE */
11354 }
11355
11356 /*
11357 * General case: Jumpful:
11358 * xorl dest,dest cmpl op1, op2
11359 * cmpl op1, op2 movl ct, dest
11360 * setcc dest jcc 1f
11361 * decl dest movl cf, dest
11362 * andl (cf-ct),dest 1:
11363 * addl ct,dest
11364 *
11365 * Size 20. Size 14.
11366 *
11367 * This is reasonably steep, but branch mispredict costs are
11368 * high on modern cpus, so consider failing only if optimizing
11369 * for space.
11370 */
11371
11372 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
11373 && BRANCH_COST >= 2)
11374 {
11375 if (cf == 0)
11376 {
11377 cf = ct;
11378 ct = 0;
11379 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
11380 /* We may be reversing unordered compare to normal compare,
11381 that is not valid in general (we may convert non-trapping
11382 condition to trapping one), however on i386 we currently
11383 emit all comparisons unordered. */
11384 code = reverse_condition_maybe_unordered (code);
11385 else
11386 {
11387 code = reverse_condition (code);
11388 if (compare_code != UNKNOWN)
11389 compare_code = reverse_condition (compare_code);
11390 }
11391 }
11392
11393 if (compare_code != UNKNOWN)
11394 {
11395 /* notl op1 (if needed)
11396 sarl $31, op1
11397 andl (cf-ct), op1
11398 addl ct, op1
11399
11400 For x < 0 (resp. x <= -1) there will be no notl,
11401 so if possible swap the constants to get rid of the
11402 complement.
11403 True/false will be -1/0 while code below (store flag
11404 followed by decrement) is 0/-1, so the constants need
11405 to be exchanged once more. */
11406
11407 if (compare_code == GE || !cf)
11408 {
11409 code = reverse_condition (code);
11410 compare_code = LT;
11411 }
11412 else
11413 {
11414 HOST_WIDE_INT tmp = cf;
11415 cf = ct;
11416 ct = tmp;
11417 }
11418
11419 out = emit_store_flag (out, code, ix86_compare_op0,
11420 ix86_compare_op1, VOIDmode, 0, -1);
11421 }
11422 else
11423 {
11424 out = emit_store_flag (out, code, ix86_compare_op0,
11425 ix86_compare_op1, VOIDmode, 0, 1);
11426
11427 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
11428 copy_rtx (out), 1, OPTAB_DIRECT);
11429 }
11430
11431 out = expand_simple_binop (mode, AND, copy_rtx (out),
11432 gen_int_mode (cf - ct, mode),
11433 copy_rtx (out), 1, OPTAB_DIRECT);
11434 if (ct)
11435 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
11436 copy_rtx (out), 1, OPTAB_DIRECT);
11437 if (!rtx_equal_p (out, operands[0]))
11438 emit_move_insn (operands[0], copy_rtx (out));
11439
11440 return 1; /* DONE */
11441 }
11442 }
11443
11444 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
11445 {
11446 /* Try a few things more with specific constants and a variable. */
11447
11448 optab op;
11449 rtx var, orig_out, out, tmp;
11450
11451 if (BRANCH_COST <= 2)
11452 return 0; /* FAIL */
11453
11454 /* If one of the two operands is an interesting constant, load a
11455 constant with the above and mask it in with a logical operation. */
11456
11457 if (CONST_INT_P (operands[2]))
11458 {
11459 var = operands[3];
11460 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
11461 operands[3] = constm1_rtx, op = and_optab;
11462 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
11463 operands[3] = const0_rtx, op = ior_optab;
11464 else
11465 return 0; /* FAIL */
11466 }
11467 else if (CONST_INT_P (operands[3]))
11468 {
11469 var = operands[2];
11470 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
11471 operands[2] = constm1_rtx, op = and_optab;
11472 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
11473 operands[2] = const0_rtx, op = ior_optab;
11474 else
11475 return 0; /* FAIL */
11476 }
11477 else
11478 return 0; /* FAIL */
11479
11480 orig_out = operands[0];
11481 tmp = gen_reg_rtx (mode);
11482 operands[0] = tmp;
11483
11484 /* Recurse to get the constant loaded. */
11485 if (ix86_expand_int_movcc (operands) == 0)
11486 return 0; /* FAIL */
11487
11488 /* Mask in the interesting variable. */
11489 out = expand_binop (mode, op, var, tmp, orig_out, 0,
11490 OPTAB_WIDEN);
11491 if (!rtx_equal_p (out, orig_out))
11492 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
11493
11494 return 1; /* DONE */
11495 }
11496
11497 /*
11498 * For comparison with above,
11499 *
11500 * movl cf,dest
11501 * movl ct,tmp
11502 * cmpl op1,op2
11503 * cmovcc tmp,dest
11504 *
11505 * Size 15.
11506 */
11507
11508 if (! nonimmediate_operand (operands[2], mode))
11509 operands[2] = force_reg (mode, operands[2]);
11510 if (! nonimmediate_operand (operands[3], mode))
11511 operands[3] = force_reg (mode, operands[3]);
11512
11513 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
11514 {
11515 rtx tmp = gen_reg_rtx (mode);
11516 emit_move_insn (tmp, operands[3]);
11517 operands[3] = tmp;
11518 }
11519 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
11520 {
11521 rtx tmp = gen_reg_rtx (mode);
11522 emit_move_insn (tmp, operands[2]);
11523 operands[2] = tmp;
11524 }
11525
11526 if (! register_operand (operands[2], VOIDmode)
11527 && (mode == QImode
11528 || ! register_operand (operands[3], VOIDmode)))
11529 operands[2] = force_reg (mode, operands[2]);
11530
11531 if (mode == QImode
11532 && ! register_operand (operands[3], VOIDmode))
11533 operands[3] = force_reg (mode, operands[3]);
11534
11535 emit_insn (compare_seq);
11536 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11537 gen_rtx_IF_THEN_ELSE (mode,
11538 compare_op, operands[2],
11539 operands[3])));
11540 if (bypass_test)
11541 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
11542 gen_rtx_IF_THEN_ELSE (mode,
11543 bypass_test,
11544 copy_rtx (operands[3]),
11545 copy_rtx (operands[0]))));
11546 if (second_test)
11547 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
11548 gen_rtx_IF_THEN_ELSE (mode,
11549 second_test,
11550 copy_rtx (operands[2]),
11551 copy_rtx (operands[0]))));
11552
11553 return 1; /* DONE */
11554 }
11555
11556 /* Swap, force into registers, or otherwise massage the two operands
11557 to an sse comparison with a mask result. Thus we differ a bit from
11558 ix86_prepare_fp_compare_args which expects to produce a flags result.
11559
11560 The DEST operand exists to help determine whether to commute commutative
11561 operators. The POP0/POP1 operands are updated in place. The new
11562 comparison code is returned, or UNKNOWN if not implementable. */
11563
11564 static enum rtx_code
11565 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
11566 rtx *pop0, rtx *pop1)
11567 {
11568 rtx tmp;
11569
11570 switch (code)
11571 {
11572 case LTGT:
11573 case UNEQ:
11574 /* We have no LTGT as an operator. We could implement it with
11575 NE & ORDERED, but this requires an extra temporary. It's
11576 not clear that it's worth it. */
11577 return UNKNOWN;
11578
11579 case LT:
11580 case LE:
11581 case UNGT:
11582 case UNGE:
11583 /* These are supported directly. */
11584 break;
11585
11586 case EQ:
11587 case NE:
11588 case UNORDERED:
11589 case ORDERED:
11590 /* For commutative operators, try to canonicalize the destination
11591 operand to be first in the comparison - this helps reload to
11592 avoid extra moves. */
11593 if (!dest || !rtx_equal_p (dest, *pop1))
11594 break;
11595 /* FALLTHRU */
11596
11597 case GE:
11598 case GT:
11599 case UNLE:
11600 case UNLT:
11601 /* These are not supported directly. Swap the comparison operands
11602 to transform into something that is supported. */
11603 tmp = *pop0;
11604 *pop0 = *pop1;
11605 *pop1 = tmp;
11606 code = swap_condition (code);
11607 break;
11608
11609 default:
11610 gcc_unreachable ();
11611 }
11612
11613 return code;
11614 }
11615
11616 /* Detect conditional moves that exactly match min/max operational
11617 semantics. Note that this is IEEE safe, as long as we don't
11618 interchange the operands.
11619
11620 Returns FALSE if this conditional move doesn't match a MIN/MAX,
11621 and TRUE if the operation is successful and instructions are emitted. */
11622
11623 static bool
11624 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
11625 rtx cmp_op1, rtx if_true, rtx if_false)
11626 {
11627 enum machine_mode mode;
11628 bool is_min;
11629 rtx tmp;
11630
11631 if (code == LT)
11632 ;
11633 else if (code == UNGE)
11634 {
11635 tmp = if_true;
11636 if_true = if_false;
11637 if_false = tmp;
11638 }
11639 else
11640 return false;
11641
11642 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
11643 is_min = true;
11644 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
11645 is_min = false;
11646 else
11647 return false;
11648
11649 mode = GET_MODE (dest);
11650
11651 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
11652 but MODE may be a vector mode and thus not appropriate. */
11653 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
11654 {
11655 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
11656 rtvec v;
11657
11658 if_true = force_reg (mode, if_true);
11659 v = gen_rtvec (2, if_true, if_false);
11660 tmp = gen_rtx_UNSPEC (mode, v, u);
11661 }
11662 else
11663 {
11664 code = is_min ? SMIN : SMAX;
11665 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
11666 }
11667
11668 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
11669 return true;
11670 }
11671
11672 /* Expand an sse vector comparison. Return the register with the result. */
11673
11674 static rtx
11675 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
11676 rtx op_true, rtx op_false)
11677 {
11678 enum machine_mode mode = GET_MODE (dest);
11679 rtx x;
11680
11681 cmp_op0 = force_reg (mode, cmp_op0);
11682 if (!nonimmediate_operand (cmp_op1, mode))
11683 cmp_op1 = force_reg (mode, cmp_op1);
11684
11685 if (optimize
11686 || reg_overlap_mentioned_p (dest, op_true)
11687 || reg_overlap_mentioned_p (dest, op_false))
11688 dest = gen_reg_rtx (mode);
11689
11690 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
11691 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11692
11693 return dest;
11694 }
11695
11696 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
11697 operations. This is used for both scalar and vector conditional moves. */
11698
11699 static void
11700 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
11701 {
11702 enum machine_mode mode = GET_MODE (dest);
11703 rtx t2, t3, x;
11704
11705 if (op_false == CONST0_RTX (mode))
11706 {
11707 op_true = force_reg (mode, op_true);
11708 x = gen_rtx_AND (mode, cmp, op_true);
11709 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11710 }
11711 else if (op_true == CONST0_RTX (mode))
11712 {
11713 op_false = force_reg (mode, op_false);
11714 x = gen_rtx_NOT (mode, cmp);
11715 x = gen_rtx_AND (mode, x, op_false);
11716 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11717 }
11718 else
11719 {
11720 op_true = force_reg (mode, op_true);
11721 op_false = force_reg (mode, op_false);
11722
11723 t2 = gen_reg_rtx (mode);
11724 if (optimize)
11725 t3 = gen_reg_rtx (mode);
11726 else
11727 t3 = dest;
11728
11729 x = gen_rtx_AND (mode, op_true, cmp);
11730 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
11731
11732 x = gen_rtx_NOT (mode, cmp);
11733 x = gen_rtx_AND (mode, x, op_false);
11734 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
11735
11736 x = gen_rtx_IOR (mode, t3, t2);
11737 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11738 }
11739 }
11740
11741 /* Expand a floating-point conditional move. Return true if successful. */
11742
11743 int
11744 ix86_expand_fp_movcc (rtx operands[])
11745 {
11746 enum machine_mode mode = GET_MODE (operands[0]);
11747 enum rtx_code code = GET_CODE (operands[1]);
11748 rtx tmp, compare_op, second_test, bypass_test;
11749
11750 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
11751 {
11752 enum machine_mode cmode;
11753
11754 /* Since we've no cmove for sse registers, don't force bad register
11755 allocation just to gain access to it. Deny movcc when the
11756 comparison mode doesn't match the move mode. */
11757 cmode = GET_MODE (ix86_compare_op0);
11758 if (cmode == VOIDmode)
11759 cmode = GET_MODE (ix86_compare_op1);
11760 if (cmode != mode)
11761 return 0;
11762
11763 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
11764 &ix86_compare_op0,
11765 &ix86_compare_op1);
11766 if (code == UNKNOWN)
11767 return 0;
11768
11769 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
11770 ix86_compare_op1, operands[2],
11771 operands[3]))
11772 return 1;
11773
11774 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
11775 ix86_compare_op1, operands[2], operands[3]);
11776 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
11777 return 1;
11778 }
11779
11780 /* The floating point conditional move instructions don't directly
11781 support conditions resulting from a signed integer comparison. */
11782
11783 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11784
11785 /* The floating point conditional move instructions don't directly
11786 support signed integer comparisons. */
11787
11788 if (!fcmov_comparison_operator (compare_op, VOIDmode))
11789 {
11790 gcc_assert (!second_test && !bypass_test);
11791 tmp = gen_reg_rtx (QImode);
11792 ix86_expand_setcc (code, tmp);
11793 code = NE;
11794 ix86_compare_op0 = tmp;
11795 ix86_compare_op1 = const0_rtx;
11796 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11797 }
11798 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
11799 {
11800 tmp = gen_reg_rtx (mode);
11801 emit_move_insn (tmp, operands[3]);
11802 operands[3] = tmp;
11803 }
11804 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
11805 {
11806 tmp = gen_reg_rtx (mode);
11807 emit_move_insn (tmp, operands[2]);
11808 operands[2] = tmp;
11809 }
11810
11811 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11812 gen_rtx_IF_THEN_ELSE (mode, compare_op,
11813 operands[2], operands[3])));
11814 if (bypass_test)
11815 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11816 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
11817 operands[3], operands[0])));
11818 if (second_test)
11819 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11820 gen_rtx_IF_THEN_ELSE (mode, second_test,
11821 operands[2], operands[0])));
11822
11823 return 1;
11824 }
11825
11826 /* Expand a floating-point vector conditional move; a vcond operation
11827 rather than a movcc operation. */
11828
11829 bool
11830 ix86_expand_fp_vcond (rtx operands[])
11831 {
11832 enum rtx_code code = GET_CODE (operands[3]);
11833 rtx cmp;
11834
11835 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
11836 &operands[4], &operands[5]);
11837 if (code == UNKNOWN)
11838 return false;
11839
11840 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
11841 operands[5], operands[1], operands[2]))
11842 return true;
11843
11844 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
11845 operands[1], operands[2]);
11846 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
11847 return true;
11848 }
11849
11850 /* Expand a signed integral vector conditional move. */
11851
11852 bool
11853 ix86_expand_int_vcond (rtx operands[])
11854 {
11855 enum machine_mode mode = GET_MODE (operands[0]);
11856 enum rtx_code code = GET_CODE (operands[3]);
11857 bool negate = false;
11858 rtx x, cop0, cop1;
11859
11860 cop0 = operands[4];
11861 cop1 = operands[5];
11862
11863 /* Canonicalize the comparison to EQ, GT, GTU. */
11864 switch (code)
11865 {
11866 case EQ:
11867 case GT:
11868 case GTU:
11869 break;
11870
11871 case NE:
11872 case LE:
11873 case LEU:
11874 code = reverse_condition (code);
11875 negate = true;
11876 break;
11877
11878 case GE:
11879 case GEU:
11880 code = reverse_condition (code);
11881 negate = true;
11882 /* FALLTHRU */
11883
11884 case LT:
11885 case LTU:
11886 code = swap_condition (code);
11887 x = cop0, cop0 = cop1, cop1 = x;
11888 break;
11889
11890 default:
11891 gcc_unreachable ();
11892 }
11893
11894 /* Unsigned parallel compare is not supported by the hardware. Play some
11895 tricks to turn this into a signed comparison against 0. */
11896 if (code == GTU)
11897 {
11898 cop0 = force_reg (mode, cop0);
11899
11900 switch (mode)
11901 {
11902 case V4SImode:
11903 {
11904 rtx t1, t2, mask;
11905
11906 /* Perform a parallel modulo subtraction. */
11907 t1 = gen_reg_rtx (mode);
11908 emit_insn (gen_subv4si3 (t1, cop0, cop1));
11909
11910 /* Extract the original sign bit of op0. */
11911 mask = GEN_INT (-0x80000000);
11912 mask = gen_rtx_CONST_VECTOR (mode,
11913 gen_rtvec (4, mask, mask, mask, mask));
11914 mask = force_reg (mode, mask);
11915 t2 = gen_reg_rtx (mode);
11916 emit_insn (gen_andv4si3 (t2, cop0, mask));
11917
11918 /* XOR it back into the result of the subtraction. This results
11919 in the sign bit set iff we saw unsigned underflow. */
11920 x = gen_reg_rtx (mode);
11921 emit_insn (gen_xorv4si3 (x, t1, t2));
11922
11923 code = GT;
11924 }
11925 break;
11926
11927 case V16QImode:
11928 case V8HImode:
11929 /* Perform a parallel unsigned saturating subtraction. */
11930 x = gen_reg_rtx (mode);
11931 emit_insn (gen_rtx_SET (VOIDmode, x,
11932 gen_rtx_US_MINUS (mode, cop0, cop1)));
11933
11934 code = EQ;
11935 negate = !negate;
11936 break;
11937
11938 default:
11939 gcc_unreachable ();
11940 }
11941
11942 cop0 = x;
11943 cop1 = CONST0_RTX (mode);
11944 }
11945
11946 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
11947 operands[1+negate], operands[2-negate]);
11948
11949 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
11950 operands[2-negate]);
11951 return true;
11952 }
11953
11954 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
11955 true if we should do zero extension, else sign extension. HIGH_P is
11956 true if we want the N/2 high elements, else the low elements. */
11957
11958 void
11959 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
11960 {
11961 enum machine_mode imode = GET_MODE (operands[1]);
11962 rtx (*unpack)(rtx, rtx, rtx);
11963 rtx se, dest;
11964
11965 switch (imode)
11966 {
11967 case V16QImode:
11968 if (high_p)
11969 unpack = gen_vec_interleave_highv16qi;
11970 else
11971 unpack = gen_vec_interleave_lowv16qi;
11972 break;
11973 case V8HImode:
11974 if (high_p)
11975 unpack = gen_vec_interleave_highv8hi;
11976 else
11977 unpack = gen_vec_interleave_lowv8hi;
11978 break;
11979 case V4SImode:
11980 if (high_p)
11981 unpack = gen_vec_interleave_highv4si;
11982 else
11983 unpack = gen_vec_interleave_lowv4si;
11984 break;
11985 default:
11986 gcc_unreachable ();
11987 }
11988
11989 dest = gen_lowpart (imode, operands[0]);
11990
11991 if (unsigned_p)
11992 se = force_reg (imode, CONST0_RTX (imode));
11993 else
11994 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
11995 operands[1], pc_rtx, pc_rtx);
11996
11997 emit_insn (unpack (dest, operands[1], se));
11998 }
11999
12000 /* Expand conditional increment or decrement using adb/sbb instructions.
12001 The default case using setcc followed by the conditional move can be
12002 done by generic code. */
12003 int
12004 ix86_expand_int_addcc (rtx operands[])
12005 {
12006 enum rtx_code code = GET_CODE (operands[1]);
12007 rtx compare_op;
12008 rtx val = const0_rtx;
12009 bool fpcmp = false;
12010 enum machine_mode mode = GET_MODE (operands[0]);
12011
12012 if (operands[3] != const1_rtx
12013 && operands[3] != constm1_rtx)
12014 return 0;
12015 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
12016 ix86_compare_op1, &compare_op))
12017 return 0;
12018 code = GET_CODE (compare_op);
12019
12020 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
12021 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
12022 {
12023 fpcmp = true;
12024 code = ix86_fp_compare_code_to_integer (code);
12025 }
12026
12027 if (code != LTU)
12028 {
12029 val = constm1_rtx;
12030 if (fpcmp)
12031 PUT_CODE (compare_op,
12032 reverse_condition_maybe_unordered
12033 (GET_CODE (compare_op)));
12034 else
12035 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
12036 }
12037 PUT_MODE (compare_op, mode);
12038
12039 /* Construct either adc or sbb insn. */
12040 if ((code == LTU) == (operands[3] == constm1_rtx))
12041 {
12042 switch (GET_MODE (operands[0]))
12043 {
12044 case QImode:
12045 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
12046 break;
12047 case HImode:
12048 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
12049 break;
12050 case SImode:
12051 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
12052 break;
12053 case DImode:
12054 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
12055 break;
12056 default:
12057 gcc_unreachable ();
12058 }
12059 }
12060 else
12061 {
12062 switch (GET_MODE (operands[0]))
12063 {
12064 case QImode:
12065 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
12066 break;
12067 case HImode:
12068 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
12069 break;
12070 case SImode:
12071 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
12072 break;
12073 case DImode:
12074 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
12075 break;
12076 default:
12077 gcc_unreachable ();
12078 }
12079 }
12080 return 1; /* DONE */
12081 }
12082
12083
12084 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
12085 works for floating pointer parameters and nonoffsetable memories.
12086 For pushes, it returns just stack offsets; the values will be saved
12087 in the right order. Maximally three parts are generated. */
12088
12089 static int
12090 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
12091 {
12092 int size;
12093
12094 if (!TARGET_64BIT)
12095 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
12096 else
12097 size = (GET_MODE_SIZE (mode) + 4) / 8;
12098
12099 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
12100 gcc_assert (size >= 2 && size <= 3);
12101
12102 /* Optimize constant pool reference to immediates. This is used by fp
12103 moves, that force all constants to memory to allow combining. */
12104 if (MEM_P (operand) && MEM_READONLY_P (operand))
12105 {
12106 rtx tmp = maybe_get_pool_constant (operand);
12107 if (tmp)
12108 operand = tmp;
12109 }
12110
12111 if (MEM_P (operand) && !offsettable_memref_p (operand))
12112 {
12113 /* The only non-offsetable memories we handle are pushes. */
12114 int ok = push_operand (operand, VOIDmode);
12115
12116 gcc_assert (ok);
12117
12118 operand = copy_rtx (operand);
12119 PUT_MODE (operand, Pmode);
12120 parts[0] = parts[1] = parts[2] = operand;
12121 return size;
12122 }
12123
12124 if (GET_CODE (operand) == CONST_VECTOR)
12125 {
12126 enum machine_mode imode = int_mode_for_mode (mode);
12127 /* Caution: if we looked through a constant pool memory above,
12128 the operand may actually have a different mode now. That's
12129 ok, since we want to pun this all the way back to an integer. */
12130 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
12131 gcc_assert (operand != NULL);
12132 mode = imode;
12133 }
12134
12135 if (!TARGET_64BIT)
12136 {
12137 if (mode == DImode)
12138 split_di (&operand, 1, &parts[0], &parts[1]);
12139 else
12140 {
12141 if (REG_P (operand))
12142 {
12143 gcc_assert (reload_completed);
12144 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
12145 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
12146 if (size == 3)
12147 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
12148 }
12149 else if (offsettable_memref_p (operand))
12150 {
12151 operand = adjust_address (operand, SImode, 0);
12152 parts[0] = operand;
12153 parts[1] = adjust_address (operand, SImode, 4);
12154 if (size == 3)
12155 parts[2] = adjust_address (operand, SImode, 8);
12156 }
12157 else if (GET_CODE (operand) == CONST_DOUBLE)
12158 {
12159 REAL_VALUE_TYPE r;
12160 long l[4];
12161
12162 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
12163 switch (mode)
12164 {
12165 case XFmode:
12166 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
12167 parts[2] = gen_int_mode (l[2], SImode);
12168 break;
12169 case DFmode:
12170 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
12171 break;
12172 default:
12173 gcc_unreachable ();
12174 }
12175 parts[1] = gen_int_mode (l[1], SImode);
12176 parts[0] = gen_int_mode (l[0], SImode);
12177 }
12178 else
12179 gcc_unreachable ();
12180 }
12181 }
12182 else
12183 {
12184 if (mode == TImode)
12185 split_ti (&operand, 1, &parts[0], &parts[1]);
12186 if (mode == XFmode || mode == TFmode)
12187 {
12188 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
12189 if (REG_P (operand))
12190 {
12191 gcc_assert (reload_completed);
12192 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
12193 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
12194 }
12195 else if (offsettable_memref_p (operand))
12196 {
12197 operand = adjust_address (operand, DImode, 0);
12198 parts[0] = operand;
12199 parts[1] = adjust_address (operand, upper_mode, 8);
12200 }
12201 else if (GET_CODE (operand) == CONST_DOUBLE)
12202 {
12203 REAL_VALUE_TYPE r;
12204 long l[4];
12205
12206 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
12207 real_to_target (l, &r, mode);
12208
12209 /* Do not use shift by 32 to avoid warning on 32bit systems. */
12210 if (HOST_BITS_PER_WIDE_INT >= 64)
12211 parts[0]
12212 = gen_int_mode
12213 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
12214 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
12215 DImode);
12216 else
12217 parts[0] = immed_double_const (l[0], l[1], DImode);
12218
12219 if (upper_mode == SImode)
12220 parts[1] = gen_int_mode (l[2], SImode);
12221 else if (HOST_BITS_PER_WIDE_INT >= 64)
12222 parts[1]
12223 = gen_int_mode
12224 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
12225 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
12226 DImode);
12227 else
12228 parts[1] = immed_double_const (l[2], l[3], DImode);
12229 }
12230 else
12231 gcc_unreachable ();
12232 }
12233 }
12234
12235 return size;
12236 }
12237
12238 /* Emit insns to perform a move or push of DI, DF, and XF values.
12239 Return false when normal moves are needed; true when all required
12240 insns have been emitted. Operands 2-4 contain the input values
12241 int the correct order; operands 5-7 contain the output values. */
12242
12243 void
12244 ix86_split_long_move (rtx operands[])
12245 {
12246 rtx part[2][3];
12247 int nparts;
12248 int push = 0;
12249 int collisions = 0;
12250 enum machine_mode mode = GET_MODE (operands[0]);
12251
12252 /* The DFmode expanders may ask us to move double.
12253 For 64bit target this is single move. By hiding the fact
12254 here we simplify i386.md splitters. */
12255 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
12256 {
12257 /* Optimize constant pool reference to immediates. This is used by
12258 fp moves, that force all constants to memory to allow combining. */
12259
12260 if (MEM_P (operands[1])
12261 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
12262 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
12263 operands[1] = get_pool_constant (XEXP (operands[1], 0));
12264 if (push_operand (operands[0], VOIDmode))
12265 {
12266 operands[0] = copy_rtx (operands[0]);
12267 PUT_MODE (operands[0], Pmode);
12268 }
12269 else
12270 operands[0] = gen_lowpart (DImode, operands[0]);
12271 operands[1] = gen_lowpart (DImode, operands[1]);
12272 emit_move_insn (operands[0], operands[1]);
12273 return;
12274 }
12275
12276 /* The only non-offsettable memory we handle is push. */
12277 if (push_operand (operands[0], VOIDmode))
12278 push = 1;
12279 else
12280 gcc_assert (!MEM_P (operands[0])
12281 || offsettable_memref_p (operands[0]));
12282
12283 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
12284 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
12285
12286 /* When emitting push, take care for source operands on the stack. */
12287 if (push && MEM_P (operands[1])
12288 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
12289 {
12290 if (nparts == 3)
12291 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
12292 XEXP (part[1][2], 0));
12293 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
12294 XEXP (part[1][1], 0));
12295 }
12296
12297 /* We need to do copy in the right order in case an address register
12298 of the source overlaps the destination. */
12299 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
12300 {
12301 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
12302 collisions++;
12303 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
12304 collisions++;
12305 if (nparts == 3
12306 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
12307 collisions++;
12308
12309 /* Collision in the middle part can be handled by reordering. */
12310 if (collisions == 1 && nparts == 3
12311 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
12312 {
12313 rtx tmp;
12314 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
12315 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
12316 }
12317
12318 /* If there are more collisions, we can't handle it by reordering.
12319 Do an lea to the last part and use only one colliding move. */
12320 else if (collisions > 1)
12321 {
12322 rtx base;
12323
12324 collisions = 1;
12325
12326 base = part[0][nparts - 1];
12327
12328 /* Handle the case when the last part isn't valid for lea.
12329 Happens in 64-bit mode storing the 12-byte XFmode. */
12330 if (GET_MODE (base) != Pmode)
12331 base = gen_rtx_REG (Pmode, REGNO (base));
12332
12333 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
12334 part[1][0] = replace_equiv_address (part[1][0], base);
12335 part[1][1] = replace_equiv_address (part[1][1],
12336 plus_constant (base, UNITS_PER_WORD));
12337 if (nparts == 3)
12338 part[1][2] = replace_equiv_address (part[1][2],
12339 plus_constant (base, 8));
12340 }
12341 }
12342
12343 if (push)
12344 {
12345 if (!TARGET_64BIT)
12346 {
12347 if (nparts == 3)
12348 {
12349 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
12350 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
12351 emit_move_insn (part[0][2], part[1][2]);
12352 }
12353 }
12354 else
12355 {
12356 /* In 64bit mode we don't have 32bit push available. In case this is
12357 register, it is OK - we will just use larger counterpart. We also
12358 retype memory - these comes from attempt to avoid REX prefix on
12359 moving of second half of TFmode value. */
12360 if (GET_MODE (part[1][1]) == SImode)
12361 {
12362 switch (GET_CODE (part[1][1]))
12363 {
12364 case MEM:
12365 part[1][1] = adjust_address (part[1][1], DImode, 0);
12366 break;
12367
12368 case REG:
12369 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
12370 break;
12371
12372 default:
12373 gcc_unreachable ();
12374 }
12375
12376 if (GET_MODE (part[1][0]) == SImode)
12377 part[1][0] = part[1][1];
12378 }
12379 }
12380 emit_move_insn (part[0][1], part[1][1]);
12381 emit_move_insn (part[0][0], part[1][0]);
12382 return;
12383 }
12384
12385 /* Choose correct order to not overwrite the source before it is copied. */
12386 if ((REG_P (part[0][0])
12387 && REG_P (part[1][1])
12388 && (REGNO (part[0][0]) == REGNO (part[1][1])
12389 || (nparts == 3
12390 && REGNO (part[0][0]) == REGNO (part[1][2]))))
12391 || (collisions > 0
12392 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
12393 {
12394 if (nparts == 3)
12395 {
12396 operands[2] = part[0][2];
12397 operands[3] = part[0][1];
12398 operands[4] = part[0][0];
12399 operands[5] = part[1][2];
12400 operands[6] = part[1][1];
12401 operands[7] = part[1][0];
12402 }
12403 else
12404 {
12405 operands[2] = part[0][1];
12406 operands[3] = part[0][0];
12407 operands[5] = part[1][1];
12408 operands[6] = part[1][0];
12409 }
12410 }
12411 else
12412 {
12413 if (nparts == 3)
12414 {
12415 operands[2] = part[0][0];
12416 operands[3] = part[0][1];
12417 operands[4] = part[0][2];
12418 operands[5] = part[1][0];
12419 operands[6] = part[1][1];
12420 operands[7] = part[1][2];
12421 }
12422 else
12423 {
12424 operands[2] = part[0][0];
12425 operands[3] = part[0][1];
12426 operands[5] = part[1][0];
12427 operands[6] = part[1][1];
12428 }
12429 }
12430
12431 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
12432 if (optimize_size)
12433 {
12434 if (CONST_INT_P (operands[5])
12435 && operands[5] != const0_rtx
12436 && REG_P (operands[2]))
12437 {
12438 if (CONST_INT_P (operands[6])
12439 && INTVAL (operands[6]) == INTVAL (operands[5]))
12440 operands[6] = operands[2];
12441
12442 if (nparts == 3
12443 && CONST_INT_P (operands[7])
12444 && INTVAL (operands[7]) == INTVAL (operands[5]))
12445 operands[7] = operands[2];
12446 }
12447
12448 if (nparts == 3
12449 && CONST_INT_P (operands[6])
12450 && operands[6] != const0_rtx
12451 && REG_P (operands[3])
12452 && CONST_INT_P (operands[7])
12453 && INTVAL (operands[7]) == INTVAL (operands[6]))
12454 operands[7] = operands[3];
12455 }
12456
12457 emit_move_insn (operands[2], operands[5]);
12458 emit_move_insn (operands[3], operands[6]);
12459 if (nparts == 3)
12460 emit_move_insn (operands[4], operands[7]);
12461
12462 return;
12463 }
12464
12465 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
12466 left shift by a constant, either using a single shift or
12467 a sequence of add instructions. */
12468
12469 static void
12470 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
12471 {
12472 if (count == 1)
12473 {
12474 emit_insn ((mode == DImode
12475 ? gen_addsi3
12476 : gen_adddi3) (operand, operand, operand));
12477 }
12478 else if (!optimize_size
12479 && count * ix86_cost->add <= ix86_cost->shift_const)
12480 {
12481 int i;
12482 for (i=0; i<count; i++)
12483 {
12484 emit_insn ((mode == DImode
12485 ? gen_addsi3
12486 : gen_adddi3) (operand, operand, operand));
12487 }
12488 }
12489 else
12490 emit_insn ((mode == DImode
12491 ? gen_ashlsi3
12492 : gen_ashldi3) (operand, operand, GEN_INT (count)));
12493 }
12494
12495 void
12496 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
12497 {
12498 rtx low[2], high[2];
12499 int count;
12500 const int single_width = mode == DImode ? 32 : 64;
12501
12502 if (CONST_INT_P (operands[2]))
12503 {
12504 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12505 count = INTVAL (operands[2]) & (single_width * 2 - 1);
12506
12507 if (count >= single_width)
12508 {
12509 emit_move_insn (high[0], low[1]);
12510 emit_move_insn (low[0], const0_rtx);
12511
12512 if (count > single_width)
12513 ix86_expand_ashl_const (high[0], count - single_width, mode);
12514 }
12515 else
12516 {
12517 if (!rtx_equal_p (operands[0], operands[1]))
12518 emit_move_insn (operands[0], operands[1]);
12519 emit_insn ((mode == DImode
12520 ? gen_x86_shld_1
12521 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
12522 ix86_expand_ashl_const (low[0], count, mode);
12523 }
12524 return;
12525 }
12526
12527 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12528
12529 if (operands[1] == const1_rtx)
12530 {
12531 /* Assuming we've chosen a QImode capable registers, then 1 << N
12532 can be done with two 32/64-bit shifts, no branches, no cmoves. */
12533 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
12534 {
12535 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
12536
12537 ix86_expand_clear (low[0]);
12538 ix86_expand_clear (high[0]);
12539 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
12540
12541 d = gen_lowpart (QImode, low[0]);
12542 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
12543 s = gen_rtx_EQ (QImode, flags, const0_rtx);
12544 emit_insn (gen_rtx_SET (VOIDmode, d, s));
12545
12546 d = gen_lowpart (QImode, high[0]);
12547 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
12548 s = gen_rtx_NE (QImode, flags, const0_rtx);
12549 emit_insn (gen_rtx_SET (VOIDmode, d, s));
12550 }
12551
12552 /* Otherwise, we can get the same results by manually performing
12553 a bit extract operation on bit 5/6, and then performing the two
12554 shifts. The two methods of getting 0/1 into low/high are exactly
12555 the same size. Avoiding the shift in the bit extract case helps
12556 pentium4 a bit; no one else seems to care much either way. */
12557 else
12558 {
12559 rtx x;
12560
12561 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
12562 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
12563 else
12564 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
12565 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
12566
12567 emit_insn ((mode == DImode
12568 ? gen_lshrsi3
12569 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
12570 emit_insn ((mode == DImode
12571 ? gen_andsi3
12572 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
12573 emit_move_insn (low[0], high[0]);
12574 emit_insn ((mode == DImode
12575 ? gen_xorsi3
12576 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
12577 }
12578
12579 emit_insn ((mode == DImode
12580 ? gen_ashlsi3
12581 : gen_ashldi3) (low[0], low[0], operands[2]));
12582 emit_insn ((mode == DImode
12583 ? gen_ashlsi3
12584 : gen_ashldi3) (high[0], high[0], operands[2]));
12585 return;
12586 }
12587
12588 if (operands[1] == constm1_rtx)
12589 {
12590 /* For -1 << N, we can avoid the shld instruction, because we
12591 know that we're shifting 0...31/63 ones into a -1. */
12592 emit_move_insn (low[0], constm1_rtx);
12593 if (optimize_size)
12594 emit_move_insn (high[0], low[0]);
12595 else
12596 emit_move_insn (high[0], constm1_rtx);
12597 }
12598 else
12599 {
12600 if (!rtx_equal_p (operands[0], operands[1]))
12601 emit_move_insn (operands[0], operands[1]);
12602
12603 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12604 emit_insn ((mode == DImode
12605 ? gen_x86_shld_1
12606 : gen_x86_64_shld) (high[0], low[0], operands[2]));
12607 }
12608
12609 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
12610
12611 if (TARGET_CMOVE && scratch)
12612 {
12613 ix86_expand_clear (scratch);
12614 emit_insn ((mode == DImode
12615 ? gen_x86_shift_adj_1
12616 : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch));
12617 }
12618 else
12619 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
12620 }
12621
12622 void
12623 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
12624 {
12625 rtx low[2], high[2];
12626 int count;
12627 const int single_width = mode == DImode ? 32 : 64;
12628
12629 if (CONST_INT_P (operands[2]))
12630 {
12631 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12632 count = INTVAL (operands[2]) & (single_width * 2 - 1);
12633
12634 if (count == single_width * 2 - 1)
12635 {
12636 emit_move_insn (high[0], high[1]);
12637 emit_insn ((mode == DImode
12638 ? gen_ashrsi3
12639 : gen_ashrdi3) (high[0], high[0],
12640 GEN_INT (single_width - 1)));
12641 emit_move_insn (low[0], high[0]);
12642
12643 }
12644 else if (count >= single_width)
12645 {
12646 emit_move_insn (low[0], high[1]);
12647 emit_move_insn (high[0], low[0]);
12648 emit_insn ((mode == DImode
12649 ? gen_ashrsi3
12650 : gen_ashrdi3) (high[0], high[0],
12651 GEN_INT (single_width - 1)));
12652 if (count > single_width)
12653 emit_insn ((mode == DImode
12654 ? gen_ashrsi3
12655 : gen_ashrdi3) (low[0], low[0],
12656 GEN_INT (count - single_width)));
12657 }
12658 else
12659 {
12660 if (!rtx_equal_p (operands[0], operands[1]))
12661 emit_move_insn (operands[0], operands[1]);
12662 emit_insn ((mode == DImode
12663 ? gen_x86_shrd_1
12664 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
12665 emit_insn ((mode == DImode
12666 ? gen_ashrsi3
12667 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
12668 }
12669 }
12670 else
12671 {
12672 if (!rtx_equal_p (operands[0], operands[1]))
12673 emit_move_insn (operands[0], operands[1]);
12674
12675 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12676
12677 emit_insn ((mode == DImode
12678 ? gen_x86_shrd_1
12679 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
12680 emit_insn ((mode == DImode
12681 ? gen_ashrsi3
12682 : gen_ashrdi3) (high[0], high[0], operands[2]));
12683
12684 if (TARGET_CMOVE && scratch)
12685 {
12686 emit_move_insn (scratch, high[0]);
12687 emit_insn ((mode == DImode
12688 ? gen_ashrsi3
12689 : gen_ashrdi3) (scratch, scratch,
12690 GEN_INT (single_width - 1)));
12691 emit_insn ((mode == DImode
12692 ? gen_x86_shift_adj_1
12693 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
12694 scratch));
12695 }
12696 else
12697 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
12698 }
12699 }
12700
12701 void
12702 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
12703 {
12704 rtx low[2], high[2];
12705 int count;
12706 const int single_width = mode == DImode ? 32 : 64;
12707
12708 if (CONST_INT_P (operands[2]))
12709 {
12710 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12711 count = INTVAL (operands[2]) & (single_width * 2 - 1);
12712
12713 if (count >= single_width)
12714 {
12715 emit_move_insn (low[0], high[1]);
12716 ix86_expand_clear (high[0]);
12717
12718 if (count > single_width)
12719 emit_insn ((mode == DImode
12720 ? gen_lshrsi3
12721 : gen_lshrdi3) (low[0], low[0],
12722 GEN_INT (count - single_width)));
12723 }
12724 else
12725 {
12726 if (!rtx_equal_p (operands[0], operands[1]))
12727 emit_move_insn (operands[0], operands[1]);
12728 emit_insn ((mode == DImode
12729 ? gen_x86_shrd_1
12730 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
12731 emit_insn ((mode == DImode
12732 ? gen_lshrsi3
12733 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
12734 }
12735 }
12736 else
12737 {
12738 if (!rtx_equal_p (operands[0], operands[1]))
12739 emit_move_insn (operands[0], operands[1]);
12740
12741 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12742
12743 emit_insn ((mode == DImode
12744 ? gen_x86_shrd_1
12745 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
12746 emit_insn ((mode == DImode
12747 ? gen_lshrsi3
12748 : gen_lshrdi3) (high[0], high[0], operands[2]));
12749
12750 /* Heh. By reversing the arguments, we can reuse this pattern. */
12751 if (TARGET_CMOVE && scratch)
12752 {
12753 ix86_expand_clear (scratch);
12754 emit_insn ((mode == DImode
12755 ? gen_x86_shift_adj_1
12756 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
12757 scratch));
12758 }
12759 else
12760 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
12761 }
12762 }
12763
12764 /* Predict just emitted jump instruction to be taken with probability PROB. */
12765 static void
12766 predict_jump (int prob)
12767 {
12768 rtx insn = get_last_insn ();
12769 gcc_assert (JUMP_P (insn));
12770 REG_NOTES (insn)
12771 = gen_rtx_EXPR_LIST (REG_BR_PROB,
12772 GEN_INT (prob),
12773 REG_NOTES (insn));
12774 }
12775
12776 /* Helper function for the string operations below. Dest VARIABLE whether
12777 it is aligned to VALUE bytes. If true, jump to the label. */
12778 static rtx
12779 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
12780 {
12781 rtx label = gen_label_rtx ();
12782 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
12783 if (GET_MODE (variable) == DImode)
12784 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
12785 else
12786 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
12787 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
12788 1, label);
12789 if (epilogue)
12790 predict_jump (REG_BR_PROB_BASE * 50 / 100);
12791 else
12792 predict_jump (REG_BR_PROB_BASE * 90 / 100);
12793 return label;
12794 }
12795
12796 /* Adjust COUNTER by the VALUE. */
12797 static void
12798 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
12799 {
12800 if (GET_MODE (countreg) == DImode)
12801 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
12802 else
12803 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
12804 }
12805
12806 /* Zero extend possibly SImode EXP to Pmode register. */
12807 rtx
12808 ix86_zero_extend_to_Pmode (rtx exp)
12809 {
12810 rtx r;
12811 if (GET_MODE (exp) == VOIDmode)
12812 return force_reg (Pmode, exp);
12813 if (GET_MODE (exp) == Pmode)
12814 return copy_to_mode_reg (Pmode, exp);
12815 r = gen_reg_rtx (Pmode);
12816 emit_insn (gen_zero_extendsidi2 (r, exp));
12817 return r;
12818 }
12819
12820 /* Divide COUNTREG by SCALE. */
12821 static rtx
12822 scale_counter (rtx countreg, int scale)
12823 {
12824 rtx sc;
12825 rtx piece_size_mask;
12826
12827 if (scale == 1)
12828 return countreg;
12829 if (CONST_INT_P (countreg))
12830 return GEN_INT (INTVAL (countreg) / scale);
12831 gcc_assert (REG_P (countreg));
12832
12833 piece_size_mask = GEN_INT (scale - 1);
12834 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
12835 GEN_INT (exact_log2 (scale)),
12836 NULL, 1, OPTAB_DIRECT);
12837 return sc;
12838 }
12839
12840 /* When SRCPTR is non-NULL, output simple loop to move memory
12841 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
12842 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
12843 equivalent loop to set memory by VALUE (supposed to be in MODE).
12844
12845 The size is rounded down to whole number of chunk size moved at once.
12846 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
12847
12848
12849 static void
12850 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
12851 rtx destptr, rtx srcptr, rtx value,
12852 rtx count, enum machine_mode mode, int unroll,
12853 int expected_size)
12854 {
12855 rtx out_label, top_label, iter, tmp;
12856 enum machine_mode iter_mode;
12857 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
12858 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
12859 rtx size;
12860 rtx x_addr;
12861 rtx y_addr;
12862 int i;
12863
12864 iter_mode = GET_MODE (count);
12865 if (iter_mode == VOIDmode)
12866 iter_mode = word_mode;
12867
12868 top_label = gen_label_rtx ();
12869 out_label = gen_label_rtx ();
12870 iter = gen_reg_rtx (iter_mode);
12871
12872 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
12873 NULL, 1, OPTAB_DIRECT);
12874 /* Those two should combine. */
12875 if (piece_size == const1_rtx)
12876 {
12877 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
12878 true, out_label);
12879 predict_jump (REG_BR_PROB_BASE * 10 / 100);
12880 }
12881 emit_move_insn (iter, const0_rtx);
12882
12883 emit_label (top_label);
12884
12885 tmp = convert_modes (Pmode, iter_mode, iter, true);
12886 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
12887 destmem = change_address (destmem, mode, x_addr);
12888
12889 if (srcmem)
12890 {
12891 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
12892 srcmem = change_address (srcmem, mode, y_addr);
12893
12894 /* When unrolling for chips that reorder memory reads and writes,
12895 we can save registers by using single temporary.
12896 Also using 4 temporaries is overkill in 32bit mode. */
12897 if (!TARGET_64BIT && 0)
12898 {
12899 for (i = 0; i < unroll; i++)
12900 {
12901 if (i)
12902 {
12903 destmem =
12904 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
12905 srcmem =
12906 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
12907 }
12908 emit_move_insn (destmem, srcmem);
12909 }
12910 }
12911 else
12912 {
12913 rtx tmpreg[4];
12914 gcc_assert (unroll <= 4);
12915 for (i = 0; i < unroll; i++)
12916 {
12917 tmpreg[i] = gen_reg_rtx (mode);
12918 if (i)
12919 {
12920 srcmem =
12921 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
12922 }
12923 emit_move_insn (tmpreg[i], srcmem);
12924 }
12925 for (i = 0; i < unroll; i++)
12926 {
12927 if (i)
12928 {
12929 destmem =
12930 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
12931 }
12932 emit_move_insn (destmem, tmpreg[i]);
12933 }
12934 }
12935 }
12936 else
12937 for (i = 0; i < unroll; i++)
12938 {
12939 if (i)
12940 destmem =
12941 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
12942 emit_move_insn (destmem, value);
12943 }
12944
12945 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
12946 true, OPTAB_LIB_WIDEN);
12947 if (tmp != iter)
12948 emit_move_insn (iter, tmp);
12949
12950 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
12951 true, top_label);
12952 if (expected_size != -1)
12953 {
12954 expected_size /= GET_MODE_SIZE (mode) * unroll;
12955 if (expected_size == 0)
12956 predict_jump (0);
12957 else if (expected_size > REG_BR_PROB_BASE)
12958 predict_jump (REG_BR_PROB_BASE - 1);
12959 else
12960 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
12961 }
12962 else
12963 predict_jump (REG_BR_PROB_BASE * 80 / 100);
12964 iter = ix86_zero_extend_to_Pmode (iter);
12965 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
12966 true, OPTAB_LIB_WIDEN);
12967 if (tmp != destptr)
12968 emit_move_insn (destptr, tmp);
12969 if (srcptr)
12970 {
12971 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
12972 true, OPTAB_LIB_WIDEN);
12973 if (tmp != srcptr)
12974 emit_move_insn (srcptr, tmp);
12975 }
12976 emit_label (out_label);
12977 }
12978
12979 /* Output "rep; mov" instruction.
12980 Arguments have same meaning as for previous function */
12981 static void
12982 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
12983 rtx destptr, rtx srcptr,
12984 rtx count,
12985 enum machine_mode mode)
12986 {
12987 rtx destexp;
12988 rtx srcexp;
12989 rtx countreg;
12990
12991 /* If the size is known, it is shorter to use rep movs. */
12992 if (mode == QImode && CONST_INT_P (count)
12993 && !(INTVAL (count) & 3))
12994 mode = SImode;
12995
12996 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
12997 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
12998 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
12999 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
13000 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
13001 if (mode != QImode)
13002 {
13003 destexp = gen_rtx_ASHIFT (Pmode, countreg,
13004 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
13005 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
13006 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
13007 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
13008 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
13009 }
13010 else
13011 {
13012 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
13013 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
13014 }
13015 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
13016 destexp, srcexp));
13017 }
13018
13019 /* Output "rep; stos" instruction.
13020 Arguments have same meaning as for previous function */
13021 static void
13022 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
13023 rtx count,
13024 enum machine_mode mode)
13025 {
13026 rtx destexp;
13027 rtx countreg;
13028
13029 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
13030 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
13031 value = force_reg (mode, gen_lowpart (mode, value));
13032 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
13033 if (mode != QImode)
13034 {
13035 destexp = gen_rtx_ASHIFT (Pmode, countreg,
13036 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
13037 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
13038 }
13039 else
13040 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
13041 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
13042 }
13043
13044 static void
13045 emit_strmov (rtx destmem, rtx srcmem,
13046 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
13047 {
13048 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
13049 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
13050 emit_insn (gen_strmov (destptr, dest, srcptr, src));
13051 }
13052
13053 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
13054 static void
13055 expand_movmem_epilogue (rtx destmem, rtx srcmem,
13056 rtx destptr, rtx srcptr, rtx count, int max_size)
13057 {
13058 rtx src, dest;
13059 if (CONST_INT_P (count))
13060 {
13061 HOST_WIDE_INT countval = INTVAL (count);
13062 int offset = 0;
13063
13064 if ((countval & 0x16) && max_size > 16)
13065 {
13066 if (TARGET_64BIT)
13067 {
13068 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
13069 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
13070 }
13071 else
13072 gcc_unreachable ();
13073 offset += 16;
13074 }
13075 if ((countval & 0x08) && max_size > 8)
13076 {
13077 if (TARGET_64BIT)
13078 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
13079 else
13080 {
13081 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
13082 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 4);
13083 }
13084 offset += 8;
13085 }
13086 if ((countval & 0x04) && max_size > 4)
13087 {
13088 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
13089 offset += 4;
13090 }
13091 if ((countval & 0x02) && max_size > 2)
13092 {
13093 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
13094 offset += 2;
13095 }
13096 if ((countval & 0x01) && max_size > 1)
13097 {
13098 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
13099 offset += 1;
13100 }
13101 return;
13102 }
13103 if (max_size > 8)
13104 {
13105 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
13106 count, 1, OPTAB_DIRECT);
13107 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
13108 count, QImode, 1, 4);
13109 return;
13110 }
13111
13112 /* When there are stringops, we can cheaply increase dest and src pointers.
13113 Otherwise we save code size by maintaining offset (zero is readily
13114 available from preceding rep operation) and using x86 addressing modes.
13115 */
13116 if (TARGET_SINGLE_STRINGOP)
13117 {
13118 if (max_size > 4)
13119 {
13120 rtx label = ix86_expand_aligntest (count, 4, true);
13121 src = change_address (srcmem, SImode, srcptr);
13122 dest = change_address (destmem, SImode, destptr);
13123 emit_insn (gen_strmov (destptr, dest, srcptr, src));
13124 emit_label (label);
13125 LABEL_NUSES (label) = 1;
13126 }
13127 if (max_size > 2)
13128 {
13129 rtx label = ix86_expand_aligntest (count, 2, true);
13130 src = change_address (srcmem, HImode, srcptr);
13131 dest = change_address (destmem, HImode, destptr);
13132 emit_insn (gen_strmov (destptr, dest, srcptr, src));
13133 emit_label (label);
13134 LABEL_NUSES (label) = 1;
13135 }
13136 if (max_size > 1)
13137 {
13138 rtx label = ix86_expand_aligntest (count, 1, true);
13139 src = change_address (srcmem, QImode, srcptr);
13140 dest = change_address (destmem, QImode, destptr);
13141 emit_insn (gen_strmov (destptr, dest, srcptr, src));
13142 emit_label (label);
13143 LABEL_NUSES (label) = 1;
13144 }
13145 }
13146 else
13147 {
13148 rtx offset = force_reg (Pmode, const0_rtx);
13149 rtx tmp;
13150
13151 if (max_size > 4)
13152 {
13153 rtx label = ix86_expand_aligntest (count, 4, true);
13154 src = change_address (srcmem, SImode, srcptr);
13155 dest = change_address (destmem, SImode, destptr);
13156 emit_move_insn (dest, src);
13157 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
13158 true, OPTAB_LIB_WIDEN);
13159 if (tmp != offset)
13160 emit_move_insn (offset, tmp);
13161 emit_label (label);
13162 LABEL_NUSES (label) = 1;
13163 }
13164 if (max_size > 2)
13165 {
13166 rtx label = ix86_expand_aligntest (count, 2, true);
13167 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
13168 src = change_address (srcmem, HImode, tmp);
13169 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
13170 dest = change_address (destmem, HImode, tmp);
13171 emit_move_insn (dest, src);
13172 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
13173 true, OPTAB_LIB_WIDEN);
13174 if (tmp != offset)
13175 emit_move_insn (offset, tmp);
13176 emit_label (label);
13177 LABEL_NUSES (label) = 1;
13178 }
13179 if (max_size > 1)
13180 {
13181 rtx label = ix86_expand_aligntest (count, 1, true);
13182 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
13183 src = change_address (srcmem, QImode, tmp);
13184 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
13185 dest = change_address (destmem, QImode, tmp);
13186 emit_move_insn (dest, src);
13187 emit_label (label);
13188 LABEL_NUSES (label) = 1;
13189 }
13190 }
13191 }
13192
13193 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
13194 static void
13195 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
13196 rtx count, int max_size)
13197 {
13198 count =
13199 expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
13200 count, 1, OPTAB_DIRECT);
13201 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
13202 gen_lowpart (QImode, value), count, QImode,
13203 1, max_size / 2);
13204 }
13205
13206 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
13207 static void
13208 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
13209 {
13210 rtx dest;
13211
13212 if (CONST_INT_P (count))
13213 {
13214 HOST_WIDE_INT countval = INTVAL (count);
13215 int offset = 0;
13216
13217 if ((countval & 0x16) && max_size > 16)
13218 {
13219 if (TARGET_64BIT)
13220 {
13221 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
13222 emit_insn (gen_strset (destptr, dest, value));
13223 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
13224 emit_insn (gen_strset (destptr, dest, value));
13225 }
13226 else
13227 gcc_unreachable ();
13228 offset += 16;
13229 }
13230 if ((countval & 0x08) && max_size > 8)
13231 {
13232 if (TARGET_64BIT)
13233 {
13234 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
13235 emit_insn (gen_strset (destptr, dest, value));
13236 }
13237 else
13238 {
13239 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
13240 emit_insn (gen_strset (destptr, dest, value));
13241 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
13242 emit_insn (gen_strset (destptr, dest, value));
13243 }
13244 offset += 8;
13245 }
13246 if ((countval & 0x04) && max_size > 4)
13247 {
13248 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
13249 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
13250 offset += 4;
13251 }
13252 if ((countval & 0x02) && max_size > 2)
13253 {
13254 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
13255 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
13256 offset += 2;
13257 }
13258 if ((countval & 0x01) && max_size > 1)
13259 {
13260 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
13261 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
13262 offset += 1;
13263 }
13264 return;
13265 }
13266 if (max_size > 32)
13267 {
13268 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
13269 return;
13270 }
13271 if (max_size > 16)
13272 {
13273 rtx label = ix86_expand_aligntest (count, 16, true);
13274 if (TARGET_64BIT)
13275 {
13276 dest = change_address (destmem, DImode, destptr);
13277 emit_insn (gen_strset (destptr, dest, value));
13278 emit_insn (gen_strset (destptr, dest, value));
13279 }
13280 else
13281 {
13282 dest = change_address (destmem, SImode, destptr);
13283 emit_insn (gen_strset (destptr, dest, value));
13284 emit_insn (gen_strset (destptr, dest, value));
13285 emit_insn (gen_strset (destptr, dest, value));
13286 emit_insn (gen_strset (destptr, dest, value));
13287 }
13288 emit_label (label);
13289 LABEL_NUSES (label) = 1;
13290 }
13291 if (max_size > 8)
13292 {
13293 rtx label = ix86_expand_aligntest (count, 8, true);
13294 if (TARGET_64BIT)
13295 {
13296 dest = change_address (destmem, DImode, destptr);
13297 emit_insn (gen_strset (destptr, dest, value));
13298 }
13299 else
13300 {
13301 dest = change_address (destmem, SImode, destptr);
13302 emit_insn (gen_strset (destptr, dest, value));
13303 emit_insn (gen_strset (destptr, dest, value));
13304 }
13305 emit_label (label);
13306 LABEL_NUSES (label) = 1;
13307 }
13308 if (max_size > 4)
13309 {
13310 rtx label = ix86_expand_aligntest (count, 4, true);
13311 dest = change_address (destmem, SImode, destptr);
13312 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
13313 emit_label (label);
13314 LABEL_NUSES (label) = 1;
13315 }
13316 if (max_size > 2)
13317 {
13318 rtx label = ix86_expand_aligntest (count, 2, true);
13319 dest = change_address (destmem, HImode, destptr);
13320 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
13321 emit_label (label);
13322 LABEL_NUSES (label) = 1;
13323 }
13324 if (max_size > 1)
13325 {
13326 rtx label = ix86_expand_aligntest (count, 1, true);
13327 dest = change_address (destmem, QImode, destptr);
13328 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
13329 emit_label (label);
13330 LABEL_NUSES (label) = 1;
13331 }
13332 }
13333
13334 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
13335 DESIRED_ALIGNMENT. */
13336 static void
13337 expand_movmem_prologue (rtx destmem, rtx srcmem,
13338 rtx destptr, rtx srcptr, rtx count,
13339 int align, int desired_alignment)
13340 {
13341 if (align <= 1 && desired_alignment > 1)
13342 {
13343 rtx label = ix86_expand_aligntest (destptr, 1, false);
13344 srcmem = change_address (srcmem, QImode, srcptr);
13345 destmem = change_address (destmem, QImode, destptr);
13346 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
13347 ix86_adjust_counter (count, 1);
13348 emit_label (label);
13349 LABEL_NUSES (label) = 1;
13350 }
13351 if (align <= 2 && desired_alignment > 2)
13352 {
13353 rtx label = ix86_expand_aligntest (destptr, 2, false);
13354 srcmem = change_address (srcmem, HImode, srcptr);
13355 destmem = change_address (destmem, HImode, destptr);
13356 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
13357 ix86_adjust_counter (count, 2);
13358 emit_label (label);
13359 LABEL_NUSES (label) = 1;
13360 }
13361 if (align <= 4 && desired_alignment > 4)
13362 {
13363 rtx label = ix86_expand_aligntest (destptr, 4, false);
13364 srcmem = change_address (srcmem, SImode, srcptr);
13365 destmem = change_address (destmem, SImode, destptr);
13366 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
13367 ix86_adjust_counter (count, 4);
13368 emit_label (label);
13369 LABEL_NUSES (label) = 1;
13370 }
13371 gcc_assert (desired_alignment <= 8);
13372 }
13373
13374 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
13375 DESIRED_ALIGNMENT. */
13376 static void
13377 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
13378 int align, int desired_alignment)
13379 {
13380 if (align <= 1 && desired_alignment > 1)
13381 {
13382 rtx label = ix86_expand_aligntest (destptr, 1, false);
13383 destmem = change_address (destmem, QImode, destptr);
13384 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
13385 ix86_adjust_counter (count, 1);
13386 emit_label (label);
13387 LABEL_NUSES (label) = 1;
13388 }
13389 if (align <= 2 && desired_alignment > 2)
13390 {
13391 rtx label = ix86_expand_aligntest (destptr, 2, false);
13392 destmem = change_address (destmem, HImode, destptr);
13393 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
13394 ix86_adjust_counter (count, 2);
13395 emit_label (label);
13396 LABEL_NUSES (label) = 1;
13397 }
13398 if (align <= 4 && desired_alignment > 4)
13399 {
13400 rtx label = ix86_expand_aligntest (destptr, 4, false);
13401 destmem = change_address (destmem, SImode, destptr);
13402 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
13403 ix86_adjust_counter (count, 4);
13404 emit_label (label);
13405 LABEL_NUSES (label) = 1;
13406 }
13407 gcc_assert (desired_alignment <= 8);
13408 }
13409
13410 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
13411 static enum stringop_alg
13412 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
13413 int *dynamic_check)
13414 {
13415 const struct stringop_algs * algs;
13416
13417 *dynamic_check = -1;
13418 if (memset)
13419 algs = &ix86_cost->memset[TARGET_64BIT != 0];
13420 else
13421 algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
13422 if (stringop_alg != no_stringop)
13423 return stringop_alg;
13424 /* rep; movq or rep; movl is the smallest variant. */
13425 else if (optimize_size)
13426 {
13427 if (!count || (count & 3))
13428 return rep_prefix_1_byte;
13429 else
13430 return rep_prefix_4_byte;
13431 }
13432 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
13433 */
13434 else if (expected_size != -1 && expected_size < 4)
13435 return loop_1_byte;
13436 else if (expected_size != -1)
13437 {
13438 unsigned int i;
13439 enum stringop_alg alg = libcall;
13440 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
13441 {
13442 gcc_assert (algs->size[i].max);
13443 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
13444 {
13445 if (algs->size[i].alg != libcall)
13446 alg = algs->size[i].alg;
13447 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
13448 last non-libcall inline algorithm. */
13449 if (TARGET_INLINE_ALL_STRINGOPS)
13450 {
13451 /* When the current size is best to be copied by a libcall,
13452 but we are still forced to inline, run the heuristic bellow
13453 that will pick code for medium sized blocks. */
13454 if (alg != libcall)
13455 return alg;
13456 break;
13457 }
13458 else
13459 return algs->size[i].alg;
13460 }
13461 }
13462 gcc_assert (TARGET_INLINE_ALL_STRINGOPS);
13463 }
13464 /* When asked to inline the call anyway, try to pick meaningful choice.
13465 We look for maximal size of block that is faster to copy by hand and
13466 take blocks of at most of that size guessing that average size will
13467 be roughly half of the block.
13468
13469 If this turns out to be bad, we might simply specify the preferred
13470 choice in ix86_costs. */
13471 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
13472 && algs->unknown_size == libcall)
13473 {
13474 int max = -1;
13475 enum stringop_alg alg;
13476 int i;
13477
13478 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
13479 if (algs->size[i].alg != libcall && algs->size[i].alg)
13480 max = algs->size[i].max;
13481 if (max == -1)
13482 max = 4096;
13483 alg = decide_alg (count, max / 2, memset, dynamic_check);
13484 gcc_assert (*dynamic_check == -1);
13485 gcc_assert (alg != libcall);
13486 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
13487 *dynamic_check = max;
13488 return alg;
13489 }
13490 return algs->unknown_size;
13491 }
13492
13493 /* Decide on alignment. We know that the operand is already aligned to ALIGN
13494 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
13495 static int
13496 decide_alignment (int align,
13497 enum stringop_alg alg,
13498 int expected_size)
13499 {
13500 int desired_align = 0;
13501 switch (alg)
13502 {
13503 case no_stringop:
13504 gcc_unreachable ();
13505 case loop:
13506 case unrolled_loop:
13507 desired_align = GET_MODE_SIZE (Pmode);
13508 break;
13509 case rep_prefix_8_byte:
13510 desired_align = 8;
13511 break;
13512 case rep_prefix_4_byte:
13513 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
13514 copying whole cacheline at once. */
13515 if (TARGET_PENTIUMPRO)
13516 desired_align = 8;
13517 else
13518 desired_align = 4;
13519 break;
13520 case rep_prefix_1_byte:
13521 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
13522 copying whole cacheline at once. */
13523 if (TARGET_PENTIUMPRO)
13524 desired_align = 8;
13525 else
13526 desired_align = 1;
13527 break;
13528 case loop_1_byte:
13529 desired_align = 1;
13530 break;
13531 case libcall:
13532 return 0;
13533 }
13534
13535 if (optimize_size)
13536 desired_align = 1;
13537 if (desired_align < align)
13538 desired_align = align;
13539 if (expected_size != -1 && expected_size < 4)
13540 desired_align = align;
13541 return desired_align;
13542 }
13543
13544 /* Return the smallest power of 2 greater than VAL. */
13545 static int
13546 smallest_pow2_greater_than (int val)
13547 {
13548 int ret = 1;
13549 while (ret <= val)
13550 ret <<= 1;
13551 return ret;
13552 }
13553
13554 /* Expand string move (memcpy) operation. Use i386 string operations when
13555 profitable. expand_clrmem contains similar code. The code depends upon
13556 architecture, block size and alignment, but always has the same
13557 overall structure:
13558
13559 1) Prologue guard: Conditional that jumps up to epilogues for small
13560 blocks that can be handled by epilogue alone. This is faster but
13561 also needed for correctness, since prologue assume the block is larger
13562 than the desired alignment.
13563
13564 Optional dynamic check for size and libcall for large
13565 blocks is emitted here too, with -minline-stringops-dynamically.
13566
13567 2) Prologue: copy first few bytes in order to get destination aligned
13568 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
13569 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
13570 We emit either a jump tree on power of two sized blocks, or a byte loop.
13571
13572 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
13573 with specified algorithm.
13574
13575 4) Epilogue: code copying tail of the block that is too small to be
13576 handled by main body (or up to size guarded by prologue guard). */
13577
13578 int
13579 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
13580 rtx expected_align_exp, rtx expected_size_exp)
13581 {
13582 rtx destreg;
13583 rtx srcreg;
13584 rtx label = NULL;
13585 rtx tmp;
13586 rtx jump_around_label = NULL;
13587 HOST_WIDE_INT align = 1;
13588 unsigned HOST_WIDE_INT count = 0;
13589 HOST_WIDE_INT expected_size = -1;
13590 int size_needed = 0, epilogue_size_needed;
13591 int desired_align = 0;
13592 enum stringop_alg alg;
13593 int dynamic_check;
13594
13595 if (CONST_INT_P (align_exp))
13596 align = INTVAL (align_exp);
13597 /* i386 can do misaligned access on reasonably increased cost. */
13598 if (CONST_INT_P (expected_align_exp)
13599 && INTVAL (expected_align_exp) > align)
13600 align = INTVAL (expected_align_exp);
13601 if (CONST_INT_P (count_exp))
13602 count = expected_size = INTVAL (count_exp);
13603 if (CONST_INT_P (expected_size_exp) && count == 0)
13604 expected_size = INTVAL (expected_size_exp);
13605
13606 /* Step 0: Decide on preferred algorithm, desired alignment and
13607 size of chunks to be copied by main loop. */
13608
13609 alg = decide_alg (count, expected_size, false, &dynamic_check);
13610 desired_align = decide_alignment (align, alg, expected_size);
13611
13612 if (!TARGET_ALIGN_STRINGOPS)
13613 align = desired_align;
13614
13615 if (alg == libcall)
13616 return 0;
13617 gcc_assert (alg != no_stringop);
13618 if (!count)
13619 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
13620 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
13621 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
13622 switch (alg)
13623 {
13624 case libcall:
13625 case no_stringop:
13626 gcc_unreachable ();
13627 case loop:
13628 size_needed = GET_MODE_SIZE (Pmode);
13629 break;
13630 case unrolled_loop:
13631 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
13632 break;
13633 case rep_prefix_8_byte:
13634 size_needed = 8;
13635 break;
13636 case rep_prefix_4_byte:
13637 size_needed = 4;
13638 break;
13639 case rep_prefix_1_byte:
13640 case loop_1_byte:
13641 size_needed = 1;
13642 break;
13643 }
13644
13645 epilogue_size_needed = size_needed;
13646
13647 /* Step 1: Prologue guard. */
13648
13649 /* Alignment code needs count to be in register. */
13650 if (CONST_INT_P (count_exp) && desired_align > align)
13651 {
13652 enum machine_mode mode = SImode;
13653 if (TARGET_64BIT && (count & ~0xffffffff))
13654 mode = DImode;
13655 count_exp = force_reg (mode, count_exp);
13656 }
13657 gcc_assert (desired_align >= 1 && align >= 1);
13658
13659 /* Ensure that alignment prologue won't copy past end of block. */
13660 if ((size_needed > 1 || (desired_align > 1 && desired_align > align))
13661 && !count)
13662 {
13663 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
13664
13665 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
13666 Make sure it is power of 2. */
13667 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
13668
13669 label = gen_label_rtx ();
13670 emit_cmp_and_jump_insns (count_exp,
13671 GEN_INT (epilogue_size_needed),
13672 LTU, 0, GET_MODE (count_exp), 1, label);
13673 if (expected_size == -1 || expected_size < epilogue_size_needed)
13674 predict_jump (REG_BR_PROB_BASE * 60 / 100);
13675 else
13676 predict_jump (REG_BR_PROB_BASE * 20 / 100);
13677 }
13678 /* Emit code to decide on runtime whether library call or inline should be
13679 used. */
13680 if (dynamic_check != -1)
13681 {
13682 rtx hot_label = gen_label_rtx ();
13683 jump_around_label = gen_label_rtx ();
13684 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
13685 LEU, 0, GET_MODE (count_exp), 1, hot_label);
13686 predict_jump (REG_BR_PROB_BASE * 90 / 100);
13687 emit_block_move_via_libcall (dst, src, count_exp, false);
13688 emit_jump (jump_around_label);
13689 emit_label (hot_label);
13690 }
13691
13692 /* Step 2: Alignment prologue. */
13693
13694 if (desired_align > align)
13695 {
13696 /* Except for the first move in epilogue, we no longer know
13697 constant offset in aliasing info. It don't seems to worth
13698 the pain to maintain it for the first move, so throw away
13699 the info early. */
13700 src = change_address (src, BLKmode, srcreg);
13701 dst = change_address (dst, BLKmode, destreg);
13702 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
13703 desired_align);
13704 }
13705 if (label && size_needed == 1)
13706 {
13707 emit_label (label);
13708 LABEL_NUSES (label) = 1;
13709 label = NULL;
13710 }
13711
13712 /* Step 3: Main loop. */
13713
13714 switch (alg)
13715 {
13716 case libcall:
13717 case no_stringop:
13718 gcc_unreachable ();
13719 case loop_1_byte:
13720 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
13721 count_exp, QImode, 1, expected_size);
13722 break;
13723 case loop:
13724 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
13725 count_exp, Pmode, 1, expected_size);
13726 break;
13727 case unrolled_loop:
13728 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
13729 registers for 4 temporaries anyway. */
13730 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
13731 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
13732 expected_size);
13733 break;
13734 case rep_prefix_8_byte:
13735 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
13736 DImode);
13737 break;
13738 case rep_prefix_4_byte:
13739 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
13740 SImode);
13741 break;
13742 case rep_prefix_1_byte:
13743 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
13744 QImode);
13745 break;
13746 }
13747 /* Adjust properly the offset of src and dest memory for aliasing. */
13748 if (CONST_INT_P (count_exp))
13749 {
13750 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
13751 (count / size_needed) * size_needed);
13752 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
13753 (count / size_needed) * size_needed);
13754 }
13755 else
13756 {
13757 src = change_address (src, BLKmode, srcreg);
13758 dst = change_address (dst, BLKmode, destreg);
13759 }
13760
13761 /* Step 4: Epilogue to copy the remaining bytes. */
13762
13763 if (label)
13764 {
13765 /* When the main loop is done, COUNT_EXP might hold original count,
13766 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
13767 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
13768 bytes. Compensate if needed. */
13769
13770 if (size_needed < epilogue_size_needed)
13771 {
13772 tmp =
13773 expand_simple_binop (GET_MODE (count_exp), AND, count_exp,
13774 GEN_INT (size_needed - 1), count_exp, 1,
13775 OPTAB_DIRECT);
13776 if (tmp != count_exp)
13777 emit_move_insn (count_exp, tmp);
13778 }
13779 emit_label (label);
13780 LABEL_NUSES (label) = 1;
13781 }
13782
13783 if (count_exp != const0_rtx && epilogue_size_needed > 1)
13784 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
13785 epilogue_size_needed);
13786 if (jump_around_label)
13787 emit_label (jump_around_label);
13788 return 1;
13789 }
13790
13791 /* Helper function for memcpy. For QImode value 0xXY produce
13792 0xXYXYXYXY of wide specified by MODE. This is essentially
13793 a * 0x10101010, but we can do slightly better than
13794 synth_mult by unwinding the sequence by hand on CPUs with
13795 slow multiply. */
13796 static rtx
13797 promote_duplicated_reg (enum machine_mode mode, rtx val)
13798 {
13799 enum machine_mode valmode = GET_MODE (val);
13800 rtx tmp;
13801 int nops = mode == DImode ? 3 : 2;
13802
13803 gcc_assert (mode == SImode || mode == DImode);
13804 if (val == const0_rtx)
13805 return copy_to_mode_reg (mode, const0_rtx);
13806 if (CONST_INT_P (val))
13807 {
13808 HOST_WIDE_INT v = INTVAL (val) & 255;
13809
13810 v |= v << 8;
13811 v |= v << 16;
13812 if (mode == DImode)
13813 v |= (v << 16) << 16;
13814 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
13815 }
13816
13817 if (valmode == VOIDmode)
13818 valmode = QImode;
13819 if (valmode != QImode)
13820 val = gen_lowpart (QImode, val);
13821 if (mode == QImode)
13822 return val;
13823 if (!TARGET_PARTIAL_REG_STALL)
13824 nops--;
13825 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
13826 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
13827 <= (ix86_cost->shift_const + ix86_cost->add) * nops
13828 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
13829 {
13830 rtx reg = convert_modes (mode, QImode, val, true);
13831 tmp = promote_duplicated_reg (mode, const1_rtx);
13832 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
13833 OPTAB_DIRECT);
13834 }
13835 else
13836 {
13837 rtx reg = convert_modes (mode, QImode, val, true);
13838
13839 if (!TARGET_PARTIAL_REG_STALL)
13840 if (mode == SImode)
13841 emit_insn (gen_movsi_insv_1 (reg, reg));
13842 else
13843 emit_insn (gen_movdi_insv_1_rex64 (reg, reg));
13844 else
13845 {
13846 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
13847 NULL, 1, OPTAB_DIRECT);
13848 reg =
13849 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
13850 }
13851 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
13852 NULL, 1, OPTAB_DIRECT);
13853 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
13854 if (mode == SImode)
13855 return reg;
13856 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
13857 NULL, 1, OPTAB_DIRECT);
13858 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
13859 return reg;
13860 }
13861 }
13862
13863 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
13864 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
13865 alignment from ALIGN to DESIRED_ALIGN. */
13866 static rtx
13867 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
13868 {
13869 rtx promoted_val;
13870
13871 if (TARGET_64BIT
13872 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
13873 promoted_val = promote_duplicated_reg (DImode, val);
13874 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
13875 promoted_val = promote_duplicated_reg (SImode, val);
13876 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
13877 promoted_val = promote_duplicated_reg (HImode, val);
13878 else
13879 promoted_val = val;
13880
13881 return promoted_val;
13882 }
13883
13884 /* Expand string clear operation (bzero). Use i386 string operations when
13885 profitable. See expand_movmem comment for explanation of individual
13886 steps performed. */
13887 int
13888 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
13889 rtx expected_align_exp, rtx expected_size_exp)
13890 {
13891 rtx destreg;
13892 rtx label = NULL;
13893 rtx tmp;
13894 rtx jump_around_label = NULL;
13895 HOST_WIDE_INT align = 1;
13896 unsigned HOST_WIDE_INT count = 0;
13897 HOST_WIDE_INT expected_size = -1;
13898 int size_needed = 0, epilogue_size_needed;
13899 int desired_align = 0;
13900 enum stringop_alg alg;
13901 rtx promoted_val = NULL;
13902 bool force_loopy_epilogue = false;
13903 int dynamic_check;
13904
13905 if (CONST_INT_P (align_exp))
13906 align = INTVAL (align_exp);
13907 /* i386 can do misaligned access on reasonably increased cost. */
13908 if (CONST_INT_P (expected_align_exp)
13909 && INTVAL (expected_align_exp) > align)
13910 align = INTVAL (expected_align_exp);
13911 if (CONST_INT_P (count_exp))
13912 count = expected_size = INTVAL (count_exp);
13913 if (CONST_INT_P (expected_size_exp) && count == 0)
13914 expected_size = INTVAL (expected_size_exp);
13915
13916 /* Step 0: Decide on preferred algorithm, desired alignment and
13917 size of chunks to be copied by main loop. */
13918
13919 alg = decide_alg (count, expected_size, true, &dynamic_check);
13920 desired_align = decide_alignment (align, alg, expected_size);
13921
13922 if (!TARGET_ALIGN_STRINGOPS)
13923 align = desired_align;
13924
13925 if (alg == libcall)
13926 return 0;
13927 gcc_assert (alg != no_stringop);
13928 if (!count)
13929 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
13930 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
13931 switch (alg)
13932 {
13933 case libcall:
13934 case no_stringop:
13935 gcc_unreachable ();
13936 case loop:
13937 size_needed = GET_MODE_SIZE (Pmode);
13938 break;
13939 case unrolled_loop:
13940 size_needed = GET_MODE_SIZE (Pmode) * 4;
13941 break;
13942 case rep_prefix_8_byte:
13943 size_needed = 8;
13944 break;
13945 case rep_prefix_4_byte:
13946 size_needed = 4;
13947 break;
13948 case rep_prefix_1_byte:
13949 case loop_1_byte:
13950 size_needed = 1;
13951 break;
13952 }
13953 epilogue_size_needed = size_needed;
13954
13955 /* Step 1: Prologue guard. */
13956
13957 /* Alignment code needs count to be in register. */
13958 if (CONST_INT_P (count_exp) && desired_align > align)
13959 {
13960 enum machine_mode mode = SImode;
13961 if (TARGET_64BIT && (count & ~0xffffffff))
13962 mode = DImode;
13963 count_exp = force_reg (mode, count_exp);
13964 }
13965 /* Do the cheap promotion to allow better CSE across the
13966 main loop and epilogue (ie one load of the big constant in the
13967 front of all code. */
13968 if (CONST_INT_P (val_exp))
13969 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
13970 desired_align, align);
13971 /* Ensure that alignment prologue won't copy past end of block. */
13972 if ((size_needed > 1 || (desired_align > 1 && desired_align > align))
13973 && !count)
13974 {
13975 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
13976
13977 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
13978 Make sure it is power of 2. */
13979 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
13980
13981 /* To improve performance of small blocks, we jump around the VAL
13982 promoting mode. This mean that if the promoted VAL is not constant,
13983 we might not use it in the epilogue and have to use byte
13984 loop variant. */
13985 if (epilogue_size_needed > 2 && !promoted_val)
13986 force_loopy_epilogue = true;
13987 label = gen_label_rtx ();
13988 emit_cmp_and_jump_insns (count_exp,
13989 GEN_INT (epilogue_size_needed),
13990 LTU, 0, GET_MODE (count_exp), 1, label);
13991 if (expected_size == -1 || expected_size <= epilogue_size_needed)
13992 predict_jump (REG_BR_PROB_BASE * 60 / 100);
13993 else
13994 predict_jump (REG_BR_PROB_BASE * 20 / 100);
13995 }
13996 if (dynamic_check != -1)
13997 {
13998 rtx hot_label = gen_label_rtx ();
13999 jump_around_label = gen_label_rtx ();
14000 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
14001 LEU, 0, GET_MODE (count_exp), 1, hot_label);
14002 predict_jump (REG_BR_PROB_BASE * 90 / 100);
14003 set_storage_via_libcall (dst, count_exp, val_exp, false);
14004 emit_jump (jump_around_label);
14005 emit_label (hot_label);
14006 }
14007
14008 /* Step 2: Alignment prologue. */
14009
14010 /* Do the expensive promotion once we branched off the small blocks. */
14011 if (!promoted_val)
14012 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
14013 desired_align, align);
14014 gcc_assert (desired_align >= 1 && align >= 1);
14015
14016 if (desired_align > align)
14017 {
14018 /* Except for the first move in epilogue, we no longer know
14019 constant offset in aliasing info. It don't seems to worth
14020 the pain to maintain it for the first move, so throw away
14021 the info early. */
14022 dst = change_address (dst, BLKmode, destreg);
14023 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
14024 desired_align);
14025 }
14026 if (label && size_needed == 1)
14027 {
14028 emit_label (label);
14029 LABEL_NUSES (label) = 1;
14030 label = NULL;
14031 }
14032
14033 /* Step 3: Main loop. */
14034
14035 switch (alg)
14036 {
14037 case libcall:
14038 case no_stringop:
14039 gcc_unreachable ();
14040 case loop_1_byte:
14041 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
14042 count_exp, QImode, 1, expected_size);
14043 break;
14044 case loop:
14045 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
14046 count_exp, Pmode, 1, expected_size);
14047 break;
14048 case unrolled_loop:
14049 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
14050 count_exp, Pmode, 4, expected_size);
14051 break;
14052 case rep_prefix_8_byte:
14053 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
14054 DImode);
14055 break;
14056 case rep_prefix_4_byte:
14057 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
14058 SImode);
14059 break;
14060 case rep_prefix_1_byte:
14061 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
14062 QImode);
14063 break;
14064 }
14065 /* Adjust properly the offset of src and dest memory for aliasing. */
14066 if (CONST_INT_P (count_exp))
14067 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
14068 (count / size_needed) * size_needed);
14069 else
14070 dst = change_address (dst, BLKmode, destreg);
14071
14072 /* Step 4: Epilogue to copy the remaining bytes. */
14073
14074 if (label)
14075 {
14076 /* When the main loop is done, COUNT_EXP might hold original count,
14077 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
14078 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
14079 bytes. Compensate if needed. */
14080
14081 if (size_needed < desired_align - align)
14082 {
14083 tmp =
14084 expand_simple_binop (GET_MODE (count_exp), AND, count_exp,
14085 GEN_INT (size_needed - 1), count_exp, 1,
14086 OPTAB_DIRECT);
14087 size_needed = desired_align - align + 1;
14088 if (tmp != count_exp)
14089 emit_move_insn (count_exp, tmp);
14090 }
14091 emit_label (label);
14092 LABEL_NUSES (label) = 1;
14093 }
14094 if (count_exp != const0_rtx && epilogue_size_needed > 1)
14095 {
14096 if (force_loopy_epilogue)
14097 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
14098 size_needed);
14099 else
14100 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
14101 size_needed);
14102 }
14103 if (jump_around_label)
14104 emit_label (jump_around_label);
14105 return 1;
14106 }
14107
14108 /* Expand strlen. */
14109 int
14110 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
14111 {
14112 rtx addr, scratch1, scratch2, scratch3, scratch4;
14113
14114 /* The generic case of strlen expander is long. Avoid it's
14115 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
14116
14117 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
14118 && !TARGET_INLINE_ALL_STRINGOPS
14119 && !optimize_size
14120 && (!CONST_INT_P (align) || INTVAL (align) < 4))
14121 return 0;
14122
14123 addr = force_reg (Pmode, XEXP (src, 0));
14124 scratch1 = gen_reg_rtx (Pmode);
14125
14126 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
14127 && !optimize_size)
14128 {
14129 /* Well it seems that some optimizer does not combine a call like
14130 foo(strlen(bar), strlen(bar));
14131 when the move and the subtraction is done here. It does calculate
14132 the length just once when these instructions are done inside of
14133 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
14134 often used and I use one fewer register for the lifetime of
14135 output_strlen_unroll() this is better. */
14136
14137 emit_move_insn (out, addr);
14138
14139 ix86_expand_strlensi_unroll_1 (out, src, align);
14140
14141 /* strlensi_unroll_1 returns the address of the zero at the end of
14142 the string, like memchr(), so compute the length by subtracting
14143 the start address. */
14144 if (TARGET_64BIT)
14145 emit_insn (gen_subdi3 (out, out, addr));
14146 else
14147 emit_insn (gen_subsi3 (out, out, addr));
14148 }
14149 else
14150 {
14151 rtx unspec;
14152 scratch2 = gen_reg_rtx (Pmode);
14153 scratch3 = gen_reg_rtx (Pmode);
14154 scratch4 = force_reg (Pmode, constm1_rtx);
14155
14156 emit_move_insn (scratch3, addr);
14157 eoschar = force_reg (QImode, eoschar);
14158
14159 src = replace_equiv_address_nv (src, scratch3);
14160
14161 /* If .md starts supporting :P, this can be done in .md. */
14162 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
14163 scratch4), UNSPEC_SCAS);
14164 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
14165 if (TARGET_64BIT)
14166 {
14167 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
14168 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
14169 }
14170 else
14171 {
14172 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
14173 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
14174 }
14175 }
14176 return 1;
14177 }
14178
14179 /* Expand the appropriate insns for doing strlen if not just doing
14180 repnz; scasb
14181
14182 out = result, initialized with the start address
14183 align_rtx = alignment of the address.
14184 scratch = scratch register, initialized with the startaddress when
14185 not aligned, otherwise undefined
14186
14187 This is just the body. It needs the initializations mentioned above and
14188 some address computing at the end. These things are done in i386.md. */
14189
14190 static void
14191 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
14192 {
14193 int align;
14194 rtx tmp;
14195 rtx align_2_label = NULL_RTX;
14196 rtx align_3_label = NULL_RTX;
14197 rtx align_4_label = gen_label_rtx ();
14198 rtx end_0_label = gen_label_rtx ();
14199 rtx mem;
14200 rtx tmpreg = gen_reg_rtx (SImode);
14201 rtx scratch = gen_reg_rtx (SImode);
14202 rtx cmp;
14203
14204 align = 0;
14205 if (CONST_INT_P (align_rtx))
14206 align = INTVAL (align_rtx);
14207
14208 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
14209
14210 /* Is there a known alignment and is it less than 4? */
14211 if (align < 4)
14212 {
14213 rtx scratch1 = gen_reg_rtx (Pmode);
14214 emit_move_insn (scratch1, out);
14215 /* Is there a known alignment and is it not 2? */
14216 if (align != 2)
14217 {
14218 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
14219 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
14220
14221 /* Leave just the 3 lower bits. */
14222 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
14223 NULL_RTX, 0, OPTAB_WIDEN);
14224
14225 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
14226 Pmode, 1, align_4_label);
14227 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
14228 Pmode, 1, align_2_label);
14229 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
14230 Pmode, 1, align_3_label);
14231 }
14232 else
14233 {
14234 /* Since the alignment is 2, we have to check 2 or 0 bytes;
14235 check if is aligned to 4 - byte. */
14236
14237 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
14238 NULL_RTX, 0, OPTAB_WIDEN);
14239
14240 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
14241 Pmode, 1, align_4_label);
14242 }
14243
14244 mem = change_address (src, QImode, out);
14245
14246 /* Now compare the bytes. */
14247
14248 /* Compare the first n unaligned byte on a byte per byte basis. */
14249 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
14250 QImode, 1, end_0_label);
14251
14252 /* Increment the address. */
14253 if (TARGET_64BIT)
14254 emit_insn (gen_adddi3 (out, out, const1_rtx));
14255 else
14256 emit_insn (gen_addsi3 (out, out, const1_rtx));
14257
14258 /* Not needed with an alignment of 2 */
14259 if (align != 2)
14260 {
14261 emit_label (align_2_label);
14262
14263 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
14264 end_0_label);
14265
14266 if (TARGET_64BIT)
14267 emit_insn (gen_adddi3 (out, out, const1_rtx));
14268 else
14269 emit_insn (gen_addsi3 (out, out, const1_rtx));
14270
14271 emit_label (align_3_label);
14272 }
14273
14274 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
14275 end_0_label);
14276
14277 if (TARGET_64BIT)
14278 emit_insn (gen_adddi3 (out, out, const1_rtx));
14279 else
14280 emit_insn (gen_addsi3 (out, out, const1_rtx));
14281 }
14282
14283 /* Generate loop to check 4 bytes at a time. It is not a good idea to
14284 align this loop. It gives only huge programs, but does not help to
14285 speed up. */
14286 emit_label (align_4_label);
14287
14288 mem = change_address (src, SImode, out);
14289 emit_move_insn (scratch, mem);
14290 if (TARGET_64BIT)
14291 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
14292 else
14293 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
14294
14295 /* This formula yields a nonzero result iff one of the bytes is zero.
14296 This saves three branches inside loop and many cycles. */
14297
14298 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
14299 emit_insn (gen_one_cmplsi2 (scratch, scratch));
14300 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
14301 emit_insn (gen_andsi3 (tmpreg, tmpreg,
14302 gen_int_mode (0x80808080, SImode)));
14303 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
14304 align_4_label);
14305
14306 if (TARGET_CMOVE)
14307 {
14308 rtx reg = gen_reg_rtx (SImode);
14309 rtx reg2 = gen_reg_rtx (Pmode);
14310 emit_move_insn (reg, tmpreg);
14311 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
14312
14313 /* If zero is not in the first two bytes, move two bytes forward. */
14314 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
14315 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
14316 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
14317 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
14318 gen_rtx_IF_THEN_ELSE (SImode, tmp,
14319 reg,
14320 tmpreg)));
14321 /* Emit lea manually to avoid clobbering of flags. */
14322 emit_insn (gen_rtx_SET (SImode, reg2,
14323 gen_rtx_PLUS (Pmode, out, const2_rtx)));
14324
14325 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
14326 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
14327 emit_insn (gen_rtx_SET (VOIDmode, out,
14328 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
14329 reg2,
14330 out)));
14331
14332 }
14333 else
14334 {
14335 rtx end_2_label = gen_label_rtx ();
14336 /* Is zero in the first two bytes? */
14337
14338 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
14339 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
14340 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
14341 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
14342 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
14343 pc_rtx);
14344 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
14345 JUMP_LABEL (tmp) = end_2_label;
14346
14347 /* Not in the first two. Move two bytes forward. */
14348 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
14349 if (TARGET_64BIT)
14350 emit_insn (gen_adddi3 (out, out, const2_rtx));
14351 else
14352 emit_insn (gen_addsi3 (out, out, const2_rtx));
14353
14354 emit_label (end_2_label);
14355
14356 }
14357
14358 /* Avoid branch in fixing the byte. */
14359 tmpreg = gen_lowpart (QImode, tmpreg);
14360 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
14361 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
14362 if (TARGET_64BIT)
14363 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
14364 else
14365 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
14366
14367 emit_label (end_0_label);
14368 }
14369
14370 void
14371 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
14372 rtx callarg2 ATTRIBUTE_UNUSED,
14373 rtx pop, int sibcall)
14374 {
14375 rtx use = NULL, call;
14376
14377 if (pop == const0_rtx)
14378 pop = NULL;
14379 gcc_assert (!TARGET_64BIT || !pop);
14380
14381 if (TARGET_MACHO && !TARGET_64BIT)
14382 {
14383 #if TARGET_MACHO
14384 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
14385 fnaddr = machopic_indirect_call_target (fnaddr);
14386 #endif
14387 }
14388 else
14389 {
14390 /* Static functions and indirect calls don't need the pic register. */
14391 if (! TARGET_64BIT && flag_pic
14392 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
14393 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
14394 use_reg (&use, pic_offset_table_rtx);
14395 }
14396
14397 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
14398 {
14399 rtx al = gen_rtx_REG (QImode, 0);
14400 emit_move_insn (al, callarg2);
14401 use_reg (&use, al);
14402 }
14403
14404 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
14405 {
14406 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
14407 fnaddr = gen_rtx_MEM (QImode, fnaddr);
14408 }
14409 if (sibcall && TARGET_64BIT
14410 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
14411 {
14412 rtx addr;
14413 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
14414 fnaddr = gen_rtx_REG (Pmode, R11_REG);
14415 emit_move_insn (fnaddr, addr);
14416 fnaddr = gen_rtx_MEM (QImode, fnaddr);
14417 }
14418
14419 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
14420 if (retval)
14421 call = gen_rtx_SET (VOIDmode, retval, call);
14422 if (pop)
14423 {
14424 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
14425 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
14426 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
14427 }
14428
14429 call = emit_call_insn (call);
14430 if (use)
14431 CALL_INSN_FUNCTION_USAGE (call) = use;
14432 }
14433
14434 \f
14435 /* Clear stack slot assignments remembered from previous functions.
14436 This is called from INIT_EXPANDERS once before RTL is emitted for each
14437 function. */
14438
14439 static struct machine_function *
14440 ix86_init_machine_status (void)
14441 {
14442 struct machine_function *f;
14443
14444 f = ggc_alloc_cleared (sizeof (struct machine_function));
14445 f->use_fast_prologue_epilogue_nregs = -1;
14446 f->tls_descriptor_call_expanded_p = 0;
14447
14448 return f;
14449 }
14450
14451 /* Return a MEM corresponding to a stack slot with mode MODE.
14452 Allocate a new slot if necessary.
14453
14454 The RTL for a function can have several slots available: N is
14455 which slot to use. */
14456
14457 rtx
14458 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
14459 {
14460 struct stack_local_entry *s;
14461
14462 gcc_assert (n < MAX_386_STACK_LOCALS);
14463
14464 for (s = ix86_stack_locals; s; s = s->next)
14465 if (s->mode == mode && s->n == n)
14466 return copy_rtx (s->rtl);
14467
14468 s = (struct stack_local_entry *)
14469 ggc_alloc (sizeof (struct stack_local_entry));
14470 s->n = n;
14471 s->mode = mode;
14472 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
14473
14474 s->next = ix86_stack_locals;
14475 ix86_stack_locals = s;
14476 return s->rtl;
14477 }
14478
14479 /* Construct the SYMBOL_REF for the tls_get_addr function. */
14480
14481 static GTY(()) rtx ix86_tls_symbol;
14482 rtx
14483 ix86_tls_get_addr (void)
14484 {
14485
14486 if (!ix86_tls_symbol)
14487 {
14488 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
14489 (TARGET_ANY_GNU_TLS
14490 && !TARGET_64BIT)
14491 ? "___tls_get_addr"
14492 : "__tls_get_addr");
14493 }
14494
14495 return ix86_tls_symbol;
14496 }
14497
14498 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
14499
14500 static GTY(()) rtx ix86_tls_module_base_symbol;
14501 rtx
14502 ix86_tls_module_base (void)
14503 {
14504
14505 if (!ix86_tls_module_base_symbol)
14506 {
14507 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
14508 "_TLS_MODULE_BASE_");
14509 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
14510 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
14511 }
14512
14513 return ix86_tls_module_base_symbol;
14514 }
14515 \f
14516 /* Calculate the length of the memory address in the instruction
14517 encoding. Does not include the one-byte modrm, opcode, or prefix. */
14518
14519 int
14520 memory_address_length (rtx addr)
14521 {
14522 struct ix86_address parts;
14523 rtx base, index, disp;
14524 int len;
14525 int ok;
14526
14527 if (GET_CODE (addr) == PRE_DEC
14528 || GET_CODE (addr) == POST_INC
14529 || GET_CODE (addr) == PRE_MODIFY
14530 || GET_CODE (addr) == POST_MODIFY)
14531 return 0;
14532
14533 ok = ix86_decompose_address (addr, &parts);
14534 gcc_assert (ok);
14535
14536 if (parts.base && GET_CODE (parts.base) == SUBREG)
14537 parts.base = SUBREG_REG (parts.base);
14538 if (parts.index && GET_CODE (parts.index) == SUBREG)
14539 parts.index = SUBREG_REG (parts.index);
14540
14541 base = parts.base;
14542 index = parts.index;
14543 disp = parts.disp;
14544 len = 0;
14545
14546 /* Rule of thumb:
14547 - esp as the base always wants an index,
14548 - ebp as the base always wants a displacement. */
14549
14550 /* Register Indirect. */
14551 if (base && !index && !disp)
14552 {
14553 /* esp (for its index) and ebp (for its displacement) need
14554 the two-byte modrm form. */
14555 if (addr == stack_pointer_rtx
14556 || addr == arg_pointer_rtx
14557 || addr == frame_pointer_rtx
14558 || addr == hard_frame_pointer_rtx)
14559 len = 1;
14560 }
14561
14562 /* Direct Addressing. */
14563 else if (disp && !base && !index)
14564 len = 4;
14565
14566 else
14567 {
14568 /* Find the length of the displacement constant. */
14569 if (disp)
14570 {
14571 if (base && satisfies_constraint_K (disp))
14572 len = 1;
14573 else
14574 len = 4;
14575 }
14576 /* ebp always wants a displacement. */
14577 else if (base == hard_frame_pointer_rtx)
14578 len = 1;
14579
14580 /* An index requires the two-byte modrm form.... */
14581 if (index
14582 /* ...like esp, which always wants an index. */
14583 || base == stack_pointer_rtx
14584 || base == arg_pointer_rtx
14585 || base == frame_pointer_rtx)
14586 len += 1;
14587 }
14588
14589 return len;
14590 }
14591
14592 /* Compute default value for "length_immediate" attribute. When SHORTFORM
14593 is set, expect that insn have 8bit immediate alternative. */
14594 int
14595 ix86_attr_length_immediate_default (rtx insn, int shortform)
14596 {
14597 int len = 0;
14598 int i;
14599 extract_insn_cached (insn);
14600 for (i = recog_data.n_operands - 1; i >= 0; --i)
14601 if (CONSTANT_P (recog_data.operand[i]))
14602 {
14603 gcc_assert (!len);
14604 if (shortform && satisfies_constraint_K (recog_data.operand[i]))
14605 len = 1;
14606 else
14607 {
14608 switch (get_attr_mode (insn))
14609 {
14610 case MODE_QI:
14611 len+=1;
14612 break;
14613 case MODE_HI:
14614 len+=2;
14615 break;
14616 case MODE_SI:
14617 len+=4;
14618 break;
14619 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
14620 case MODE_DI:
14621 len+=4;
14622 break;
14623 default:
14624 fatal_insn ("unknown insn mode", insn);
14625 }
14626 }
14627 }
14628 return len;
14629 }
14630 /* Compute default value for "length_address" attribute. */
14631 int
14632 ix86_attr_length_address_default (rtx insn)
14633 {
14634 int i;
14635
14636 if (get_attr_type (insn) == TYPE_LEA)
14637 {
14638 rtx set = PATTERN (insn);
14639
14640 if (GET_CODE (set) == PARALLEL)
14641 set = XVECEXP (set, 0, 0);
14642
14643 gcc_assert (GET_CODE (set) == SET);
14644
14645 return memory_address_length (SET_SRC (set));
14646 }
14647
14648 extract_insn_cached (insn);
14649 for (i = recog_data.n_operands - 1; i >= 0; --i)
14650 if (MEM_P (recog_data.operand[i]))
14651 {
14652 return memory_address_length (XEXP (recog_data.operand[i], 0));
14653 break;
14654 }
14655 return 0;
14656 }
14657 \f
14658 /* Return the maximum number of instructions a cpu can issue. */
14659
14660 static int
14661 ix86_issue_rate (void)
14662 {
14663 switch (ix86_tune)
14664 {
14665 case PROCESSOR_PENTIUM:
14666 case PROCESSOR_K6:
14667 return 2;
14668
14669 case PROCESSOR_PENTIUMPRO:
14670 case PROCESSOR_PENTIUM4:
14671 case PROCESSOR_ATHLON:
14672 case PROCESSOR_K8:
14673 case PROCESSOR_NOCONA:
14674 case PROCESSOR_GENERIC32:
14675 case PROCESSOR_GENERIC64:
14676 return 3;
14677
14678 case PROCESSOR_CORE2:
14679 return 4;
14680
14681 default:
14682 return 1;
14683 }
14684 }
14685
14686 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
14687 by DEP_INSN and nothing set by DEP_INSN. */
14688
14689 static int
14690 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
14691 {
14692 rtx set, set2;
14693
14694 /* Simplify the test for uninteresting insns. */
14695 if (insn_type != TYPE_SETCC
14696 && insn_type != TYPE_ICMOV
14697 && insn_type != TYPE_FCMOV
14698 && insn_type != TYPE_IBR)
14699 return 0;
14700
14701 if ((set = single_set (dep_insn)) != 0)
14702 {
14703 set = SET_DEST (set);
14704 set2 = NULL_RTX;
14705 }
14706 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
14707 && XVECLEN (PATTERN (dep_insn), 0) == 2
14708 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
14709 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
14710 {
14711 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
14712 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
14713 }
14714 else
14715 return 0;
14716
14717 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
14718 return 0;
14719
14720 /* This test is true if the dependent insn reads the flags but
14721 not any other potentially set register. */
14722 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
14723 return 0;
14724
14725 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
14726 return 0;
14727
14728 return 1;
14729 }
14730
14731 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
14732 address with operands set by DEP_INSN. */
14733
14734 static int
14735 ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
14736 {
14737 rtx addr;
14738
14739 if (insn_type == TYPE_LEA
14740 && TARGET_PENTIUM)
14741 {
14742 addr = PATTERN (insn);
14743
14744 if (GET_CODE (addr) == PARALLEL)
14745 addr = XVECEXP (addr, 0, 0);
14746
14747 gcc_assert (GET_CODE (addr) == SET);
14748
14749 addr = SET_SRC (addr);
14750 }
14751 else
14752 {
14753 int i;
14754 extract_insn_cached (insn);
14755 for (i = recog_data.n_operands - 1; i >= 0; --i)
14756 if (MEM_P (recog_data.operand[i]))
14757 {
14758 addr = XEXP (recog_data.operand[i], 0);
14759 goto found;
14760 }
14761 return 0;
14762 found:;
14763 }
14764
14765 return modified_in_p (addr, dep_insn);
14766 }
14767
14768 static int
14769 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
14770 {
14771 enum attr_type insn_type, dep_insn_type;
14772 enum attr_memory memory;
14773 rtx set, set2;
14774 int dep_insn_code_number;
14775
14776 /* Anti and output dependencies have zero cost on all CPUs. */
14777 if (REG_NOTE_KIND (link) != 0)
14778 return 0;
14779
14780 dep_insn_code_number = recog_memoized (dep_insn);
14781
14782 /* If we can't recognize the insns, we can't really do anything. */
14783 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
14784 return cost;
14785
14786 insn_type = get_attr_type (insn);
14787 dep_insn_type = get_attr_type (dep_insn);
14788
14789 switch (ix86_tune)
14790 {
14791 case PROCESSOR_PENTIUM:
14792 /* Address Generation Interlock adds a cycle of latency. */
14793 if (ix86_agi_dependent (insn, dep_insn, insn_type))
14794 cost += 1;
14795
14796 /* ??? Compares pair with jump/setcc. */
14797 if (ix86_flags_dependent (insn, dep_insn, insn_type))
14798 cost = 0;
14799
14800 /* Floating point stores require value to be ready one cycle earlier. */
14801 if (insn_type == TYPE_FMOV
14802 && get_attr_memory (insn) == MEMORY_STORE
14803 && !ix86_agi_dependent (insn, dep_insn, insn_type))
14804 cost += 1;
14805 break;
14806
14807 case PROCESSOR_PENTIUMPRO:
14808 memory = get_attr_memory (insn);
14809
14810 /* INT->FP conversion is expensive. */
14811 if (get_attr_fp_int_src (dep_insn))
14812 cost += 5;
14813
14814 /* There is one cycle extra latency between an FP op and a store. */
14815 if (insn_type == TYPE_FMOV
14816 && (set = single_set (dep_insn)) != NULL_RTX
14817 && (set2 = single_set (insn)) != NULL_RTX
14818 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
14819 && MEM_P (SET_DEST (set2)))
14820 cost += 1;
14821
14822 /* Show ability of reorder buffer to hide latency of load by executing
14823 in parallel with previous instruction in case
14824 previous instruction is not needed to compute the address. */
14825 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
14826 && !ix86_agi_dependent (insn, dep_insn, insn_type))
14827 {
14828 /* Claim moves to take one cycle, as core can issue one load
14829 at time and the next load can start cycle later. */
14830 if (dep_insn_type == TYPE_IMOV
14831 || dep_insn_type == TYPE_FMOV)
14832 cost = 1;
14833 else if (cost > 1)
14834 cost--;
14835 }
14836 break;
14837
14838 case PROCESSOR_K6:
14839 memory = get_attr_memory (insn);
14840
14841 /* The esp dependency is resolved before the instruction is really
14842 finished. */
14843 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
14844 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
14845 return 1;
14846
14847 /* INT->FP conversion is expensive. */
14848 if (get_attr_fp_int_src (dep_insn))
14849 cost += 5;
14850
14851 /* Show ability of reorder buffer to hide latency of load by executing
14852 in parallel with previous instruction in case
14853 previous instruction is not needed to compute the address. */
14854 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
14855 && !ix86_agi_dependent (insn, dep_insn, insn_type))
14856 {
14857 /* Claim moves to take one cycle, as core can issue one load
14858 at time and the next load can start cycle later. */
14859 if (dep_insn_type == TYPE_IMOV
14860 || dep_insn_type == TYPE_FMOV)
14861 cost = 1;
14862 else if (cost > 2)
14863 cost -= 2;
14864 else
14865 cost = 1;
14866 }
14867 break;
14868
14869 case PROCESSOR_ATHLON:
14870 case PROCESSOR_K8:
14871 case PROCESSOR_GENERIC32:
14872 case PROCESSOR_GENERIC64:
14873 memory = get_attr_memory (insn);
14874
14875 /* Show ability of reorder buffer to hide latency of load by executing
14876 in parallel with previous instruction in case
14877 previous instruction is not needed to compute the address. */
14878 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
14879 && !ix86_agi_dependent (insn, dep_insn, insn_type))
14880 {
14881 enum attr_unit unit = get_attr_unit (insn);
14882 int loadcost = 3;
14883
14884 /* Because of the difference between the length of integer and
14885 floating unit pipeline preparation stages, the memory operands
14886 for floating point are cheaper.
14887
14888 ??? For Athlon it the difference is most probably 2. */
14889 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
14890 loadcost = 3;
14891 else
14892 loadcost = TARGET_ATHLON ? 2 : 0;
14893
14894 if (cost >= loadcost)
14895 cost -= loadcost;
14896 else
14897 cost = 0;
14898 }
14899
14900 default:
14901 break;
14902 }
14903
14904 return cost;
14905 }
14906
14907 /* How many alternative schedules to try. This should be as wide as the
14908 scheduling freedom in the DFA, but no wider. Making this value too
14909 large results extra work for the scheduler. */
14910
14911 static int
14912 ia32_multipass_dfa_lookahead (void)
14913 {
14914 if (ix86_tune == PROCESSOR_PENTIUM)
14915 return 2;
14916
14917 if (ix86_tune == PROCESSOR_PENTIUMPRO
14918 || ix86_tune == PROCESSOR_K6)
14919 return 1;
14920
14921 else
14922 return 0;
14923 }
14924
14925 \f
14926 /* Compute the alignment given to a constant that is being placed in memory.
14927 EXP is the constant and ALIGN is the alignment that the object would
14928 ordinarily have.
14929 The value of this function is used instead of that alignment to align
14930 the object. */
14931
14932 int
14933 ix86_constant_alignment (tree exp, int align)
14934 {
14935 if (TREE_CODE (exp) == REAL_CST)
14936 {
14937 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
14938 return 64;
14939 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
14940 return 128;
14941 }
14942 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
14943 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
14944 return BITS_PER_WORD;
14945
14946 return align;
14947 }
14948
14949 /* Compute the alignment for a static variable.
14950 TYPE is the data type, and ALIGN is the alignment that
14951 the object would ordinarily have. The value of this function is used
14952 instead of that alignment to align the object. */
14953
14954 int
14955 ix86_data_alignment (tree type, int align)
14956 {
14957 int max_align = optimize_size ? BITS_PER_WORD : 256;
14958
14959 if (AGGREGATE_TYPE_P (type)
14960 && TYPE_SIZE (type)
14961 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
14962 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
14963 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
14964 && align < max_align)
14965 align = max_align;
14966
14967 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
14968 to 16byte boundary. */
14969 if (TARGET_64BIT)
14970 {
14971 if (AGGREGATE_TYPE_P (type)
14972 && TYPE_SIZE (type)
14973 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
14974 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
14975 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
14976 return 128;
14977 }
14978
14979 if (TREE_CODE (type) == ARRAY_TYPE)
14980 {
14981 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
14982 return 64;
14983 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
14984 return 128;
14985 }
14986 else if (TREE_CODE (type) == COMPLEX_TYPE)
14987 {
14988
14989 if (TYPE_MODE (type) == DCmode && align < 64)
14990 return 64;
14991 if (TYPE_MODE (type) == XCmode && align < 128)
14992 return 128;
14993 }
14994 else if ((TREE_CODE (type) == RECORD_TYPE
14995 || TREE_CODE (type) == UNION_TYPE
14996 || TREE_CODE (type) == QUAL_UNION_TYPE)
14997 && TYPE_FIELDS (type))
14998 {
14999 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
15000 return 64;
15001 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
15002 return 128;
15003 }
15004 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
15005 || TREE_CODE (type) == INTEGER_TYPE)
15006 {
15007 if (TYPE_MODE (type) == DFmode && align < 64)
15008 return 64;
15009 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
15010 return 128;
15011 }
15012
15013 return align;
15014 }
15015
15016 /* Compute the alignment for a local variable.
15017 TYPE is the data type, and ALIGN is the alignment that
15018 the object would ordinarily have. The value of this macro is used
15019 instead of that alignment to align the object. */
15020
15021 int
15022 ix86_local_alignment (tree type, int align)
15023 {
15024 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
15025 to 16byte boundary. */
15026 if (TARGET_64BIT)
15027 {
15028 if (AGGREGATE_TYPE_P (type)
15029 && TYPE_SIZE (type)
15030 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
15031 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
15032 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
15033 return 128;
15034 }
15035 if (TREE_CODE (type) == ARRAY_TYPE)
15036 {
15037 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
15038 return 64;
15039 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
15040 return 128;
15041 }
15042 else if (TREE_CODE (type) == COMPLEX_TYPE)
15043 {
15044 if (TYPE_MODE (type) == DCmode && align < 64)
15045 return 64;
15046 if (TYPE_MODE (type) == XCmode && align < 128)
15047 return 128;
15048 }
15049 else if ((TREE_CODE (type) == RECORD_TYPE
15050 || TREE_CODE (type) == UNION_TYPE
15051 || TREE_CODE (type) == QUAL_UNION_TYPE)
15052 && TYPE_FIELDS (type))
15053 {
15054 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
15055 return 64;
15056 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
15057 return 128;
15058 }
15059 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
15060 || TREE_CODE (type) == INTEGER_TYPE)
15061 {
15062
15063 if (TYPE_MODE (type) == DFmode && align < 64)
15064 return 64;
15065 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
15066 return 128;
15067 }
15068 return align;
15069 }
15070 \f
15071 /* Emit RTL insns to initialize the variable parts of a trampoline.
15072 FNADDR is an RTX for the address of the function's pure code.
15073 CXT is an RTX for the static chain value for the function. */
15074 void
15075 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
15076 {
15077 if (!TARGET_64BIT)
15078 {
15079 /* Compute offset from the end of the jmp to the target function. */
15080 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
15081 plus_constant (tramp, 10),
15082 NULL_RTX, 1, OPTAB_DIRECT);
15083 emit_move_insn (gen_rtx_MEM (QImode, tramp),
15084 gen_int_mode (0xb9, QImode));
15085 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
15086 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
15087 gen_int_mode (0xe9, QImode));
15088 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
15089 }
15090 else
15091 {
15092 int offset = 0;
15093 /* Try to load address using shorter movl instead of movabs.
15094 We may want to support movq for kernel mode, but kernel does not use
15095 trampolines at the moment. */
15096 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
15097 {
15098 fnaddr = copy_to_mode_reg (DImode, fnaddr);
15099 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
15100 gen_int_mode (0xbb41, HImode));
15101 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
15102 gen_lowpart (SImode, fnaddr));
15103 offset += 6;
15104 }
15105 else
15106 {
15107 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
15108 gen_int_mode (0xbb49, HImode));
15109 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
15110 fnaddr);
15111 offset += 10;
15112 }
15113 /* Load static chain using movabs to r10. */
15114 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
15115 gen_int_mode (0xba49, HImode));
15116 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
15117 cxt);
15118 offset += 10;
15119 /* Jump to the r11 */
15120 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
15121 gen_int_mode (0xff49, HImode));
15122 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
15123 gen_int_mode (0xe3, QImode));
15124 offset += 3;
15125 gcc_assert (offset <= TRAMPOLINE_SIZE);
15126 }
15127
15128 #ifdef ENABLE_EXECUTE_STACK
15129 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
15130 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
15131 #endif
15132 }
15133 \f
15134 /* Codes for all the SSE/MMX builtins. */
15135 enum ix86_builtins
15136 {
15137 IX86_BUILTIN_ADDPS,
15138 IX86_BUILTIN_ADDSS,
15139 IX86_BUILTIN_DIVPS,
15140 IX86_BUILTIN_DIVSS,
15141 IX86_BUILTIN_MULPS,
15142 IX86_BUILTIN_MULSS,
15143 IX86_BUILTIN_SUBPS,
15144 IX86_BUILTIN_SUBSS,
15145
15146 IX86_BUILTIN_CMPEQPS,
15147 IX86_BUILTIN_CMPLTPS,
15148 IX86_BUILTIN_CMPLEPS,
15149 IX86_BUILTIN_CMPGTPS,
15150 IX86_BUILTIN_CMPGEPS,
15151 IX86_BUILTIN_CMPNEQPS,
15152 IX86_BUILTIN_CMPNLTPS,
15153 IX86_BUILTIN_CMPNLEPS,
15154 IX86_BUILTIN_CMPNGTPS,
15155 IX86_BUILTIN_CMPNGEPS,
15156 IX86_BUILTIN_CMPORDPS,
15157 IX86_BUILTIN_CMPUNORDPS,
15158 IX86_BUILTIN_CMPEQSS,
15159 IX86_BUILTIN_CMPLTSS,
15160 IX86_BUILTIN_CMPLESS,
15161 IX86_BUILTIN_CMPNEQSS,
15162 IX86_BUILTIN_CMPNLTSS,
15163 IX86_BUILTIN_CMPNLESS,
15164 IX86_BUILTIN_CMPNGTSS,
15165 IX86_BUILTIN_CMPNGESS,
15166 IX86_BUILTIN_CMPORDSS,
15167 IX86_BUILTIN_CMPUNORDSS,
15168
15169 IX86_BUILTIN_COMIEQSS,
15170 IX86_BUILTIN_COMILTSS,
15171 IX86_BUILTIN_COMILESS,
15172 IX86_BUILTIN_COMIGTSS,
15173 IX86_BUILTIN_COMIGESS,
15174 IX86_BUILTIN_COMINEQSS,
15175 IX86_BUILTIN_UCOMIEQSS,
15176 IX86_BUILTIN_UCOMILTSS,
15177 IX86_BUILTIN_UCOMILESS,
15178 IX86_BUILTIN_UCOMIGTSS,
15179 IX86_BUILTIN_UCOMIGESS,
15180 IX86_BUILTIN_UCOMINEQSS,
15181
15182 IX86_BUILTIN_CVTPI2PS,
15183 IX86_BUILTIN_CVTPS2PI,
15184 IX86_BUILTIN_CVTSI2SS,
15185 IX86_BUILTIN_CVTSI642SS,
15186 IX86_BUILTIN_CVTSS2SI,
15187 IX86_BUILTIN_CVTSS2SI64,
15188 IX86_BUILTIN_CVTTPS2PI,
15189 IX86_BUILTIN_CVTTSS2SI,
15190 IX86_BUILTIN_CVTTSS2SI64,
15191
15192 IX86_BUILTIN_MAXPS,
15193 IX86_BUILTIN_MAXSS,
15194 IX86_BUILTIN_MINPS,
15195 IX86_BUILTIN_MINSS,
15196
15197 IX86_BUILTIN_LOADUPS,
15198 IX86_BUILTIN_STOREUPS,
15199 IX86_BUILTIN_MOVSS,
15200
15201 IX86_BUILTIN_MOVHLPS,
15202 IX86_BUILTIN_MOVLHPS,
15203 IX86_BUILTIN_LOADHPS,
15204 IX86_BUILTIN_LOADLPS,
15205 IX86_BUILTIN_STOREHPS,
15206 IX86_BUILTIN_STORELPS,
15207
15208 IX86_BUILTIN_MASKMOVQ,
15209 IX86_BUILTIN_MOVMSKPS,
15210 IX86_BUILTIN_PMOVMSKB,
15211
15212 IX86_BUILTIN_MOVNTPS,
15213 IX86_BUILTIN_MOVNTQ,
15214
15215 IX86_BUILTIN_LOADDQU,
15216 IX86_BUILTIN_STOREDQU,
15217
15218 IX86_BUILTIN_PACKSSWB,
15219 IX86_BUILTIN_PACKSSDW,
15220 IX86_BUILTIN_PACKUSWB,
15221
15222 IX86_BUILTIN_PADDB,
15223 IX86_BUILTIN_PADDW,
15224 IX86_BUILTIN_PADDD,
15225 IX86_BUILTIN_PADDQ,
15226 IX86_BUILTIN_PADDSB,
15227 IX86_BUILTIN_PADDSW,
15228 IX86_BUILTIN_PADDUSB,
15229 IX86_BUILTIN_PADDUSW,
15230 IX86_BUILTIN_PSUBB,
15231 IX86_BUILTIN_PSUBW,
15232 IX86_BUILTIN_PSUBD,
15233 IX86_BUILTIN_PSUBQ,
15234 IX86_BUILTIN_PSUBSB,
15235 IX86_BUILTIN_PSUBSW,
15236 IX86_BUILTIN_PSUBUSB,
15237 IX86_BUILTIN_PSUBUSW,
15238
15239 IX86_BUILTIN_PAND,
15240 IX86_BUILTIN_PANDN,
15241 IX86_BUILTIN_POR,
15242 IX86_BUILTIN_PXOR,
15243
15244 IX86_BUILTIN_PAVGB,
15245 IX86_BUILTIN_PAVGW,
15246
15247 IX86_BUILTIN_PCMPEQB,
15248 IX86_BUILTIN_PCMPEQW,
15249 IX86_BUILTIN_PCMPEQD,
15250 IX86_BUILTIN_PCMPGTB,
15251 IX86_BUILTIN_PCMPGTW,
15252 IX86_BUILTIN_PCMPGTD,
15253
15254 IX86_BUILTIN_PMADDWD,
15255
15256 IX86_BUILTIN_PMAXSW,
15257 IX86_BUILTIN_PMAXUB,
15258 IX86_BUILTIN_PMINSW,
15259 IX86_BUILTIN_PMINUB,
15260
15261 IX86_BUILTIN_PMULHUW,
15262 IX86_BUILTIN_PMULHW,
15263 IX86_BUILTIN_PMULLW,
15264
15265 IX86_BUILTIN_PSADBW,
15266 IX86_BUILTIN_PSHUFW,
15267
15268 IX86_BUILTIN_PSLLW,
15269 IX86_BUILTIN_PSLLD,
15270 IX86_BUILTIN_PSLLQ,
15271 IX86_BUILTIN_PSRAW,
15272 IX86_BUILTIN_PSRAD,
15273 IX86_BUILTIN_PSRLW,
15274 IX86_BUILTIN_PSRLD,
15275 IX86_BUILTIN_PSRLQ,
15276 IX86_BUILTIN_PSLLWI,
15277 IX86_BUILTIN_PSLLDI,
15278 IX86_BUILTIN_PSLLQI,
15279 IX86_BUILTIN_PSRAWI,
15280 IX86_BUILTIN_PSRADI,
15281 IX86_BUILTIN_PSRLWI,
15282 IX86_BUILTIN_PSRLDI,
15283 IX86_BUILTIN_PSRLQI,
15284
15285 IX86_BUILTIN_PUNPCKHBW,
15286 IX86_BUILTIN_PUNPCKHWD,
15287 IX86_BUILTIN_PUNPCKHDQ,
15288 IX86_BUILTIN_PUNPCKLBW,
15289 IX86_BUILTIN_PUNPCKLWD,
15290 IX86_BUILTIN_PUNPCKLDQ,
15291
15292 IX86_BUILTIN_SHUFPS,
15293
15294 IX86_BUILTIN_RCPPS,
15295 IX86_BUILTIN_RCPSS,
15296 IX86_BUILTIN_RSQRTPS,
15297 IX86_BUILTIN_RSQRTSS,
15298 IX86_BUILTIN_SQRTPS,
15299 IX86_BUILTIN_SQRTSS,
15300
15301 IX86_BUILTIN_UNPCKHPS,
15302 IX86_BUILTIN_UNPCKLPS,
15303
15304 IX86_BUILTIN_ANDPS,
15305 IX86_BUILTIN_ANDNPS,
15306 IX86_BUILTIN_ORPS,
15307 IX86_BUILTIN_XORPS,
15308
15309 IX86_BUILTIN_EMMS,
15310 IX86_BUILTIN_LDMXCSR,
15311 IX86_BUILTIN_STMXCSR,
15312 IX86_BUILTIN_SFENCE,
15313
15314 /* 3DNow! Original */
15315 IX86_BUILTIN_FEMMS,
15316 IX86_BUILTIN_PAVGUSB,
15317 IX86_BUILTIN_PF2ID,
15318 IX86_BUILTIN_PFACC,
15319 IX86_BUILTIN_PFADD,
15320 IX86_BUILTIN_PFCMPEQ,
15321 IX86_BUILTIN_PFCMPGE,
15322 IX86_BUILTIN_PFCMPGT,
15323 IX86_BUILTIN_PFMAX,
15324 IX86_BUILTIN_PFMIN,
15325 IX86_BUILTIN_PFMUL,
15326 IX86_BUILTIN_PFRCP,
15327 IX86_BUILTIN_PFRCPIT1,
15328 IX86_BUILTIN_PFRCPIT2,
15329 IX86_BUILTIN_PFRSQIT1,
15330 IX86_BUILTIN_PFRSQRT,
15331 IX86_BUILTIN_PFSUB,
15332 IX86_BUILTIN_PFSUBR,
15333 IX86_BUILTIN_PI2FD,
15334 IX86_BUILTIN_PMULHRW,
15335
15336 /* 3DNow! Athlon Extensions */
15337 IX86_BUILTIN_PF2IW,
15338 IX86_BUILTIN_PFNACC,
15339 IX86_BUILTIN_PFPNACC,
15340 IX86_BUILTIN_PI2FW,
15341 IX86_BUILTIN_PSWAPDSI,
15342 IX86_BUILTIN_PSWAPDSF,
15343
15344 /* SSE2 */
15345 IX86_BUILTIN_ADDPD,
15346 IX86_BUILTIN_ADDSD,
15347 IX86_BUILTIN_DIVPD,
15348 IX86_BUILTIN_DIVSD,
15349 IX86_BUILTIN_MULPD,
15350 IX86_BUILTIN_MULSD,
15351 IX86_BUILTIN_SUBPD,
15352 IX86_BUILTIN_SUBSD,
15353
15354 IX86_BUILTIN_CMPEQPD,
15355 IX86_BUILTIN_CMPLTPD,
15356 IX86_BUILTIN_CMPLEPD,
15357 IX86_BUILTIN_CMPGTPD,
15358 IX86_BUILTIN_CMPGEPD,
15359 IX86_BUILTIN_CMPNEQPD,
15360 IX86_BUILTIN_CMPNLTPD,
15361 IX86_BUILTIN_CMPNLEPD,
15362 IX86_BUILTIN_CMPNGTPD,
15363 IX86_BUILTIN_CMPNGEPD,
15364 IX86_BUILTIN_CMPORDPD,
15365 IX86_BUILTIN_CMPUNORDPD,
15366 IX86_BUILTIN_CMPNEPD,
15367 IX86_BUILTIN_CMPEQSD,
15368 IX86_BUILTIN_CMPLTSD,
15369 IX86_BUILTIN_CMPLESD,
15370 IX86_BUILTIN_CMPNEQSD,
15371 IX86_BUILTIN_CMPNLTSD,
15372 IX86_BUILTIN_CMPNLESD,
15373 IX86_BUILTIN_CMPORDSD,
15374 IX86_BUILTIN_CMPUNORDSD,
15375 IX86_BUILTIN_CMPNESD,
15376
15377 IX86_BUILTIN_COMIEQSD,
15378 IX86_BUILTIN_COMILTSD,
15379 IX86_BUILTIN_COMILESD,
15380 IX86_BUILTIN_COMIGTSD,
15381 IX86_BUILTIN_COMIGESD,
15382 IX86_BUILTIN_COMINEQSD,
15383 IX86_BUILTIN_UCOMIEQSD,
15384 IX86_BUILTIN_UCOMILTSD,
15385 IX86_BUILTIN_UCOMILESD,
15386 IX86_BUILTIN_UCOMIGTSD,
15387 IX86_BUILTIN_UCOMIGESD,
15388 IX86_BUILTIN_UCOMINEQSD,
15389
15390 IX86_BUILTIN_MAXPD,
15391 IX86_BUILTIN_MAXSD,
15392 IX86_BUILTIN_MINPD,
15393 IX86_BUILTIN_MINSD,
15394
15395 IX86_BUILTIN_ANDPD,
15396 IX86_BUILTIN_ANDNPD,
15397 IX86_BUILTIN_ORPD,
15398 IX86_BUILTIN_XORPD,
15399
15400 IX86_BUILTIN_SQRTPD,
15401 IX86_BUILTIN_SQRTSD,
15402
15403 IX86_BUILTIN_UNPCKHPD,
15404 IX86_BUILTIN_UNPCKLPD,
15405
15406 IX86_BUILTIN_SHUFPD,
15407
15408 IX86_BUILTIN_LOADUPD,
15409 IX86_BUILTIN_STOREUPD,
15410 IX86_BUILTIN_MOVSD,
15411
15412 IX86_BUILTIN_LOADHPD,
15413 IX86_BUILTIN_LOADLPD,
15414
15415 IX86_BUILTIN_CVTDQ2PD,
15416 IX86_BUILTIN_CVTDQ2PS,
15417
15418 IX86_BUILTIN_CVTPD2DQ,
15419 IX86_BUILTIN_CVTPD2PI,
15420 IX86_BUILTIN_CVTPD2PS,
15421 IX86_BUILTIN_CVTTPD2DQ,
15422 IX86_BUILTIN_CVTTPD2PI,
15423
15424 IX86_BUILTIN_CVTPI2PD,
15425 IX86_BUILTIN_CVTSI2SD,
15426 IX86_BUILTIN_CVTSI642SD,
15427
15428 IX86_BUILTIN_CVTSD2SI,
15429 IX86_BUILTIN_CVTSD2SI64,
15430 IX86_BUILTIN_CVTSD2SS,
15431 IX86_BUILTIN_CVTSS2SD,
15432 IX86_BUILTIN_CVTTSD2SI,
15433 IX86_BUILTIN_CVTTSD2SI64,
15434
15435 IX86_BUILTIN_CVTPS2DQ,
15436 IX86_BUILTIN_CVTPS2PD,
15437 IX86_BUILTIN_CVTTPS2DQ,
15438
15439 IX86_BUILTIN_MOVNTI,
15440 IX86_BUILTIN_MOVNTPD,
15441 IX86_BUILTIN_MOVNTDQ,
15442
15443 /* SSE2 MMX */
15444 IX86_BUILTIN_MASKMOVDQU,
15445 IX86_BUILTIN_MOVMSKPD,
15446 IX86_BUILTIN_PMOVMSKB128,
15447
15448 IX86_BUILTIN_PACKSSWB128,
15449 IX86_BUILTIN_PACKSSDW128,
15450 IX86_BUILTIN_PACKUSWB128,
15451
15452 IX86_BUILTIN_PADDB128,
15453 IX86_BUILTIN_PADDW128,
15454 IX86_BUILTIN_PADDD128,
15455 IX86_BUILTIN_PADDQ128,
15456 IX86_BUILTIN_PADDSB128,
15457 IX86_BUILTIN_PADDSW128,
15458 IX86_BUILTIN_PADDUSB128,
15459 IX86_BUILTIN_PADDUSW128,
15460 IX86_BUILTIN_PSUBB128,
15461 IX86_BUILTIN_PSUBW128,
15462 IX86_BUILTIN_PSUBD128,
15463 IX86_BUILTIN_PSUBQ128,
15464 IX86_BUILTIN_PSUBSB128,
15465 IX86_BUILTIN_PSUBSW128,
15466 IX86_BUILTIN_PSUBUSB128,
15467 IX86_BUILTIN_PSUBUSW128,
15468
15469 IX86_BUILTIN_PAND128,
15470 IX86_BUILTIN_PANDN128,
15471 IX86_BUILTIN_POR128,
15472 IX86_BUILTIN_PXOR128,
15473
15474 IX86_BUILTIN_PAVGB128,
15475 IX86_BUILTIN_PAVGW128,
15476
15477 IX86_BUILTIN_PCMPEQB128,
15478 IX86_BUILTIN_PCMPEQW128,
15479 IX86_BUILTIN_PCMPEQD128,
15480 IX86_BUILTIN_PCMPGTB128,
15481 IX86_BUILTIN_PCMPGTW128,
15482 IX86_BUILTIN_PCMPGTD128,
15483
15484 IX86_BUILTIN_PMADDWD128,
15485
15486 IX86_BUILTIN_PMAXSW128,
15487 IX86_BUILTIN_PMAXUB128,
15488 IX86_BUILTIN_PMINSW128,
15489 IX86_BUILTIN_PMINUB128,
15490
15491 IX86_BUILTIN_PMULUDQ,
15492 IX86_BUILTIN_PMULUDQ128,
15493 IX86_BUILTIN_PMULHUW128,
15494 IX86_BUILTIN_PMULHW128,
15495 IX86_BUILTIN_PMULLW128,
15496
15497 IX86_BUILTIN_PSADBW128,
15498 IX86_BUILTIN_PSHUFHW,
15499 IX86_BUILTIN_PSHUFLW,
15500 IX86_BUILTIN_PSHUFD,
15501
15502 IX86_BUILTIN_PSLLW128,
15503 IX86_BUILTIN_PSLLD128,
15504 IX86_BUILTIN_PSLLQ128,
15505 IX86_BUILTIN_PSRAW128,
15506 IX86_BUILTIN_PSRAD128,
15507 IX86_BUILTIN_PSRLW128,
15508 IX86_BUILTIN_PSRLD128,
15509 IX86_BUILTIN_PSRLQ128,
15510 IX86_BUILTIN_PSLLDQI128,
15511 IX86_BUILTIN_PSLLWI128,
15512 IX86_BUILTIN_PSLLDI128,
15513 IX86_BUILTIN_PSLLQI128,
15514 IX86_BUILTIN_PSRAWI128,
15515 IX86_BUILTIN_PSRADI128,
15516 IX86_BUILTIN_PSRLDQI128,
15517 IX86_BUILTIN_PSRLWI128,
15518 IX86_BUILTIN_PSRLDI128,
15519 IX86_BUILTIN_PSRLQI128,
15520
15521 IX86_BUILTIN_PUNPCKHBW128,
15522 IX86_BUILTIN_PUNPCKHWD128,
15523 IX86_BUILTIN_PUNPCKHDQ128,
15524 IX86_BUILTIN_PUNPCKHQDQ128,
15525 IX86_BUILTIN_PUNPCKLBW128,
15526 IX86_BUILTIN_PUNPCKLWD128,
15527 IX86_BUILTIN_PUNPCKLDQ128,
15528 IX86_BUILTIN_PUNPCKLQDQ128,
15529
15530 IX86_BUILTIN_CLFLUSH,
15531 IX86_BUILTIN_MFENCE,
15532 IX86_BUILTIN_LFENCE,
15533
15534 /* Prescott New Instructions. */
15535 IX86_BUILTIN_ADDSUBPS,
15536 IX86_BUILTIN_HADDPS,
15537 IX86_BUILTIN_HSUBPS,
15538 IX86_BUILTIN_MOVSHDUP,
15539 IX86_BUILTIN_MOVSLDUP,
15540 IX86_BUILTIN_ADDSUBPD,
15541 IX86_BUILTIN_HADDPD,
15542 IX86_BUILTIN_HSUBPD,
15543 IX86_BUILTIN_LDDQU,
15544
15545 IX86_BUILTIN_MONITOR,
15546 IX86_BUILTIN_MWAIT,
15547
15548 /* SSSE3. */
15549 IX86_BUILTIN_PHADDW,
15550 IX86_BUILTIN_PHADDD,
15551 IX86_BUILTIN_PHADDSW,
15552 IX86_BUILTIN_PHSUBW,
15553 IX86_BUILTIN_PHSUBD,
15554 IX86_BUILTIN_PHSUBSW,
15555 IX86_BUILTIN_PMADDUBSW,
15556 IX86_BUILTIN_PMULHRSW,
15557 IX86_BUILTIN_PSHUFB,
15558 IX86_BUILTIN_PSIGNB,
15559 IX86_BUILTIN_PSIGNW,
15560 IX86_BUILTIN_PSIGND,
15561 IX86_BUILTIN_PALIGNR,
15562 IX86_BUILTIN_PABSB,
15563 IX86_BUILTIN_PABSW,
15564 IX86_BUILTIN_PABSD,
15565
15566 IX86_BUILTIN_PHADDW128,
15567 IX86_BUILTIN_PHADDD128,
15568 IX86_BUILTIN_PHADDSW128,
15569 IX86_BUILTIN_PHSUBW128,
15570 IX86_BUILTIN_PHSUBD128,
15571 IX86_BUILTIN_PHSUBSW128,
15572 IX86_BUILTIN_PMADDUBSW128,
15573 IX86_BUILTIN_PMULHRSW128,
15574 IX86_BUILTIN_PSHUFB128,
15575 IX86_BUILTIN_PSIGNB128,
15576 IX86_BUILTIN_PSIGNW128,
15577 IX86_BUILTIN_PSIGND128,
15578 IX86_BUILTIN_PALIGNR128,
15579 IX86_BUILTIN_PABSB128,
15580 IX86_BUILTIN_PABSW128,
15581 IX86_BUILTIN_PABSD128,
15582
15583 IX86_BUILTIN_VEC_INIT_V2SI,
15584 IX86_BUILTIN_VEC_INIT_V4HI,
15585 IX86_BUILTIN_VEC_INIT_V8QI,
15586 IX86_BUILTIN_VEC_EXT_V2DF,
15587 IX86_BUILTIN_VEC_EXT_V2DI,
15588 IX86_BUILTIN_VEC_EXT_V4SF,
15589 IX86_BUILTIN_VEC_EXT_V4SI,
15590 IX86_BUILTIN_VEC_EXT_V8HI,
15591 IX86_BUILTIN_VEC_EXT_V2SI,
15592 IX86_BUILTIN_VEC_EXT_V4HI,
15593 IX86_BUILTIN_VEC_SET_V8HI,
15594 IX86_BUILTIN_VEC_SET_V4HI,
15595
15596 IX86_BUILTIN_MAX
15597 };
15598
15599 /* Table for the ix86 builtin decls. */
15600 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
15601
15602 /* Add a ix86 target builtin function with CODE, NAME and TYPE. Do so,
15603 * if the target_flags include one of MASK. Stores the function decl
15604 * in the ix86_builtins array.
15605 * Returns the function decl or NULL_TREE, if the builtin was not added. */
15606
15607 static inline tree
15608 def_builtin (int mask, const char *name, tree type, enum ix86_builtins code)
15609 {
15610 tree decl = NULL_TREE;
15611
15612 if (mask & target_flags
15613 && (!(mask & MASK_64BIT) || TARGET_64BIT))
15614 {
15615 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
15616 NULL, NULL_TREE);
15617 ix86_builtins[(int) code] = decl;
15618 }
15619
15620 return decl;
15621 }
15622
15623 /* Like def_builtin, but also marks the function decl "const". */
15624
15625 static inline tree
15626 def_builtin_const (int mask, const char *name, tree type,
15627 enum ix86_builtins code)
15628 {
15629 tree decl = def_builtin (mask, name, type, code);
15630 if (decl)
15631 TREE_READONLY (decl) = 1;
15632 return decl;
15633 }
15634
15635 /* Bits for builtin_description.flag. */
15636
15637 /* Set when we don't support the comparison natively, and should
15638 swap_comparison in order to support it. */
15639 #define BUILTIN_DESC_SWAP_OPERANDS 1
15640
15641 struct builtin_description
15642 {
15643 const unsigned int mask;
15644 const enum insn_code icode;
15645 const char *const name;
15646 const enum ix86_builtins code;
15647 const enum rtx_code comparison;
15648 const unsigned int flag;
15649 };
15650
15651 static const struct builtin_description bdesc_comi[] =
15652 {
15653 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
15654 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
15655 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
15656 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
15657 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
15658 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
15659 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
15660 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
15661 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
15662 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
15663 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
15664 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
15665 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
15666 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
15667 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
15668 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
15669 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
15670 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
15671 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
15672 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
15673 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
15674 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
15675 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
15676 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
15677 };
15678
15679 static const struct builtin_description bdesc_2arg[] =
15680 {
15681 /* SSE */
15682 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
15683 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
15684 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
15685 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
15686 { MASK_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
15687 { MASK_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
15688 { MASK_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
15689 { MASK_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
15690
15691 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
15692 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
15693 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
15694 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT,
15695 BUILTIN_DESC_SWAP_OPERANDS },
15696 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE,
15697 BUILTIN_DESC_SWAP_OPERANDS },
15698 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
15699 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 },
15700 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 },
15701 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 },
15702 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE,
15703 BUILTIN_DESC_SWAP_OPERANDS },
15704 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT,
15705 BUILTIN_DESC_SWAP_OPERANDS },
15706 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 },
15707 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
15708 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
15709 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
15710 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
15711 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 },
15712 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 },
15713 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 },
15714 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE,
15715 BUILTIN_DESC_SWAP_OPERANDS },
15716 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT,
15717 BUILTIN_DESC_SWAP_OPERANDS },
15718 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
15719
15720 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
15721 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
15722 { MASK_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
15723 { MASK_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
15724
15725 { MASK_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
15726 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
15727 { MASK_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
15728 { MASK_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
15729
15730 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
15731 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
15732 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
15733 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
15734 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
15735
15736 /* MMX */
15737 { MASK_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
15738 { MASK_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
15739 { MASK_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
15740 { MASK_SSE2, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
15741 { MASK_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
15742 { MASK_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
15743 { MASK_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
15744 { MASK_SSE2, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
15745
15746 { MASK_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
15747 { MASK_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
15748 { MASK_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
15749 { MASK_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
15750 { MASK_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
15751 { MASK_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
15752 { MASK_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
15753 { MASK_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
15754
15755 { MASK_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
15756 { MASK_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
15757 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
15758
15759 { MASK_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
15760 { MASK_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
15761 { MASK_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
15762 { MASK_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
15763
15764 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
15765 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
15766
15767 { MASK_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
15768 { MASK_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
15769 { MASK_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
15770 { MASK_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
15771 { MASK_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
15772 { MASK_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
15773
15774 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
15775 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
15776 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
15777 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
15778
15779 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
15780 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
15781 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
15782 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
15783 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
15784 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
15785
15786 /* Special. */
15787 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
15788 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
15789 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
15790
15791 { MASK_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
15792 { MASK_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
15793 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
15794
15795 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
15796 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
15797 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
15798 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
15799 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
15800 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
15801
15802 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
15803 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
15804 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
15805 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
15806 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
15807 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
15808
15809 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
15810 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
15811 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
15812 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
15813
15814 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
15815 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
15816
15817 /* SSE2 */
15818 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
15819 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
15820 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
15821 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
15822 { MASK_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
15823 { MASK_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
15824 { MASK_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
15825 { MASK_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
15826
15827 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
15828 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
15829 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
15830 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT,
15831 BUILTIN_DESC_SWAP_OPERANDS },
15832 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE,
15833 BUILTIN_DESC_SWAP_OPERANDS },
15834 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
15835 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 },
15836 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 },
15837 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 },
15838 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE,
15839 BUILTIN_DESC_SWAP_OPERANDS },
15840 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT,
15841 BUILTIN_DESC_SWAP_OPERANDS },
15842 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 },
15843 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
15844 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
15845 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
15846 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
15847 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 },
15848 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 },
15849 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 },
15850 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 },
15851
15852 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
15853 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
15854 { MASK_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
15855 { MASK_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
15856
15857 { MASK_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
15858 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
15859 { MASK_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
15860 { MASK_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
15861
15862 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
15863 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
15864 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
15865
15866 /* SSE2 MMX */
15867 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
15868 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
15869 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
15870 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
15871 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
15872 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
15873 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
15874 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
15875
15876 { MASK_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
15877 { MASK_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
15878 { MASK_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
15879 { MASK_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
15880 { MASK_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
15881 { MASK_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
15882 { MASK_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
15883 { MASK_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
15884
15885 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
15886 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
15887
15888 { MASK_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
15889 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
15890 { MASK_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
15891 { MASK_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
15892
15893 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
15894 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
15895
15896 { MASK_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
15897 { MASK_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
15898 { MASK_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
15899 { MASK_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
15900 { MASK_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
15901 { MASK_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
15902
15903 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
15904 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
15905 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
15906 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
15907
15908 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
15909 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
15910 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
15911 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
15912 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
15913 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
15914 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
15915 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
15916
15917 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
15918 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
15919 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
15920
15921 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
15922 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
15923
15924 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 },
15925 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 },
15926
15927 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
15928 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
15929 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
15930
15931 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
15932 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
15933 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
15934
15935 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
15936 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
15937
15938 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
15939
15940 { MASK_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
15941 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
15942 { MASK_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
15943 { MASK_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
15944
15945 /* SSE3 MMX */
15946 { MASK_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
15947 { MASK_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
15948 { MASK_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
15949 { MASK_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
15950 { MASK_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
15951 { MASK_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 },
15952
15953 /* SSSE3 */
15954 { MASK_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, 0, 0 },
15955 { MASK_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, 0, 0 },
15956 { MASK_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, 0, 0 },
15957 { MASK_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, 0, 0 },
15958 { MASK_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, 0, 0 },
15959 { MASK_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, 0, 0 },
15960 { MASK_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, 0, 0 },
15961 { MASK_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, 0, 0 },
15962 { MASK_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, 0, 0 },
15963 { MASK_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, 0, 0 },
15964 { MASK_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, 0, 0 },
15965 { MASK_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, 0, 0 },
15966 { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv8hi3, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, 0, 0 },
15967 { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv4hi3, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, 0, 0 },
15968 { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, 0, 0 },
15969 { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, 0, 0 },
15970 { MASK_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, 0, 0 },
15971 { MASK_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, 0, 0 },
15972 { MASK_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, 0, 0 },
15973 { MASK_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, 0, 0 },
15974 { MASK_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, 0, 0 },
15975 { MASK_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, 0, 0 },
15976 { MASK_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, 0, 0 },
15977 { MASK_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, 0, 0 }
15978 };
15979
15980 static const struct builtin_description bdesc_1arg[] =
15981 {
15982 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
15983 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
15984
15985 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
15986 { MASK_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
15987 { MASK_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
15988
15989 { MASK_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
15990 { MASK_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
15991 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
15992 { MASK_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
15993 { MASK_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
15994 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
15995
15996 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
15997 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
15998
15999 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
16000
16001 { MASK_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
16002 { MASK_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
16003
16004 { MASK_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
16005 { MASK_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
16006 { MASK_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
16007 { MASK_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
16008 { MASK_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
16009
16010 { MASK_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
16011
16012 { MASK_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
16013 { MASK_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
16014 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
16015 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
16016
16017 { MASK_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
16018 { MASK_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
16019 { MASK_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
16020
16021 /* SSE3 */
16022 { MASK_SSE3, CODE_FOR_sse3_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
16023 { MASK_SSE3, CODE_FOR_sse3_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
16024
16025 /* SSSE3 */
16026 { MASK_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, 0, 0 },
16027 { MASK_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, 0, 0 },
16028 { MASK_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, 0, 0 },
16029 { MASK_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, 0, 0 },
16030 { MASK_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, 0, 0 },
16031 { MASK_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, 0, 0 },
16032 };
16033
16034 static void
16035 ix86_init_builtins (void)
16036 {
16037 if (TARGET_MMX)
16038 ix86_init_mmx_sse_builtins ();
16039 }
16040
16041 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
16042 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
16043 builtins. */
16044 static void
16045 ix86_init_mmx_sse_builtins (void)
16046 {
16047 const struct builtin_description * d;
16048 size_t i;
16049
16050 tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode);
16051 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
16052 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
16053 tree V2DI_type_node
16054 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
16055 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
16056 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
16057 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
16058 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
16059 tree V8QI_type_node = build_vector_type_for_mode (char_type_node, V8QImode);
16060 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
16061
16062 tree pchar_type_node = build_pointer_type (char_type_node);
16063 tree pcchar_type_node = build_pointer_type (
16064 build_type_variant (char_type_node, 1, 0));
16065 tree pfloat_type_node = build_pointer_type (float_type_node);
16066 tree pcfloat_type_node = build_pointer_type (
16067 build_type_variant (float_type_node, 1, 0));
16068 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
16069 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
16070 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
16071
16072 /* Comparisons. */
16073 tree int_ftype_v4sf_v4sf
16074 = build_function_type_list (integer_type_node,
16075 V4SF_type_node, V4SF_type_node, NULL_TREE);
16076 tree v4si_ftype_v4sf_v4sf
16077 = build_function_type_list (V4SI_type_node,
16078 V4SF_type_node, V4SF_type_node, NULL_TREE);
16079 /* MMX/SSE/integer conversions. */
16080 tree int_ftype_v4sf
16081 = build_function_type_list (integer_type_node,
16082 V4SF_type_node, NULL_TREE);
16083 tree int64_ftype_v4sf
16084 = build_function_type_list (long_long_integer_type_node,
16085 V4SF_type_node, NULL_TREE);
16086 tree int_ftype_v8qi
16087 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
16088 tree v4sf_ftype_v4sf_int
16089 = build_function_type_list (V4SF_type_node,
16090 V4SF_type_node, integer_type_node, NULL_TREE);
16091 tree v4sf_ftype_v4sf_int64
16092 = build_function_type_list (V4SF_type_node,
16093 V4SF_type_node, long_long_integer_type_node,
16094 NULL_TREE);
16095 tree v4sf_ftype_v4sf_v2si
16096 = build_function_type_list (V4SF_type_node,
16097 V4SF_type_node, V2SI_type_node, NULL_TREE);
16098
16099 /* Miscellaneous. */
16100 tree v8qi_ftype_v4hi_v4hi
16101 = build_function_type_list (V8QI_type_node,
16102 V4HI_type_node, V4HI_type_node, NULL_TREE);
16103 tree v4hi_ftype_v2si_v2si
16104 = build_function_type_list (V4HI_type_node,
16105 V2SI_type_node, V2SI_type_node, NULL_TREE);
16106 tree v4sf_ftype_v4sf_v4sf_int
16107 = build_function_type_list (V4SF_type_node,
16108 V4SF_type_node, V4SF_type_node,
16109 integer_type_node, NULL_TREE);
16110 tree v2si_ftype_v4hi_v4hi
16111 = build_function_type_list (V2SI_type_node,
16112 V4HI_type_node, V4HI_type_node, NULL_TREE);
16113 tree v4hi_ftype_v4hi_int
16114 = build_function_type_list (V4HI_type_node,
16115 V4HI_type_node, integer_type_node, NULL_TREE);
16116 tree v4hi_ftype_v4hi_di
16117 = build_function_type_list (V4HI_type_node,
16118 V4HI_type_node, long_long_unsigned_type_node,
16119 NULL_TREE);
16120 tree v2si_ftype_v2si_di
16121 = build_function_type_list (V2SI_type_node,
16122 V2SI_type_node, long_long_unsigned_type_node,
16123 NULL_TREE);
16124 tree void_ftype_void
16125 = build_function_type (void_type_node, void_list_node);
16126 tree void_ftype_unsigned
16127 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
16128 tree void_ftype_unsigned_unsigned
16129 = build_function_type_list (void_type_node, unsigned_type_node,
16130 unsigned_type_node, NULL_TREE);
16131 tree void_ftype_pcvoid_unsigned_unsigned
16132 = build_function_type_list (void_type_node, const_ptr_type_node,
16133 unsigned_type_node, unsigned_type_node,
16134 NULL_TREE);
16135 tree unsigned_ftype_void
16136 = build_function_type (unsigned_type_node, void_list_node);
16137 tree v2si_ftype_v4sf
16138 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
16139 /* Loads/stores. */
16140 tree void_ftype_v8qi_v8qi_pchar
16141 = build_function_type_list (void_type_node,
16142 V8QI_type_node, V8QI_type_node,
16143 pchar_type_node, NULL_TREE);
16144 tree v4sf_ftype_pcfloat
16145 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
16146 /* @@@ the type is bogus */
16147 tree v4sf_ftype_v4sf_pv2si
16148 = build_function_type_list (V4SF_type_node,
16149 V4SF_type_node, pv2si_type_node, NULL_TREE);
16150 tree void_ftype_pv2si_v4sf
16151 = build_function_type_list (void_type_node,
16152 pv2si_type_node, V4SF_type_node, NULL_TREE);
16153 tree void_ftype_pfloat_v4sf
16154 = build_function_type_list (void_type_node,
16155 pfloat_type_node, V4SF_type_node, NULL_TREE);
16156 tree void_ftype_pdi_di
16157 = build_function_type_list (void_type_node,
16158 pdi_type_node, long_long_unsigned_type_node,
16159 NULL_TREE);
16160 tree void_ftype_pv2di_v2di
16161 = build_function_type_list (void_type_node,
16162 pv2di_type_node, V2DI_type_node, NULL_TREE);
16163 /* Normal vector unops. */
16164 tree v4sf_ftype_v4sf
16165 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
16166 tree v16qi_ftype_v16qi
16167 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
16168 tree v8hi_ftype_v8hi
16169 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
16170 tree v4si_ftype_v4si
16171 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
16172 tree v8qi_ftype_v8qi
16173 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
16174 tree v4hi_ftype_v4hi
16175 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
16176
16177 /* Normal vector binops. */
16178 tree v4sf_ftype_v4sf_v4sf
16179 = build_function_type_list (V4SF_type_node,
16180 V4SF_type_node, V4SF_type_node, NULL_TREE);
16181 tree v8qi_ftype_v8qi_v8qi
16182 = build_function_type_list (V8QI_type_node,
16183 V8QI_type_node, V8QI_type_node, NULL_TREE);
16184 tree v4hi_ftype_v4hi_v4hi
16185 = build_function_type_list (V4HI_type_node,
16186 V4HI_type_node, V4HI_type_node, NULL_TREE);
16187 tree v2si_ftype_v2si_v2si
16188 = build_function_type_list (V2SI_type_node,
16189 V2SI_type_node, V2SI_type_node, NULL_TREE);
16190 tree di_ftype_di_di
16191 = build_function_type_list (long_long_unsigned_type_node,
16192 long_long_unsigned_type_node,
16193 long_long_unsigned_type_node, NULL_TREE);
16194
16195 tree di_ftype_di_di_int
16196 = build_function_type_list (long_long_unsigned_type_node,
16197 long_long_unsigned_type_node,
16198 long_long_unsigned_type_node,
16199 integer_type_node, NULL_TREE);
16200
16201 tree v2si_ftype_v2sf
16202 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
16203 tree v2sf_ftype_v2si
16204 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
16205 tree v2si_ftype_v2si
16206 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
16207 tree v2sf_ftype_v2sf
16208 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
16209 tree v2sf_ftype_v2sf_v2sf
16210 = build_function_type_list (V2SF_type_node,
16211 V2SF_type_node, V2SF_type_node, NULL_TREE);
16212 tree v2si_ftype_v2sf_v2sf
16213 = build_function_type_list (V2SI_type_node,
16214 V2SF_type_node, V2SF_type_node, NULL_TREE);
16215 tree pint_type_node = build_pointer_type (integer_type_node);
16216 tree pdouble_type_node = build_pointer_type (double_type_node);
16217 tree pcdouble_type_node = build_pointer_type (
16218 build_type_variant (double_type_node, 1, 0));
16219 tree int_ftype_v2df_v2df
16220 = build_function_type_list (integer_type_node,
16221 V2DF_type_node, V2DF_type_node, NULL_TREE);
16222
16223 tree void_ftype_pcvoid
16224 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
16225 tree v4sf_ftype_v4si
16226 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
16227 tree v4si_ftype_v4sf
16228 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
16229 tree v2df_ftype_v4si
16230 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
16231 tree v4si_ftype_v2df
16232 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
16233 tree v2si_ftype_v2df
16234 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
16235 tree v4sf_ftype_v2df
16236 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
16237 tree v2df_ftype_v2si
16238 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
16239 tree v2df_ftype_v4sf
16240 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
16241 tree int_ftype_v2df
16242 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
16243 tree int64_ftype_v2df
16244 = build_function_type_list (long_long_integer_type_node,
16245 V2DF_type_node, NULL_TREE);
16246 tree v2df_ftype_v2df_int
16247 = build_function_type_list (V2DF_type_node,
16248 V2DF_type_node, integer_type_node, NULL_TREE);
16249 tree v2df_ftype_v2df_int64
16250 = build_function_type_list (V2DF_type_node,
16251 V2DF_type_node, long_long_integer_type_node,
16252 NULL_TREE);
16253 tree v4sf_ftype_v4sf_v2df
16254 = build_function_type_list (V4SF_type_node,
16255 V4SF_type_node, V2DF_type_node, NULL_TREE);
16256 tree v2df_ftype_v2df_v4sf
16257 = build_function_type_list (V2DF_type_node,
16258 V2DF_type_node, V4SF_type_node, NULL_TREE);
16259 tree v2df_ftype_v2df_v2df_int
16260 = build_function_type_list (V2DF_type_node,
16261 V2DF_type_node, V2DF_type_node,
16262 integer_type_node,
16263 NULL_TREE);
16264 tree v2df_ftype_v2df_pcdouble
16265 = build_function_type_list (V2DF_type_node,
16266 V2DF_type_node, pcdouble_type_node, NULL_TREE);
16267 tree void_ftype_pdouble_v2df
16268 = build_function_type_list (void_type_node,
16269 pdouble_type_node, V2DF_type_node, NULL_TREE);
16270 tree void_ftype_pint_int
16271 = build_function_type_list (void_type_node,
16272 pint_type_node, integer_type_node, NULL_TREE);
16273 tree void_ftype_v16qi_v16qi_pchar
16274 = build_function_type_list (void_type_node,
16275 V16QI_type_node, V16QI_type_node,
16276 pchar_type_node, NULL_TREE);
16277 tree v2df_ftype_pcdouble
16278 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
16279 tree v2df_ftype_v2df_v2df
16280 = build_function_type_list (V2DF_type_node,
16281 V2DF_type_node, V2DF_type_node, NULL_TREE);
16282 tree v16qi_ftype_v16qi_v16qi
16283 = build_function_type_list (V16QI_type_node,
16284 V16QI_type_node, V16QI_type_node, NULL_TREE);
16285 tree v8hi_ftype_v8hi_v8hi
16286 = build_function_type_list (V8HI_type_node,
16287 V8HI_type_node, V8HI_type_node, NULL_TREE);
16288 tree v4si_ftype_v4si_v4si
16289 = build_function_type_list (V4SI_type_node,
16290 V4SI_type_node, V4SI_type_node, NULL_TREE);
16291 tree v2di_ftype_v2di_v2di
16292 = build_function_type_list (V2DI_type_node,
16293 V2DI_type_node, V2DI_type_node, NULL_TREE);
16294 tree v2di_ftype_v2df_v2df
16295 = build_function_type_list (V2DI_type_node,
16296 V2DF_type_node, V2DF_type_node, NULL_TREE);
16297 tree v2df_ftype_v2df
16298 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
16299 tree v2di_ftype_v2di_int
16300 = build_function_type_list (V2DI_type_node,
16301 V2DI_type_node, integer_type_node, NULL_TREE);
16302 tree v2di_ftype_v2di_v2di_int
16303 = build_function_type_list (V2DI_type_node, V2DI_type_node,
16304 V2DI_type_node, integer_type_node, NULL_TREE);
16305 tree v4si_ftype_v4si_int
16306 = build_function_type_list (V4SI_type_node,
16307 V4SI_type_node, integer_type_node, NULL_TREE);
16308 tree v8hi_ftype_v8hi_int
16309 = build_function_type_list (V8HI_type_node,
16310 V8HI_type_node, integer_type_node, NULL_TREE);
16311 tree v8hi_ftype_v8hi_v2di
16312 = build_function_type_list (V8HI_type_node,
16313 V8HI_type_node, V2DI_type_node, NULL_TREE);
16314 tree v4si_ftype_v4si_v2di
16315 = build_function_type_list (V4SI_type_node,
16316 V4SI_type_node, V2DI_type_node, NULL_TREE);
16317 tree v4si_ftype_v8hi_v8hi
16318 = build_function_type_list (V4SI_type_node,
16319 V8HI_type_node, V8HI_type_node, NULL_TREE);
16320 tree di_ftype_v8qi_v8qi
16321 = build_function_type_list (long_long_unsigned_type_node,
16322 V8QI_type_node, V8QI_type_node, NULL_TREE);
16323 tree di_ftype_v2si_v2si
16324 = build_function_type_list (long_long_unsigned_type_node,
16325 V2SI_type_node, V2SI_type_node, NULL_TREE);
16326 tree v2di_ftype_v16qi_v16qi
16327 = build_function_type_list (V2DI_type_node,
16328 V16QI_type_node, V16QI_type_node, NULL_TREE);
16329 tree v2di_ftype_v4si_v4si
16330 = build_function_type_list (V2DI_type_node,
16331 V4SI_type_node, V4SI_type_node, NULL_TREE);
16332 tree int_ftype_v16qi
16333 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
16334 tree v16qi_ftype_pcchar
16335 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
16336 tree void_ftype_pchar_v16qi
16337 = build_function_type_list (void_type_node,
16338 pchar_type_node, V16QI_type_node, NULL_TREE);
16339
16340 tree float80_type;
16341 tree float128_type;
16342 tree ftype;
16343
16344 /* The __float80 type. */
16345 if (TYPE_MODE (long_double_type_node) == XFmode)
16346 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
16347 "__float80");
16348 else
16349 {
16350 /* The __float80 type. */
16351 float80_type = make_node (REAL_TYPE);
16352 TYPE_PRECISION (float80_type) = 80;
16353 layout_type (float80_type);
16354 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
16355 }
16356
16357 if (TARGET_64BIT)
16358 {
16359 float128_type = make_node (REAL_TYPE);
16360 TYPE_PRECISION (float128_type) = 128;
16361 layout_type (float128_type);
16362 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
16363 }
16364
16365 /* Add all builtins that are more or less simple operations on two
16366 operands. */
16367 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
16368 {
16369 /* Use one of the operands; the target can have a different mode for
16370 mask-generating compares. */
16371 enum machine_mode mode;
16372 tree type;
16373
16374 if (d->name == 0)
16375 continue;
16376 mode = insn_data[d->icode].operand[1].mode;
16377
16378 switch (mode)
16379 {
16380 case V16QImode:
16381 type = v16qi_ftype_v16qi_v16qi;
16382 break;
16383 case V8HImode:
16384 type = v8hi_ftype_v8hi_v8hi;
16385 break;
16386 case V4SImode:
16387 type = v4si_ftype_v4si_v4si;
16388 break;
16389 case V2DImode:
16390 type = v2di_ftype_v2di_v2di;
16391 break;
16392 case V2DFmode:
16393 type = v2df_ftype_v2df_v2df;
16394 break;
16395 case V4SFmode:
16396 type = v4sf_ftype_v4sf_v4sf;
16397 break;
16398 case V8QImode:
16399 type = v8qi_ftype_v8qi_v8qi;
16400 break;
16401 case V4HImode:
16402 type = v4hi_ftype_v4hi_v4hi;
16403 break;
16404 case V2SImode:
16405 type = v2si_ftype_v2si_v2si;
16406 break;
16407 case DImode:
16408 type = di_ftype_di_di;
16409 break;
16410
16411 default:
16412 gcc_unreachable ();
16413 }
16414
16415 /* Override for comparisons. */
16416 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
16417 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3)
16418 type = v4si_ftype_v4sf_v4sf;
16419
16420 if (d->icode == CODE_FOR_sse2_maskcmpv2df3
16421 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
16422 type = v2di_ftype_v2df_v2df;
16423
16424 def_builtin (d->mask, d->name, type, d->code);
16425 }
16426
16427 /* Add all builtins that are more or less simple operations on 1 operand. */
16428 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
16429 {
16430 enum machine_mode mode;
16431 tree type;
16432
16433 if (d->name == 0)
16434 continue;
16435 mode = insn_data[d->icode].operand[1].mode;
16436
16437 switch (mode)
16438 {
16439 case V16QImode:
16440 type = v16qi_ftype_v16qi;
16441 break;
16442 case V8HImode:
16443 type = v8hi_ftype_v8hi;
16444 break;
16445 case V4SImode:
16446 type = v4si_ftype_v4si;
16447 break;
16448 case V2DFmode:
16449 type = v2df_ftype_v2df;
16450 break;
16451 case V4SFmode:
16452 type = v4sf_ftype_v4sf;
16453 break;
16454 case V8QImode:
16455 type = v8qi_ftype_v8qi;
16456 break;
16457 case V4HImode:
16458 type = v4hi_ftype_v4hi;
16459 break;
16460 case V2SImode:
16461 type = v2si_ftype_v2si;
16462 break;
16463
16464 default:
16465 abort ();
16466 }
16467
16468 def_builtin (d->mask, d->name, type, d->code);
16469 }
16470
16471 /* Add the remaining MMX insns with somewhat more complicated types. */
16472 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
16473 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
16474 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
16475 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
16476
16477 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
16478 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
16479 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
16480
16481 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
16482 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
16483
16484 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
16485 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
16486
16487 /* comi/ucomi insns. */
16488 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
16489 if (d->mask == MASK_SSE2)
16490 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
16491 else
16492 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
16493
16494 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
16495 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
16496 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
16497
16498 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
16499 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
16500 def_builtin_const (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
16501 def_builtin_const (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
16502 def_builtin_const (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
16503 def_builtin_const (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
16504 def_builtin_const (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
16505 def_builtin_const (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
16506 def_builtin_const (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
16507 def_builtin_const (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
16508 def_builtin_const (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
16509
16510 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
16511
16512 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
16513 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
16514
16515 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
16516 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
16517 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
16518 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
16519
16520 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
16521 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
16522 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
16523 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
16524
16525 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
16526
16527 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
16528
16529 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
16530 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
16531 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
16532 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
16533 def_builtin_const (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
16534 def_builtin_const (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
16535
16536 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
16537
16538 /* Original 3DNow! */
16539 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
16540 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
16541 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
16542 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
16543 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
16544 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
16545 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
16546 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
16547 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
16548 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
16549 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
16550 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
16551 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
16552 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
16553 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
16554 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
16555 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
16556 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
16557 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
16558 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
16559
16560 /* 3DNow! extension as used in the Athlon CPU. */
16561 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
16562 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
16563 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
16564 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
16565 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
16566 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
16567
16568 /* SSE2 */
16569 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
16570
16571 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
16572 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
16573
16574 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
16575 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
16576
16577 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
16578 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
16579 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
16580 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
16581 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
16582
16583 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
16584 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
16585 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
16586 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
16587
16588 def_builtin_const (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
16589 def_builtin_const (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
16590
16591 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
16592
16593 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
16594 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
16595
16596 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
16597 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
16598 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
16599 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
16600 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
16601
16602 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
16603
16604 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
16605 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
16606 def_builtin_const (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
16607 def_builtin_const (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
16608
16609 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
16610 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
16611 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
16612
16613 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
16614 def_builtin_const (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
16615 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
16616 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
16617
16618 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
16619 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
16620 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
16621
16622 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
16623 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
16624
16625 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
16626 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
16627
16628 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
16629 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
16630 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
16631
16632 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
16633 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
16634 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
16635
16636 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
16637 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
16638
16639 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
16640 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
16641 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
16642 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
16643
16644 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
16645 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
16646 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
16647 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
16648
16649 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
16650 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
16651
16652 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
16653
16654 /* Prescott New Instructions. */
16655 def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
16656 void_ftype_pcvoid_unsigned_unsigned,
16657 IX86_BUILTIN_MONITOR);
16658 def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
16659 void_ftype_unsigned_unsigned,
16660 IX86_BUILTIN_MWAIT);
16661 def_builtin (MASK_SSE3, "__builtin_ia32_movshdup",
16662 v4sf_ftype_v4sf,
16663 IX86_BUILTIN_MOVSHDUP);
16664 def_builtin (MASK_SSE3, "__builtin_ia32_movsldup",
16665 v4sf_ftype_v4sf,
16666 IX86_BUILTIN_MOVSLDUP);
16667 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
16668 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
16669
16670 /* SSSE3. */
16671 def_builtin (MASK_SSSE3, "__builtin_ia32_palignr128",
16672 v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PALIGNR128);
16673 def_builtin (MASK_SSSE3, "__builtin_ia32_palignr", di_ftype_di_di_int,
16674 IX86_BUILTIN_PALIGNR);
16675
16676 /* Access to the vec_init patterns. */
16677 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
16678 integer_type_node, NULL_TREE);
16679 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v2si",
16680 ftype, IX86_BUILTIN_VEC_INIT_V2SI);
16681
16682 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
16683 short_integer_type_node,
16684 short_integer_type_node,
16685 short_integer_type_node, NULL_TREE);
16686 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v4hi",
16687 ftype, IX86_BUILTIN_VEC_INIT_V4HI);
16688
16689 ftype = build_function_type_list (V8QI_type_node, char_type_node,
16690 char_type_node, char_type_node,
16691 char_type_node, char_type_node,
16692 char_type_node, char_type_node,
16693 char_type_node, NULL_TREE);
16694 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v8qi",
16695 ftype, IX86_BUILTIN_VEC_INIT_V8QI);
16696
16697 /* Access to the vec_extract patterns. */
16698 ftype = build_function_type_list (double_type_node, V2DF_type_node,
16699 integer_type_node, NULL_TREE);
16700 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2df",
16701 ftype, IX86_BUILTIN_VEC_EXT_V2DF);
16702
16703 ftype = build_function_type_list (long_long_integer_type_node,
16704 V2DI_type_node, integer_type_node,
16705 NULL_TREE);
16706 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2di",
16707 ftype, IX86_BUILTIN_VEC_EXT_V2DI);
16708
16709 ftype = build_function_type_list (float_type_node, V4SF_type_node,
16710 integer_type_node, NULL_TREE);
16711 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4sf",
16712 ftype, IX86_BUILTIN_VEC_EXT_V4SF);
16713
16714 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
16715 integer_type_node, NULL_TREE);
16716 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4si",
16717 ftype, IX86_BUILTIN_VEC_EXT_V4SI);
16718
16719 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
16720 integer_type_node, NULL_TREE);
16721 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v8hi",
16722 ftype, IX86_BUILTIN_VEC_EXT_V8HI);
16723
16724 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
16725 integer_type_node, NULL_TREE);
16726 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_ext_v4hi",
16727 ftype, IX86_BUILTIN_VEC_EXT_V4HI);
16728
16729 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
16730 integer_type_node, NULL_TREE);
16731 def_builtin (MASK_MMX, "__builtin_ia32_vec_ext_v2si",
16732 ftype, IX86_BUILTIN_VEC_EXT_V2SI);
16733
16734 /* Access to the vec_set patterns. */
16735 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
16736 intHI_type_node,
16737 integer_type_node, NULL_TREE);
16738 def_builtin (MASK_SSE, "__builtin_ia32_vec_set_v8hi",
16739 ftype, IX86_BUILTIN_VEC_SET_V8HI);
16740
16741 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
16742 intHI_type_node,
16743 integer_type_node, NULL_TREE);
16744 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_set_v4hi",
16745 ftype, IX86_BUILTIN_VEC_SET_V4HI);
16746 }
16747
16748 /* Errors in the source file can cause expand_expr to return const0_rtx
16749 where we expect a vector. To avoid crashing, use one of the vector
16750 clear instructions. */
16751 static rtx
16752 safe_vector_operand (rtx x, enum machine_mode mode)
16753 {
16754 if (x == const0_rtx)
16755 x = CONST0_RTX (mode);
16756 return x;
16757 }
16758
16759 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
16760
16761 static rtx
16762 ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
16763 {
16764 rtx pat, xops[3];
16765 tree arg0 = TREE_VALUE (arglist);
16766 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16767 rtx op0 = expand_normal (arg0);
16768 rtx op1 = expand_normal (arg1);
16769 enum machine_mode tmode = insn_data[icode].operand[0].mode;
16770 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
16771 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
16772
16773 if (VECTOR_MODE_P (mode0))
16774 op0 = safe_vector_operand (op0, mode0);
16775 if (VECTOR_MODE_P (mode1))
16776 op1 = safe_vector_operand (op1, mode1);
16777
16778 if (optimize || !target
16779 || GET_MODE (target) != tmode
16780 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16781 target = gen_reg_rtx (tmode);
16782
16783 if (GET_MODE (op1) == SImode && mode1 == TImode)
16784 {
16785 rtx x = gen_reg_rtx (V4SImode);
16786 emit_insn (gen_sse2_loadd (x, op1));
16787 op1 = gen_lowpart (TImode, x);
16788 }
16789
16790 /* The insn must want input operands in the same modes as the
16791 result. */
16792 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
16793 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
16794
16795 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
16796 op0 = copy_to_mode_reg (mode0, op0);
16797 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
16798 op1 = copy_to_mode_reg (mode1, op1);
16799
16800 /* ??? Using ix86_fixup_binary_operands is problematic when
16801 we've got mismatched modes. Fake it. */
16802
16803 xops[0] = target;
16804 xops[1] = op0;
16805 xops[2] = op1;
16806
16807 if (tmode == mode0 && tmode == mode1)
16808 {
16809 target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
16810 op0 = xops[1];
16811 op1 = xops[2];
16812 }
16813 else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops))
16814 {
16815 op0 = force_reg (mode0, op0);
16816 op1 = force_reg (mode1, op1);
16817 target = gen_reg_rtx (tmode);
16818 }
16819
16820 pat = GEN_FCN (icode) (target, op0, op1);
16821 if (! pat)
16822 return 0;
16823 emit_insn (pat);
16824 return target;
16825 }
16826
16827 /* Subroutine of ix86_expand_builtin to take care of stores. */
16828
16829 static rtx
16830 ix86_expand_store_builtin (enum insn_code icode, tree arglist)
16831 {
16832 rtx pat;
16833 tree arg0 = TREE_VALUE (arglist);
16834 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16835 rtx op0 = expand_normal (arg0);
16836 rtx op1 = expand_normal (arg1);
16837 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
16838 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
16839
16840 if (VECTOR_MODE_P (mode1))
16841 op1 = safe_vector_operand (op1, mode1);
16842
16843 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
16844 op1 = copy_to_mode_reg (mode1, op1);
16845
16846 pat = GEN_FCN (icode) (op0, op1);
16847 if (pat)
16848 emit_insn (pat);
16849 return 0;
16850 }
16851
16852 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
16853
16854 static rtx
16855 ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
16856 rtx target, int do_load)
16857 {
16858 rtx pat;
16859 tree arg0 = TREE_VALUE (arglist);
16860 rtx op0 = expand_normal (arg0);
16861 enum machine_mode tmode = insn_data[icode].operand[0].mode;
16862 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
16863
16864 if (optimize || !target
16865 || GET_MODE (target) != tmode
16866 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16867 target = gen_reg_rtx (tmode);
16868 if (do_load)
16869 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
16870 else
16871 {
16872 if (VECTOR_MODE_P (mode0))
16873 op0 = safe_vector_operand (op0, mode0);
16874
16875 if ((optimize && !register_operand (op0, mode0))
16876 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16877 op0 = copy_to_mode_reg (mode0, op0);
16878 }
16879
16880 pat = GEN_FCN (icode) (target, op0);
16881 if (! pat)
16882 return 0;
16883 emit_insn (pat);
16884 return target;
16885 }
16886
16887 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
16888 sqrtss, rsqrtss, rcpss. */
16889
16890 static rtx
16891 ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
16892 {
16893 rtx pat;
16894 tree arg0 = TREE_VALUE (arglist);
16895 rtx op1, op0 = expand_normal (arg0);
16896 enum machine_mode tmode = insn_data[icode].operand[0].mode;
16897 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
16898
16899 if (optimize || !target
16900 || GET_MODE (target) != tmode
16901 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16902 target = gen_reg_rtx (tmode);
16903
16904 if (VECTOR_MODE_P (mode0))
16905 op0 = safe_vector_operand (op0, mode0);
16906
16907 if ((optimize && !register_operand (op0, mode0))
16908 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16909 op0 = copy_to_mode_reg (mode0, op0);
16910
16911 op1 = op0;
16912 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
16913 op1 = copy_to_mode_reg (mode0, op1);
16914
16915 pat = GEN_FCN (icode) (target, op0, op1);
16916 if (! pat)
16917 return 0;
16918 emit_insn (pat);
16919 return target;
16920 }
16921
16922 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
16923
16924 static rtx
16925 ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
16926 rtx target)
16927 {
16928 rtx pat;
16929 tree arg0 = TREE_VALUE (arglist);
16930 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16931 rtx op0 = expand_normal (arg0);
16932 rtx op1 = expand_normal (arg1);
16933 rtx op2;
16934 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
16935 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
16936 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
16937 enum rtx_code comparison = d->comparison;
16938
16939 if (VECTOR_MODE_P (mode0))
16940 op0 = safe_vector_operand (op0, mode0);
16941 if (VECTOR_MODE_P (mode1))
16942 op1 = safe_vector_operand (op1, mode1);
16943
16944 /* Swap operands if we have a comparison that isn't available in
16945 hardware. */
16946 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
16947 {
16948 rtx tmp = gen_reg_rtx (mode1);
16949 emit_move_insn (tmp, op1);
16950 op1 = op0;
16951 op0 = tmp;
16952 }
16953
16954 if (optimize || !target
16955 || GET_MODE (target) != tmode
16956 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
16957 target = gen_reg_rtx (tmode);
16958
16959 if ((optimize && !register_operand (op0, mode0))
16960 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
16961 op0 = copy_to_mode_reg (mode0, op0);
16962 if ((optimize && !register_operand (op1, mode1))
16963 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
16964 op1 = copy_to_mode_reg (mode1, op1);
16965
16966 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
16967 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
16968 if (! pat)
16969 return 0;
16970 emit_insn (pat);
16971 return target;
16972 }
16973
16974 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
16975
16976 static rtx
16977 ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
16978 rtx target)
16979 {
16980 rtx pat;
16981 tree arg0 = TREE_VALUE (arglist);
16982 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16983 rtx op0 = expand_normal (arg0);
16984 rtx op1 = expand_normal (arg1);
16985 rtx op2;
16986 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
16987 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
16988 enum rtx_code comparison = d->comparison;
16989
16990 if (VECTOR_MODE_P (mode0))
16991 op0 = safe_vector_operand (op0, mode0);
16992 if (VECTOR_MODE_P (mode1))
16993 op1 = safe_vector_operand (op1, mode1);
16994
16995 /* Swap operands if we have a comparison that isn't available in
16996 hardware. */
16997 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
16998 {
16999 rtx tmp = op1;
17000 op1 = op0;
17001 op0 = tmp;
17002 }
17003
17004 target = gen_reg_rtx (SImode);
17005 emit_move_insn (target, const0_rtx);
17006 target = gen_rtx_SUBREG (QImode, target, 0);
17007
17008 if ((optimize && !register_operand (op0, mode0))
17009 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
17010 op0 = copy_to_mode_reg (mode0, op0);
17011 if ((optimize && !register_operand (op1, mode1))
17012 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
17013 op1 = copy_to_mode_reg (mode1, op1);
17014
17015 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
17016 pat = GEN_FCN (d->icode) (op0, op1);
17017 if (! pat)
17018 return 0;
17019 emit_insn (pat);
17020 emit_insn (gen_rtx_SET (VOIDmode,
17021 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
17022 gen_rtx_fmt_ee (comparison, QImode,
17023 SET_DEST (pat),
17024 const0_rtx)));
17025
17026 return SUBREG_REG (target);
17027 }
17028
17029 /* Return the integer constant in ARG. Constrain it to be in the range
17030 of the subparts of VEC_TYPE; issue an error if not. */
17031
17032 static int
17033 get_element_number (tree vec_type, tree arg)
17034 {
17035 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
17036
17037 if (!host_integerp (arg, 1)
17038 || (elt = tree_low_cst (arg, 1), elt > max))
17039 {
17040 error ("selector must be an integer constant in the range 0..%wi", max);
17041 return 0;
17042 }
17043
17044 return elt;
17045 }
17046
17047 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
17048 ix86_expand_vector_init. We DO have language-level syntax for this, in
17049 the form of (type){ init-list }. Except that since we can't place emms
17050 instructions from inside the compiler, we can't allow the use of MMX
17051 registers unless the user explicitly asks for it. So we do *not* define
17052 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
17053 we have builtins invoked by mmintrin.h that gives us license to emit
17054 these sorts of instructions. */
17055
17056 static rtx
17057 ix86_expand_vec_init_builtin (tree type, tree arglist, rtx target)
17058 {
17059 enum machine_mode tmode = TYPE_MODE (type);
17060 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
17061 int i, n_elt = GET_MODE_NUNITS (tmode);
17062 rtvec v = rtvec_alloc (n_elt);
17063
17064 gcc_assert (VECTOR_MODE_P (tmode));
17065
17066 for (i = 0; i < n_elt; ++i, arglist = TREE_CHAIN (arglist))
17067 {
17068 rtx x = expand_normal (TREE_VALUE (arglist));
17069 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
17070 }
17071
17072 gcc_assert (arglist == NULL);
17073
17074 if (!target || !register_operand (target, tmode))
17075 target = gen_reg_rtx (tmode);
17076
17077 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
17078 return target;
17079 }
17080
17081 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
17082 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
17083 had a language-level syntax for referencing vector elements. */
17084
17085 static rtx
17086 ix86_expand_vec_ext_builtin (tree arglist, rtx target)
17087 {
17088 enum machine_mode tmode, mode0;
17089 tree arg0, arg1;
17090 int elt;
17091 rtx op0;
17092
17093 arg0 = TREE_VALUE (arglist);
17094 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
17095
17096 op0 = expand_normal (arg0);
17097 elt = get_element_number (TREE_TYPE (arg0), arg1);
17098
17099 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
17100 mode0 = TYPE_MODE (TREE_TYPE (arg0));
17101 gcc_assert (VECTOR_MODE_P (mode0));
17102
17103 op0 = force_reg (mode0, op0);
17104
17105 if (optimize || !target || !register_operand (target, tmode))
17106 target = gen_reg_rtx (tmode);
17107
17108 ix86_expand_vector_extract (true, target, op0, elt);
17109
17110 return target;
17111 }
17112
17113 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
17114 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
17115 a language-level syntax for referencing vector elements. */
17116
17117 static rtx
17118 ix86_expand_vec_set_builtin (tree arglist)
17119 {
17120 enum machine_mode tmode, mode1;
17121 tree arg0, arg1, arg2;
17122 int elt;
17123 rtx op0, op1;
17124
17125 arg0 = TREE_VALUE (arglist);
17126 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
17127 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
17128
17129 tmode = TYPE_MODE (TREE_TYPE (arg0));
17130 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
17131 gcc_assert (VECTOR_MODE_P (tmode));
17132
17133 op0 = expand_expr (arg0, NULL_RTX, tmode, 0);
17134 op1 = expand_expr (arg1, NULL_RTX, mode1, 0);
17135 elt = get_element_number (TREE_TYPE (arg0), arg2);
17136
17137 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
17138 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
17139
17140 op0 = force_reg (tmode, op0);
17141 op1 = force_reg (mode1, op1);
17142
17143 ix86_expand_vector_set (true, op0, op1, elt);
17144
17145 return op0;
17146 }
17147
17148 /* Expand an expression EXP that calls a built-in function,
17149 with result going to TARGET if that's convenient
17150 (and in mode MODE if that's convenient).
17151 SUBTARGET may be used as the target for computing one of EXP's operands.
17152 IGNORE is nonzero if the value is to be ignored. */
17153
17154 static rtx
17155 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
17156 enum machine_mode mode ATTRIBUTE_UNUSED,
17157 int ignore ATTRIBUTE_UNUSED)
17158 {
17159 const struct builtin_description *d;
17160 size_t i;
17161 enum insn_code icode;
17162 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
17163 tree arglist = TREE_OPERAND (exp, 1);
17164 tree arg0, arg1, arg2;
17165 rtx op0, op1, op2, pat;
17166 enum machine_mode tmode, mode0, mode1, mode2, mode3;
17167 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
17168
17169 switch (fcode)
17170 {
17171 case IX86_BUILTIN_EMMS:
17172 emit_insn (gen_mmx_emms ());
17173 return 0;
17174
17175 case IX86_BUILTIN_SFENCE:
17176 emit_insn (gen_sse_sfence ());
17177 return 0;
17178
17179 case IX86_BUILTIN_MASKMOVQ:
17180 case IX86_BUILTIN_MASKMOVDQU:
17181 icode = (fcode == IX86_BUILTIN_MASKMOVQ
17182 ? CODE_FOR_mmx_maskmovq
17183 : CODE_FOR_sse2_maskmovdqu);
17184 /* Note the arg order is different from the operand order. */
17185 arg1 = TREE_VALUE (arglist);
17186 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
17187 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
17188 op0 = expand_normal (arg0);
17189 op1 = expand_normal (arg1);
17190 op2 = expand_normal (arg2);
17191 mode0 = insn_data[icode].operand[0].mode;
17192 mode1 = insn_data[icode].operand[1].mode;
17193 mode2 = insn_data[icode].operand[2].mode;
17194
17195 op0 = force_reg (Pmode, op0);
17196 op0 = gen_rtx_MEM (mode1, op0);
17197
17198 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
17199 op0 = copy_to_mode_reg (mode0, op0);
17200 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
17201 op1 = copy_to_mode_reg (mode1, op1);
17202 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
17203 op2 = copy_to_mode_reg (mode2, op2);
17204 pat = GEN_FCN (icode) (op0, op1, op2);
17205 if (! pat)
17206 return 0;
17207 emit_insn (pat);
17208 return 0;
17209
17210 case IX86_BUILTIN_SQRTSS:
17211 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, arglist, target);
17212 case IX86_BUILTIN_RSQRTSS:
17213 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, arglist, target);
17214 case IX86_BUILTIN_RCPSS:
17215 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, arglist, target);
17216
17217 case IX86_BUILTIN_LOADUPS:
17218 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
17219
17220 case IX86_BUILTIN_STOREUPS:
17221 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
17222
17223 case IX86_BUILTIN_LOADHPS:
17224 case IX86_BUILTIN_LOADLPS:
17225 case IX86_BUILTIN_LOADHPD:
17226 case IX86_BUILTIN_LOADLPD:
17227 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
17228 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
17229 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
17230 : CODE_FOR_sse2_loadlpd);
17231 arg0 = TREE_VALUE (arglist);
17232 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
17233 op0 = expand_normal (arg0);
17234 op1 = expand_normal (arg1);
17235 tmode = insn_data[icode].operand[0].mode;
17236 mode0 = insn_data[icode].operand[1].mode;
17237 mode1 = insn_data[icode].operand[2].mode;
17238
17239 op0 = force_reg (mode0, op0);
17240 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
17241 if (optimize || target == 0
17242 || GET_MODE (target) != tmode
17243 || !register_operand (target, tmode))
17244 target = gen_reg_rtx (tmode);
17245 pat = GEN_FCN (icode) (target, op0, op1);
17246 if (! pat)
17247 return 0;
17248 emit_insn (pat);
17249 return target;
17250
17251 case IX86_BUILTIN_STOREHPS:
17252 case IX86_BUILTIN_STORELPS:
17253 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
17254 : CODE_FOR_sse_storelps);
17255 arg0 = TREE_VALUE (arglist);
17256 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
17257 op0 = expand_normal (arg0);
17258 op1 = expand_normal (arg1);
17259 mode0 = insn_data[icode].operand[0].mode;
17260 mode1 = insn_data[icode].operand[1].mode;
17261
17262 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
17263 op1 = force_reg (mode1, op1);
17264
17265 pat = GEN_FCN (icode) (op0, op1);
17266 if (! pat)
17267 return 0;
17268 emit_insn (pat);
17269 return const0_rtx;
17270
17271 case IX86_BUILTIN_MOVNTPS:
17272 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
17273 case IX86_BUILTIN_MOVNTQ:
17274 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
17275
17276 case IX86_BUILTIN_LDMXCSR:
17277 op0 = expand_normal (TREE_VALUE (arglist));
17278 target = assign_386_stack_local (SImode, SLOT_TEMP);
17279 emit_move_insn (target, op0);
17280 emit_insn (gen_sse_ldmxcsr (target));
17281 return 0;
17282
17283 case IX86_BUILTIN_STMXCSR:
17284 target = assign_386_stack_local (SImode, SLOT_TEMP);
17285 emit_insn (gen_sse_stmxcsr (target));
17286 return copy_to_mode_reg (SImode, target);
17287
17288 case IX86_BUILTIN_SHUFPS:
17289 case IX86_BUILTIN_SHUFPD:
17290 icode = (fcode == IX86_BUILTIN_SHUFPS
17291 ? CODE_FOR_sse_shufps
17292 : CODE_FOR_sse2_shufpd);
17293 arg0 = TREE_VALUE (arglist);
17294 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
17295 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
17296 op0 = expand_normal (arg0);
17297 op1 = expand_normal (arg1);
17298 op2 = expand_normal (arg2);
17299 tmode = insn_data[icode].operand[0].mode;
17300 mode0 = insn_data[icode].operand[1].mode;
17301 mode1 = insn_data[icode].operand[2].mode;
17302 mode2 = insn_data[icode].operand[3].mode;
17303
17304 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
17305 op0 = copy_to_mode_reg (mode0, op0);
17306 if ((optimize && !register_operand (op1, mode1))
17307 || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
17308 op1 = copy_to_mode_reg (mode1, op1);
17309 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
17310 {
17311 /* @@@ better error message */
17312 error ("mask must be an immediate");
17313 return gen_reg_rtx (tmode);
17314 }
17315 if (optimize || target == 0
17316 || GET_MODE (target) != tmode
17317 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17318 target = gen_reg_rtx (tmode);
17319 pat = GEN_FCN (icode) (target, op0, op1, op2);
17320 if (! pat)
17321 return 0;
17322 emit_insn (pat);
17323 return target;
17324
17325 case IX86_BUILTIN_PSHUFW:
17326 case IX86_BUILTIN_PSHUFD:
17327 case IX86_BUILTIN_PSHUFHW:
17328 case IX86_BUILTIN_PSHUFLW:
17329 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
17330 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
17331 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
17332 : CODE_FOR_mmx_pshufw);
17333 arg0 = TREE_VALUE (arglist);
17334 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
17335 op0 = expand_normal (arg0);
17336 op1 = expand_normal (arg1);
17337 tmode = insn_data[icode].operand[0].mode;
17338 mode1 = insn_data[icode].operand[1].mode;
17339 mode2 = insn_data[icode].operand[2].mode;
17340
17341 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
17342 op0 = copy_to_mode_reg (mode1, op0);
17343 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
17344 {
17345 /* @@@ better error message */
17346 error ("mask must be an immediate");
17347 return const0_rtx;
17348 }
17349 if (target == 0
17350 || GET_MODE (target) != tmode
17351 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17352 target = gen_reg_rtx (tmode);
17353 pat = GEN_FCN (icode) (target, op0, op1);
17354 if (! pat)
17355 return 0;
17356 emit_insn (pat);
17357 return target;
17358
17359 case IX86_BUILTIN_PSLLDQI128:
17360 case IX86_BUILTIN_PSRLDQI128:
17361 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
17362 : CODE_FOR_sse2_lshrti3);
17363 arg0 = TREE_VALUE (arglist);
17364 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
17365 op0 = expand_normal (arg0);
17366 op1 = expand_normal (arg1);
17367 tmode = insn_data[icode].operand[0].mode;
17368 mode1 = insn_data[icode].operand[1].mode;
17369 mode2 = insn_data[icode].operand[2].mode;
17370
17371 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
17372 {
17373 op0 = copy_to_reg (op0);
17374 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
17375 }
17376 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
17377 {
17378 error ("shift must be an immediate");
17379 return const0_rtx;
17380 }
17381 target = gen_reg_rtx (V2DImode);
17382 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
17383 if (! pat)
17384 return 0;
17385 emit_insn (pat);
17386 return target;
17387
17388 case IX86_BUILTIN_FEMMS:
17389 emit_insn (gen_mmx_femms ());
17390 return NULL_RTX;
17391
17392 case IX86_BUILTIN_PAVGUSB:
17393 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, arglist, target);
17394
17395 case IX86_BUILTIN_PF2ID:
17396 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, arglist, target, 0);
17397
17398 case IX86_BUILTIN_PFACC:
17399 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, arglist, target);
17400
17401 case IX86_BUILTIN_PFADD:
17402 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, arglist, target);
17403
17404 case IX86_BUILTIN_PFCMPEQ:
17405 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, arglist, target);
17406
17407 case IX86_BUILTIN_PFCMPGE:
17408 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, arglist, target);
17409
17410 case IX86_BUILTIN_PFCMPGT:
17411 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, arglist, target);
17412
17413 case IX86_BUILTIN_PFMAX:
17414 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, arglist, target);
17415
17416 case IX86_BUILTIN_PFMIN:
17417 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, arglist, target);
17418
17419 case IX86_BUILTIN_PFMUL:
17420 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, arglist, target);
17421
17422 case IX86_BUILTIN_PFRCP:
17423 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, arglist, target, 0);
17424
17425 case IX86_BUILTIN_PFRCPIT1:
17426 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, arglist, target);
17427
17428 case IX86_BUILTIN_PFRCPIT2:
17429 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, arglist, target);
17430
17431 case IX86_BUILTIN_PFRSQIT1:
17432 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, arglist, target);
17433
17434 case IX86_BUILTIN_PFRSQRT:
17435 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, arglist, target, 0);
17436
17437 case IX86_BUILTIN_PFSUB:
17438 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, arglist, target);
17439
17440 case IX86_BUILTIN_PFSUBR:
17441 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, arglist, target);
17442
17443 case IX86_BUILTIN_PI2FD:
17444 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, arglist, target, 0);
17445
17446 case IX86_BUILTIN_PMULHRW:
17447 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, arglist, target);
17448
17449 case IX86_BUILTIN_PF2IW:
17450 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, arglist, target, 0);
17451
17452 case IX86_BUILTIN_PFNACC:
17453 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, arglist, target);
17454
17455 case IX86_BUILTIN_PFPNACC:
17456 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, arglist, target);
17457
17458 case IX86_BUILTIN_PI2FW:
17459 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, arglist, target, 0);
17460
17461 case IX86_BUILTIN_PSWAPDSI:
17462 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, arglist, target, 0);
17463
17464 case IX86_BUILTIN_PSWAPDSF:
17465 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, arglist, target, 0);
17466
17467 case IX86_BUILTIN_SQRTSD:
17468 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, arglist, target);
17469 case IX86_BUILTIN_LOADUPD:
17470 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
17471 case IX86_BUILTIN_STOREUPD:
17472 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
17473
17474 case IX86_BUILTIN_MFENCE:
17475 emit_insn (gen_sse2_mfence ());
17476 return 0;
17477 case IX86_BUILTIN_LFENCE:
17478 emit_insn (gen_sse2_lfence ());
17479 return 0;
17480
17481 case IX86_BUILTIN_CLFLUSH:
17482 arg0 = TREE_VALUE (arglist);
17483 op0 = expand_normal (arg0);
17484 icode = CODE_FOR_sse2_clflush;
17485 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
17486 op0 = copy_to_mode_reg (Pmode, op0);
17487
17488 emit_insn (gen_sse2_clflush (op0));
17489 return 0;
17490
17491 case IX86_BUILTIN_MOVNTPD:
17492 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
17493 case IX86_BUILTIN_MOVNTDQ:
17494 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
17495 case IX86_BUILTIN_MOVNTI:
17496 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
17497
17498 case IX86_BUILTIN_LOADDQU:
17499 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
17500 case IX86_BUILTIN_STOREDQU:
17501 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
17502
17503 case IX86_BUILTIN_MONITOR:
17504 arg0 = TREE_VALUE (arglist);
17505 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
17506 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
17507 op0 = expand_normal (arg0);
17508 op1 = expand_normal (arg1);
17509 op2 = expand_normal (arg2);
17510 if (!REG_P (op0))
17511 op0 = copy_to_mode_reg (Pmode, op0);
17512 if (!REG_P (op1))
17513 op1 = copy_to_mode_reg (SImode, op1);
17514 if (!REG_P (op2))
17515 op2 = copy_to_mode_reg (SImode, op2);
17516 if (!TARGET_64BIT)
17517 emit_insn (gen_sse3_monitor (op0, op1, op2));
17518 else
17519 emit_insn (gen_sse3_monitor64 (op0, op1, op2));
17520 return 0;
17521
17522 case IX86_BUILTIN_MWAIT:
17523 arg0 = TREE_VALUE (arglist);
17524 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
17525 op0 = expand_normal (arg0);
17526 op1 = expand_normal (arg1);
17527 if (!REG_P (op0))
17528 op0 = copy_to_mode_reg (SImode, op0);
17529 if (!REG_P (op1))
17530 op1 = copy_to_mode_reg (SImode, op1);
17531 emit_insn (gen_sse3_mwait (op0, op1));
17532 return 0;
17533
17534 case IX86_BUILTIN_LDDQU:
17535 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, arglist,
17536 target, 1);
17537
17538 case IX86_BUILTIN_PALIGNR:
17539 case IX86_BUILTIN_PALIGNR128:
17540 if (fcode == IX86_BUILTIN_PALIGNR)
17541 {
17542 icode = CODE_FOR_ssse3_palignrdi;
17543 mode = DImode;
17544 }
17545 else
17546 {
17547 icode = CODE_FOR_ssse3_palignrti;
17548 mode = V2DImode;
17549 }
17550 arg0 = TREE_VALUE (arglist);
17551 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
17552 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
17553 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
17554 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
17555 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
17556 tmode = insn_data[icode].operand[0].mode;
17557 mode1 = insn_data[icode].operand[1].mode;
17558 mode2 = insn_data[icode].operand[2].mode;
17559 mode3 = insn_data[icode].operand[3].mode;
17560
17561 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
17562 {
17563 op0 = copy_to_reg (op0);
17564 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
17565 }
17566 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
17567 {
17568 op1 = copy_to_reg (op1);
17569 op1 = simplify_gen_subreg (mode2, op1, GET_MODE (op1), 0);
17570 }
17571 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
17572 {
17573 error ("shift must be an immediate");
17574 return const0_rtx;
17575 }
17576 target = gen_reg_rtx (mode);
17577 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, mode, 0),
17578 op0, op1, op2);
17579 if (! pat)
17580 return 0;
17581 emit_insn (pat);
17582 return target;
17583
17584 case IX86_BUILTIN_VEC_INIT_V2SI:
17585 case IX86_BUILTIN_VEC_INIT_V4HI:
17586 case IX86_BUILTIN_VEC_INIT_V8QI:
17587 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), arglist, target);
17588
17589 case IX86_BUILTIN_VEC_EXT_V2DF:
17590 case IX86_BUILTIN_VEC_EXT_V2DI:
17591 case IX86_BUILTIN_VEC_EXT_V4SF:
17592 case IX86_BUILTIN_VEC_EXT_V4SI:
17593 case IX86_BUILTIN_VEC_EXT_V8HI:
17594 case IX86_BUILTIN_VEC_EXT_V2SI:
17595 case IX86_BUILTIN_VEC_EXT_V4HI:
17596 return ix86_expand_vec_ext_builtin (arglist, target);
17597
17598 case IX86_BUILTIN_VEC_SET_V8HI:
17599 case IX86_BUILTIN_VEC_SET_V4HI:
17600 return ix86_expand_vec_set_builtin (arglist);
17601
17602 default:
17603 break;
17604 }
17605
17606 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
17607 if (d->code == fcode)
17608 {
17609 /* Compares are treated specially. */
17610 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
17611 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3
17612 || d->icode == CODE_FOR_sse2_maskcmpv2df3
17613 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
17614 return ix86_expand_sse_compare (d, arglist, target);
17615
17616 return ix86_expand_binop_builtin (d->icode, arglist, target);
17617 }
17618
17619 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
17620 if (d->code == fcode)
17621 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
17622
17623 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
17624 if (d->code == fcode)
17625 return ix86_expand_sse_comi (d, arglist, target);
17626
17627 gcc_unreachable ();
17628 }
17629
17630 /* Returns a function decl for a vectorized version of the builtin function
17631 with builtin function code FN and the result vector type TYPE, or NULL_TREE
17632 if it is not available. */
17633
17634 static tree
17635 ix86_builtin_vectorized_function (enum built_in_function fn, tree type)
17636 {
17637 enum machine_mode el_mode;
17638 int n;
17639
17640 if (TREE_CODE (type) != VECTOR_TYPE)
17641 return NULL_TREE;
17642
17643 el_mode = TYPE_MODE (TREE_TYPE (type));
17644 n = TYPE_VECTOR_SUBPARTS (type);
17645
17646 switch (fn)
17647 {
17648 case BUILT_IN_SQRT:
17649 if (el_mode == DFmode && n == 2)
17650 return ix86_builtins[IX86_BUILTIN_SQRTPD];
17651 return NULL_TREE;
17652
17653 case BUILT_IN_SQRTF:
17654 if (el_mode == SFmode && n == 4)
17655 return ix86_builtins[IX86_BUILTIN_SQRTPS];
17656 return NULL_TREE;
17657
17658 default:
17659 ;
17660 }
17661
17662 return NULL_TREE;
17663 }
17664
17665 /* Store OPERAND to the memory after reload is completed. This means
17666 that we can't easily use assign_stack_local. */
17667 rtx
17668 ix86_force_to_memory (enum machine_mode mode, rtx operand)
17669 {
17670 rtx result;
17671
17672 gcc_assert (reload_completed);
17673 if (TARGET_RED_ZONE)
17674 {
17675 result = gen_rtx_MEM (mode,
17676 gen_rtx_PLUS (Pmode,
17677 stack_pointer_rtx,
17678 GEN_INT (-RED_ZONE_SIZE)));
17679 emit_move_insn (result, operand);
17680 }
17681 else if (!TARGET_RED_ZONE && TARGET_64BIT)
17682 {
17683 switch (mode)
17684 {
17685 case HImode:
17686 case SImode:
17687 operand = gen_lowpart (DImode, operand);
17688 /* FALLTHRU */
17689 case DImode:
17690 emit_insn (
17691 gen_rtx_SET (VOIDmode,
17692 gen_rtx_MEM (DImode,
17693 gen_rtx_PRE_DEC (DImode,
17694 stack_pointer_rtx)),
17695 operand));
17696 break;
17697 default:
17698 gcc_unreachable ();
17699 }
17700 result = gen_rtx_MEM (mode, stack_pointer_rtx);
17701 }
17702 else
17703 {
17704 switch (mode)
17705 {
17706 case DImode:
17707 {
17708 rtx operands[2];
17709 split_di (&operand, 1, operands, operands + 1);
17710 emit_insn (
17711 gen_rtx_SET (VOIDmode,
17712 gen_rtx_MEM (SImode,
17713 gen_rtx_PRE_DEC (Pmode,
17714 stack_pointer_rtx)),
17715 operands[1]));
17716 emit_insn (
17717 gen_rtx_SET (VOIDmode,
17718 gen_rtx_MEM (SImode,
17719 gen_rtx_PRE_DEC (Pmode,
17720 stack_pointer_rtx)),
17721 operands[0]));
17722 }
17723 break;
17724 case HImode:
17725 /* Store HImodes as SImodes. */
17726 operand = gen_lowpart (SImode, operand);
17727 /* FALLTHRU */
17728 case SImode:
17729 emit_insn (
17730 gen_rtx_SET (VOIDmode,
17731 gen_rtx_MEM (GET_MODE (operand),
17732 gen_rtx_PRE_DEC (SImode,
17733 stack_pointer_rtx)),
17734 operand));
17735 break;
17736 default:
17737 gcc_unreachable ();
17738 }
17739 result = gen_rtx_MEM (mode, stack_pointer_rtx);
17740 }
17741 return result;
17742 }
17743
17744 /* Free operand from the memory. */
17745 void
17746 ix86_free_from_memory (enum machine_mode mode)
17747 {
17748 if (!TARGET_RED_ZONE)
17749 {
17750 int size;
17751
17752 if (mode == DImode || TARGET_64BIT)
17753 size = 8;
17754 else
17755 size = 4;
17756 /* Use LEA to deallocate stack space. In peephole2 it will be converted
17757 to pop or add instruction if registers are available. */
17758 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
17759 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
17760 GEN_INT (size))));
17761 }
17762 }
17763
17764 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
17765 QImode must go into class Q_REGS.
17766 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
17767 movdf to do mem-to-mem moves through integer regs. */
17768 enum reg_class
17769 ix86_preferred_reload_class (rtx x, enum reg_class class)
17770 {
17771 enum machine_mode mode = GET_MODE (x);
17772
17773 /* We're only allowed to return a subclass of CLASS. Many of the
17774 following checks fail for NO_REGS, so eliminate that early. */
17775 if (class == NO_REGS)
17776 return NO_REGS;
17777
17778 /* All classes can load zeros. */
17779 if (x == CONST0_RTX (mode))
17780 return class;
17781
17782 /* Force constants into memory if we are loading a (nonzero) constant into
17783 an MMX or SSE register. This is because there are no MMX/SSE instructions
17784 to load from a constant. */
17785 if (CONSTANT_P (x)
17786 && (MAYBE_MMX_CLASS_P (class) || MAYBE_SSE_CLASS_P (class)))
17787 return NO_REGS;
17788
17789 /* Prefer SSE regs only, if we can use them for math. */
17790 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
17791 return SSE_CLASS_P (class) ? class : NO_REGS;
17792
17793 /* Floating-point constants need more complex checks. */
17794 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
17795 {
17796 /* General regs can load everything. */
17797 if (reg_class_subset_p (class, GENERAL_REGS))
17798 return class;
17799
17800 /* Floats can load 0 and 1 plus some others. Note that we eliminated
17801 zero above. We only want to wind up preferring 80387 registers if
17802 we plan on doing computation with them. */
17803 if (TARGET_80387
17804 && standard_80387_constant_p (x))
17805 {
17806 /* Limit class to non-sse. */
17807 if (class == FLOAT_SSE_REGS)
17808 return FLOAT_REGS;
17809 if (class == FP_TOP_SSE_REGS)
17810 return FP_TOP_REG;
17811 if (class == FP_SECOND_SSE_REGS)
17812 return FP_SECOND_REG;
17813 if (class == FLOAT_INT_REGS || class == FLOAT_REGS)
17814 return class;
17815 }
17816
17817 return NO_REGS;
17818 }
17819
17820 /* Generally when we see PLUS here, it's the function invariant
17821 (plus soft-fp const_int). Which can only be computed into general
17822 regs. */
17823 if (GET_CODE (x) == PLUS)
17824 return reg_class_subset_p (class, GENERAL_REGS) ? class : NO_REGS;
17825
17826 /* QImode constants are easy to load, but non-constant QImode data
17827 must go into Q_REGS. */
17828 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
17829 {
17830 if (reg_class_subset_p (class, Q_REGS))
17831 return class;
17832 if (reg_class_subset_p (Q_REGS, class))
17833 return Q_REGS;
17834 return NO_REGS;
17835 }
17836
17837 return class;
17838 }
17839
17840 /* Discourage putting floating-point values in SSE registers unless
17841 SSE math is being used, and likewise for the 387 registers. */
17842 enum reg_class
17843 ix86_preferred_output_reload_class (rtx x, enum reg_class class)
17844 {
17845 enum machine_mode mode = GET_MODE (x);
17846
17847 /* Restrict the output reload class to the register bank that we are doing
17848 math on. If we would like not to return a subset of CLASS, reject this
17849 alternative: if reload cannot do this, it will still use its choice. */
17850 mode = GET_MODE (x);
17851 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
17852 return MAYBE_SSE_CLASS_P (class) ? SSE_REGS : NO_REGS;
17853
17854 if (TARGET_80387 && SCALAR_FLOAT_MODE_P (mode))
17855 {
17856 if (class == FP_TOP_SSE_REGS)
17857 return FP_TOP_REG;
17858 else if (class == FP_SECOND_SSE_REGS)
17859 return FP_SECOND_REG;
17860 else
17861 return FLOAT_CLASS_P (class) ? class : NO_REGS;
17862 }
17863
17864 return class;
17865 }
17866
17867 /* If we are copying between general and FP registers, we need a memory
17868 location. The same is true for SSE and MMX registers.
17869
17870 The macro can't work reliably when one of the CLASSES is class containing
17871 registers from multiple units (SSE, MMX, integer). We avoid this by never
17872 combining those units in single alternative in the machine description.
17873 Ensure that this constraint holds to avoid unexpected surprises.
17874
17875 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
17876 enforce these sanity checks. */
17877
17878 int
17879 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
17880 enum machine_mode mode, int strict)
17881 {
17882 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
17883 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
17884 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
17885 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
17886 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
17887 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
17888 {
17889 gcc_assert (!strict);
17890 return true;
17891 }
17892
17893 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
17894 return true;
17895
17896 /* ??? This is a lie. We do have moves between mmx/general, and for
17897 mmx/sse2. But by saying we need secondary memory we discourage the
17898 register allocator from using the mmx registers unless needed. */
17899 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
17900 return true;
17901
17902 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
17903 {
17904 /* SSE1 doesn't have any direct moves from other classes. */
17905 if (!TARGET_SSE2)
17906 return true;
17907
17908 /* If the target says that inter-unit moves are more expensive
17909 than moving through memory, then don't generate them. */
17910 if (!TARGET_INTER_UNIT_MOVES && !optimize_size)
17911 return true;
17912
17913 /* Between SSE and general, we have moves no larger than word size. */
17914 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
17915 return true;
17916
17917 /* ??? For the cost of one register reformat penalty, we could use
17918 the same instructions to move SFmode and DFmode data, but the
17919 relevant move patterns don't support those alternatives. */
17920 if (mode == SFmode || mode == DFmode)
17921 return true;
17922 }
17923
17924 return false;
17925 }
17926
17927 /* Return true if the registers in CLASS cannot represent the change from
17928 modes FROM to TO. */
17929
17930 bool
17931 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
17932 enum reg_class class)
17933 {
17934 if (from == to)
17935 return false;
17936
17937 /* x87 registers can't do subreg at all, as all values are reformatted
17938 to extended precision. */
17939 if (MAYBE_FLOAT_CLASS_P (class))
17940 return true;
17941
17942 if (MAYBE_SSE_CLASS_P (class) || MAYBE_MMX_CLASS_P (class))
17943 {
17944 /* Vector registers do not support QI or HImode loads. If we don't
17945 disallow a change to these modes, reload will assume it's ok to
17946 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
17947 the vec_dupv4hi pattern. */
17948 if (GET_MODE_SIZE (from) < 4)
17949 return true;
17950
17951 /* Vector registers do not support subreg with nonzero offsets, which
17952 are otherwise valid for integer registers. Since we can't see
17953 whether we have a nonzero offset from here, prohibit all
17954 nonparadoxical subregs changing size. */
17955 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
17956 return true;
17957 }
17958
17959 return false;
17960 }
17961
17962 /* Return the cost of moving data from a register in class CLASS1 to
17963 one in class CLASS2.
17964
17965 It is not required that the cost always equal 2 when FROM is the same as TO;
17966 on some machines it is expensive to move between registers if they are not
17967 general registers. */
17968
17969 int
17970 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
17971 enum reg_class class2)
17972 {
17973 /* In case we require secondary memory, compute cost of the store followed
17974 by load. In order to avoid bad register allocation choices, we need
17975 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
17976
17977 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
17978 {
17979 int cost = 1;
17980
17981 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
17982 MEMORY_MOVE_COST (mode, class1, 1));
17983 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
17984 MEMORY_MOVE_COST (mode, class2, 1));
17985
17986 /* In case of copying from general_purpose_register we may emit multiple
17987 stores followed by single load causing memory size mismatch stall.
17988 Count this as arbitrarily high cost of 20. */
17989 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
17990 cost += 20;
17991
17992 /* In the case of FP/MMX moves, the registers actually overlap, and we
17993 have to switch modes in order to treat them differently. */
17994 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
17995 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
17996 cost += 20;
17997
17998 return cost;
17999 }
18000
18001 /* Moves between SSE/MMX and integer unit are expensive. */
18002 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
18003 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
18004 return ix86_cost->mmxsse_to_integer;
18005 if (MAYBE_FLOAT_CLASS_P (class1))
18006 return ix86_cost->fp_move;
18007 if (MAYBE_SSE_CLASS_P (class1))
18008 return ix86_cost->sse_move;
18009 if (MAYBE_MMX_CLASS_P (class1))
18010 return ix86_cost->mmx_move;
18011 return 2;
18012 }
18013
18014 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
18015
18016 bool
18017 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
18018 {
18019 /* Flags and only flags can only hold CCmode values. */
18020 if (CC_REGNO_P (regno))
18021 return GET_MODE_CLASS (mode) == MODE_CC;
18022 if (GET_MODE_CLASS (mode) == MODE_CC
18023 || GET_MODE_CLASS (mode) == MODE_RANDOM
18024 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
18025 return 0;
18026 if (FP_REGNO_P (regno))
18027 return VALID_FP_MODE_P (mode);
18028 if (SSE_REGNO_P (regno))
18029 {
18030 /* We implement the move patterns for all vector modes into and
18031 out of SSE registers, even when no operation instructions
18032 are available. */
18033 return (VALID_SSE_REG_MODE (mode)
18034 || VALID_SSE2_REG_MODE (mode)
18035 || VALID_MMX_REG_MODE (mode)
18036 || VALID_MMX_REG_MODE_3DNOW (mode));
18037 }
18038 if (MMX_REGNO_P (regno))
18039 {
18040 /* We implement the move patterns for 3DNOW modes even in MMX mode,
18041 so if the register is available at all, then we can move data of
18042 the given mode into or out of it. */
18043 return (VALID_MMX_REG_MODE (mode)
18044 || VALID_MMX_REG_MODE_3DNOW (mode));
18045 }
18046
18047 if (mode == QImode)
18048 {
18049 /* Take care for QImode values - they can be in non-QI regs,
18050 but then they do cause partial register stalls. */
18051 if (regno < 4 || TARGET_64BIT)
18052 return 1;
18053 if (!TARGET_PARTIAL_REG_STALL)
18054 return 1;
18055 return reload_in_progress || reload_completed;
18056 }
18057 /* We handle both integer and floats in the general purpose registers. */
18058 else if (VALID_INT_MODE_P (mode))
18059 return 1;
18060 else if (VALID_FP_MODE_P (mode))
18061 return 1;
18062 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
18063 on to use that value in smaller contexts, this can easily force a
18064 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
18065 supporting DImode, allow it. */
18066 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
18067 return 1;
18068
18069 return 0;
18070 }
18071
18072 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
18073 tieable integer mode. */
18074
18075 static bool
18076 ix86_tieable_integer_mode_p (enum machine_mode mode)
18077 {
18078 switch (mode)
18079 {
18080 case HImode:
18081 case SImode:
18082 return true;
18083
18084 case QImode:
18085 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
18086
18087 case DImode:
18088 return TARGET_64BIT;
18089
18090 default:
18091 return false;
18092 }
18093 }
18094
18095 /* Return true if MODE1 is accessible in a register that can hold MODE2
18096 without copying. That is, all register classes that can hold MODE2
18097 can also hold MODE1. */
18098
18099 bool
18100 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
18101 {
18102 if (mode1 == mode2)
18103 return true;
18104
18105 if (ix86_tieable_integer_mode_p (mode1)
18106 && ix86_tieable_integer_mode_p (mode2))
18107 return true;
18108
18109 /* MODE2 being XFmode implies fp stack or general regs, which means we
18110 can tie any smaller floating point modes to it. Note that we do not
18111 tie this with TFmode. */
18112 if (mode2 == XFmode)
18113 return mode1 == SFmode || mode1 == DFmode;
18114
18115 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
18116 that we can tie it with SFmode. */
18117 if (mode2 == DFmode)
18118 return mode1 == SFmode;
18119
18120 /* If MODE2 is only appropriate for an SSE register, then tie with
18121 any other mode acceptable to SSE registers. */
18122 if (GET_MODE_SIZE (mode2) >= 8
18123 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
18124 return ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1);
18125
18126 /* If MODE2 is appropriate for an MMX (or SSE) register, then tie
18127 with any other mode acceptable to MMX registers. */
18128 if (GET_MODE_SIZE (mode2) == 8
18129 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
18130 return ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1);
18131
18132 return false;
18133 }
18134
18135 /* Return the cost of moving data of mode M between a
18136 register and memory. A value of 2 is the default; this cost is
18137 relative to those in `REGISTER_MOVE_COST'.
18138
18139 If moving between registers and memory is more expensive than
18140 between two registers, you should define this macro to express the
18141 relative cost.
18142
18143 Model also increased moving costs of QImode registers in non
18144 Q_REGS classes.
18145 */
18146 int
18147 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
18148 {
18149 if (FLOAT_CLASS_P (class))
18150 {
18151 int index;
18152 switch (mode)
18153 {
18154 case SFmode:
18155 index = 0;
18156 break;
18157 case DFmode:
18158 index = 1;
18159 break;
18160 case XFmode:
18161 index = 2;
18162 break;
18163 default:
18164 return 100;
18165 }
18166 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
18167 }
18168 if (SSE_CLASS_P (class))
18169 {
18170 int index;
18171 switch (GET_MODE_SIZE (mode))
18172 {
18173 case 4:
18174 index = 0;
18175 break;
18176 case 8:
18177 index = 1;
18178 break;
18179 case 16:
18180 index = 2;
18181 break;
18182 default:
18183 return 100;
18184 }
18185 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
18186 }
18187 if (MMX_CLASS_P (class))
18188 {
18189 int index;
18190 switch (GET_MODE_SIZE (mode))
18191 {
18192 case 4:
18193 index = 0;
18194 break;
18195 case 8:
18196 index = 1;
18197 break;
18198 default:
18199 return 100;
18200 }
18201 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
18202 }
18203 switch (GET_MODE_SIZE (mode))
18204 {
18205 case 1:
18206 if (in)
18207 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
18208 : ix86_cost->movzbl_load);
18209 else
18210 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
18211 : ix86_cost->int_store[0] + 4);
18212 break;
18213 case 2:
18214 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
18215 default:
18216 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
18217 if (mode == TFmode)
18218 mode = XFmode;
18219 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
18220 * (((int) GET_MODE_SIZE (mode)
18221 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
18222 }
18223 }
18224
18225 /* Compute a (partial) cost for rtx X. Return true if the complete
18226 cost has been computed, and false if subexpressions should be
18227 scanned. In either case, *TOTAL contains the cost result. */
18228
18229 static bool
18230 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
18231 {
18232 enum machine_mode mode = GET_MODE (x);
18233
18234 switch (code)
18235 {
18236 case CONST_INT:
18237 case CONST:
18238 case LABEL_REF:
18239 case SYMBOL_REF:
18240 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
18241 *total = 3;
18242 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
18243 *total = 2;
18244 else if (flag_pic && SYMBOLIC_CONST (x)
18245 && (!TARGET_64BIT
18246 || (!GET_CODE (x) != LABEL_REF
18247 && (GET_CODE (x) != SYMBOL_REF
18248 || !SYMBOL_REF_LOCAL_P (x)))))
18249 *total = 1;
18250 else
18251 *total = 0;
18252 return true;
18253
18254 case CONST_DOUBLE:
18255 if (mode == VOIDmode)
18256 *total = 0;
18257 else
18258 switch (standard_80387_constant_p (x))
18259 {
18260 case 1: /* 0.0 */
18261 *total = 1;
18262 break;
18263 default: /* Other constants */
18264 *total = 2;
18265 break;
18266 case 0:
18267 case -1:
18268 /* Start with (MEM (SYMBOL_REF)), since that's where
18269 it'll probably end up. Add a penalty for size. */
18270 *total = (COSTS_N_INSNS (1)
18271 + (flag_pic != 0 && !TARGET_64BIT)
18272 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
18273 break;
18274 }
18275 return true;
18276
18277 case ZERO_EXTEND:
18278 /* The zero extensions is often completely free on x86_64, so make
18279 it as cheap as possible. */
18280 if (TARGET_64BIT && mode == DImode
18281 && GET_MODE (XEXP (x, 0)) == SImode)
18282 *total = 1;
18283 else if (TARGET_ZERO_EXTEND_WITH_AND)
18284 *total = ix86_cost->add;
18285 else
18286 *total = ix86_cost->movzx;
18287 return false;
18288
18289 case SIGN_EXTEND:
18290 *total = ix86_cost->movsx;
18291 return false;
18292
18293 case ASHIFT:
18294 if (CONST_INT_P (XEXP (x, 1))
18295 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
18296 {
18297 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
18298 if (value == 1)
18299 {
18300 *total = ix86_cost->add;
18301 return false;
18302 }
18303 if ((value == 2 || value == 3)
18304 && ix86_cost->lea <= ix86_cost->shift_const)
18305 {
18306 *total = ix86_cost->lea;
18307 return false;
18308 }
18309 }
18310 /* FALLTHRU */
18311
18312 case ROTATE:
18313 case ASHIFTRT:
18314 case LSHIFTRT:
18315 case ROTATERT:
18316 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
18317 {
18318 if (CONST_INT_P (XEXP (x, 1)))
18319 {
18320 if (INTVAL (XEXP (x, 1)) > 32)
18321 *total = ix86_cost->shift_const + COSTS_N_INSNS (2);
18322 else
18323 *total = ix86_cost->shift_const * 2;
18324 }
18325 else
18326 {
18327 if (GET_CODE (XEXP (x, 1)) == AND)
18328 *total = ix86_cost->shift_var * 2;
18329 else
18330 *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
18331 }
18332 }
18333 else
18334 {
18335 if (CONST_INT_P (XEXP (x, 1)))
18336 *total = ix86_cost->shift_const;
18337 else
18338 *total = ix86_cost->shift_var;
18339 }
18340 return false;
18341
18342 case MULT:
18343 if (FLOAT_MODE_P (mode))
18344 {
18345 *total = ix86_cost->fmul;
18346 return false;
18347 }
18348 else
18349 {
18350 rtx op0 = XEXP (x, 0);
18351 rtx op1 = XEXP (x, 1);
18352 int nbits;
18353 if (CONST_INT_P (XEXP (x, 1)))
18354 {
18355 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
18356 for (nbits = 0; value != 0; value &= value - 1)
18357 nbits++;
18358 }
18359 else
18360 /* This is arbitrary. */
18361 nbits = 7;
18362
18363 /* Compute costs correctly for widening multiplication. */
18364 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
18365 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
18366 == GET_MODE_SIZE (mode))
18367 {
18368 int is_mulwiden = 0;
18369 enum machine_mode inner_mode = GET_MODE (op0);
18370
18371 if (GET_CODE (op0) == GET_CODE (op1))
18372 is_mulwiden = 1, op1 = XEXP (op1, 0);
18373 else if (CONST_INT_P (op1))
18374 {
18375 if (GET_CODE (op0) == SIGN_EXTEND)
18376 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
18377 == INTVAL (op1);
18378 else
18379 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
18380 }
18381
18382 if (is_mulwiden)
18383 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
18384 }
18385
18386 *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
18387 + nbits * ix86_cost->mult_bit
18388 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
18389
18390 return true;
18391 }
18392
18393 case DIV:
18394 case UDIV:
18395 case MOD:
18396 case UMOD:
18397 if (FLOAT_MODE_P (mode))
18398 *total = ix86_cost->fdiv;
18399 else
18400 *total = ix86_cost->divide[MODE_INDEX (mode)];
18401 return false;
18402
18403 case PLUS:
18404 if (FLOAT_MODE_P (mode))
18405 *total = ix86_cost->fadd;
18406 else if (GET_MODE_CLASS (mode) == MODE_INT
18407 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
18408 {
18409 if (GET_CODE (XEXP (x, 0)) == PLUS
18410 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
18411 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
18412 && CONSTANT_P (XEXP (x, 1)))
18413 {
18414 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
18415 if (val == 2 || val == 4 || val == 8)
18416 {
18417 *total = ix86_cost->lea;
18418 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
18419 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
18420 outer_code);
18421 *total += rtx_cost (XEXP (x, 1), outer_code);
18422 return true;
18423 }
18424 }
18425 else if (GET_CODE (XEXP (x, 0)) == MULT
18426 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
18427 {
18428 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
18429 if (val == 2 || val == 4 || val == 8)
18430 {
18431 *total = ix86_cost->lea;
18432 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
18433 *total += rtx_cost (XEXP (x, 1), outer_code);
18434 return true;
18435 }
18436 }
18437 else if (GET_CODE (XEXP (x, 0)) == PLUS)
18438 {
18439 *total = ix86_cost->lea;
18440 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
18441 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
18442 *total += rtx_cost (XEXP (x, 1), outer_code);
18443 return true;
18444 }
18445 }
18446 /* FALLTHRU */
18447
18448 case MINUS:
18449 if (FLOAT_MODE_P (mode))
18450 {
18451 *total = ix86_cost->fadd;
18452 return false;
18453 }
18454 /* FALLTHRU */
18455
18456 case AND:
18457 case IOR:
18458 case XOR:
18459 if (!TARGET_64BIT && mode == DImode)
18460 {
18461 *total = (ix86_cost->add * 2
18462 + (rtx_cost (XEXP (x, 0), outer_code)
18463 << (GET_MODE (XEXP (x, 0)) != DImode))
18464 + (rtx_cost (XEXP (x, 1), outer_code)
18465 << (GET_MODE (XEXP (x, 1)) != DImode)));
18466 return true;
18467 }
18468 /* FALLTHRU */
18469
18470 case NEG:
18471 if (FLOAT_MODE_P (mode))
18472 {
18473 *total = ix86_cost->fchs;
18474 return false;
18475 }
18476 /* FALLTHRU */
18477
18478 case NOT:
18479 if (!TARGET_64BIT && mode == DImode)
18480 *total = ix86_cost->add * 2;
18481 else
18482 *total = ix86_cost->add;
18483 return false;
18484
18485 case COMPARE:
18486 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
18487 && XEXP (XEXP (x, 0), 1) == const1_rtx
18488 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
18489 && XEXP (x, 1) == const0_rtx)
18490 {
18491 /* This kind of construct is implemented using test[bwl].
18492 Treat it as if we had an AND. */
18493 *total = (ix86_cost->add
18494 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
18495 + rtx_cost (const1_rtx, outer_code));
18496 return true;
18497 }
18498 return false;
18499
18500 case FLOAT_EXTEND:
18501 if (!TARGET_SSE_MATH
18502 || mode == XFmode
18503 || (mode == DFmode && !TARGET_SSE2))
18504 *total = 0;
18505 return false;
18506
18507 case ABS:
18508 if (FLOAT_MODE_P (mode))
18509 *total = ix86_cost->fabs;
18510 return false;
18511
18512 case SQRT:
18513 if (FLOAT_MODE_P (mode))
18514 *total = ix86_cost->fsqrt;
18515 return false;
18516
18517 case UNSPEC:
18518 if (XINT (x, 1) == UNSPEC_TP)
18519 *total = 0;
18520 return false;
18521
18522 default:
18523 return false;
18524 }
18525 }
18526
18527 #if TARGET_MACHO
18528
18529 static int current_machopic_label_num;
18530
18531 /* Given a symbol name and its associated stub, write out the
18532 definition of the stub. */
18533
18534 void
18535 machopic_output_stub (FILE *file, const char *symb, const char *stub)
18536 {
18537 unsigned int length;
18538 char *binder_name, *symbol_name, lazy_ptr_name[32];
18539 int label = ++current_machopic_label_num;
18540
18541 /* For 64-bit we shouldn't get here. */
18542 gcc_assert (!TARGET_64BIT);
18543
18544 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
18545 symb = (*targetm.strip_name_encoding) (symb);
18546
18547 length = strlen (stub);
18548 binder_name = alloca (length + 32);
18549 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
18550
18551 length = strlen (symb);
18552 symbol_name = alloca (length + 32);
18553 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
18554
18555 sprintf (lazy_ptr_name, "L%d$lz", label);
18556
18557 if (MACHOPIC_PURE)
18558 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
18559 else
18560 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
18561
18562 fprintf (file, "%s:\n", stub);
18563 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
18564
18565 if (MACHOPIC_PURE)
18566 {
18567 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
18568 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
18569 fprintf (file, "\tjmp\t*%%edx\n");
18570 }
18571 else
18572 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
18573
18574 fprintf (file, "%s:\n", binder_name);
18575
18576 if (MACHOPIC_PURE)
18577 {
18578 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
18579 fprintf (file, "\tpushl\t%%eax\n");
18580 }
18581 else
18582 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
18583
18584 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
18585
18586 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
18587 fprintf (file, "%s:\n", lazy_ptr_name);
18588 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
18589 fprintf (file, "\t.long %s\n", binder_name);
18590 }
18591
18592 void
18593 darwin_x86_file_end (void)
18594 {
18595 darwin_file_end ();
18596 ix86_file_end ();
18597 }
18598 #endif /* TARGET_MACHO */
18599
18600 /* Order the registers for register allocator. */
18601
18602 void
18603 x86_order_regs_for_local_alloc (void)
18604 {
18605 int pos = 0;
18606 int i;
18607
18608 /* First allocate the local general purpose registers. */
18609 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
18610 if (GENERAL_REGNO_P (i) && call_used_regs[i])
18611 reg_alloc_order [pos++] = i;
18612
18613 /* Global general purpose registers. */
18614 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
18615 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
18616 reg_alloc_order [pos++] = i;
18617
18618 /* x87 registers come first in case we are doing FP math
18619 using them. */
18620 if (!TARGET_SSE_MATH)
18621 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
18622 reg_alloc_order [pos++] = i;
18623
18624 /* SSE registers. */
18625 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
18626 reg_alloc_order [pos++] = i;
18627 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
18628 reg_alloc_order [pos++] = i;
18629
18630 /* x87 registers. */
18631 if (TARGET_SSE_MATH)
18632 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
18633 reg_alloc_order [pos++] = i;
18634
18635 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
18636 reg_alloc_order [pos++] = i;
18637
18638 /* Initialize the rest of array as we do not allocate some registers
18639 at all. */
18640 while (pos < FIRST_PSEUDO_REGISTER)
18641 reg_alloc_order [pos++] = 0;
18642 }
18643
18644 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
18645 struct attribute_spec.handler. */
18646 static tree
18647 ix86_handle_struct_attribute (tree *node, tree name,
18648 tree args ATTRIBUTE_UNUSED,
18649 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
18650 {
18651 tree *type = NULL;
18652 if (DECL_P (*node))
18653 {
18654 if (TREE_CODE (*node) == TYPE_DECL)
18655 type = &TREE_TYPE (*node);
18656 }
18657 else
18658 type = node;
18659
18660 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
18661 || TREE_CODE (*type) == UNION_TYPE)))
18662 {
18663 warning (OPT_Wattributes, "%qs attribute ignored",
18664 IDENTIFIER_POINTER (name));
18665 *no_add_attrs = true;
18666 }
18667
18668 else if ((is_attribute_p ("ms_struct", name)
18669 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
18670 || ((is_attribute_p ("gcc_struct", name)
18671 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
18672 {
18673 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
18674 IDENTIFIER_POINTER (name));
18675 *no_add_attrs = true;
18676 }
18677
18678 return NULL_TREE;
18679 }
18680
18681 static bool
18682 ix86_ms_bitfield_layout_p (tree record_type)
18683 {
18684 return (TARGET_MS_BITFIELD_LAYOUT &&
18685 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
18686 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
18687 }
18688
18689 /* Returns an expression indicating where the this parameter is
18690 located on entry to the FUNCTION. */
18691
18692 static rtx
18693 x86_this_parameter (tree function)
18694 {
18695 tree type = TREE_TYPE (function);
18696
18697 if (TARGET_64BIT)
18698 {
18699 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
18700 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
18701 }
18702
18703 if (ix86_function_regparm (type, function) > 0)
18704 {
18705 tree parm;
18706
18707 parm = TYPE_ARG_TYPES (type);
18708 /* Figure out whether or not the function has a variable number of
18709 arguments. */
18710 for (; parm; parm = TREE_CHAIN (parm))
18711 if (TREE_VALUE (parm) == void_type_node)
18712 break;
18713 /* If not, the this parameter is in the first argument. */
18714 if (parm)
18715 {
18716 int regno = 0;
18717 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
18718 regno = 2;
18719 return gen_rtx_REG (SImode, regno);
18720 }
18721 }
18722
18723 if (aggregate_value_p (TREE_TYPE (type), type))
18724 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
18725 else
18726 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
18727 }
18728
18729 /* Determine whether x86_output_mi_thunk can succeed. */
18730
18731 static bool
18732 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
18733 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
18734 HOST_WIDE_INT vcall_offset, tree function)
18735 {
18736 /* 64-bit can handle anything. */
18737 if (TARGET_64BIT)
18738 return true;
18739
18740 /* For 32-bit, everything's fine if we have one free register. */
18741 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
18742 return true;
18743
18744 /* Need a free register for vcall_offset. */
18745 if (vcall_offset)
18746 return false;
18747
18748 /* Need a free register for GOT references. */
18749 if (flag_pic && !(*targetm.binds_local_p) (function))
18750 return false;
18751
18752 /* Otherwise ok. */
18753 return true;
18754 }
18755
18756 /* Output the assembler code for a thunk function. THUNK_DECL is the
18757 declaration for the thunk function itself, FUNCTION is the decl for
18758 the target function. DELTA is an immediate constant offset to be
18759 added to THIS. If VCALL_OFFSET is nonzero, the word at
18760 *(*this + vcall_offset) should be added to THIS. */
18761
18762 static void
18763 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
18764 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
18765 HOST_WIDE_INT vcall_offset, tree function)
18766 {
18767 rtx xops[3];
18768 rtx this = x86_this_parameter (function);
18769 rtx this_reg, tmp;
18770
18771 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
18772 pull it in now and let DELTA benefit. */
18773 if (REG_P (this))
18774 this_reg = this;
18775 else if (vcall_offset)
18776 {
18777 /* Put the this parameter into %eax. */
18778 xops[0] = this;
18779 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
18780 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
18781 }
18782 else
18783 this_reg = NULL_RTX;
18784
18785 /* Adjust the this parameter by a fixed constant. */
18786 if (delta)
18787 {
18788 xops[0] = GEN_INT (delta);
18789 xops[1] = this_reg ? this_reg : this;
18790 if (TARGET_64BIT)
18791 {
18792 if (!x86_64_general_operand (xops[0], DImode))
18793 {
18794 tmp = gen_rtx_REG (DImode, R10_REG);
18795 xops[1] = tmp;
18796 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
18797 xops[0] = tmp;
18798 xops[1] = this;
18799 }
18800 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
18801 }
18802 else
18803 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
18804 }
18805
18806 /* Adjust the this parameter by a value stored in the vtable. */
18807 if (vcall_offset)
18808 {
18809 if (TARGET_64BIT)
18810 tmp = gen_rtx_REG (DImode, R10_REG);
18811 else
18812 {
18813 int tmp_regno = 2 /* ECX */;
18814 if (lookup_attribute ("fastcall",
18815 TYPE_ATTRIBUTES (TREE_TYPE (function))))
18816 tmp_regno = 0 /* EAX */;
18817 tmp = gen_rtx_REG (SImode, tmp_regno);
18818 }
18819
18820 xops[0] = gen_rtx_MEM (Pmode, this_reg);
18821 xops[1] = tmp;
18822 if (TARGET_64BIT)
18823 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
18824 else
18825 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
18826
18827 /* Adjust the this parameter. */
18828 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
18829 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
18830 {
18831 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
18832 xops[0] = GEN_INT (vcall_offset);
18833 xops[1] = tmp2;
18834 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
18835 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
18836 }
18837 xops[1] = this_reg;
18838 if (TARGET_64BIT)
18839 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
18840 else
18841 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
18842 }
18843
18844 /* If necessary, drop THIS back to its stack slot. */
18845 if (this_reg && this_reg != this)
18846 {
18847 xops[0] = this_reg;
18848 xops[1] = this;
18849 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
18850 }
18851
18852 xops[0] = XEXP (DECL_RTL (function), 0);
18853 if (TARGET_64BIT)
18854 {
18855 if (!flag_pic || (*targetm.binds_local_p) (function))
18856 output_asm_insn ("jmp\t%P0", xops);
18857 else
18858 {
18859 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
18860 tmp = gen_rtx_CONST (Pmode, tmp);
18861 tmp = gen_rtx_MEM (QImode, tmp);
18862 xops[0] = tmp;
18863 output_asm_insn ("jmp\t%A0", xops);
18864 }
18865 }
18866 else
18867 {
18868 if (!flag_pic || (*targetm.binds_local_p) (function))
18869 output_asm_insn ("jmp\t%P0", xops);
18870 else
18871 #if TARGET_MACHO
18872 if (TARGET_MACHO)
18873 {
18874 rtx sym_ref = XEXP (DECL_RTL (function), 0);
18875 tmp = (gen_rtx_SYMBOL_REF
18876 (Pmode,
18877 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
18878 tmp = gen_rtx_MEM (QImode, tmp);
18879 xops[0] = tmp;
18880 output_asm_insn ("jmp\t%0", xops);
18881 }
18882 else
18883 #endif /* TARGET_MACHO */
18884 {
18885 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
18886 output_set_got (tmp, NULL_RTX);
18887
18888 xops[1] = tmp;
18889 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
18890 output_asm_insn ("jmp\t{*}%1", xops);
18891 }
18892 }
18893 }
18894
18895 static void
18896 x86_file_start (void)
18897 {
18898 default_file_start ();
18899 #if TARGET_MACHO
18900 darwin_file_start ();
18901 #endif
18902 if (X86_FILE_START_VERSION_DIRECTIVE)
18903 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
18904 if (X86_FILE_START_FLTUSED)
18905 fputs ("\t.global\t__fltused\n", asm_out_file);
18906 if (ix86_asm_dialect == ASM_INTEL)
18907 fputs ("\t.intel_syntax\n", asm_out_file);
18908 }
18909
18910 int
18911 x86_field_alignment (tree field, int computed)
18912 {
18913 enum machine_mode mode;
18914 tree type = TREE_TYPE (field);
18915
18916 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
18917 return computed;
18918 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
18919 ? get_inner_array_type (type) : type);
18920 if (mode == DFmode || mode == DCmode
18921 || GET_MODE_CLASS (mode) == MODE_INT
18922 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
18923 return MIN (32, computed);
18924 return computed;
18925 }
18926
18927 /* Output assembler code to FILE to increment profiler label # LABELNO
18928 for profiling a function entry. */
18929 void
18930 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
18931 {
18932 if (TARGET_64BIT)
18933 if (flag_pic)
18934 {
18935 #ifndef NO_PROFILE_COUNTERS
18936 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
18937 #endif
18938 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
18939 }
18940 else
18941 {
18942 #ifndef NO_PROFILE_COUNTERS
18943 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
18944 #endif
18945 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
18946 }
18947 else if (flag_pic)
18948 {
18949 #ifndef NO_PROFILE_COUNTERS
18950 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
18951 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
18952 #endif
18953 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
18954 }
18955 else
18956 {
18957 #ifndef NO_PROFILE_COUNTERS
18958 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
18959 PROFILE_COUNT_REGISTER);
18960 #endif
18961 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
18962 }
18963 }
18964
18965 /* We don't have exact information about the insn sizes, but we may assume
18966 quite safely that we are informed about all 1 byte insns and memory
18967 address sizes. This is enough to eliminate unnecessary padding in
18968 99% of cases. */
18969
18970 static int
18971 min_insn_size (rtx insn)
18972 {
18973 int l = 0;
18974
18975 if (!INSN_P (insn) || !active_insn_p (insn))
18976 return 0;
18977
18978 /* Discard alignments we've emit and jump instructions. */
18979 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
18980 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
18981 return 0;
18982 if (JUMP_P (insn)
18983 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
18984 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
18985 return 0;
18986
18987 /* Important case - calls are always 5 bytes.
18988 It is common to have many calls in the row. */
18989 if (CALL_P (insn)
18990 && symbolic_reference_mentioned_p (PATTERN (insn))
18991 && !SIBLING_CALL_P (insn))
18992 return 5;
18993 if (get_attr_length (insn) <= 1)
18994 return 1;
18995
18996 /* For normal instructions we may rely on the sizes of addresses
18997 and the presence of symbol to require 4 bytes of encoding.
18998 This is not the case for jumps where references are PC relative. */
18999 if (!JUMP_P (insn))
19000 {
19001 l = get_attr_length_address (insn);
19002 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
19003 l = 4;
19004 }
19005 if (l)
19006 return 1+l;
19007 else
19008 return 2;
19009 }
19010
19011 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
19012 window. */
19013
19014 static void
19015 ix86_avoid_jump_misspredicts (void)
19016 {
19017 rtx insn, start = get_insns ();
19018 int nbytes = 0, njumps = 0;
19019 int isjump = 0;
19020
19021 /* Look for all minimal intervals of instructions containing 4 jumps.
19022 The intervals are bounded by START and INSN. NBYTES is the total
19023 size of instructions in the interval including INSN and not including
19024 START. When the NBYTES is smaller than 16 bytes, it is possible
19025 that the end of START and INSN ends up in the same 16byte page.
19026
19027 The smallest offset in the page INSN can start is the case where START
19028 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
19029 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
19030 */
19031 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
19032 {
19033
19034 nbytes += min_insn_size (insn);
19035 if (dump_file)
19036 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
19037 INSN_UID (insn), min_insn_size (insn));
19038 if ((JUMP_P (insn)
19039 && GET_CODE (PATTERN (insn)) != ADDR_VEC
19040 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
19041 || CALL_P (insn))
19042 njumps++;
19043 else
19044 continue;
19045
19046 while (njumps > 3)
19047 {
19048 start = NEXT_INSN (start);
19049 if ((JUMP_P (start)
19050 && GET_CODE (PATTERN (start)) != ADDR_VEC
19051 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
19052 || CALL_P (start))
19053 njumps--, isjump = 1;
19054 else
19055 isjump = 0;
19056 nbytes -= min_insn_size (start);
19057 }
19058 gcc_assert (njumps >= 0);
19059 if (dump_file)
19060 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
19061 INSN_UID (start), INSN_UID (insn), nbytes);
19062
19063 if (njumps == 3 && isjump && nbytes < 16)
19064 {
19065 int padsize = 15 - nbytes + min_insn_size (insn);
19066
19067 if (dump_file)
19068 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
19069 INSN_UID (insn), padsize);
19070 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
19071 }
19072 }
19073 }
19074
19075 /* AMD Athlon works faster
19076 when RET is not destination of conditional jump or directly preceded
19077 by other jump instruction. We avoid the penalty by inserting NOP just
19078 before the RET instructions in such cases. */
19079 static void
19080 ix86_pad_returns (void)
19081 {
19082 edge e;
19083 edge_iterator ei;
19084
19085 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
19086 {
19087 basic_block bb = e->src;
19088 rtx ret = BB_END (bb);
19089 rtx prev;
19090 bool replace = false;
19091
19092 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
19093 || !maybe_hot_bb_p (bb))
19094 continue;
19095 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
19096 if (active_insn_p (prev) || LABEL_P (prev))
19097 break;
19098 if (prev && LABEL_P (prev))
19099 {
19100 edge e;
19101 edge_iterator ei;
19102
19103 FOR_EACH_EDGE (e, ei, bb->preds)
19104 if (EDGE_FREQUENCY (e) && e->src->index >= 0
19105 && !(e->flags & EDGE_FALLTHRU))
19106 replace = true;
19107 }
19108 if (!replace)
19109 {
19110 prev = prev_active_insn (ret);
19111 if (prev
19112 && ((JUMP_P (prev) && any_condjump_p (prev))
19113 || CALL_P (prev)))
19114 replace = true;
19115 /* Empty functions get branch mispredict even when the jump destination
19116 is not visible to us. */
19117 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
19118 replace = true;
19119 }
19120 if (replace)
19121 {
19122 emit_insn_before (gen_return_internal_long (), ret);
19123 delete_insn (ret);
19124 }
19125 }
19126 }
19127
19128 /* Implement machine specific optimizations. We implement padding of returns
19129 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
19130 static void
19131 ix86_reorg (void)
19132 {
19133 if (TARGET_PAD_RETURNS && optimize && !optimize_size)
19134 ix86_pad_returns ();
19135 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
19136 ix86_avoid_jump_misspredicts ();
19137 }
19138
19139 /* Return nonzero when QImode register that must be represented via REX prefix
19140 is used. */
19141 bool
19142 x86_extended_QIreg_mentioned_p (rtx insn)
19143 {
19144 int i;
19145 extract_insn_cached (insn);
19146 for (i = 0; i < recog_data.n_operands; i++)
19147 if (REG_P (recog_data.operand[i])
19148 && REGNO (recog_data.operand[i]) >= 4)
19149 return true;
19150 return false;
19151 }
19152
19153 /* Return nonzero when P points to register encoded via REX prefix.
19154 Called via for_each_rtx. */
19155 static int
19156 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
19157 {
19158 unsigned int regno;
19159 if (!REG_P (*p))
19160 return 0;
19161 regno = REGNO (*p);
19162 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
19163 }
19164
19165 /* Return true when INSN mentions register that must be encoded using REX
19166 prefix. */
19167 bool
19168 x86_extended_reg_mentioned_p (rtx insn)
19169 {
19170 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
19171 }
19172
19173 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
19174 optabs would emit if we didn't have TFmode patterns. */
19175
19176 void
19177 x86_emit_floatuns (rtx operands[2])
19178 {
19179 rtx neglab, donelab, i0, i1, f0, in, out;
19180 enum machine_mode mode, inmode;
19181
19182 inmode = GET_MODE (operands[1]);
19183 gcc_assert (inmode == SImode || inmode == DImode);
19184
19185 out = operands[0];
19186 in = force_reg (inmode, operands[1]);
19187 mode = GET_MODE (out);
19188 neglab = gen_label_rtx ();
19189 donelab = gen_label_rtx ();
19190 i1 = gen_reg_rtx (Pmode);
19191 f0 = gen_reg_rtx (mode);
19192
19193 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
19194
19195 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
19196 emit_jump_insn (gen_jump (donelab));
19197 emit_barrier ();
19198
19199 emit_label (neglab);
19200
19201 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
19202 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
19203 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
19204 expand_float (f0, i0, 0);
19205 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
19206
19207 emit_label (donelab);
19208 }
19209 \f
19210 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
19211 with all elements equal to VAR. Return true if successful. */
19212
19213 static bool
19214 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
19215 rtx target, rtx val)
19216 {
19217 enum machine_mode smode, wsmode, wvmode;
19218 rtx x;
19219
19220 switch (mode)
19221 {
19222 case V2SImode:
19223 case V2SFmode:
19224 if (!mmx_ok)
19225 return false;
19226 /* FALLTHRU */
19227
19228 case V2DFmode:
19229 case V2DImode:
19230 case V4SFmode:
19231 case V4SImode:
19232 val = force_reg (GET_MODE_INNER (mode), val);
19233 x = gen_rtx_VEC_DUPLICATE (mode, val);
19234 emit_insn (gen_rtx_SET (VOIDmode, target, x));
19235 return true;
19236
19237 case V4HImode:
19238 if (!mmx_ok)
19239 return false;
19240 if (TARGET_SSE || TARGET_3DNOW_A)
19241 {
19242 val = gen_lowpart (SImode, val);
19243 x = gen_rtx_TRUNCATE (HImode, val);
19244 x = gen_rtx_VEC_DUPLICATE (mode, x);
19245 emit_insn (gen_rtx_SET (VOIDmode, target, x));
19246 return true;
19247 }
19248 else
19249 {
19250 smode = HImode;
19251 wsmode = SImode;
19252 wvmode = V2SImode;
19253 goto widen;
19254 }
19255
19256 case V8QImode:
19257 if (!mmx_ok)
19258 return false;
19259 smode = QImode;
19260 wsmode = HImode;
19261 wvmode = V4HImode;
19262 goto widen;
19263 case V8HImode:
19264 if (TARGET_SSE2)
19265 {
19266 rtx tmp1, tmp2;
19267 /* Extend HImode to SImode using a paradoxical SUBREG. */
19268 tmp1 = gen_reg_rtx (SImode);
19269 emit_move_insn (tmp1, gen_lowpart (SImode, val));
19270 /* Insert the SImode value as low element of V4SImode vector. */
19271 tmp2 = gen_reg_rtx (V4SImode);
19272 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
19273 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
19274 CONST0_RTX (V4SImode),
19275 const1_rtx);
19276 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
19277 /* Cast the V4SImode vector back to a V8HImode vector. */
19278 tmp1 = gen_reg_rtx (V8HImode);
19279 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
19280 /* Duplicate the low short through the whole low SImode word. */
19281 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
19282 /* Cast the V8HImode vector back to a V4SImode vector. */
19283 tmp2 = gen_reg_rtx (V4SImode);
19284 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
19285 /* Replicate the low element of the V4SImode vector. */
19286 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
19287 /* Cast the V2SImode back to V8HImode, and store in target. */
19288 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
19289 return true;
19290 }
19291 smode = HImode;
19292 wsmode = SImode;
19293 wvmode = V4SImode;
19294 goto widen;
19295 case V16QImode:
19296 if (TARGET_SSE2)
19297 {
19298 rtx tmp1, tmp2;
19299 /* Extend QImode to SImode using a paradoxical SUBREG. */
19300 tmp1 = gen_reg_rtx (SImode);
19301 emit_move_insn (tmp1, gen_lowpart (SImode, val));
19302 /* Insert the SImode value as low element of V4SImode vector. */
19303 tmp2 = gen_reg_rtx (V4SImode);
19304 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
19305 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
19306 CONST0_RTX (V4SImode),
19307 const1_rtx);
19308 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
19309 /* Cast the V4SImode vector back to a V16QImode vector. */
19310 tmp1 = gen_reg_rtx (V16QImode);
19311 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
19312 /* Duplicate the low byte through the whole low SImode word. */
19313 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
19314 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
19315 /* Cast the V16QImode vector back to a V4SImode vector. */
19316 tmp2 = gen_reg_rtx (V4SImode);
19317 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
19318 /* Replicate the low element of the V4SImode vector. */
19319 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
19320 /* Cast the V2SImode back to V16QImode, and store in target. */
19321 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
19322 return true;
19323 }
19324 smode = QImode;
19325 wsmode = HImode;
19326 wvmode = V8HImode;
19327 goto widen;
19328 widen:
19329 /* Replicate the value once into the next wider mode and recurse. */
19330 val = convert_modes (wsmode, smode, val, true);
19331 x = expand_simple_binop (wsmode, ASHIFT, val,
19332 GEN_INT (GET_MODE_BITSIZE (smode)),
19333 NULL_RTX, 1, OPTAB_LIB_WIDEN);
19334 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
19335
19336 x = gen_reg_rtx (wvmode);
19337 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
19338 gcc_unreachable ();
19339 emit_move_insn (target, gen_lowpart (mode, x));
19340 return true;
19341
19342 default:
19343 return false;
19344 }
19345 }
19346
19347 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
19348 whose ONE_VAR element is VAR, and other elements are zero. Return true
19349 if successful. */
19350
19351 static bool
19352 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
19353 rtx target, rtx var, int one_var)
19354 {
19355 enum machine_mode vsimode;
19356 rtx new_target;
19357 rtx x, tmp;
19358
19359 switch (mode)
19360 {
19361 case V2SFmode:
19362 case V2SImode:
19363 if (!mmx_ok)
19364 return false;
19365 /* FALLTHRU */
19366
19367 case V2DFmode:
19368 case V2DImode:
19369 if (one_var != 0)
19370 return false;
19371 var = force_reg (GET_MODE_INNER (mode), var);
19372 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
19373 emit_insn (gen_rtx_SET (VOIDmode, target, x));
19374 return true;
19375
19376 case V4SFmode:
19377 case V4SImode:
19378 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
19379 new_target = gen_reg_rtx (mode);
19380 else
19381 new_target = target;
19382 var = force_reg (GET_MODE_INNER (mode), var);
19383 x = gen_rtx_VEC_DUPLICATE (mode, var);
19384 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
19385 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
19386 if (one_var != 0)
19387 {
19388 /* We need to shuffle the value to the correct position, so
19389 create a new pseudo to store the intermediate result. */
19390
19391 /* With SSE2, we can use the integer shuffle insns. */
19392 if (mode != V4SFmode && TARGET_SSE2)
19393 {
19394 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
19395 GEN_INT (1),
19396 GEN_INT (one_var == 1 ? 0 : 1),
19397 GEN_INT (one_var == 2 ? 0 : 1),
19398 GEN_INT (one_var == 3 ? 0 : 1)));
19399 if (target != new_target)
19400 emit_move_insn (target, new_target);
19401 return true;
19402 }
19403
19404 /* Otherwise convert the intermediate result to V4SFmode and
19405 use the SSE1 shuffle instructions. */
19406 if (mode != V4SFmode)
19407 {
19408 tmp = gen_reg_rtx (V4SFmode);
19409 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
19410 }
19411 else
19412 tmp = new_target;
19413
19414 emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp,
19415 GEN_INT (1),
19416 GEN_INT (one_var == 1 ? 0 : 1),
19417 GEN_INT (one_var == 2 ? 0+4 : 1+4),
19418 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
19419
19420 if (mode != V4SFmode)
19421 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
19422 else if (tmp != target)
19423 emit_move_insn (target, tmp);
19424 }
19425 else if (target != new_target)
19426 emit_move_insn (target, new_target);
19427 return true;
19428
19429 case V8HImode:
19430 case V16QImode:
19431 vsimode = V4SImode;
19432 goto widen;
19433 case V4HImode:
19434 case V8QImode:
19435 if (!mmx_ok)
19436 return false;
19437 vsimode = V2SImode;
19438 goto widen;
19439 widen:
19440 if (one_var != 0)
19441 return false;
19442
19443 /* Zero extend the variable element to SImode and recurse. */
19444 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
19445
19446 x = gen_reg_rtx (vsimode);
19447 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
19448 var, one_var))
19449 gcc_unreachable ();
19450
19451 emit_move_insn (target, gen_lowpart (mode, x));
19452 return true;
19453
19454 default:
19455 return false;
19456 }
19457 }
19458
19459 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
19460 consisting of the values in VALS. It is known that all elements
19461 except ONE_VAR are constants. Return true if successful. */
19462
19463 static bool
19464 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
19465 rtx target, rtx vals, int one_var)
19466 {
19467 rtx var = XVECEXP (vals, 0, one_var);
19468 enum machine_mode wmode;
19469 rtx const_vec, x;
19470
19471 const_vec = copy_rtx (vals);
19472 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
19473 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
19474
19475 switch (mode)
19476 {
19477 case V2DFmode:
19478 case V2DImode:
19479 case V2SFmode:
19480 case V2SImode:
19481 /* For the two element vectors, it's just as easy to use
19482 the general case. */
19483 return false;
19484
19485 case V4SFmode:
19486 case V4SImode:
19487 case V8HImode:
19488 case V4HImode:
19489 break;
19490
19491 case V16QImode:
19492 wmode = V8HImode;
19493 goto widen;
19494 case V8QImode:
19495 wmode = V4HImode;
19496 goto widen;
19497 widen:
19498 /* There's no way to set one QImode entry easily. Combine
19499 the variable value with its adjacent constant value, and
19500 promote to an HImode set. */
19501 x = XVECEXP (vals, 0, one_var ^ 1);
19502 if (one_var & 1)
19503 {
19504 var = convert_modes (HImode, QImode, var, true);
19505 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
19506 NULL_RTX, 1, OPTAB_LIB_WIDEN);
19507 x = GEN_INT (INTVAL (x) & 0xff);
19508 }
19509 else
19510 {
19511 var = convert_modes (HImode, QImode, var, true);
19512 x = gen_int_mode (INTVAL (x) << 8, HImode);
19513 }
19514 if (x != const0_rtx)
19515 var = expand_simple_binop (HImode, IOR, var, x, var,
19516 1, OPTAB_LIB_WIDEN);
19517
19518 x = gen_reg_rtx (wmode);
19519 emit_move_insn (x, gen_lowpart (wmode, const_vec));
19520 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
19521
19522 emit_move_insn (target, gen_lowpart (mode, x));
19523 return true;
19524
19525 default:
19526 return false;
19527 }
19528
19529 emit_move_insn (target, const_vec);
19530 ix86_expand_vector_set (mmx_ok, target, var, one_var);
19531 return true;
19532 }
19533
19534 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
19535 all values variable, and none identical. */
19536
19537 static void
19538 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
19539 rtx target, rtx vals)
19540 {
19541 enum machine_mode half_mode = GET_MODE_INNER (mode);
19542 rtx op0 = NULL, op1 = NULL;
19543 bool use_vec_concat = false;
19544
19545 switch (mode)
19546 {
19547 case V2SFmode:
19548 case V2SImode:
19549 if (!mmx_ok && !TARGET_SSE)
19550 break;
19551 /* FALLTHRU */
19552
19553 case V2DFmode:
19554 case V2DImode:
19555 /* For the two element vectors, we always implement VEC_CONCAT. */
19556 op0 = XVECEXP (vals, 0, 0);
19557 op1 = XVECEXP (vals, 0, 1);
19558 use_vec_concat = true;
19559 break;
19560
19561 case V4SFmode:
19562 half_mode = V2SFmode;
19563 goto half;
19564 case V4SImode:
19565 half_mode = V2SImode;
19566 goto half;
19567 half:
19568 {
19569 rtvec v;
19570
19571 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
19572 Recurse to load the two halves. */
19573
19574 op0 = gen_reg_rtx (half_mode);
19575 v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
19576 ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
19577
19578 op1 = gen_reg_rtx (half_mode);
19579 v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
19580 ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
19581
19582 use_vec_concat = true;
19583 }
19584 break;
19585
19586 case V8HImode:
19587 case V16QImode:
19588 case V4HImode:
19589 case V8QImode:
19590 break;
19591
19592 default:
19593 gcc_unreachable ();
19594 }
19595
19596 if (use_vec_concat)
19597 {
19598 if (!register_operand (op0, half_mode))
19599 op0 = force_reg (half_mode, op0);
19600 if (!register_operand (op1, half_mode))
19601 op1 = force_reg (half_mode, op1);
19602
19603 emit_insn (gen_rtx_SET (VOIDmode, target,
19604 gen_rtx_VEC_CONCAT (mode, op0, op1)));
19605 }
19606 else
19607 {
19608 int i, j, n_elts, n_words, n_elt_per_word;
19609 enum machine_mode inner_mode;
19610 rtx words[4], shift;
19611
19612 inner_mode = GET_MODE_INNER (mode);
19613 n_elts = GET_MODE_NUNITS (mode);
19614 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
19615 n_elt_per_word = n_elts / n_words;
19616 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
19617
19618 for (i = 0; i < n_words; ++i)
19619 {
19620 rtx word = NULL_RTX;
19621
19622 for (j = 0; j < n_elt_per_word; ++j)
19623 {
19624 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
19625 elt = convert_modes (word_mode, inner_mode, elt, true);
19626
19627 if (j == 0)
19628 word = elt;
19629 else
19630 {
19631 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
19632 word, 1, OPTAB_LIB_WIDEN);
19633 word = expand_simple_binop (word_mode, IOR, word, elt,
19634 word, 1, OPTAB_LIB_WIDEN);
19635 }
19636 }
19637
19638 words[i] = word;
19639 }
19640
19641 if (n_words == 1)
19642 emit_move_insn (target, gen_lowpart (mode, words[0]));
19643 else if (n_words == 2)
19644 {
19645 rtx tmp = gen_reg_rtx (mode);
19646 emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
19647 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
19648 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
19649 emit_move_insn (target, tmp);
19650 }
19651 else if (n_words == 4)
19652 {
19653 rtx tmp = gen_reg_rtx (V4SImode);
19654 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
19655 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
19656 emit_move_insn (target, gen_lowpart (mode, tmp));
19657 }
19658 else
19659 gcc_unreachable ();
19660 }
19661 }
19662
19663 /* Initialize vector TARGET via VALS. Suppress the use of MMX
19664 instructions unless MMX_OK is true. */
19665
19666 void
19667 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
19668 {
19669 enum machine_mode mode = GET_MODE (target);
19670 enum machine_mode inner_mode = GET_MODE_INNER (mode);
19671 int n_elts = GET_MODE_NUNITS (mode);
19672 int n_var = 0, one_var = -1;
19673 bool all_same = true, all_const_zero = true;
19674 int i;
19675 rtx x;
19676
19677 for (i = 0; i < n_elts; ++i)
19678 {
19679 x = XVECEXP (vals, 0, i);
19680 if (!CONSTANT_P (x))
19681 n_var++, one_var = i;
19682 else if (x != CONST0_RTX (inner_mode))
19683 all_const_zero = false;
19684 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
19685 all_same = false;
19686 }
19687
19688 /* Constants are best loaded from the constant pool. */
19689 if (n_var == 0)
19690 {
19691 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
19692 return;
19693 }
19694
19695 /* If all values are identical, broadcast the value. */
19696 if (all_same
19697 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
19698 XVECEXP (vals, 0, 0)))
19699 return;
19700
19701 /* Values where only one field is non-constant are best loaded from
19702 the pool and overwritten via move later. */
19703 if (n_var == 1)
19704 {
19705 if (all_const_zero
19706 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
19707 XVECEXP (vals, 0, one_var),
19708 one_var))
19709 return;
19710
19711 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
19712 return;
19713 }
19714
19715 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
19716 }
19717
19718 void
19719 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
19720 {
19721 enum machine_mode mode = GET_MODE (target);
19722 enum machine_mode inner_mode = GET_MODE_INNER (mode);
19723 bool use_vec_merge = false;
19724 rtx tmp;
19725
19726 switch (mode)
19727 {
19728 case V2SFmode:
19729 case V2SImode:
19730 if (mmx_ok)
19731 {
19732 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
19733 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
19734 if (elt == 0)
19735 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
19736 else
19737 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
19738 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
19739 return;
19740 }
19741 break;
19742
19743 case V2DFmode:
19744 case V2DImode:
19745 {
19746 rtx op0, op1;
19747
19748 /* For the two element vectors, we implement a VEC_CONCAT with
19749 the extraction of the other element. */
19750
19751 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
19752 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
19753
19754 if (elt == 0)
19755 op0 = val, op1 = tmp;
19756 else
19757 op0 = tmp, op1 = val;
19758
19759 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
19760 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
19761 }
19762 return;
19763
19764 case V4SFmode:
19765 switch (elt)
19766 {
19767 case 0:
19768 use_vec_merge = true;
19769 break;
19770
19771 case 1:
19772 /* tmp = target = A B C D */
19773 tmp = copy_to_reg (target);
19774 /* target = A A B B */
19775 emit_insn (gen_sse_unpcklps (target, target, target));
19776 /* target = X A B B */
19777 ix86_expand_vector_set (false, target, val, 0);
19778 /* target = A X C D */
19779 emit_insn (gen_sse_shufps_1 (target, target, tmp,
19780 GEN_INT (1), GEN_INT (0),
19781 GEN_INT (2+4), GEN_INT (3+4)));
19782 return;
19783
19784 case 2:
19785 /* tmp = target = A B C D */
19786 tmp = copy_to_reg (target);
19787 /* tmp = X B C D */
19788 ix86_expand_vector_set (false, tmp, val, 0);
19789 /* target = A B X D */
19790 emit_insn (gen_sse_shufps_1 (target, target, tmp,
19791 GEN_INT (0), GEN_INT (1),
19792 GEN_INT (0+4), GEN_INT (3+4)));
19793 return;
19794
19795 case 3:
19796 /* tmp = target = A B C D */
19797 tmp = copy_to_reg (target);
19798 /* tmp = X B C D */
19799 ix86_expand_vector_set (false, tmp, val, 0);
19800 /* target = A B X D */
19801 emit_insn (gen_sse_shufps_1 (target, target, tmp,
19802 GEN_INT (0), GEN_INT (1),
19803 GEN_INT (2+4), GEN_INT (0+4)));
19804 return;
19805
19806 default:
19807 gcc_unreachable ();
19808 }
19809 break;
19810
19811 case V4SImode:
19812 /* Element 0 handled by vec_merge below. */
19813 if (elt == 0)
19814 {
19815 use_vec_merge = true;
19816 break;
19817 }
19818
19819 if (TARGET_SSE2)
19820 {
19821 /* With SSE2, use integer shuffles to swap element 0 and ELT,
19822 store into element 0, then shuffle them back. */
19823
19824 rtx order[4];
19825
19826 order[0] = GEN_INT (elt);
19827 order[1] = const1_rtx;
19828 order[2] = const2_rtx;
19829 order[3] = GEN_INT (3);
19830 order[elt] = const0_rtx;
19831
19832 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
19833 order[1], order[2], order[3]));
19834
19835 ix86_expand_vector_set (false, target, val, 0);
19836
19837 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
19838 order[1], order[2], order[3]));
19839 }
19840 else
19841 {
19842 /* For SSE1, we have to reuse the V4SF code. */
19843 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
19844 gen_lowpart (SFmode, val), elt);
19845 }
19846 return;
19847
19848 case V8HImode:
19849 use_vec_merge = TARGET_SSE2;
19850 break;
19851 case V4HImode:
19852 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
19853 break;
19854
19855 case V16QImode:
19856 case V8QImode:
19857 default:
19858 break;
19859 }
19860
19861 if (use_vec_merge)
19862 {
19863 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
19864 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
19865 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
19866 }
19867 else
19868 {
19869 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
19870
19871 emit_move_insn (mem, target);
19872
19873 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
19874 emit_move_insn (tmp, val);
19875
19876 emit_move_insn (target, mem);
19877 }
19878 }
19879
19880 void
19881 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
19882 {
19883 enum machine_mode mode = GET_MODE (vec);
19884 enum machine_mode inner_mode = GET_MODE_INNER (mode);
19885 bool use_vec_extr = false;
19886 rtx tmp;
19887
19888 switch (mode)
19889 {
19890 case V2SImode:
19891 case V2SFmode:
19892 if (!mmx_ok)
19893 break;
19894 /* FALLTHRU */
19895
19896 case V2DFmode:
19897 case V2DImode:
19898 use_vec_extr = true;
19899 break;
19900
19901 case V4SFmode:
19902 switch (elt)
19903 {
19904 case 0:
19905 tmp = vec;
19906 break;
19907
19908 case 1:
19909 case 3:
19910 tmp = gen_reg_rtx (mode);
19911 emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
19912 GEN_INT (elt), GEN_INT (elt),
19913 GEN_INT (elt+4), GEN_INT (elt+4)));
19914 break;
19915
19916 case 2:
19917 tmp = gen_reg_rtx (mode);
19918 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
19919 break;
19920
19921 default:
19922 gcc_unreachable ();
19923 }
19924 vec = tmp;
19925 use_vec_extr = true;
19926 elt = 0;
19927 break;
19928
19929 case V4SImode:
19930 if (TARGET_SSE2)
19931 {
19932 switch (elt)
19933 {
19934 case 0:
19935 tmp = vec;
19936 break;
19937
19938 case 1:
19939 case 3:
19940 tmp = gen_reg_rtx (mode);
19941 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
19942 GEN_INT (elt), GEN_INT (elt),
19943 GEN_INT (elt), GEN_INT (elt)));
19944 break;
19945
19946 case 2:
19947 tmp = gen_reg_rtx (mode);
19948 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
19949 break;
19950
19951 default:
19952 gcc_unreachable ();
19953 }
19954 vec = tmp;
19955 use_vec_extr = true;
19956 elt = 0;
19957 }
19958 else
19959 {
19960 /* For SSE1, we have to reuse the V4SF code. */
19961 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
19962 gen_lowpart (V4SFmode, vec), elt);
19963 return;
19964 }
19965 break;
19966
19967 case V8HImode:
19968 use_vec_extr = TARGET_SSE2;
19969 break;
19970 case V4HImode:
19971 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
19972 break;
19973
19974 case V16QImode:
19975 case V8QImode:
19976 /* ??? Could extract the appropriate HImode element and shift. */
19977 default:
19978 break;
19979 }
19980
19981 if (use_vec_extr)
19982 {
19983 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
19984 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
19985
19986 /* Let the rtl optimizers know about the zero extension performed. */
19987 if (inner_mode == HImode)
19988 {
19989 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
19990 target = gen_lowpart (SImode, target);
19991 }
19992
19993 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
19994 }
19995 else
19996 {
19997 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
19998
19999 emit_move_insn (mem, vec);
20000
20001 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
20002 emit_move_insn (target, tmp);
20003 }
20004 }
20005
20006 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
20007 pattern to reduce; DEST is the destination; IN is the input vector. */
20008
20009 void
20010 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
20011 {
20012 rtx tmp1, tmp2, tmp3;
20013
20014 tmp1 = gen_reg_rtx (V4SFmode);
20015 tmp2 = gen_reg_rtx (V4SFmode);
20016 tmp3 = gen_reg_rtx (V4SFmode);
20017
20018 emit_insn (gen_sse_movhlps (tmp1, in, in));
20019 emit_insn (fn (tmp2, tmp1, in));
20020
20021 emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2,
20022 GEN_INT (1), GEN_INT (1),
20023 GEN_INT (1+4), GEN_INT (1+4)));
20024 emit_insn (fn (dest, tmp2, tmp3));
20025 }
20026 \f
20027 /* Target hook for scalar_mode_supported_p. */
20028 static bool
20029 ix86_scalar_mode_supported_p (enum machine_mode mode)
20030 {
20031 if (DECIMAL_FLOAT_MODE_P (mode))
20032 return true;
20033 else
20034 return default_scalar_mode_supported_p (mode);
20035 }
20036
20037 /* Implements target hook vector_mode_supported_p. */
20038 static bool
20039 ix86_vector_mode_supported_p (enum machine_mode mode)
20040 {
20041 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
20042 return true;
20043 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
20044 return true;
20045 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
20046 return true;
20047 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
20048 return true;
20049 return false;
20050 }
20051
20052 /* Worker function for TARGET_MD_ASM_CLOBBERS.
20053
20054 We do this in the new i386 backend to maintain source compatibility
20055 with the old cc0-based compiler. */
20056
20057 static tree
20058 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
20059 tree inputs ATTRIBUTE_UNUSED,
20060 tree clobbers)
20061 {
20062 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
20063 clobbers);
20064 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
20065 clobbers);
20066 return clobbers;
20067 }
20068
20069 /* Return true if this goes in small data/bss. */
20070
20071 static bool
20072 ix86_in_large_data_p (tree exp)
20073 {
20074 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
20075 return false;
20076
20077 /* Functions are never large data. */
20078 if (TREE_CODE (exp) == FUNCTION_DECL)
20079 return false;
20080
20081 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
20082 {
20083 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
20084 if (strcmp (section, ".ldata") == 0
20085 || strcmp (section, ".lbss") == 0)
20086 return true;
20087 return false;
20088 }
20089 else
20090 {
20091 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
20092
20093 /* If this is an incomplete type with size 0, then we can't put it
20094 in data because it might be too big when completed. */
20095 if (!size || size > ix86_section_threshold)
20096 return true;
20097 }
20098
20099 return false;
20100 }
20101 static void
20102 ix86_encode_section_info (tree decl, rtx rtl, int first)
20103 {
20104 default_encode_section_info (decl, rtl, first);
20105
20106 if (TREE_CODE (decl) == VAR_DECL
20107 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
20108 && ix86_in_large_data_p (decl))
20109 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
20110 }
20111
20112 /* Worker function for REVERSE_CONDITION. */
20113
20114 enum rtx_code
20115 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
20116 {
20117 return (mode != CCFPmode && mode != CCFPUmode
20118 ? reverse_condition (code)
20119 : reverse_condition_maybe_unordered (code));
20120 }
20121
20122 /* Output code to perform an x87 FP register move, from OPERANDS[1]
20123 to OPERANDS[0]. */
20124
20125 const char *
20126 output_387_reg_move (rtx insn, rtx *operands)
20127 {
20128 if (REG_P (operands[1])
20129 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
20130 {
20131 if (REGNO (operands[0]) == FIRST_STACK_REG)
20132 return output_387_ffreep (operands, 0);
20133 return "fstp\t%y0";
20134 }
20135 if (STACK_TOP_P (operands[0]))
20136 return "fld%z1\t%y1";
20137 return "fst\t%y0";
20138 }
20139
20140 /* Output code to perform a conditional jump to LABEL, if C2 flag in
20141 FP status register is set. */
20142
20143 void
20144 ix86_emit_fp_unordered_jump (rtx label)
20145 {
20146 rtx reg = gen_reg_rtx (HImode);
20147 rtx temp;
20148
20149 emit_insn (gen_x86_fnstsw_1 (reg));
20150
20151 if (TARGET_USE_SAHF)
20152 {
20153 emit_insn (gen_x86_sahf_1 (reg));
20154
20155 temp = gen_rtx_REG (CCmode, FLAGS_REG);
20156 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
20157 }
20158 else
20159 {
20160 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
20161
20162 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
20163 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
20164 }
20165
20166 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
20167 gen_rtx_LABEL_REF (VOIDmode, label),
20168 pc_rtx);
20169 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
20170 emit_jump_insn (temp);
20171 }
20172
20173 /* Output code to perform a log1p XFmode calculation. */
20174
20175 void ix86_emit_i387_log1p (rtx op0, rtx op1)
20176 {
20177 rtx label1 = gen_label_rtx ();
20178 rtx label2 = gen_label_rtx ();
20179
20180 rtx tmp = gen_reg_rtx (XFmode);
20181 rtx tmp2 = gen_reg_rtx (XFmode);
20182
20183 emit_insn (gen_absxf2 (tmp, op1));
20184 emit_insn (gen_cmpxf (tmp,
20185 CONST_DOUBLE_FROM_REAL_VALUE (
20186 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
20187 XFmode)));
20188 emit_jump_insn (gen_bge (label1));
20189
20190 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
20191 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
20192 emit_jump (label2);
20193
20194 emit_label (label1);
20195 emit_move_insn (tmp, CONST1_RTX (XFmode));
20196 emit_insn (gen_addxf3 (tmp, op1, tmp));
20197 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
20198 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
20199
20200 emit_label (label2);
20201 }
20202
20203 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
20204
20205 static void
20206 i386_solaris_elf_named_section (const char *name, unsigned int flags,
20207 tree decl)
20208 {
20209 /* With Binutils 2.15, the "@unwind" marker must be specified on
20210 every occurrence of the ".eh_frame" section, not just the first
20211 one. */
20212 if (TARGET_64BIT
20213 && strcmp (name, ".eh_frame") == 0)
20214 {
20215 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
20216 flags & SECTION_WRITE ? "aw" : "a");
20217 return;
20218 }
20219 default_elf_asm_named_section (name, flags, decl);
20220 }
20221
20222 /* Return the mangling of TYPE if it is an extended fundamental type. */
20223
20224 static const char *
20225 ix86_mangle_fundamental_type (tree type)
20226 {
20227 switch (TYPE_MODE (type))
20228 {
20229 case TFmode:
20230 /* __float128 is "g". */
20231 return "g";
20232 case XFmode:
20233 /* "long double" or __float80 is "e". */
20234 return "e";
20235 default:
20236 return NULL;
20237 }
20238 }
20239
20240 /* For 32-bit code we can save PIC register setup by using
20241 __stack_chk_fail_local hidden function instead of calling
20242 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
20243 register, so it is better to call __stack_chk_fail directly. */
20244
20245 static tree
20246 ix86_stack_protect_fail (void)
20247 {
20248 return TARGET_64BIT
20249 ? default_external_stack_protect_fail ()
20250 : default_hidden_stack_protect_fail ();
20251 }
20252
20253 /* Select a format to encode pointers in exception handling data. CODE
20254 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
20255 true if the symbol may be affected by dynamic relocations.
20256
20257 ??? All x86 object file formats are capable of representing this.
20258 After all, the relocation needed is the same as for the call insn.
20259 Whether or not a particular assembler allows us to enter such, I
20260 guess we'll have to see. */
20261 int
20262 asm_preferred_eh_data_format (int code, int global)
20263 {
20264 if (flag_pic)
20265 {
20266 int type = DW_EH_PE_sdata8;
20267 if (!TARGET_64BIT
20268 || ix86_cmodel == CM_SMALL_PIC
20269 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
20270 type = DW_EH_PE_sdata4;
20271 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
20272 }
20273 if (ix86_cmodel == CM_SMALL
20274 || (ix86_cmodel == CM_MEDIUM && code))
20275 return DW_EH_PE_udata4;
20276 return DW_EH_PE_absptr;
20277 }
20278 \f
20279 /* Expand copysign from SIGN to the positive value ABS_VALUE
20280 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
20281 the sign-bit. */
20282 static void
20283 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
20284 {
20285 enum machine_mode mode = GET_MODE (sign);
20286 rtx sgn = gen_reg_rtx (mode);
20287 if (mask == NULL_RTX)
20288 {
20289 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
20290 if (!VECTOR_MODE_P (mode))
20291 {
20292 /* We need to generate a scalar mode mask in this case. */
20293 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
20294 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
20295 mask = gen_reg_rtx (mode);
20296 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
20297 }
20298 }
20299 else
20300 mask = gen_rtx_NOT (mode, mask);
20301 emit_insn (gen_rtx_SET (VOIDmode, sgn,
20302 gen_rtx_AND (mode, mask, sign)));
20303 emit_insn (gen_rtx_SET (VOIDmode, result,
20304 gen_rtx_IOR (mode, abs_value, sgn)));
20305 }
20306
20307 /* Expand fabs (OP0) and return a new rtx that holds the result. The
20308 mask for masking out the sign-bit is stored in *SMASK, if that is
20309 non-null. */
20310 static rtx
20311 ix86_expand_sse_fabs (rtx op0, rtx *smask)
20312 {
20313 enum machine_mode mode = GET_MODE (op0);
20314 rtx xa, mask;
20315
20316 xa = gen_reg_rtx (mode);
20317 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
20318 if (!VECTOR_MODE_P (mode))
20319 {
20320 /* We need to generate a scalar mode mask in this case. */
20321 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
20322 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
20323 mask = gen_reg_rtx (mode);
20324 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
20325 }
20326 emit_insn (gen_rtx_SET (VOIDmode, xa,
20327 gen_rtx_AND (mode, op0, mask)));
20328
20329 if (smask)
20330 *smask = mask;
20331
20332 return xa;
20333 }
20334
20335 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
20336 swapping the operands if SWAP_OPERANDS is true. The expanded
20337 code is a forward jump to a newly created label in case the
20338 comparison is true. The generated label rtx is returned. */
20339 static rtx
20340 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
20341 bool swap_operands)
20342 {
20343 rtx label, tmp;
20344
20345 if (swap_operands)
20346 {
20347 tmp = op0;
20348 op0 = op1;
20349 op1 = tmp;
20350 }
20351
20352 label = gen_label_rtx ();
20353 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
20354 emit_insn (gen_rtx_SET (VOIDmode, tmp,
20355 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
20356 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
20357 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
20358 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
20359 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
20360 JUMP_LABEL (tmp) = label;
20361
20362 return label;
20363 }
20364
20365 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
20366 using comparison code CODE. Operands are swapped for the comparison if
20367 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
20368 static rtx
20369 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
20370 bool swap_operands)
20371 {
20372 enum machine_mode mode = GET_MODE (op0);
20373 rtx mask = gen_reg_rtx (mode);
20374
20375 if (swap_operands)
20376 {
20377 rtx tmp = op0;
20378 op0 = op1;
20379 op1 = tmp;
20380 }
20381
20382 if (mode == DFmode)
20383 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
20384 gen_rtx_fmt_ee (code, mode, op0, op1)));
20385 else
20386 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
20387 gen_rtx_fmt_ee (code, mode, op0, op1)));
20388
20389 return mask;
20390 }
20391
20392 /* Generate and return a rtx of mode MODE for 2**n where n is the number
20393 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
20394 static rtx
20395 ix86_gen_TWO52 (enum machine_mode mode)
20396 {
20397 REAL_VALUE_TYPE TWO52r;
20398 rtx TWO52;
20399
20400 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
20401 TWO52 = const_double_from_real_value (TWO52r, mode);
20402 TWO52 = force_reg (mode, TWO52);
20403
20404 return TWO52;
20405 }
20406
20407 /* Expand SSE sequence for computing lround from OP1 storing
20408 into OP0. */
20409 void
20410 ix86_expand_lround (rtx op0, rtx op1)
20411 {
20412 /* C code for the stuff we're doing below:
20413 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
20414 return (long)tmp;
20415 */
20416 enum machine_mode mode = GET_MODE (op1);
20417 const struct real_format *fmt;
20418 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
20419 rtx adj;
20420
20421 /* load nextafter (0.5, 0.0) */
20422 fmt = REAL_MODE_FORMAT (mode);
20423 real_2expN (&half_minus_pred_half, -(fmt->p) - 1);
20424 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
20425
20426 /* adj = copysign (0.5, op1) */
20427 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
20428 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
20429
20430 /* adj = op1 + adj */
20431 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
20432
20433 /* op0 = (imode)adj */
20434 expand_fix (op0, adj, 0);
20435 }
20436
20437 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
20438 into OPERAND0. */
20439 void
20440 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
20441 {
20442 /* C code for the stuff we're doing below (for do_floor):
20443 xi = (long)op1;
20444 xi -= (double)xi > op1 ? 1 : 0;
20445 return xi;
20446 */
20447 enum machine_mode fmode = GET_MODE (op1);
20448 enum machine_mode imode = GET_MODE (op0);
20449 rtx ireg, freg, label, tmp;
20450
20451 /* reg = (long)op1 */
20452 ireg = gen_reg_rtx (imode);
20453 expand_fix (ireg, op1, 0);
20454
20455 /* freg = (double)reg */
20456 freg = gen_reg_rtx (fmode);
20457 expand_float (freg, ireg, 0);
20458
20459 /* ireg = (freg > op1) ? ireg - 1 : ireg */
20460 label = ix86_expand_sse_compare_and_jump (UNLE,
20461 freg, op1, !do_floor);
20462 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
20463 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
20464 emit_move_insn (ireg, tmp);
20465
20466 emit_label (label);
20467 LABEL_NUSES (label) = 1;
20468
20469 emit_move_insn (op0, ireg);
20470 }
20471
20472 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
20473 result in OPERAND0. */
20474 void
20475 ix86_expand_rint (rtx operand0, rtx operand1)
20476 {
20477 /* C code for the stuff we're doing below:
20478 xa = fabs (operand1);
20479 if (!isless (xa, 2**52))
20480 return operand1;
20481 xa = xa + 2**52 - 2**52;
20482 return copysign (xa, operand1);
20483 */
20484 enum machine_mode mode = GET_MODE (operand0);
20485 rtx res, xa, label, TWO52, mask;
20486
20487 res = gen_reg_rtx (mode);
20488 emit_move_insn (res, operand1);
20489
20490 /* xa = abs (operand1) */
20491 xa = ix86_expand_sse_fabs (res, &mask);
20492
20493 /* if (!isless (xa, TWO52)) goto label; */
20494 TWO52 = ix86_gen_TWO52 (mode);
20495 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
20496
20497 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
20498 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
20499
20500 ix86_sse_copysign_to_positive (res, xa, res, mask);
20501
20502 emit_label (label);
20503 LABEL_NUSES (label) = 1;
20504
20505 emit_move_insn (operand0, res);
20506 }
20507
20508 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
20509 into OPERAND0. */
20510 void
20511 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
20512 {
20513 /* C code for the stuff we expand below.
20514 double xa = fabs (x), x2;
20515 if (!isless (xa, TWO52))
20516 return x;
20517 xa = xa + TWO52 - TWO52;
20518 x2 = copysign (xa, x);
20519 Compensate. Floor:
20520 if (x2 > x)
20521 x2 -= 1;
20522 Compensate. Ceil:
20523 if (x2 < x)
20524 x2 -= -1;
20525 return x2;
20526 */
20527 enum machine_mode mode = GET_MODE (operand0);
20528 rtx xa, TWO52, tmp, label, one, res, mask;
20529
20530 TWO52 = ix86_gen_TWO52 (mode);
20531
20532 /* Temporary for holding the result, initialized to the input
20533 operand to ease control flow. */
20534 res = gen_reg_rtx (mode);
20535 emit_move_insn (res, operand1);
20536
20537 /* xa = abs (operand1) */
20538 xa = ix86_expand_sse_fabs (res, &mask);
20539
20540 /* if (!isless (xa, TWO52)) goto label; */
20541 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
20542
20543 /* xa = xa + TWO52 - TWO52; */
20544 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
20545 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
20546
20547 /* xa = copysign (xa, operand1) */
20548 ix86_sse_copysign_to_positive (xa, xa, res, mask);
20549
20550 /* generate 1.0 or -1.0 */
20551 one = force_reg (mode,
20552 const_double_from_real_value (do_floor
20553 ? dconst1 : dconstm1, mode));
20554
20555 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
20556 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
20557 emit_insn (gen_rtx_SET (VOIDmode, tmp,
20558 gen_rtx_AND (mode, one, tmp)));
20559 /* We always need to subtract here to preserve signed zero. */
20560 tmp = expand_simple_binop (mode, MINUS,
20561 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
20562 emit_move_insn (res, tmp);
20563
20564 emit_label (label);
20565 LABEL_NUSES (label) = 1;
20566
20567 emit_move_insn (operand0, res);
20568 }
20569
20570 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
20571 into OPERAND0. */
20572 void
20573 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
20574 {
20575 /* C code for the stuff we expand below.
20576 double xa = fabs (x), x2;
20577 if (!isless (xa, TWO52))
20578 return x;
20579 x2 = (double)(long)x;
20580 Compensate. Floor:
20581 if (x2 > x)
20582 x2 -= 1;
20583 Compensate. Ceil:
20584 if (x2 < x)
20585 x2 += 1;
20586 if (HONOR_SIGNED_ZEROS (mode))
20587 return copysign (x2, x);
20588 return x2;
20589 */
20590 enum machine_mode mode = GET_MODE (operand0);
20591 rtx xa, xi, TWO52, tmp, label, one, res, mask;
20592
20593 TWO52 = ix86_gen_TWO52 (mode);
20594
20595 /* Temporary for holding the result, initialized to the input
20596 operand to ease control flow. */
20597 res = gen_reg_rtx (mode);
20598 emit_move_insn (res, operand1);
20599
20600 /* xa = abs (operand1) */
20601 xa = ix86_expand_sse_fabs (res, &mask);
20602
20603 /* if (!isless (xa, TWO52)) goto label; */
20604 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
20605
20606 /* xa = (double)(long)x */
20607 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
20608 expand_fix (xi, res, 0);
20609 expand_float (xa, xi, 0);
20610
20611 /* generate 1.0 */
20612 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
20613
20614 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
20615 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
20616 emit_insn (gen_rtx_SET (VOIDmode, tmp,
20617 gen_rtx_AND (mode, one, tmp)));
20618 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
20619 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
20620 emit_move_insn (res, tmp);
20621
20622 if (HONOR_SIGNED_ZEROS (mode))
20623 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
20624
20625 emit_label (label);
20626 LABEL_NUSES (label) = 1;
20627
20628 emit_move_insn (operand0, res);
20629 }
20630
20631 /* Expand SSE sequence for computing round from OPERAND1 storing
20632 into OPERAND0. Sequence that works without relying on DImode truncation
20633 via cvttsd2siq that is only available on 64bit targets. */
20634 void
20635 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
20636 {
20637 /* C code for the stuff we expand below.
20638 double xa = fabs (x), xa2, x2;
20639 if (!isless (xa, TWO52))
20640 return x;
20641 Using the absolute value and copying back sign makes
20642 -0.0 -> -0.0 correct.
20643 xa2 = xa + TWO52 - TWO52;
20644 Compensate.
20645 dxa = xa2 - xa;
20646 if (dxa <= -0.5)
20647 xa2 += 1;
20648 else if (dxa > 0.5)
20649 xa2 -= 1;
20650 x2 = copysign (xa2, x);
20651 return x2;
20652 */
20653 enum machine_mode mode = GET_MODE (operand0);
20654 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
20655
20656 TWO52 = ix86_gen_TWO52 (mode);
20657
20658 /* Temporary for holding the result, initialized to the input
20659 operand to ease control flow. */
20660 res = gen_reg_rtx (mode);
20661 emit_move_insn (res, operand1);
20662
20663 /* xa = abs (operand1) */
20664 xa = ix86_expand_sse_fabs (res, &mask);
20665
20666 /* if (!isless (xa, TWO52)) goto label; */
20667 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
20668
20669 /* xa2 = xa + TWO52 - TWO52; */
20670 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
20671 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
20672
20673 /* dxa = xa2 - xa; */
20674 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
20675
20676 /* generate 0.5, 1.0 and -0.5 */
20677 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
20678 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
20679 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
20680 0, OPTAB_DIRECT);
20681
20682 /* Compensate. */
20683 tmp = gen_reg_rtx (mode);
20684 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
20685 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
20686 emit_insn (gen_rtx_SET (VOIDmode, tmp,
20687 gen_rtx_AND (mode, one, tmp)));
20688 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
20689 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
20690 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
20691 emit_insn (gen_rtx_SET (VOIDmode, tmp,
20692 gen_rtx_AND (mode, one, tmp)));
20693 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
20694
20695 /* res = copysign (xa2, operand1) */
20696 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
20697
20698 emit_label (label);
20699 LABEL_NUSES (label) = 1;
20700
20701 emit_move_insn (operand0, res);
20702 }
20703
20704 /* Expand SSE sequence for computing trunc from OPERAND1 storing
20705 into OPERAND0. */
20706 void
20707 ix86_expand_trunc (rtx operand0, rtx operand1)
20708 {
20709 /* C code for SSE variant we expand below.
20710 double xa = fabs (x), x2;
20711 if (!isless (xa, TWO52))
20712 return x;
20713 x2 = (double)(long)x;
20714 if (HONOR_SIGNED_ZEROS (mode))
20715 return copysign (x2, x);
20716 return x2;
20717 */
20718 enum machine_mode mode = GET_MODE (operand0);
20719 rtx xa, xi, TWO52, label, res, mask;
20720
20721 TWO52 = ix86_gen_TWO52 (mode);
20722
20723 /* Temporary for holding the result, initialized to the input
20724 operand to ease control flow. */
20725 res = gen_reg_rtx (mode);
20726 emit_move_insn (res, operand1);
20727
20728 /* xa = abs (operand1) */
20729 xa = ix86_expand_sse_fabs (res, &mask);
20730
20731 /* if (!isless (xa, TWO52)) goto label; */
20732 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
20733
20734 /* x = (double)(long)x */
20735 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
20736 expand_fix (xi, res, 0);
20737 expand_float (res, xi, 0);
20738
20739 if (HONOR_SIGNED_ZEROS (mode))
20740 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
20741
20742 emit_label (label);
20743 LABEL_NUSES (label) = 1;
20744
20745 emit_move_insn (operand0, res);
20746 }
20747
20748 /* Expand SSE sequence for computing trunc from OPERAND1 storing
20749 into OPERAND0. */
20750 void
20751 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
20752 {
20753 enum machine_mode mode = GET_MODE (operand0);
20754 rtx xa, mask, TWO52, label, one, res, smask, tmp;
20755
20756 /* C code for SSE variant we expand below.
20757 double xa = fabs (x), x2;
20758 if (!isless (xa, TWO52))
20759 return x;
20760 xa2 = xa + TWO52 - TWO52;
20761 Compensate:
20762 if (xa2 > xa)
20763 xa2 -= 1.0;
20764 x2 = copysign (xa2, x);
20765 return x2;
20766 */
20767
20768 TWO52 = ix86_gen_TWO52 (mode);
20769
20770 /* Temporary for holding the result, initialized to the input
20771 operand to ease control flow. */
20772 res = gen_reg_rtx (mode);
20773 emit_move_insn (res, operand1);
20774
20775 /* xa = abs (operand1) */
20776 xa = ix86_expand_sse_fabs (res, &smask);
20777
20778 /* if (!isless (xa, TWO52)) goto label; */
20779 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
20780
20781 /* res = xa + TWO52 - TWO52; */
20782 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
20783 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
20784 emit_move_insn (res, tmp);
20785
20786 /* generate 1.0 */
20787 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
20788
20789 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
20790 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
20791 emit_insn (gen_rtx_SET (VOIDmode, mask,
20792 gen_rtx_AND (mode, mask, one)));
20793 tmp = expand_simple_binop (mode, MINUS,
20794 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
20795 emit_move_insn (res, tmp);
20796
20797 /* res = copysign (res, operand1) */
20798 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
20799
20800 emit_label (label);
20801 LABEL_NUSES (label) = 1;
20802
20803 emit_move_insn (operand0, res);
20804 }
20805
20806 /* Expand SSE sequence for computing round from OPERAND1 storing
20807 into OPERAND0. */
20808 void
20809 ix86_expand_round (rtx operand0, rtx operand1)
20810 {
20811 /* C code for the stuff we're doing below:
20812 double xa = fabs (x);
20813 if (!isless (xa, TWO52))
20814 return x;
20815 xa = (double)(long)(xa + nextafter (0.5, 0.0));
20816 return copysign (xa, x);
20817 */
20818 enum machine_mode mode = GET_MODE (operand0);
20819 rtx res, TWO52, xa, label, xi, half, mask;
20820 const struct real_format *fmt;
20821 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
20822
20823 /* Temporary for holding the result, initialized to the input
20824 operand to ease control flow. */
20825 res = gen_reg_rtx (mode);
20826 emit_move_insn (res, operand1);
20827
20828 TWO52 = ix86_gen_TWO52 (mode);
20829 xa = ix86_expand_sse_fabs (res, &mask);
20830 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
20831
20832 /* load nextafter (0.5, 0.0) */
20833 fmt = REAL_MODE_FORMAT (mode);
20834 real_2expN (&half_minus_pred_half, -(fmt->p) - 1);
20835 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
20836
20837 /* xa = xa + 0.5 */
20838 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
20839 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
20840
20841 /* xa = (double)(int64_t)xa */
20842 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
20843 expand_fix (xi, xa, 0);
20844 expand_float (xa, xi, 0);
20845
20846 /* res = copysign (xa, operand1) */
20847 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
20848
20849 emit_label (label);
20850 LABEL_NUSES (label) = 1;
20851
20852 emit_move_insn (operand0, res);
20853 }
20854
20855 #include "gt-i386.h"