i386.c (enum pta_flags): Move out of struct scope...
[gcc.git] / gcc / config / i386 / i386.c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 Boston, MA 02110-1301, USA. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "tm_p.h"
29 #include "regs.h"
30 #include "hard-reg-set.h"
31 #include "real.h"
32 #include "insn-config.h"
33 #include "conditions.h"
34 #include "output.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
37 #include "flags.h"
38 #include "except.h"
39 #include "function.h"
40 #include "recog.h"
41 #include "expr.h"
42 #include "optabs.h"
43 #include "toplev.h"
44 #include "basic-block.h"
45 #include "ggc.h"
46 #include "target.h"
47 #include "target-def.h"
48 #include "langhooks.h"
49 #include "cgraph.h"
50 #include "tree-gimple.h"
51 #include "dwarf2.h"
52 #include "tm-constrs.h"
53 #include "params.h"
54
55 #ifndef CHECK_STACK_LIMIT
56 #define CHECK_STACK_LIMIT (-1)
57 #endif
58
59 /* Return index of given mode in mult and division cost tables. */
60 #define MODE_INDEX(mode) \
61 ((mode) == QImode ? 0 \
62 : (mode) == HImode ? 1 \
63 : (mode) == SImode ? 2 \
64 : (mode) == DImode ? 3 \
65 : 4)
66
67 /* Processor costs (relative to an add) */
68 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
69 #define COSTS_N_BYTES(N) ((N) * 2)
70
71 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
72
73 static const
74 struct processor_costs size_cost = { /* costs for tuning for size */
75 COSTS_N_BYTES (2), /* cost of an add instruction */
76 COSTS_N_BYTES (3), /* cost of a lea instruction */
77 COSTS_N_BYTES (2), /* variable shift costs */
78 COSTS_N_BYTES (3), /* constant shift costs */
79 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
80 COSTS_N_BYTES (3), /* HI */
81 COSTS_N_BYTES (3), /* SI */
82 COSTS_N_BYTES (3), /* DI */
83 COSTS_N_BYTES (5)}, /* other */
84 0, /* cost of multiply per each bit set */
85 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
86 COSTS_N_BYTES (3), /* HI */
87 COSTS_N_BYTES (3), /* SI */
88 COSTS_N_BYTES (3), /* DI */
89 COSTS_N_BYTES (5)}, /* other */
90 COSTS_N_BYTES (3), /* cost of movsx */
91 COSTS_N_BYTES (3), /* cost of movzx */
92 0, /* "large" insn */
93 2, /* MOVE_RATIO */
94 2, /* cost for loading QImode using movzbl */
95 {2, 2, 2}, /* cost of loading integer registers
96 in QImode, HImode and SImode.
97 Relative to reg-reg move (2). */
98 {2, 2, 2}, /* cost of storing integer registers */
99 2, /* cost of reg,reg fld/fst */
100 {2, 2, 2}, /* cost of loading fp registers
101 in SFmode, DFmode and XFmode */
102 {2, 2, 2}, /* cost of storing fp registers
103 in SFmode, DFmode and XFmode */
104 3, /* cost of moving MMX register */
105 {3, 3}, /* cost of loading MMX registers
106 in SImode and DImode */
107 {3, 3}, /* cost of storing MMX registers
108 in SImode and DImode */
109 3, /* cost of moving SSE register */
110 {3, 3, 3}, /* cost of loading SSE registers
111 in SImode, DImode and TImode */
112 {3, 3, 3}, /* cost of storing SSE registers
113 in SImode, DImode and TImode */
114 3, /* MMX or SSE register to integer */
115 0, /* size of prefetch block */
116 0, /* number of parallel prefetches */
117 2, /* Branch cost */
118 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
119 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
120 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
121 COSTS_N_BYTES (2), /* cost of FABS instruction. */
122 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
123 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
124 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
125 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
126 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
127 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}}
128 };
129
130 /* Processor costs (relative to an add) */
131 static const
132 struct processor_costs i386_cost = { /* 386 specific costs */
133 COSTS_N_INSNS (1), /* cost of an add instruction */
134 COSTS_N_INSNS (1), /* cost of a lea instruction */
135 COSTS_N_INSNS (3), /* variable shift costs */
136 COSTS_N_INSNS (2), /* constant shift costs */
137 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
138 COSTS_N_INSNS (6), /* HI */
139 COSTS_N_INSNS (6), /* SI */
140 COSTS_N_INSNS (6), /* DI */
141 COSTS_N_INSNS (6)}, /* other */
142 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
143 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
144 COSTS_N_INSNS (23), /* HI */
145 COSTS_N_INSNS (23), /* SI */
146 COSTS_N_INSNS (23), /* DI */
147 COSTS_N_INSNS (23)}, /* other */
148 COSTS_N_INSNS (3), /* cost of movsx */
149 COSTS_N_INSNS (2), /* cost of movzx */
150 15, /* "large" insn */
151 3, /* MOVE_RATIO */
152 4, /* cost for loading QImode using movzbl */
153 {2, 4, 2}, /* cost of loading integer registers
154 in QImode, HImode and SImode.
155 Relative to reg-reg move (2). */
156 {2, 4, 2}, /* cost of storing integer registers */
157 2, /* cost of reg,reg fld/fst */
158 {8, 8, 8}, /* cost of loading fp registers
159 in SFmode, DFmode and XFmode */
160 {8, 8, 8}, /* cost of storing fp registers
161 in SFmode, DFmode and XFmode */
162 2, /* cost of moving MMX register */
163 {4, 8}, /* cost of loading MMX registers
164 in SImode and DImode */
165 {4, 8}, /* cost of storing MMX registers
166 in SImode and DImode */
167 2, /* cost of moving SSE register */
168 {4, 8, 16}, /* cost of loading SSE registers
169 in SImode, DImode and TImode */
170 {4, 8, 16}, /* cost of storing SSE registers
171 in SImode, DImode and TImode */
172 3, /* MMX or SSE register to integer */
173 0, /* size of prefetch block */
174 0, /* number of parallel prefetches */
175 1, /* Branch cost */
176 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
177 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
178 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
179 COSTS_N_INSNS (22), /* cost of FABS instruction. */
180 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
181 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
182 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
183 DUMMY_STRINGOP_ALGS},
184 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
185 DUMMY_STRINGOP_ALGS},
186 };
187
188 static const
189 struct processor_costs i486_cost = { /* 486 specific costs */
190 COSTS_N_INSNS (1), /* cost of an add instruction */
191 COSTS_N_INSNS (1), /* cost of a lea instruction */
192 COSTS_N_INSNS (3), /* variable shift costs */
193 COSTS_N_INSNS (2), /* constant shift costs */
194 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
195 COSTS_N_INSNS (12), /* HI */
196 COSTS_N_INSNS (12), /* SI */
197 COSTS_N_INSNS (12), /* DI */
198 COSTS_N_INSNS (12)}, /* other */
199 1, /* cost of multiply per each bit set */
200 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
201 COSTS_N_INSNS (40), /* HI */
202 COSTS_N_INSNS (40), /* SI */
203 COSTS_N_INSNS (40), /* DI */
204 COSTS_N_INSNS (40)}, /* other */
205 COSTS_N_INSNS (3), /* cost of movsx */
206 COSTS_N_INSNS (2), /* cost of movzx */
207 15, /* "large" insn */
208 3, /* MOVE_RATIO */
209 4, /* cost for loading QImode using movzbl */
210 {2, 4, 2}, /* cost of loading integer registers
211 in QImode, HImode and SImode.
212 Relative to reg-reg move (2). */
213 {2, 4, 2}, /* cost of storing integer registers */
214 2, /* cost of reg,reg fld/fst */
215 {8, 8, 8}, /* cost of loading fp registers
216 in SFmode, DFmode and XFmode */
217 {8, 8, 8}, /* cost of storing fp registers
218 in SFmode, DFmode and XFmode */
219 2, /* cost of moving MMX register */
220 {4, 8}, /* cost of loading MMX registers
221 in SImode and DImode */
222 {4, 8}, /* cost of storing MMX registers
223 in SImode and DImode */
224 2, /* cost of moving SSE register */
225 {4, 8, 16}, /* cost of loading SSE registers
226 in SImode, DImode and TImode */
227 {4, 8, 16}, /* cost of storing SSE registers
228 in SImode, DImode and TImode */
229 3, /* MMX or SSE register to integer */
230 0, /* size of prefetch block */
231 0, /* number of parallel prefetches */
232 1, /* Branch cost */
233 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
234 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
235 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
236 COSTS_N_INSNS (3), /* cost of FABS instruction. */
237 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
238 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
239 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
240 DUMMY_STRINGOP_ALGS},
241 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
242 DUMMY_STRINGOP_ALGS}
243 };
244
245 static const
246 struct processor_costs pentium_cost = {
247 COSTS_N_INSNS (1), /* cost of an add instruction */
248 COSTS_N_INSNS (1), /* cost of a lea instruction */
249 COSTS_N_INSNS (4), /* variable shift costs */
250 COSTS_N_INSNS (1), /* constant shift costs */
251 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
252 COSTS_N_INSNS (11), /* HI */
253 COSTS_N_INSNS (11), /* SI */
254 COSTS_N_INSNS (11), /* DI */
255 COSTS_N_INSNS (11)}, /* other */
256 0, /* cost of multiply per each bit set */
257 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
258 COSTS_N_INSNS (25), /* HI */
259 COSTS_N_INSNS (25), /* SI */
260 COSTS_N_INSNS (25), /* DI */
261 COSTS_N_INSNS (25)}, /* other */
262 COSTS_N_INSNS (3), /* cost of movsx */
263 COSTS_N_INSNS (2), /* cost of movzx */
264 8, /* "large" insn */
265 6, /* MOVE_RATIO */
266 6, /* cost for loading QImode using movzbl */
267 {2, 4, 2}, /* cost of loading integer registers
268 in QImode, HImode and SImode.
269 Relative to reg-reg move (2). */
270 {2, 4, 2}, /* cost of storing integer registers */
271 2, /* cost of reg,reg fld/fst */
272 {2, 2, 6}, /* cost of loading fp registers
273 in SFmode, DFmode and XFmode */
274 {4, 4, 6}, /* cost of storing fp registers
275 in SFmode, DFmode and XFmode */
276 8, /* cost of moving MMX register */
277 {8, 8}, /* cost of loading MMX registers
278 in SImode and DImode */
279 {8, 8}, /* cost of storing MMX registers
280 in SImode and DImode */
281 2, /* cost of moving SSE register */
282 {4, 8, 16}, /* cost of loading SSE registers
283 in SImode, DImode and TImode */
284 {4, 8, 16}, /* cost of storing SSE registers
285 in SImode, DImode and TImode */
286 3, /* MMX or SSE register to integer */
287 0, /* size of prefetch block */
288 0, /* number of parallel prefetches */
289 2, /* Branch cost */
290 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
291 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
292 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
293 COSTS_N_INSNS (1), /* cost of FABS instruction. */
294 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
295 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
296 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
297 DUMMY_STRINGOP_ALGS},
298 {{libcall, {{-1, rep_prefix_4_byte}}},
299 DUMMY_STRINGOP_ALGS}
300 };
301
302 static const
303 struct processor_costs pentiumpro_cost = {
304 COSTS_N_INSNS (1), /* cost of an add instruction */
305 COSTS_N_INSNS (1), /* cost of a lea instruction */
306 COSTS_N_INSNS (1), /* variable shift costs */
307 COSTS_N_INSNS (1), /* constant shift costs */
308 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
309 COSTS_N_INSNS (4), /* HI */
310 COSTS_N_INSNS (4), /* SI */
311 COSTS_N_INSNS (4), /* DI */
312 COSTS_N_INSNS (4)}, /* other */
313 0, /* cost of multiply per each bit set */
314 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
315 COSTS_N_INSNS (17), /* HI */
316 COSTS_N_INSNS (17), /* SI */
317 COSTS_N_INSNS (17), /* DI */
318 COSTS_N_INSNS (17)}, /* other */
319 COSTS_N_INSNS (1), /* cost of movsx */
320 COSTS_N_INSNS (1), /* cost of movzx */
321 8, /* "large" insn */
322 6, /* MOVE_RATIO */
323 2, /* cost for loading QImode using movzbl */
324 {4, 4, 4}, /* cost of loading integer registers
325 in QImode, HImode and SImode.
326 Relative to reg-reg move (2). */
327 {2, 2, 2}, /* cost of storing integer registers */
328 2, /* cost of reg,reg fld/fst */
329 {2, 2, 6}, /* cost of loading fp registers
330 in SFmode, DFmode and XFmode */
331 {4, 4, 6}, /* cost of storing fp registers
332 in SFmode, DFmode and XFmode */
333 2, /* cost of moving MMX register */
334 {2, 2}, /* cost of loading MMX registers
335 in SImode and DImode */
336 {2, 2}, /* cost of storing MMX registers
337 in SImode and DImode */
338 2, /* cost of moving SSE register */
339 {2, 2, 8}, /* cost of loading SSE registers
340 in SImode, DImode and TImode */
341 {2, 2, 8}, /* cost of storing SSE registers
342 in SImode, DImode and TImode */
343 3, /* MMX or SSE register to integer */
344 32, /* size of prefetch block */
345 6, /* number of parallel prefetches */
346 2, /* Branch cost */
347 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
348 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
349 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
350 COSTS_N_INSNS (2), /* cost of FABS instruction. */
351 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
352 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
353 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
354 the alignment). For small blocks inline loop is still a noticeable win, for bigger
355 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
356 more expensive startup time in CPU, but after 4K the difference is down in the noise.
357 */
358 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
359 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
360 DUMMY_STRINGOP_ALGS},
361 {{rep_prefix_4_byte, {{1024, unrolled_loop},
362 {8192, rep_prefix_4_byte}, {-1, libcall}}},
363 DUMMY_STRINGOP_ALGS}
364 };
365
366 static const
367 struct processor_costs geode_cost = {
368 COSTS_N_INSNS (1), /* cost of an add instruction */
369 COSTS_N_INSNS (1), /* cost of a lea instruction */
370 COSTS_N_INSNS (2), /* variable shift costs */
371 COSTS_N_INSNS (1), /* constant shift costs */
372 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
373 COSTS_N_INSNS (4), /* HI */
374 COSTS_N_INSNS (7), /* SI */
375 COSTS_N_INSNS (7), /* DI */
376 COSTS_N_INSNS (7)}, /* other */
377 0, /* cost of multiply per each bit set */
378 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
379 COSTS_N_INSNS (23), /* HI */
380 COSTS_N_INSNS (39), /* SI */
381 COSTS_N_INSNS (39), /* DI */
382 COSTS_N_INSNS (39)}, /* other */
383 COSTS_N_INSNS (1), /* cost of movsx */
384 COSTS_N_INSNS (1), /* cost of movzx */
385 8, /* "large" insn */
386 4, /* MOVE_RATIO */
387 1, /* cost for loading QImode using movzbl */
388 {1, 1, 1}, /* cost of loading integer registers
389 in QImode, HImode and SImode.
390 Relative to reg-reg move (2). */
391 {1, 1, 1}, /* cost of storing integer registers */
392 1, /* cost of reg,reg fld/fst */
393 {1, 1, 1}, /* cost of loading fp registers
394 in SFmode, DFmode and XFmode */
395 {4, 6, 6}, /* cost of storing fp registers
396 in SFmode, DFmode and XFmode */
397
398 1, /* cost of moving MMX register */
399 {1, 1}, /* cost of loading MMX registers
400 in SImode and DImode */
401 {1, 1}, /* cost of storing MMX registers
402 in SImode and DImode */
403 1, /* cost of moving SSE register */
404 {1, 1, 1}, /* cost of loading SSE registers
405 in SImode, DImode and TImode */
406 {1, 1, 1}, /* cost of storing SSE registers
407 in SImode, DImode and TImode */
408 1, /* MMX or SSE register to integer */
409 32, /* size of prefetch block */
410 1, /* number of parallel prefetches */
411 1, /* Branch cost */
412 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
413 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
414 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
415 COSTS_N_INSNS (1), /* cost of FABS instruction. */
416 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
417 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
418 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
419 DUMMY_STRINGOP_ALGS},
420 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
421 DUMMY_STRINGOP_ALGS}
422 };
423
424 static const
425 struct processor_costs k6_cost = {
426 COSTS_N_INSNS (1), /* cost of an add instruction */
427 COSTS_N_INSNS (2), /* cost of a lea instruction */
428 COSTS_N_INSNS (1), /* variable shift costs */
429 COSTS_N_INSNS (1), /* constant shift costs */
430 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
431 COSTS_N_INSNS (3), /* HI */
432 COSTS_N_INSNS (3), /* SI */
433 COSTS_N_INSNS (3), /* DI */
434 COSTS_N_INSNS (3)}, /* other */
435 0, /* cost of multiply per each bit set */
436 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
437 COSTS_N_INSNS (18), /* HI */
438 COSTS_N_INSNS (18), /* SI */
439 COSTS_N_INSNS (18), /* DI */
440 COSTS_N_INSNS (18)}, /* other */
441 COSTS_N_INSNS (2), /* cost of movsx */
442 COSTS_N_INSNS (2), /* cost of movzx */
443 8, /* "large" insn */
444 4, /* MOVE_RATIO */
445 3, /* cost for loading QImode using movzbl */
446 {4, 5, 4}, /* cost of loading integer registers
447 in QImode, HImode and SImode.
448 Relative to reg-reg move (2). */
449 {2, 3, 2}, /* cost of storing integer registers */
450 4, /* cost of reg,reg fld/fst */
451 {6, 6, 6}, /* cost of loading fp registers
452 in SFmode, DFmode and XFmode */
453 {4, 4, 4}, /* cost of storing fp registers
454 in SFmode, DFmode and XFmode */
455 2, /* cost of moving MMX register */
456 {2, 2}, /* cost of loading MMX registers
457 in SImode and DImode */
458 {2, 2}, /* cost of storing MMX registers
459 in SImode and DImode */
460 2, /* cost of moving SSE register */
461 {2, 2, 8}, /* cost of loading SSE registers
462 in SImode, DImode and TImode */
463 {2, 2, 8}, /* cost of storing SSE registers
464 in SImode, DImode and TImode */
465 6, /* MMX or SSE register to integer */
466 32, /* size of prefetch block */
467 1, /* number of parallel prefetches */
468 1, /* Branch cost */
469 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
470 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
471 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
472 COSTS_N_INSNS (2), /* cost of FABS instruction. */
473 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
474 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
475 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
476 DUMMY_STRINGOP_ALGS},
477 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
478 DUMMY_STRINGOP_ALGS}
479 };
480
481 static const
482 struct processor_costs athlon_cost = {
483 COSTS_N_INSNS (1), /* cost of an add instruction */
484 COSTS_N_INSNS (2), /* cost of a lea instruction */
485 COSTS_N_INSNS (1), /* variable shift costs */
486 COSTS_N_INSNS (1), /* constant shift costs */
487 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
488 COSTS_N_INSNS (5), /* HI */
489 COSTS_N_INSNS (5), /* SI */
490 COSTS_N_INSNS (5), /* DI */
491 COSTS_N_INSNS (5)}, /* other */
492 0, /* cost of multiply per each bit set */
493 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
494 COSTS_N_INSNS (26), /* HI */
495 COSTS_N_INSNS (42), /* SI */
496 COSTS_N_INSNS (74), /* DI */
497 COSTS_N_INSNS (74)}, /* other */
498 COSTS_N_INSNS (1), /* cost of movsx */
499 COSTS_N_INSNS (1), /* cost of movzx */
500 8, /* "large" insn */
501 9, /* MOVE_RATIO */
502 4, /* cost for loading QImode using movzbl */
503 {3, 4, 3}, /* cost of loading integer registers
504 in QImode, HImode and SImode.
505 Relative to reg-reg move (2). */
506 {3, 4, 3}, /* cost of storing integer registers */
507 4, /* cost of reg,reg fld/fst */
508 {4, 4, 12}, /* cost of loading fp registers
509 in SFmode, DFmode and XFmode */
510 {6, 6, 8}, /* cost of storing fp registers
511 in SFmode, DFmode and XFmode */
512 2, /* cost of moving MMX register */
513 {4, 4}, /* cost of loading MMX registers
514 in SImode and DImode */
515 {4, 4}, /* cost of storing MMX registers
516 in SImode and DImode */
517 2, /* cost of moving SSE register */
518 {4, 4, 6}, /* cost of loading SSE registers
519 in SImode, DImode and TImode */
520 {4, 4, 5}, /* cost of storing SSE registers
521 in SImode, DImode and TImode */
522 5, /* MMX or SSE register to integer */
523 64, /* size of prefetch block */
524 6, /* number of parallel prefetches */
525 5, /* Branch cost */
526 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
527 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
528 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
529 COSTS_N_INSNS (2), /* cost of FABS instruction. */
530 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
531 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
532 /* For some reason, Athlon deals better with REP prefix (relative to loops)
533 compared to K8. Alignment becomes important after 8 bytes for memcpy and
534 128 bytes for memset. */
535 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
536 DUMMY_STRINGOP_ALGS},
537 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
538 DUMMY_STRINGOP_ALGS}
539 };
540
541 static const
542 struct processor_costs k8_cost = {
543 COSTS_N_INSNS (1), /* cost of an add instruction */
544 COSTS_N_INSNS (2), /* cost of a lea instruction */
545 COSTS_N_INSNS (1), /* variable shift costs */
546 COSTS_N_INSNS (1), /* constant shift costs */
547 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
548 COSTS_N_INSNS (4), /* HI */
549 COSTS_N_INSNS (3), /* SI */
550 COSTS_N_INSNS (4), /* DI */
551 COSTS_N_INSNS (5)}, /* other */
552 0, /* cost of multiply per each bit set */
553 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
554 COSTS_N_INSNS (26), /* HI */
555 COSTS_N_INSNS (42), /* SI */
556 COSTS_N_INSNS (74), /* DI */
557 COSTS_N_INSNS (74)}, /* other */
558 COSTS_N_INSNS (1), /* cost of movsx */
559 COSTS_N_INSNS (1), /* cost of movzx */
560 8, /* "large" insn */
561 9, /* MOVE_RATIO */
562 4, /* cost for loading QImode using movzbl */
563 {3, 4, 3}, /* cost of loading integer registers
564 in QImode, HImode and SImode.
565 Relative to reg-reg move (2). */
566 {3, 4, 3}, /* cost of storing integer registers */
567 4, /* cost of reg,reg fld/fst */
568 {4, 4, 12}, /* cost of loading fp registers
569 in SFmode, DFmode and XFmode */
570 {6, 6, 8}, /* cost of storing fp registers
571 in SFmode, DFmode and XFmode */
572 2, /* cost of moving MMX register */
573 {3, 3}, /* cost of loading MMX registers
574 in SImode and DImode */
575 {4, 4}, /* cost of storing MMX registers
576 in SImode and DImode */
577 2, /* cost of moving SSE register */
578 {4, 3, 6}, /* cost of loading SSE registers
579 in SImode, DImode and TImode */
580 {4, 4, 5}, /* cost of storing SSE registers
581 in SImode, DImode and TImode */
582 5, /* MMX or SSE register to integer */
583 64, /* size of prefetch block */
584 /* New AMD processors never drop prefetches; if they cannot be performed
585 immediately, they are queued. We set number of simultaneous prefetches
586 to a large constant to reflect this (it probably is not a good idea not
587 to limit number of prefetches at all, as their execution also takes some
588 time). */
589 100, /* number of parallel prefetches */
590 5, /* Branch cost */
591 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
592 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
593 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
594 COSTS_N_INSNS (2), /* cost of FABS instruction. */
595 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
596 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
597 /* K8 has optimized REP instruction for medium sized blocks, but for very small
598 blocks it is better to use loop. For large blocks, libcall can do
599 nontemporary accesses and beat inline considerably. */
600 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
601 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
602 {{libcall, {{8, loop}, {24, unrolled_loop},
603 {2048, rep_prefix_4_byte}, {-1, libcall}}},
604 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
605 };
606
607 struct processor_costs amdfam10_cost = {
608 COSTS_N_INSNS (1), /* cost of an add instruction */
609 COSTS_N_INSNS (2), /* cost of a lea instruction */
610 COSTS_N_INSNS (1), /* variable shift costs */
611 COSTS_N_INSNS (1), /* constant shift costs */
612 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
613 COSTS_N_INSNS (4), /* HI */
614 COSTS_N_INSNS (3), /* SI */
615 COSTS_N_INSNS (4), /* DI */
616 COSTS_N_INSNS (5)}, /* other */
617 0, /* cost of multiply per each bit set */
618 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
619 COSTS_N_INSNS (35), /* HI */
620 COSTS_N_INSNS (51), /* SI */
621 COSTS_N_INSNS (83), /* DI */
622 COSTS_N_INSNS (83)}, /* other */
623 COSTS_N_INSNS (1), /* cost of movsx */
624 COSTS_N_INSNS (1), /* cost of movzx */
625 8, /* "large" insn */
626 9, /* MOVE_RATIO */
627 4, /* cost for loading QImode using movzbl */
628 {3, 4, 3}, /* cost of loading integer registers
629 in QImode, HImode and SImode.
630 Relative to reg-reg move (2). */
631 {3, 4, 3}, /* cost of storing integer registers */
632 4, /* cost of reg,reg fld/fst */
633 {4, 4, 12}, /* cost of loading fp registers
634 in SFmode, DFmode and XFmode */
635 {6, 6, 8}, /* cost of storing fp registers
636 in SFmode, DFmode and XFmode */
637 2, /* cost of moving MMX register */
638 {3, 3}, /* cost of loading MMX registers
639 in SImode and DImode */
640 {4, 4}, /* cost of storing MMX registers
641 in SImode and DImode */
642 2, /* cost of moving SSE register */
643 {4, 4, 3}, /* cost of loading SSE registers
644 in SImode, DImode and TImode */
645 {4, 4, 5}, /* cost of storing SSE registers
646 in SImode, DImode and TImode */
647 3, /* MMX or SSE register to integer */
648 /* On K8
649 MOVD reg64, xmmreg Double FSTORE 4
650 MOVD reg32, xmmreg Double FSTORE 4
651 On AMDFAM10
652 MOVD reg64, xmmreg Double FADD 3
653 1/1 1/1
654 MOVD reg32, xmmreg Double FADD 3
655 1/1 1/1 */
656 64, /* size of prefetch block */
657 /* New AMD processors never drop prefetches; if they cannot be performed
658 immediately, they are queued. We set number of simultaneous prefetches
659 to a large constant to reflect this (it probably is not a good idea not
660 to limit number of prefetches at all, as their execution also takes some
661 time). */
662 100, /* number of parallel prefetches */
663 5, /* Branch cost */
664 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
665 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
666 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
667 COSTS_N_INSNS (2), /* cost of FABS instruction. */
668 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
669 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
670
671 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
672 very small blocks it is better to use loop. For large blocks, libcall can
673 do nontemporary accesses and beat inline considerably. */
674 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
675 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
676 {{libcall, {{8, loop}, {24, unrolled_loop},
677 {2048, rep_prefix_4_byte}, {-1, libcall}}},
678 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
679 };
680
681 static const
682 struct processor_costs pentium4_cost = {
683 COSTS_N_INSNS (1), /* cost of an add instruction */
684 COSTS_N_INSNS (3), /* cost of a lea instruction */
685 COSTS_N_INSNS (4), /* variable shift costs */
686 COSTS_N_INSNS (4), /* constant shift costs */
687 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
688 COSTS_N_INSNS (15), /* HI */
689 COSTS_N_INSNS (15), /* SI */
690 COSTS_N_INSNS (15), /* DI */
691 COSTS_N_INSNS (15)}, /* other */
692 0, /* cost of multiply per each bit set */
693 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
694 COSTS_N_INSNS (56), /* HI */
695 COSTS_N_INSNS (56), /* SI */
696 COSTS_N_INSNS (56), /* DI */
697 COSTS_N_INSNS (56)}, /* other */
698 COSTS_N_INSNS (1), /* cost of movsx */
699 COSTS_N_INSNS (1), /* cost of movzx */
700 16, /* "large" insn */
701 6, /* MOVE_RATIO */
702 2, /* cost for loading QImode using movzbl */
703 {4, 5, 4}, /* cost of loading integer registers
704 in QImode, HImode and SImode.
705 Relative to reg-reg move (2). */
706 {2, 3, 2}, /* cost of storing integer registers */
707 2, /* cost of reg,reg fld/fst */
708 {2, 2, 6}, /* cost of loading fp registers
709 in SFmode, DFmode and XFmode */
710 {4, 4, 6}, /* cost of storing fp registers
711 in SFmode, DFmode and XFmode */
712 2, /* cost of moving MMX register */
713 {2, 2}, /* cost of loading MMX registers
714 in SImode and DImode */
715 {2, 2}, /* cost of storing MMX registers
716 in SImode and DImode */
717 12, /* cost of moving SSE register */
718 {12, 12, 12}, /* cost of loading SSE registers
719 in SImode, DImode and TImode */
720 {2, 2, 8}, /* cost of storing SSE registers
721 in SImode, DImode and TImode */
722 10, /* MMX or SSE register to integer */
723 64, /* size of prefetch block */
724 6, /* number of parallel prefetches */
725 2, /* Branch cost */
726 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
727 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
728 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
729 COSTS_N_INSNS (2), /* cost of FABS instruction. */
730 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
731 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
732 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
733 DUMMY_STRINGOP_ALGS},
734 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
735 {-1, libcall}}},
736 DUMMY_STRINGOP_ALGS},
737 };
738
739 static const
740 struct processor_costs nocona_cost = {
741 COSTS_N_INSNS (1), /* cost of an add instruction */
742 COSTS_N_INSNS (1), /* cost of a lea instruction */
743 COSTS_N_INSNS (1), /* variable shift costs */
744 COSTS_N_INSNS (1), /* constant shift costs */
745 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
746 COSTS_N_INSNS (10), /* HI */
747 COSTS_N_INSNS (10), /* SI */
748 COSTS_N_INSNS (10), /* DI */
749 COSTS_N_INSNS (10)}, /* other */
750 0, /* cost of multiply per each bit set */
751 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
752 COSTS_N_INSNS (66), /* HI */
753 COSTS_N_INSNS (66), /* SI */
754 COSTS_N_INSNS (66), /* DI */
755 COSTS_N_INSNS (66)}, /* other */
756 COSTS_N_INSNS (1), /* cost of movsx */
757 COSTS_N_INSNS (1), /* cost of movzx */
758 16, /* "large" insn */
759 17, /* MOVE_RATIO */
760 4, /* cost for loading QImode using movzbl */
761 {4, 4, 4}, /* cost of loading integer registers
762 in QImode, HImode and SImode.
763 Relative to reg-reg move (2). */
764 {4, 4, 4}, /* cost of storing integer registers */
765 3, /* cost of reg,reg fld/fst */
766 {12, 12, 12}, /* cost of loading fp registers
767 in SFmode, DFmode and XFmode */
768 {4, 4, 4}, /* cost of storing fp registers
769 in SFmode, DFmode and XFmode */
770 6, /* cost of moving MMX register */
771 {12, 12}, /* cost of loading MMX registers
772 in SImode and DImode */
773 {12, 12}, /* cost of storing MMX registers
774 in SImode and DImode */
775 6, /* cost of moving SSE register */
776 {12, 12, 12}, /* cost of loading SSE registers
777 in SImode, DImode and TImode */
778 {12, 12, 12}, /* cost of storing SSE registers
779 in SImode, DImode and TImode */
780 8, /* MMX or SSE register to integer */
781 128, /* size of prefetch block */
782 8, /* number of parallel prefetches */
783 1, /* Branch cost */
784 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
785 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
786 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
787 COSTS_N_INSNS (3), /* cost of FABS instruction. */
788 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
789 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
790 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
791 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
792 {100000, unrolled_loop}, {-1, libcall}}}},
793 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
794 {-1, libcall}}},
795 {libcall, {{24, loop}, {64, unrolled_loop},
796 {8192, rep_prefix_8_byte}, {-1, libcall}}}}
797 };
798
799 static const
800 struct processor_costs core2_cost = {
801 COSTS_N_INSNS (1), /* cost of an add instruction */
802 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
803 COSTS_N_INSNS (1), /* variable shift costs */
804 COSTS_N_INSNS (1), /* constant shift costs */
805 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
806 COSTS_N_INSNS (3), /* HI */
807 COSTS_N_INSNS (3), /* SI */
808 COSTS_N_INSNS (3), /* DI */
809 COSTS_N_INSNS (3)}, /* other */
810 0, /* cost of multiply per each bit set */
811 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
812 COSTS_N_INSNS (22), /* HI */
813 COSTS_N_INSNS (22), /* SI */
814 COSTS_N_INSNS (22), /* DI */
815 COSTS_N_INSNS (22)}, /* other */
816 COSTS_N_INSNS (1), /* cost of movsx */
817 COSTS_N_INSNS (1), /* cost of movzx */
818 8, /* "large" insn */
819 16, /* MOVE_RATIO */
820 2, /* cost for loading QImode using movzbl */
821 {6, 6, 6}, /* cost of loading integer registers
822 in QImode, HImode and SImode.
823 Relative to reg-reg move (2). */
824 {4, 4, 4}, /* cost of storing integer registers */
825 2, /* cost of reg,reg fld/fst */
826 {6, 6, 6}, /* cost of loading fp registers
827 in SFmode, DFmode and XFmode */
828 {4, 4, 4}, /* cost of loading integer registers */
829 2, /* cost of moving MMX register */
830 {6, 6}, /* cost of loading MMX registers
831 in SImode and DImode */
832 {4, 4}, /* cost of storing MMX registers
833 in SImode and DImode */
834 2, /* cost of moving SSE register */
835 {6, 6, 6}, /* cost of loading SSE registers
836 in SImode, DImode and TImode */
837 {4, 4, 4}, /* cost of storing SSE registers
838 in SImode, DImode and TImode */
839 2, /* MMX or SSE register to integer */
840 128, /* size of prefetch block */
841 8, /* number of parallel prefetches */
842 3, /* Branch cost */
843 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
844 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
845 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
846 COSTS_N_INSNS (1), /* cost of FABS instruction. */
847 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
848 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
849 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
850 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
851 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
852 {{libcall, {{8, loop}, {15, unrolled_loop},
853 {2048, rep_prefix_4_byte}, {-1, libcall}}},
854 {libcall, {{24, loop}, {32, unrolled_loop},
855 {8192, rep_prefix_8_byte}, {-1, libcall}}}}
856 };
857
858 /* Generic64 should produce code tuned for Nocona and K8. */
859 static const
860 struct processor_costs generic64_cost = {
861 COSTS_N_INSNS (1), /* cost of an add instruction */
862 /* On all chips taken into consideration lea is 2 cycles and more. With
863 this cost however our current implementation of synth_mult results in
864 use of unnecessary temporary registers causing regression on several
865 SPECfp benchmarks. */
866 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
867 COSTS_N_INSNS (1), /* variable shift costs */
868 COSTS_N_INSNS (1), /* constant shift costs */
869 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
870 COSTS_N_INSNS (4), /* HI */
871 COSTS_N_INSNS (3), /* SI */
872 COSTS_N_INSNS (4), /* DI */
873 COSTS_N_INSNS (2)}, /* other */
874 0, /* cost of multiply per each bit set */
875 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
876 COSTS_N_INSNS (26), /* HI */
877 COSTS_N_INSNS (42), /* SI */
878 COSTS_N_INSNS (74), /* DI */
879 COSTS_N_INSNS (74)}, /* other */
880 COSTS_N_INSNS (1), /* cost of movsx */
881 COSTS_N_INSNS (1), /* cost of movzx */
882 8, /* "large" insn */
883 17, /* MOVE_RATIO */
884 4, /* cost for loading QImode using movzbl */
885 {4, 4, 4}, /* cost of loading integer registers
886 in QImode, HImode and SImode.
887 Relative to reg-reg move (2). */
888 {4, 4, 4}, /* cost of storing integer registers */
889 4, /* cost of reg,reg fld/fst */
890 {12, 12, 12}, /* cost of loading fp registers
891 in SFmode, DFmode and XFmode */
892 {6, 6, 8}, /* cost of storing fp registers
893 in SFmode, DFmode and XFmode */
894 2, /* cost of moving MMX register */
895 {8, 8}, /* cost of loading MMX registers
896 in SImode and DImode */
897 {8, 8}, /* cost of storing MMX registers
898 in SImode and DImode */
899 2, /* cost of moving SSE register */
900 {8, 8, 8}, /* cost of loading SSE registers
901 in SImode, DImode and TImode */
902 {8, 8, 8}, /* cost of storing SSE registers
903 in SImode, DImode and TImode */
904 5, /* MMX or SSE register to integer */
905 64, /* size of prefetch block */
906 6, /* number of parallel prefetches */
907 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
908 is increased to perhaps more appropriate value of 5. */
909 3, /* Branch cost */
910 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
911 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
912 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
913 COSTS_N_INSNS (8), /* cost of FABS instruction. */
914 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
915 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
916 {DUMMY_STRINGOP_ALGS,
917 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
918 {DUMMY_STRINGOP_ALGS,
919 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
920 };
921
922 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
923 static const
924 struct processor_costs generic32_cost = {
925 COSTS_N_INSNS (1), /* cost of an add instruction */
926 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
927 COSTS_N_INSNS (1), /* variable shift costs */
928 COSTS_N_INSNS (1), /* constant shift costs */
929 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
930 COSTS_N_INSNS (4), /* HI */
931 COSTS_N_INSNS (3), /* SI */
932 COSTS_N_INSNS (4), /* DI */
933 COSTS_N_INSNS (2)}, /* other */
934 0, /* cost of multiply per each bit set */
935 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
936 COSTS_N_INSNS (26), /* HI */
937 COSTS_N_INSNS (42), /* SI */
938 COSTS_N_INSNS (74), /* DI */
939 COSTS_N_INSNS (74)}, /* other */
940 COSTS_N_INSNS (1), /* cost of movsx */
941 COSTS_N_INSNS (1), /* cost of movzx */
942 8, /* "large" insn */
943 17, /* MOVE_RATIO */
944 4, /* cost for loading QImode using movzbl */
945 {4, 4, 4}, /* cost of loading integer registers
946 in QImode, HImode and SImode.
947 Relative to reg-reg move (2). */
948 {4, 4, 4}, /* cost of storing integer registers */
949 4, /* cost of reg,reg fld/fst */
950 {12, 12, 12}, /* cost of loading fp registers
951 in SFmode, DFmode and XFmode */
952 {6, 6, 8}, /* cost of storing fp registers
953 in SFmode, DFmode and XFmode */
954 2, /* cost of moving MMX register */
955 {8, 8}, /* cost of loading MMX registers
956 in SImode and DImode */
957 {8, 8}, /* cost of storing MMX registers
958 in SImode and DImode */
959 2, /* cost of moving SSE register */
960 {8, 8, 8}, /* cost of loading SSE registers
961 in SImode, DImode and TImode */
962 {8, 8, 8}, /* cost of storing SSE registers
963 in SImode, DImode and TImode */
964 5, /* MMX or SSE register to integer */
965 64, /* size of prefetch block */
966 6, /* number of parallel prefetches */
967 3, /* Branch cost */
968 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
969 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
970 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
971 COSTS_N_INSNS (8), /* cost of FABS instruction. */
972 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
973 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
974 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
975 DUMMY_STRINGOP_ALGS},
976 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
977 DUMMY_STRINGOP_ALGS},
978 };
979
980 const struct processor_costs *ix86_cost = &pentium_cost;
981
982 /* Processor feature/optimization bitmasks. */
983 #define m_386 (1<<PROCESSOR_I386)
984 #define m_486 (1<<PROCESSOR_I486)
985 #define m_PENT (1<<PROCESSOR_PENTIUM)
986 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
987 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
988 #define m_NOCONA (1<<PROCESSOR_NOCONA)
989 #define m_CORE2 (1<<PROCESSOR_CORE2)
990
991 #define m_GEODE (1<<PROCESSOR_GEODE)
992 #define m_K6 (1<<PROCESSOR_K6)
993 #define m_K6_GEODE (m_K6 | m_GEODE)
994 #define m_K8 (1<<PROCESSOR_K8)
995 #define m_ATHLON (1<<PROCESSOR_ATHLON)
996 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
997 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
998 #define m_ATHLON_K8_AMDFAM10 (m_K8 | m_ATHLON | m_AMDFAM10)
999
1000 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1001 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1002
1003 /* Generic instruction choice should be common subset of supported CPUs
1004 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1005 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1006
1007 /* Feature tests against the various tunings. */
1008 unsigned int ix86_tune_features[X86_TUNE_LAST] = {
1009 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1010 negatively, so enabling for Generic64 seems like good code size
1011 tradeoff. We can't enable it for 32bit generic because it does not
1012 work well with PPro base chips. */
1013 m_386 | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC64,
1014
1015 /* X86_TUNE_PUSH_MEMORY */
1016 m_386 | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4
1017 | m_NOCONA | m_CORE2 | m_GENERIC,
1018
1019 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1020 m_486 | m_PENT,
1021
1022 /* X86_TUNE_USE_BIT_TEST */
1023 m_386,
1024
1025 /* X86_TUNE_UNROLL_STRLEN */
1026 m_486 | m_PENT | m_PPRO | m_ATHLON_K8_AMDFAM10 | m_K6 | m_CORE2 | m_GENERIC,
1027
1028 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1029 m_PPRO | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_GENERIC,
1030
1031 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1032 on simulation result. But after P4 was made, no performance benefit
1033 was observed with branch hints. It also increases the code size.
1034 As a result, icc never generates branch hints. */
1035 0,
1036
1037 /* X86_TUNE_DOUBLE_WITH_ADD */
1038 ~m_386,
1039
1040 /* X86_TUNE_USE_SAHF */
1041 m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
1042 | m_NOCONA | m_CORE2 | m_GENERIC,
1043
1044 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1045 partial dependencies. */
1046 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA
1047 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1048
1049 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1050 register stalls on Generic32 compilation setting as well. However
1051 in current implementation the partial register stalls are not eliminated
1052 very well - they can be introduced via subregs synthesized by combine
1053 and can happen in caller/callee saving sequences. Because this option
1054 pays back little on PPro based chips and is in conflict with partial reg
1055 dependencies used by Athlon/P4 based chips, it is better to leave it off
1056 for generic32 for now. */
1057 m_PPRO,
1058
1059 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1060 m_CORE2 | m_GENERIC,
1061
1062 /* X86_TUNE_USE_HIMODE_FIOP */
1063 m_386 | m_486 | m_K6_GEODE,
1064
1065 /* X86_TUNE_USE_SIMODE_FIOP */
1066 ~(m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT | m_CORE2 | m_GENERIC),
1067
1068 /* X86_TUNE_USE_MOV0 */
1069 m_K6,
1070
1071 /* X86_TUNE_USE_CLTD */
1072 ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC),
1073
1074 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1075 m_PENT4,
1076
1077 /* X86_TUNE_SPLIT_LONG_MOVES */
1078 m_PPRO,
1079
1080 /* X86_TUNE_READ_MODIFY_WRITE */
1081 ~m_PENT,
1082
1083 /* X86_TUNE_READ_MODIFY */
1084 ~(m_PENT | m_PPRO),
1085
1086 /* X86_TUNE_PROMOTE_QIMODE */
1087 m_K6_GEODE | m_PENT | m_386 | m_486 | m_ATHLON_K8_AMDFAM10 | m_CORE2
1088 | m_GENERIC /* | m_PENT4 ? */,
1089
1090 /* X86_TUNE_FAST_PREFIX */
1091 ~(m_PENT | m_486 | m_386),
1092
1093 /* X86_TUNE_SINGLE_STRINGOP */
1094 m_386 | m_PENT4 | m_NOCONA,
1095
1096 /* X86_TUNE_QIMODE_MATH */
1097 ~0,
1098
1099 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1100 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1101 might be considered for Generic32 if our scheme for avoiding partial
1102 stalls was more effective. */
1103 ~m_PPRO,
1104
1105 /* X86_TUNE_PROMOTE_QI_REGS */
1106 0,
1107
1108 /* X86_TUNE_PROMOTE_HI_REGS */
1109 m_PPRO,
1110
1111 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
1112 m_ATHLON_K8_AMDFAM10 | m_K6_GEODE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1113
1114 /* X86_TUNE_ADD_ESP_8 */
1115 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_K6_GEODE | m_386
1116 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1117
1118 /* X86_TUNE_SUB_ESP_4 */
1119 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1120
1121 /* X86_TUNE_SUB_ESP_8 */
1122 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_386 | m_486
1123 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1124
1125 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1126 for DFmode copies */
1127 ~(m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1128 | m_GENERIC | m_GEODE),
1129
1130 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1131 m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1132
1133 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1134 conflict here in between PPro/Pentium4 based chips that thread 128bit
1135 SSE registers as single units versus K8 based chips that divide SSE
1136 registers to two 64bit halves. This knob promotes all store destinations
1137 to be 128bit to allow register renaming on 128bit SSE units, but usually
1138 results in one extra microop on 64bit SSE units. Experimental results
1139 shows that disabling this option on P4 brings over 20% SPECfp regression,
1140 while enabling it on K8 brings roughly 2.4% regression that can be partly
1141 masked by careful scheduling of moves. */
1142 m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC | m_AMDFAM10,
1143
1144 /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
1145 m_AMDFAM10,
1146
1147 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1148 are resolved on SSE register parts instead of whole registers, so we may
1149 maintain just lower part of scalar values in proper format leaving the
1150 upper part undefined. */
1151 m_ATHLON_K8,
1152
1153 /* X86_TUNE_SSE_TYPELESS_STORES */
1154 m_ATHLON_K8_AMDFAM10,
1155
1156 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1157 m_PPRO | m_PENT4 | m_NOCONA,
1158
1159 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1160 m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1161
1162 /* X86_TUNE_PROLOGUE_USING_MOVE */
1163 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1164
1165 /* X86_TUNE_EPILOGUE_USING_MOVE */
1166 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1167
1168 /* X86_TUNE_SHIFT1 */
1169 ~m_486,
1170
1171 /* X86_TUNE_USE_FFREEP */
1172 m_ATHLON_K8_AMDFAM10,
1173
1174 /* X86_TUNE_INTER_UNIT_MOVES */
1175 ~(m_ATHLON_K8_AMDFAM10 | m_GENERIC),
1176
1177 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1178 than 4 branch instructions in the 16 byte window. */
1179 m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1180
1181 /* X86_TUNE_SCHEDULE */
1182 m_PPRO | m_ATHLON_K8_AMDFAM10 | m_K6_GEODE | m_PENT | m_CORE2 | m_GENERIC,
1183
1184 /* X86_TUNE_USE_BT */
1185 m_ATHLON_K8_AMDFAM10,
1186
1187 /* X86_TUNE_USE_INCDEC */
1188 ~(m_PENT4 | m_NOCONA | m_GENERIC),
1189
1190 /* X86_TUNE_PAD_RETURNS */
1191 m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC,
1192
1193 /* X86_TUNE_EXT_80387_CONSTANTS */
1194 m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC,
1195
1196 /* X86_TUNE_SHORTEN_X87_SSE */
1197 ~m_K8,
1198
1199 /* X86_TUNE_AVOID_VECTOR_DECODE */
1200 m_K8 | m_GENERIC64,
1201
1202 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1203 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1204 ~(m_386 | m_486),
1205
1206 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1207 vector path on AMD machines. */
1208 m_K8 | m_GENERIC64 | m_AMDFAM10,
1209
1210 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1211 machines. */
1212 m_K8 | m_GENERIC64 | m_AMDFAM10,
1213
1214 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1215 than a MOV. */
1216 m_PENT,
1217
1218 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1219 but one byte longer. */
1220 m_PENT,
1221
1222 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1223 operand that cannot be represented using a modRM byte. The XOR
1224 replacement is long decoded, so this split helps here as well. */
1225 m_K6,
1226 };
1227
1228 /* Feature tests against the various architecture variations. */
1229 unsigned int ix86_arch_features[X86_ARCH_LAST] = {
1230 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
1231 ~(m_386 | m_486 | m_PENT | m_K6),
1232
1233 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1234 ~m_386,
1235
1236 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1237 ~(m_386 | m_486),
1238
1239 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1240 ~m_386,
1241
1242 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1243 ~m_386,
1244 };
1245
1246 static const unsigned int x86_accumulate_outgoing_args
1247 = m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
1248
1249 static const unsigned int x86_arch_always_fancy_math_387
1250 = m_PENT | m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT4
1251 | m_NOCONA | m_CORE2 | m_GENERIC;
1252
1253 static enum stringop_alg stringop_alg = no_stringop;
1254
1255 /* In case the average insn count for single function invocation is
1256 lower than this constant, emit fast (but longer) prologue and
1257 epilogue code. */
1258 #define FAST_PROLOGUE_INSN_COUNT 20
1259
1260 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1261 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1262 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1263 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1264
1265 /* Array of the smallest class containing reg number REGNO, indexed by
1266 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1267
1268 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1269 {
1270 /* ax, dx, cx, bx */
1271 AREG, DREG, CREG, BREG,
1272 /* si, di, bp, sp */
1273 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1274 /* FP registers */
1275 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1276 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1277 /* arg pointer */
1278 NON_Q_REGS,
1279 /* flags, fpsr, fpcr, frame */
1280 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1281 /* SSE registers */
1282 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1283 SSE_REGS, SSE_REGS,
1284 /* MMX registers */
1285 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1286 MMX_REGS, MMX_REGS,
1287 /* REX registers */
1288 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1289 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1290 /* SSE REX registers */
1291 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1292 SSE_REGS, SSE_REGS,
1293 };
1294
1295 /* The "default" register map used in 32bit mode. */
1296
1297 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1298 {
1299 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1300 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1301 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1302 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1303 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1304 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1305 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1306 };
1307
1308 static int const x86_64_int_parameter_registers[6] =
1309 {
1310 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
1311 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1312 };
1313
1314 static int const x86_64_ms_abi_int_parameter_registers[4] =
1315 {
1316 2 /*RCX*/, 1 /*RDX*/,
1317 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1318 };
1319
1320 static int const x86_64_int_return_registers[4] =
1321 {
1322 0 /*RAX*/, 1 /*RDX*/, 5 /*RDI*/, 4 /*RSI*/
1323 };
1324
1325 /* The "default" register map used in 64bit mode. */
1326 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1327 {
1328 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1329 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1330 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1331 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1332 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1333 8,9,10,11,12,13,14,15, /* extended integer registers */
1334 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1335 };
1336
1337 /* Define the register numbers to be used in Dwarf debugging information.
1338 The SVR4 reference port C compiler uses the following register numbers
1339 in its Dwarf output code:
1340 0 for %eax (gcc regno = 0)
1341 1 for %ecx (gcc regno = 2)
1342 2 for %edx (gcc regno = 1)
1343 3 for %ebx (gcc regno = 3)
1344 4 for %esp (gcc regno = 7)
1345 5 for %ebp (gcc regno = 6)
1346 6 for %esi (gcc regno = 4)
1347 7 for %edi (gcc regno = 5)
1348 The following three DWARF register numbers are never generated by
1349 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1350 believes these numbers have these meanings.
1351 8 for %eip (no gcc equivalent)
1352 9 for %eflags (gcc regno = 17)
1353 10 for %trapno (no gcc equivalent)
1354 It is not at all clear how we should number the FP stack registers
1355 for the x86 architecture. If the version of SDB on x86/svr4 were
1356 a bit less brain dead with respect to floating-point then we would
1357 have a precedent to follow with respect to DWARF register numbers
1358 for x86 FP registers, but the SDB on x86/svr4 is so completely
1359 broken with respect to FP registers that it is hardly worth thinking
1360 of it as something to strive for compatibility with.
1361 The version of x86/svr4 SDB I have at the moment does (partially)
1362 seem to believe that DWARF register number 11 is associated with
1363 the x86 register %st(0), but that's about all. Higher DWARF
1364 register numbers don't seem to be associated with anything in
1365 particular, and even for DWARF regno 11, SDB only seems to under-
1366 stand that it should say that a variable lives in %st(0) (when
1367 asked via an `=' command) if we said it was in DWARF regno 11,
1368 but SDB still prints garbage when asked for the value of the
1369 variable in question (via a `/' command).
1370 (Also note that the labels SDB prints for various FP stack regs
1371 when doing an `x' command are all wrong.)
1372 Note that these problems generally don't affect the native SVR4
1373 C compiler because it doesn't allow the use of -O with -g and
1374 because when it is *not* optimizing, it allocates a memory
1375 location for each floating-point variable, and the memory
1376 location is what gets described in the DWARF AT_location
1377 attribute for the variable in question.
1378 Regardless of the severe mental illness of the x86/svr4 SDB, we
1379 do something sensible here and we use the following DWARF
1380 register numbers. Note that these are all stack-top-relative
1381 numbers.
1382 11 for %st(0) (gcc regno = 8)
1383 12 for %st(1) (gcc regno = 9)
1384 13 for %st(2) (gcc regno = 10)
1385 14 for %st(3) (gcc regno = 11)
1386 15 for %st(4) (gcc regno = 12)
1387 16 for %st(5) (gcc regno = 13)
1388 17 for %st(6) (gcc regno = 14)
1389 18 for %st(7) (gcc regno = 15)
1390 */
1391 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1392 {
1393 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1394 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1395 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1396 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1397 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1398 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1399 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1400 };
1401
1402 /* Test and compare insns in i386.md store the information needed to
1403 generate branch and scc insns here. */
1404
1405 rtx ix86_compare_op0 = NULL_RTX;
1406 rtx ix86_compare_op1 = NULL_RTX;
1407 rtx ix86_compare_emitted = NULL_RTX;
1408
1409 /* Size of the register save area. */
1410 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
1411
1412 /* Define the structure for the machine field in struct function. */
1413
1414 struct stack_local_entry GTY(())
1415 {
1416 unsigned short mode;
1417 unsigned short n;
1418 rtx rtl;
1419 struct stack_local_entry *next;
1420 };
1421
1422 /* Structure describing stack frame layout.
1423 Stack grows downward:
1424
1425 [arguments]
1426 <- ARG_POINTER
1427 saved pc
1428
1429 saved frame pointer if frame_pointer_needed
1430 <- HARD_FRAME_POINTER
1431 [saved regs]
1432
1433 [padding1] \
1434 )
1435 [va_arg registers] (
1436 > to_allocate <- FRAME_POINTER
1437 [frame] (
1438 )
1439 [padding2] /
1440 */
1441 struct ix86_frame
1442 {
1443 int nregs;
1444 int padding1;
1445 int va_arg_size;
1446 HOST_WIDE_INT frame;
1447 int padding2;
1448 int outgoing_arguments_size;
1449 int red_zone_size;
1450
1451 HOST_WIDE_INT to_allocate;
1452 /* The offsets relative to ARG_POINTER. */
1453 HOST_WIDE_INT frame_pointer_offset;
1454 HOST_WIDE_INT hard_frame_pointer_offset;
1455 HOST_WIDE_INT stack_pointer_offset;
1456
1457 /* When save_regs_using_mov is set, emit prologue using
1458 move instead of push instructions. */
1459 bool save_regs_using_mov;
1460 };
1461
1462 /* Code model option. */
1463 enum cmodel ix86_cmodel;
1464 /* Asm dialect. */
1465 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1466 /* TLS dialects. */
1467 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1468
1469 /* Which unit we are generating floating point math for. */
1470 enum fpmath_unit ix86_fpmath;
1471
1472 /* Which cpu are we scheduling for. */
1473 enum processor_type ix86_tune;
1474
1475 /* Which instruction set architecture to use. */
1476 enum processor_type ix86_arch;
1477
1478 /* true if sse prefetch instruction is not NOOP. */
1479 int x86_prefetch_sse;
1480
1481 /* ix86_regparm_string as a number */
1482 static int ix86_regparm;
1483
1484 /* -mstackrealign option */
1485 extern int ix86_force_align_arg_pointer;
1486 static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer";
1487
1488 /* Preferred alignment for stack boundary in bits. */
1489 unsigned int ix86_preferred_stack_boundary;
1490
1491 /* Values 1-5: see jump.c */
1492 int ix86_branch_cost;
1493
1494 /* Variables which are this size or smaller are put in the data/bss
1495 or ldata/lbss sections. */
1496
1497 int ix86_section_threshold = 65536;
1498
1499 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1500 char internal_label_prefix[16];
1501 int internal_label_prefix_len;
1502
1503 /* Register class used for passing given 64bit part of the argument.
1504 These represent classes as documented by the PS ABI, with the exception
1505 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1506 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1507
1508 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1509 whenever possible (upper half does contain padding). */
1510 enum x86_64_reg_class
1511 {
1512 X86_64_NO_CLASS,
1513 X86_64_INTEGER_CLASS,
1514 X86_64_INTEGERSI_CLASS,
1515 X86_64_SSE_CLASS,
1516 X86_64_SSESF_CLASS,
1517 X86_64_SSEDF_CLASS,
1518 X86_64_SSEUP_CLASS,
1519 X86_64_X87_CLASS,
1520 X86_64_X87UP_CLASS,
1521 X86_64_COMPLEX_X87_CLASS,
1522 X86_64_MEMORY_CLASS
1523 };
1524 static const char * const x86_64_reg_class_name[] =
1525 {
1526 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1527 "sseup", "x87", "x87up", "cplx87", "no"
1528 };
1529
1530 #define MAX_CLASSES 4
1531
1532 /* Table of constants used by fldpi, fldln2, etc.... */
1533 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1534 static bool ext_80387_constants_init = 0;
1535
1536 \f
1537 static struct machine_function * ix86_init_machine_status (void);
1538 static rtx ix86_function_value (tree, tree, bool);
1539 static int ix86_function_regparm (tree, tree);
1540 static void ix86_compute_frame_layout (struct ix86_frame *);
1541 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1542 rtx, rtx, int);
1543
1544 \f
1545 /* The svr4 ABI for the i386 says that records and unions are returned
1546 in memory. */
1547 #ifndef DEFAULT_PCC_STRUCT_RETURN
1548 #define DEFAULT_PCC_STRUCT_RETURN 1
1549 #endif
1550
1551 /* Bit flags that specify the ISA we are compiling for. */
1552 int ix86_isa_flags = TARGET_64BIT_DEFAULT | TARGET_SUBTARGET_ISA_DEFAULT;
1553
1554 /* A mask of ix86_isa_flags that includes bit X if X
1555 was set or cleared on the command line. */
1556 static int ix86_isa_flags_explicit;
1557
1558 /* Define a set of ISAs which aren't available for a given ISA. MMX
1559 and SSE ISAs are handled separately. */
1560
1561 #define OPTION_MASK_ISA_MMX_UNSET \
1562 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_UNSET)
1563 #define OPTION_MASK_ISA_3DNOW_UNSET OPTION_MASK_ISA_3DNOW_A
1564
1565 #define OPTION_MASK_ISA_SSE_UNSET \
1566 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE2_UNSET)
1567 #define OPTION_MASK_ISA_SSE2_UNSET \
1568 (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE3_UNSET)
1569 #define OPTION_MASK_ISA_SSE3_UNSET \
1570 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSSE3_UNSET)
1571 #define OPTION_MASK_ISA_SSSE3_UNSET \
1572 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_1_UNSET)
1573 #define OPTION_MASK_ISA_SSE4_1_UNSET \
1574 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_2_UNSET)
1575 #define OPTION_MASK_ISA_SSE4_2_UNSET OPTION_MASK_ISA_SSE4A
1576
1577 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
1578 as -msse4.1 -msse4.2. -mno-sse4 should the same as -mno-sse4.1. */
1579 #define OPTION_MASK_ISA_SSE4 \
1580 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2)
1581 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
1582
1583 #define OPTION_MASK_ISA_SSE4A_UNSET OPTION_MASK_ISA_SSE4
1584
1585 /* Implement TARGET_HANDLE_OPTION. */
1586
1587 static bool
1588 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1589 {
1590 switch (code)
1591 {
1592 case OPT_mmmx:
1593 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX;
1594 if (!value)
1595 {
1596 ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
1597 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
1598 }
1599 return true;
1600
1601 case OPT_m3dnow:
1602 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW;
1603 if (!value)
1604 {
1605 ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
1606 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
1607 }
1608 return true;
1609
1610 case OPT_m3dnowa:
1611 return false;
1612
1613 case OPT_msse:
1614 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE;
1615 if (!value)
1616 {
1617 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
1618 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
1619 }
1620 return true;
1621
1622 case OPT_msse2:
1623 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2;
1624 if (!value)
1625 {
1626 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
1627 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
1628 }
1629 return true;
1630
1631 case OPT_msse3:
1632 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3;
1633 if (!value)
1634 {
1635 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
1636 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
1637 }
1638 return true;
1639
1640 case OPT_mssse3:
1641 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3;
1642 if (!value)
1643 {
1644 ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
1645 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
1646 }
1647 return true;
1648
1649 case OPT_msse4_1:
1650 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1;
1651 if (!value)
1652 {
1653 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
1654 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
1655 }
1656 return true;
1657
1658 case OPT_msse4_2:
1659 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2;
1660 if (!value)
1661 {
1662 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
1663 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
1664 }
1665 return true;
1666
1667 case OPT_msse4:
1668 ix86_isa_flags |= OPTION_MASK_ISA_SSE4;
1669 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4;
1670 return true;
1671
1672 case OPT_mno_sse4:
1673 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
1674 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
1675 return true;
1676
1677 case OPT_msse4a:
1678 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A;
1679 if (!value)
1680 {
1681 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
1682 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
1683 }
1684 return true;
1685
1686 default:
1687 return true;
1688 }
1689 }
1690
1691 /* Sometimes certain combinations of command options do not make
1692 sense on a particular target machine. You can define a macro
1693 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1694 defined, is executed once just after all the command options have
1695 been parsed.
1696
1697 Don't use this macro to turn on various extra optimizations for
1698 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1699
1700 void
1701 override_options (void)
1702 {
1703 int i;
1704 int ix86_tune_defaulted = 0;
1705 unsigned int ix86_arch_mask, ix86_tune_mask;
1706
1707 /* Comes from final.c -- no real reason to change it. */
1708 #define MAX_CODE_ALIGN 16
1709
1710 static struct ptt
1711 {
1712 const struct processor_costs *cost; /* Processor costs */
1713 const int align_loop; /* Default alignments. */
1714 const int align_loop_max_skip;
1715 const int align_jump;
1716 const int align_jump_max_skip;
1717 const int align_func;
1718 }
1719 const processor_target_table[PROCESSOR_max] =
1720 {
1721 {&i386_cost, 4, 3, 4, 3, 4},
1722 {&i486_cost, 16, 15, 16, 15, 16},
1723 {&pentium_cost, 16, 7, 16, 7, 16},
1724 {&pentiumpro_cost, 16, 15, 16, 7, 16},
1725 {&geode_cost, 0, 0, 0, 0, 0},
1726 {&k6_cost, 32, 7, 32, 7, 32},
1727 {&athlon_cost, 16, 7, 16, 7, 16},
1728 {&pentium4_cost, 0, 0, 0, 0, 0},
1729 {&k8_cost, 16, 7, 16, 7, 16},
1730 {&nocona_cost, 0, 0, 0, 0, 0},
1731 {&core2_cost, 16, 7, 16, 7, 16},
1732 {&generic32_cost, 16, 7, 16, 7, 16},
1733 {&generic64_cost, 16, 7, 16, 7, 16},
1734 {&amdfam10_cost, 32, 24, 32, 7, 32}
1735 };
1736
1737 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1738 enum pta_flags
1739 {
1740 PTA_SSE = 1 << 0,
1741 PTA_SSE2 = 1 << 1,
1742 PTA_SSE3 = 1 << 2,
1743 PTA_MMX = 1 << 3,
1744 PTA_PREFETCH_SSE = 1 << 4,
1745 PTA_3DNOW = 1 << 5,
1746 PTA_3DNOW_A = 1 << 6,
1747 PTA_64BIT = 1 << 7,
1748 PTA_SSSE3 = 1 << 8,
1749 PTA_CX16 = 1 << 9,
1750 PTA_POPCNT = 1 << 10,
1751 PTA_ABM = 1 << 11,
1752 PTA_SSE4A = 1 << 12,
1753 PTA_NO_SAHF = 1 << 13,
1754 PTA_SSE4_1 = 1 << 14,
1755 PTA_SSE4_2 = 1 << 15
1756 };
1757
1758 static struct pta
1759 {
1760 const char *const name; /* processor name or nickname. */
1761 const enum processor_type processor;
1762 const unsigned /*enum pta_flags*/ flags;
1763 }
1764 const processor_alias_table[] =
1765 {
1766 {"i386", PROCESSOR_I386, 0},
1767 {"i486", PROCESSOR_I486, 0},
1768 {"i586", PROCESSOR_PENTIUM, 0},
1769 {"pentium", PROCESSOR_PENTIUM, 0},
1770 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1771 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1772 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1773 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1774 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
1775 {"i686", PROCESSOR_PENTIUMPRO, 0},
1776 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1777 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1778 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
1779 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
1780 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_SSE2},
1781 {"pentium4", PROCESSOR_PENTIUM4, PTA_MMX |PTA_SSE | PTA_SSE2},
1782 {"pentium4m", PROCESSOR_PENTIUM4, PTA_MMX | PTA_SSE | PTA_SSE2},
1783 {"prescott", PROCESSOR_NOCONA, PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
1784 {"nocona", PROCESSOR_NOCONA, (PTA_64BIT
1785 | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
1786 | PTA_CX16 | PTA_NO_SAHF)},
1787 {"core2", PROCESSOR_CORE2, (PTA_64BIT
1788 | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
1789 | PTA_SSSE3
1790 | PTA_CX16)},
1791 {"geode", PROCESSOR_GEODE, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1792 |PTA_PREFETCH_SSE)},
1793 {"k6", PROCESSOR_K6, PTA_MMX},
1794 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1795 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1796 {"athlon", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1797 | PTA_PREFETCH_SSE)},
1798 {"athlon-tbird", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1799 | PTA_PREFETCH_SSE)},
1800 {"athlon-4", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1801 | PTA_SSE)},
1802 {"athlon-xp", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1803 | PTA_SSE)},
1804 {"athlon-mp", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1805 | PTA_SSE)},
1806 {"x86-64", PROCESSOR_K8, (PTA_64BIT
1807 | PTA_MMX | PTA_SSE | PTA_SSE2
1808 | PTA_NO_SAHF)},
1809 {"k8", PROCESSOR_K8, (PTA_64BIT
1810 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1811 | PTA_SSE | PTA_SSE2
1812 | PTA_NO_SAHF)},
1813 {"k8-sse3", PROCESSOR_K8, (PTA_64BIT
1814 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1815 | PTA_SSE | PTA_SSE2 | PTA_SSE3
1816 | PTA_NO_SAHF)},
1817 {"opteron", PROCESSOR_K8, (PTA_64BIT
1818 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1819 | PTA_SSE | PTA_SSE2
1820 | PTA_NO_SAHF)},
1821 {"opteron-sse3", PROCESSOR_K8, (PTA_64BIT
1822 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1823 | PTA_SSE | PTA_SSE2 | PTA_SSE3
1824 | PTA_NO_SAHF)},
1825 {"athlon64", PROCESSOR_K8, (PTA_64BIT
1826 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1827 | PTA_SSE | PTA_SSE2
1828 | PTA_NO_SAHF)},
1829 {"athlon64-sse3", PROCESSOR_K8, (PTA_64BIT
1830 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1831 | PTA_SSE | PTA_SSE2 | PTA_SSE3
1832 | PTA_NO_SAHF)},
1833 {"athlon-fx", PROCESSOR_K8, (PTA_64BIT
1834 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1835 | PTA_SSE | PTA_SSE2
1836 | PTA_NO_SAHF)},
1837 {"amdfam10", PROCESSOR_AMDFAM10, (PTA_64BIT
1838 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1839 | PTA_SSE | PTA_SSE2 | PTA_SSE3
1840 | PTA_SSE4A
1841 | PTA_CX16 | PTA_ABM)},
1842 {"barcelona", PROCESSOR_AMDFAM10, (PTA_64BIT
1843 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1844 | PTA_SSE | PTA_SSE2 | PTA_SSE3
1845 | PTA_SSE4A
1846 | PTA_CX16 | PTA_ABM)},
1847 {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ },
1848 {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ },
1849 };
1850
1851 int const pta_size = ARRAY_SIZE (processor_alias_table);
1852
1853 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1854 SUBTARGET_OVERRIDE_OPTIONS;
1855 #endif
1856
1857 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
1858 SUBSUBTARGET_OVERRIDE_OPTIONS;
1859 #endif
1860
1861 /* -fPIC is the default for x86_64. */
1862 if (TARGET_MACHO && TARGET_64BIT)
1863 flag_pic = 2;
1864
1865 /* Set the default values for switches whose default depends on TARGET_64BIT
1866 in case they weren't overwritten by command line options. */
1867 if (TARGET_64BIT)
1868 {
1869 /* Mach-O doesn't support omitting the frame pointer for now. */
1870 if (flag_omit_frame_pointer == 2)
1871 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
1872 if (flag_asynchronous_unwind_tables == 2)
1873 flag_asynchronous_unwind_tables = 1;
1874 if (flag_pcc_struct_return == 2)
1875 flag_pcc_struct_return = 0;
1876 }
1877 else
1878 {
1879 if (flag_omit_frame_pointer == 2)
1880 flag_omit_frame_pointer = 0;
1881 if (flag_asynchronous_unwind_tables == 2)
1882 flag_asynchronous_unwind_tables = 0;
1883 if (flag_pcc_struct_return == 2)
1884 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1885 }
1886
1887 /* Need to check -mtune=generic first. */
1888 if (ix86_tune_string)
1889 {
1890 if (!strcmp (ix86_tune_string, "generic")
1891 || !strcmp (ix86_tune_string, "i686")
1892 /* As special support for cross compilers we read -mtune=native
1893 as -mtune=generic. With native compilers we won't see the
1894 -mtune=native, as it was changed by the driver. */
1895 || !strcmp (ix86_tune_string, "native"))
1896 {
1897 if (TARGET_64BIT)
1898 ix86_tune_string = "generic64";
1899 else
1900 ix86_tune_string = "generic32";
1901 }
1902 else if (!strncmp (ix86_tune_string, "generic", 7))
1903 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1904 }
1905 else
1906 {
1907 if (ix86_arch_string)
1908 ix86_tune_string = ix86_arch_string;
1909 if (!ix86_tune_string)
1910 {
1911 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1912 ix86_tune_defaulted = 1;
1913 }
1914
1915 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
1916 need to use a sensible tune option. */
1917 if (!strcmp (ix86_tune_string, "generic")
1918 || !strcmp (ix86_tune_string, "x86-64")
1919 || !strcmp (ix86_tune_string, "i686"))
1920 {
1921 if (TARGET_64BIT)
1922 ix86_tune_string = "generic64";
1923 else
1924 ix86_tune_string = "generic32";
1925 }
1926 }
1927 if (ix86_stringop_string)
1928 {
1929 if (!strcmp (ix86_stringop_string, "rep_byte"))
1930 stringop_alg = rep_prefix_1_byte;
1931 else if (!strcmp (ix86_stringop_string, "libcall"))
1932 stringop_alg = libcall;
1933 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
1934 stringop_alg = rep_prefix_4_byte;
1935 else if (!strcmp (ix86_stringop_string, "rep_8byte"))
1936 stringop_alg = rep_prefix_8_byte;
1937 else if (!strcmp (ix86_stringop_string, "byte_loop"))
1938 stringop_alg = loop_1_byte;
1939 else if (!strcmp (ix86_stringop_string, "loop"))
1940 stringop_alg = loop;
1941 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
1942 stringop_alg = unrolled_loop;
1943 else
1944 error ("bad value (%s) for -mstringop-strategy= switch", ix86_stringop_string);
1945 }
1946 if (!strcmp (ix86_tune_string, "x86-64"))
1947 warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
1948 "-mtune=generic instead as appropriate.");
1949
1950 if (!ix86_arch_string)
1951 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
1952 if (!strcmp (ix86_arch_string, "generic"))
1953 error ("generic CPU can be used only for -mtune= switch");
1954 if (!strncmp (ix86_arch_string, "generic", 7))
1955 error ("bad value (%s) for -march= switch", ix86_arch_string);
1956
1957 if (ix86_cmodel_string != 0)
1958 {
1959 if (!strcmp (ix86_cmodel_string, "small"))
1960 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1961 else if (!strcmp (ix86_cmodel_string, "medium"))
1962 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
1963 else if (!strcmp (ix86_cmodel_string, "large"))
1964 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
1965 else if (flag_pic)
1966 error ("code model %s does not support PIC mode", ix86_cmodel_string);
1967 else if (!strcmp (ix86_cmodel_string, "32"))
1968 ix86_cmodel = CM_32;
1969 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1970 ix86_cmodel = CM_KERNEL;
1971 else
1972 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1973 }
1974 else
1975 {
1976 /* For TARGET_64BIT_MS_ABI, force pic on, in order to enable the
1977 use of rip-relative addressing. This eliminates fixups that
1978 would otherwise be needed if this object is to be placed in a
1979 DLL, and is essentially just as efficient as direct addressing. */
1980 if (TARGET_64BIT_MS_ABI)
1981 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
1982 else if (TARGET_64BIT)
1983 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1984 else
1985 ix86_cmodel = CM_32;
1986 }
1987 if (ix86_asm_string != 0)
1988 {
1989 if (! TARGET_MACHO
1990 && !strcmp (ix86_asm_string, "intel"))
1991 ix86_asm_dialect = ASM_INTEL;
1992 else if (!strcmp (ix86_asm_string, "att"))
1993 ix86_asm_dialect = ASM_ATT;
1994 else
1995 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1996 }
1997 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1998 error ("code model %qs not supported in the %s bit mode",
1999 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
2000 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
2001 sorry ("%i-bit mode not compiled in",
2002 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
2003
2004 for (i = 0; i < pta_size; i++)
2005 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
2006 {
2007 ix86_arch = processor_alias_table[i].processor;
2008 /* Default cpu tuning to the architecture. */
2009 ix86_tune = ix86_arch;
2010
2011 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2012 error ("CPU you selected does not support x86-64 "
2013 "instruction set");
2014
2015 if (processor_alias_table[i].flags & PTA_MMX
2016 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
2017 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
2018 if (processor_alias_table[i].flags & PTA_3DNOW
2019 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
2020 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
2021 if (processor_alias_table[i].flags & PTA_3DNOW_A
2022 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
2023 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
2024 if (processor_alias_table[i].flags & PTA_SSE
2025 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
2026 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
2027 if (processor_alias_table[i].flags & PTA_SSE2
2028 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
2029 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
2030 if (processor_alias_table[i].flags & PTA_SSE3
2031 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
2032 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
2033 if (processor_alias_table[i].flags & PTA_SSSE3
2034 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
2035 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
2036 if (processor_alias_table[i].flags & PTA_SSE4_1
2037 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
2038 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
2039 if (processor_alias_table[i].flags & PTA_SSE4_2
2040 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
2041 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
2042 if (processor_alias_table[i].flags & PTA_SSE4A
2043 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
2044 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
2045
2046 if (processor_alias_table[i].flags & PTA_ABM)
2047 x86_abm = true;
2048 if (processor_alias_table[i].flags & PTA_CX16)
2049 x86_cmpxchg16b = true;
2050 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM))
2051 x86_popcnt = true;
2052 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
2053 x86_prefetch_sse = true;
2054 if ((processor_alias_table[i].flags & PTA_NO_SAHF) && !TARGET_64BIT)
2055 x86_sahf = true;
2056
2057 break;
2058 }
2059
2060 if (i == pta_size)
2061 error ("bad value (%s) for -march= switch", ix86_arch_string);
2062
2063 ix86_arch_mask = 1u << ix86_arch;
2064 for (i = 0; i < X86_ARCH_LAST; ++i)
2065 ix86_arch_features[i] &= ix86_arch_mask;
2066
2067 for (i = 0; i < pta_size; i++)
2068 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
2069 {
2070 ix86_tune = processor_alias_table[i].processor;
2071 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2072 {
2073 if (ix86_tune_defaulted)
2074 {
2075 ix86_tune_string = "x86-64";
2076 for (i = 0; i < pta_size; i++)
2077 if (! strcmp (ix86_tune_string,
2078 processor_alias_table[i].name))
2079 break;
2080 ix86_tune = processor_alias_table[i].processor;
2081 }
2082 else
2083 error ("CPU you selected does not support x86-64 "
2084 "instruction set");
2085 }
2086 /* Intel CPUs have always interpreted SSE prefetch instructions as
2087 NOPs; so, we can enable SSE prefetch instructions even when
2088 -mtune (rather than -march) points us to a processor that has them.
2089 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
2090 higher processors. */
2091 if (TARGET_CMOVE
2092 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
2093 x86_prefetch_sse = true;
2094 break;
2095 }
2096 if (i == pta_size)
2097 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
2098
2099 ix86_tune_mask = 1u << ix86_tune;
2100 for (i = 0; i < X86_TUNE_LAST; ++i)
2101 ix86_tune_features[i] &= ix86_tune_mask;
2102
2103 if (optimize_size)
2104 ix86_cost = &size_cost;
2105 else
2106 ix86_cost = processor_target_table[ix86_tune].cost;
2107
2108 /* Arrange to set up i386_stack_locals for all functions. */
2109 init_machine_status = ix86_init_machine_status;
2110
2111 /* Validate -mregparm= value. */
2112 if (ix86_regparm_string)
2113 {
2114 if (TARGET_64BIT)
2115 warning (0, "-mregparm is ignored in 64-bit mode");
2116 i = atoi (ix86_regparm_string);
2117 if (i < 0 || i > REGPARM_MAX)
2118 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
2119 else
2120 ix86_regparm = i;
2121 }
2122 if (TARGET_64BIT)
2123 ix86_regparm = REGPARM_MAX;
2124
2125 /* If the user has provided any of the -malign-* options,
2126 warn and use that value only if -falign-* is not set.
2127 Remove this code in GCC 3.2 or later. */
2128 if (ix86_align_loops_string)
2129 {
2130 warning (0, "-malign-loops is obsolete, use -falign-loops");
2131 if (align_loops == 0)
2132 {
2133 i = atoi (ix86_align_loops_string);
2134 if (i < 0 || i > MAX_CODE_ALIGN)
2135 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2136 else
2137 align_loops = 1 << i;
2138 }
2139 }
2140
2141 if (ix86_align_jumps_string)
2142 {
2143 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
2144 if (align_jumps == 0)
2145 {
2146 i = atoi (ix86_align_jumps_string);
2147 if (i < 0 || i > MAX_CODE_ALIGN)
2148 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2149 else
2150 align_jumps = 1 << i;
2151 }
2152 }
2153
2154 if (ix86_align_funcs_string)
2155 {
2156 warning (0, "-malign-functions is obsolete, use -falign-functions");
2157 if (align_functions == 0)
2158 {
2159 i = atoi (ix86_align_funcs_string);
2160 if (i < 0 || i > MAX_CODE_ALIGN)
2161 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2162 else
2163 align_functions = 1 << i;
2164 }
2165 }
2166
2167 /* Default align_* from the processor table. */
2168 if (align_loops == 0)
2169 {
2170 align_loops = processor_target_table[ix86_tune].align_loop;
2171 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
2172 }
2173 if (align_jumps == 0)
2174 {
2175 align_jumps = processor_target_table[ix86_tune].align_jump;
2176 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
2177 }
2178 if (align_functions == 0)
2179 {
2180 align_functions = processor_target_table[ix86_tune].align_func;
2181 }
2182
2183 /* Validate -mbranch-cost= value, or provide default. */
2184 ix86_branch_cost = ix86_cost->branch_cost;
2185 if (ix86_branch_cost_string)
2186 {
2187 i = atoi (ix86_branch_cost_string);
2188 if (i < 0 || i > 5)
2189 error ("-mbranch-cost=%d is not between 0 and 5", i);
2190 else
2191 ix86_branch_cost = i;
2192 }
2193 if (ix86_section_threshold_string)
2194 {
2195 i = atoi (ix86_section_threshold_string);
2196 if (i < 0)
2197 error ("-mlarge-data-threshold=%d is negative", i);
2198 else
2199 ix86_section_threshold = i;
2200 }
2201
2202 if (ix86_tls_dialect_string)
2203 {
2204 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
2205 ix86_tls_dialect = TLS_DIALECT_GNU;
2206 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
2207 ix86_tls_dialect = TLS_DIALECT_GNU2;
2208 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
2209 ix86_tls_dialect = TLS_DIALECT_SUN;
2210 else
2211 error ("bad value (%s) for -mtls-dialect= switch",
2212 ix86_tls_dialect_string);
2213 }
2214
2215 if (ix87_precision_string)
2216 {
2217 i = atoi (ix87_precision_string);
2218 if (i != 32 && i != 64 && i != 80)
2219 error ("pc%d is not valid precision setting (32, 64 or 80)", i);
2220 }
2221
2222 /* Keep nonleaf frame pointers. */
2223 if (flag_omit_frame_pointer)
2224 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
2225 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
2226 flag_omit_frame_pointer = 1;
2227
2228 /* If we're doing fast math, we don't care about comparison order
2229 wrt NaNs. This lets us use a shorter comparison sequence. */
2230 if (flag_finite_math_only)
2231 target_flags &= ~MASK_IEEE_FP;
2232
2233 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
2234 since the insns won't need emulation. */
2235 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
2236 target_flags &= ~MASK_NO_FANCY_MATH_387;
2237
2238 /* Likewise, if the target doesn't have a 387, or we've specified
2239 software floating point, don't use 387 inline intrinsics. */
2240 if (!TARGET_80387)
2241 target_flags |= MASK_NO_FANCY_MATH_387;
2242
2243 /* Turn on SSE4.1 builtins and popcnt instruction for -msse4.2. */
2244 if (TARGET_SSE4_2)
2245 {
2246 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
2247 x86_popcnt = true;
2248 }
2249
2250 /* Turn on SSSE3 builtins for -msse4.1. */
2251 if (TARGET_SSE4_1)
2252 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
2253
2254 /* Turn on SSE3 builtins for -mssse3. */
2255 if (TARGET_SSSE3)
2256 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
2257
2258 /* Turn on SSE3 builtins for -msse4a. */
2259 if (TARGET_SSE4A)
2260 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
2261
2262 /* Turn on SSE2 builtins for -msse3. */
2263 if (TARGET_SSE3)
2264 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
2265
2266 /* Turn on SSE builtins for -msse2. */
2267 if (TARGET_SSE2)
2268 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
2269
2270 /* Turn on MMX builtins for -msse. */
2271 if (TARGET_SSE)
2272 {
2273 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
2274 x86_prefetch_sse = true;
2275 }
2276
2277 /* Turn on MMX builtins for 3Dnow. */
2278 if (TARGET_3DNOW)
2279 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
2280
2281 /* Turn on POPCNT builtins for -mabm. */
2282 if (TARGET_ABM)
2283 x86_popcnt = true;
2284
2285 if (TARGET_64BIT)
2286 {
2287 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
2288
2289 /* Enable by default the SSE and MMX builtins. Do allow the user to
2290 explicitly disable any of these. In particular, disabling SSE and
2291 MMX for kernel code is extremely useful. */
2292 ix86_isa_flags
2293 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
2294 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
2295
2296 if (TARGET_RTD)
2297 warning (0, "-mrtd is ignored in 64bit mode");
2298 }
2299 else
2300 {
2301 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
2302
2303 ix86_isa_flags
2304 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
2305
2306 /* i386 ABI does not specify red zone. It still makes sense to use it
2307 when programmer takes care to stack from being destroyed. */
2308 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
2309 target_flags |= MASK_NO_RED_ZONE;
2310 }
2311
2312 /* Validate -mpreferred-stack-boundary= value, or provide default.
2313 The default of 128 bits is for Pentium III's SSE __m128. We can't
2314 change it because of optimize_size. Otherwise, we can't mix object
2315 files compiled with -Os and -On. */
2316 ix86_preferred_stack_boundary = 128;
2317 if (ix86_preferred_stack_boundary_string)
2318 {
2319 i = atoi (ix86_preferred_stack_boundary_string);
2320 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
2321 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
2322 TARGET_64BIT ? 4 : 2);
2323 else
2324 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
2325 }
2326
2327 /* Accept -msseregparm only if at least SSE support is enabled. */
2328 if (TARGET_SSEREGPARM
2329 && ! TARGET_SSE)
2330 error ("-msseregparm used without SSE enabled");
2331
2332 ix86_fpmath = TARGET_FPMATH_DEFAULT;
2333 if (ix86_fpmath_string != 0)
2334 {
2335 if (! strcmp (ix86_fpmath_string, "387"))
2336 ix86_fpmath = FPMATH_387;
2337 else if (! strcmp (ix86_fpmath_string, "sse"))
2338 {
2339 if (!TARGET_SSE)
2340 {
2341 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2342 ix86_fpmath = FPMATH_387;
2343 }
2344 else
2345 ix86_fpmath = FPMATH_SSE;
2346 }
2347 else if (! strcmp (ix86_fpmath_string, "387,sse")
2348 || ! strcmp (ix86_fpmath_string, "sse,387"))
2349 {
2350 if (!TARGET_SSE)
2351 {
2352 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2353 ix86_fpmath = FPMATH_387;
2354 }
2355 else if (!TARGET_80387)
2356 {
2357 warning (0, "387 instruction set disabled, using SSE arithmetics");
2358 ix86_fpmath = FPMATH_SSE;
2359 }
2360 else
2361 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
2362 }
2363 else
2364 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
2365 }
2366
2367 /* If the i387 is disabled, then do not return values in it. */
2368 if (!TARGET_80387)
2369 target_flags &= ~MASK_FLOAT_RETURNS;
2370
2371 if ((x86_accumulate_outgoing_args & ix86_tune_mask)
2372 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2373 && !optimize_size)
2374 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2375
2376 /* ??? Unwind info is not correct around the CFG unless either a frame
2377 pointer is present or M_A_O_A is set. Fixing this requires rewriting
2378 unwind info generation to be aware of the CFG and propagating states
2379 around edges. */
2380 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
2381 || flag_exceptions || flag_non_call_exceptions)
2382 && flag_omit_frame_pointer
2383 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
2384 {
2385 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2386 warning (0, "unwind tables currently require either a frame pointer "
2387 "or -maccumulate-outgoing-args for correctness");
2388 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2389 }
2390
2391 /* For sane SSE instruction set generation we need fcomi instruction.
2392 It is safe to enable all CMOVE instructions. */
2393 if (TARGET_SSE)
2394 TARGET_CMOVE = 1;
2395
2396 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
2397 {
2398 char *p;
2399 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
2400 p = strchr (internal_label_prefix, 'X');
2401 internal_label_prefix_len = p - internal_label_prefix;
2402 *p = '\0';
2403 }
2404
2405 /* When scheduling description is not available, disable scheduler pass
2406 so it won't slow down the compilation and make x87 code slower. */
2407 if (!TARGET_SCHEDULE)
2408 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
2409
2410 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
2411 set_param_value ("simultaneous-prefetches",
2412 ix86_cost->simultaneous_prefetches);
2413 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
2414 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
2415 }
2416 \f
2417 /* Return true if this goes in large data/bss. */
2418
2419 static bool
2420 ix86_in_large_data_p (tree exp)
2421 {
2422 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
2423 return false;
2424
2425 /* Functions are never large data. */
2426 if (TREE_CODE (exp) == FUNCTION_DECL)
2427 return false;
2428
2429 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
2430 {
2431 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
2432 if (strcmp (section, ".ldata") == 0
2433 || strcmp (section, ".lbss") == 0)
2434 return true;
2435 return false;
2436 }
2437 else
2438 {
2439 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
2440
2441 /* If this is an incomplete type with size 0, then we can't put it
2442 in data because it might be too big when completed. */
2443 if (!size || size > ix86_section_threshold)
2444 return true;
2445 }
2446
2447 return false;
2448 }
2449
2450 /* Switch to the appropriate section for output of DECL.
2451 DECL is either a `VAR_DECL' node or a constant of some sort.
2452 RELOC indicates whether forming the initial value of DECL requires
2453 link-time relocations. */
2454
2455 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
2456 ATTRIBUTE_UNUSED;
2457
2458 static section *
2459 x86_64_elf_select_section (tree decl, int reloc,
2460 unsigned HOST_WIDE_INT align)
2461 {
2462 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2463 && ix86_in_large_data_p (decl))
2464 {
2465 const char *sname = NULL;
2466 unsigned int flags = SECTION_WRITE;
2467 switch (categorize_decl_for_section (decl, reloc))
2468 {
2469 case SECCAT_DATA:
2470 sname = ".ldata";
2471 break;
2472 case SECCAT_DATA_REL:
2473 sname = ".ldata.rel";
2474 break;
2475 case SECCAT_DATA_REL_LOCAL:
2476 sname = ".ldata.rel.local";
2477 break;
2478 case SECCAT_DATA_REL_RO:
2479 sname = ".ldata.rel.ro";
2480 break;
2481 case SECCAT_DATA_REL_RO_LOCAL:
2482 sname = ".ldata.rel.ro.local";
2483 break;
2484 case SECCAT_BSS:
2485 sname = ".lbss";
2486 flags |= SECTION_BSS;
2487 break;
2488 case SECCAT_RODATA:
2489 case SECCAT_RODATA_MERGE_STR:
2490 case SECCAT_RODATA_MERGE_STR_INIT:
2491 case SECCAT_RODATA_MERGE_CONST:
2492 sname = ".lrodata";
2493 flags = 0;
2494 break;
2495 case SECCAT_SRODATA:
2496 case SECCAT_SDATA:
2497 case SECCAT_SBSS:
2498 gcc_unreachable ();
2499 case SECCAT_TEXT:
2500 case SECCAT_TDATA:
2501 case SECCAT_TBSS:
2502 /* We don't split these for medium model. Place them into
2503 default sections and hope for best. */
2504 break;
2505 }
2506 if (sname)
2507 {
2508 /* We might get called with string constants, but get_named_section
2509 doesn't like them as they are not DECLs. Also, we need to set
2510 flags in that case. */
2511 if (!DECL_P (decl))
2512 return get_section (sname, flags, NULL);
2513 return get_named_section (decl, sname, reloc);
2514 }
2515 }
2516 return default_elf_select_section (decl, reloc, align);
2517 }
2518
2519 /* Build up a unique section name, expressed as a
2520 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2521 RELOC indicates whether the initial value of EXP requires
2522 link-time relocations. */
2523
2524 static void ATTRIBUTE_UNUSED
2525 x86_64_elf_unique_section (tree decl, int reloc)
2526 {
2527 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2528 && ix86_in_large_data_p (decl))
2529 {
2530 const char *prefix = NULL;
2531 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2532 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
2533
2534 switch (categorize_decl_for_section (decl, reloc))
2535 {
2536 case SECCAT_DATA:
2537 case SECCAT_DATA_REL:
2538 case SECCAT_DATA_REL_LOCAL:
2539 case SECCAT_DATA_REL_RO:
2540 case SECCAT_DATA_REL_RO_LOCAL:
2541 prefix = one_only ? ".gnu.linkonce.ld." : ".ldata.";
2542 break;
2543 case SECCAT_BSS:
2544 prefix = one_only ? ".gnu.linkonce.lb." : ".lbss.";
2545 break;
2546 case SECCAT_RODATA:
2547 case SECCAT_RODATA_MERGE_STR:
2548 case SECCAT_RODATA_MERGE_STR_INIT:
2549 case SECCAT_RODATA_MERGE_CONST:
2550 prefix = one_only ? ".gnu.linkonce.lr." : ".lrodata.";
2551 break;
2552 case SECCAT_SRODATA:
2553 case SECCAT_SDATA:
2554 case SECCAT_SBSS:
2555 gcc_unreachable ();
2556 case SECCAT_TEXT:
2557 case SECCAT_TDATA:
2558 case SECCAT_TBSS:
2559 /* We don't split these for medium model. Place them into
2560 default sections and hope for best. */
2561 break;
2562 }
2563 if (prefix)
2564 {
2565 const char *name;
2566 size_t nlen, plen;
2567 char *string;
2568 plen = strlen (prefix);
2569
2570 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
2571 name = targetm.strip_name_encoding (name);
2572 nlen = strlen (name);
2573
2574 string = (char *) alloca (nlen + plen + 1);
2575 memcpy (string, prefix, plen);
2576 memcpy (string + plen, name, nlen + 1);
2577
2578 DECL_SECTION_NAME (decl) = build_string (nlen + plen, string);
2579 return;
2580 }
2581 }
2582 default_unique_section (decl, reloc);
2583 }
2584
2585 #ifdef COMMON_ASM_OP
2586 /* This says how to output assembler code to declare an
2587 uninitialized external linkage data object.
2588
2589 For medium model x86-64 we need to use .largecomm opcode for
2590 large objects. */
2591 void
2592 x86_elf_aligned_common (FILE *file,
2593 const char *name, unsigned HOST_WIDE_INT size,
2594 int align)
2595 {
2596 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2597 && size > (unsigned int)ix86_section_threshold)
2598 fprintf (file, ".largecomm\t");
2599 else
2600 fprintf (file, "%s", COMMON_ASM_OP);
2601 assemble_name (file, name);
2602 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
2603 size, align / BITS_PER_UNIT);
2604 }
2605 #endif
2606
2607 /* Utility function for targets to use in implementing
2608 ASM_OUTPUT_ALIGNED_BSS. */
2609
2610 void
2611 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
2612 const char *name, unsigned HOST_WIDE_INT size,
2613 int align)
2614 {
2615 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2616 && size > (unsigned int)ix86_section_threshold)
2617 switch_to_section (get_named_section (decl, ".lbss", 0));
2618 else
2619 switch_to_section (bss_section);
2620 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
2621 #ifdef ASM_DECLARE_OBJECT_NAME
2622 last_assemble_variable_decl = decl;
2623 ASM_DECLARE_OBJECT_NAME (file, name, decl);
2624 #else
2625 /* Standard thing is just output label for the object. */
2626 ASM_OUTPUT_LABEL (file, name);
2627 #endif /* ASM_DECLARE_OBJECT_NAME */
2628 ASM_OUTPUT_SKIP (file, size ? size : 1);
2629 }
2630 \f
2631 void
2632 optimization_options (int level, int size ATTRIBUTE_UNUSED)
2633 {
2634 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2635 make the problem with not enough registers even worse. */
2636 #ifdef INSN_SCHEDULING
2637 if (level > 1)
2638 flag_schedule_insns = 0;
2639 #endif
2640
2641 if (TARGET_MACHO)
2642 /* The Darwin libraries never set errno, so we might as well
2643 avoid calling them when that's the only reason we would. */
2644 flag_errno_math = 0;
2645
2646 /* The default values of these switches depend on the TARGET_64BIT
2647 that is not known at this moment. Mark these values with 2 and
2648 let user the to override these. In case there is no command line option
2649 specifying them, we will set the defaults in override_options. */
2650 if (optimize >= 1)
2651 flag_omit_frame_pointer = 2;
2652 flag_pcc_struct_return = 2;
2653 flag_asynchronous_unwind_tables = 2;
2654 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2655 SUBTARGET_OPTIMIZATION_OPTIONS;
2656 #endif
2657 }
2658 \f
2659 /* Decide whether we can make a sibling call to a function. DECL is the
2660 declaration of the function being targeted by the call and EXP is the
2661 CALL_EXPR representing the call. */
2662
2663 static bool
2664 ix86_function_ok_for_sibcall (tree decl, tree exp)
2665 {
2666 tree func;
2667 rtx a, b;
2668
2669 /* If we are generating position-independent code, we cannot sibcall
2670 optimize any indirect call, or a direct call to a global function,
2671 as the PLT requires %ebx be live. */
2672 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
2673 return false;
2674
2675 if (decl)
2676 func = decl;
2677 else
2678 {
2679 func = TREE_TYPE (CALL_EXPR_FN (exp));
2680 if (POINTER_TYPE_P (func))
2681 func = TREE_TYPE (func);
2682 }
2683
2684 /* Check that the return value locations are the same. Like
2685 if we are returning floats on the 80387 register stack, we cannot
2686 make a sibcall from a function that doesn't return a float to a
2687 function that does or, conversely, from a function that does return
2688 a float to a function that doesn't; the necessary stack adjustment
2689 would not be executed. This is also the place we notice
2690 differences in the return value ABI. Note that it is ok for one
2691 of the functions to have void return type as long as the return
2692 value of the other is passed in a register. */
2693 a = ix86_function_value (TREE_TYPE (exp), func, false);
2694 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
2695 cfun->decl, false);
2696 if (STACK_REG_P (a) || STACK_REG_P (b))
2697 {
2698 if (!rtx_equal_p (a, b))
2699 return false;
2700 }
2701 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
2702 ;
2703 else if (!rtx_equal_p (a, b))
2704 return false;
2705
2706 /* If this call is indirect, we'll need to be able to use a call-clobbered
2707 register for the address of the target function. Make sure that all
2708 such registers are not used for passing parameters. */
2709 if (!decl && !TARGET_64BIT)
2710 {
2711 tree type;
2712
2713 /* We're looking at the CALL_EXPR, we need the type of the function. */
2714 type = CALL_EXPR_FN (exp); /* pointer expression */
2715 type = TREE_TYPE (type); /* pointer type */
2716 type = TREE_TYPE (type); /* function type */
2717
2718 if (ix86_function_regparm (type, NULL) >= 3)
2719 {
2720 /* ??? Need to count the actual number of registers to be used,
2721 not the possible number of registers. Fix later. */
2722 return false;
2723 }
2724 }
2725
2726 /* Dllimport'd functions are also called indirectly. */
2727 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
2728 && decl && DECL_DLLIMPORT_P (decl)
2729 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
2730 return false;
2731
2732 /* If we forced aligned the stack, then sibcalling would unalign the
2733 stack, which may break the called function. */
2734 if (cfun->machine->force_align_arg_pointer)
2735 return false;
2736
2737 /* Otherwise okay. That also includes certain types of indirect calls. */
2738 return true;
2739 }
2740
2741 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
2742 calling convention attributes;
2743 arguments as in struct attribute_spec.handler. */
2744
2745 static tree
2746 ix86_handle_cconv_attribute (tree *node, tree name,
2747 tree args,
2748 int flags ATTRIBUTE_UNUSED,
2749 bool *no_add_attrs)
2750 {
2751 if (TREE_CODE (*node) != FUNCTION_TYPE
2752 && TREE_CODE (*node) != METHOD_TYPE
2753 && TREE_CODE (*node) != FIELD_DECL
2754 && TREE_CODE (*node) != TYPE_DECL)
2755 {
2756 warning (OPT_Wattributes, "%qs attribute only applies to functions",
2757 IDENTIFIER_POINTER (name));
2758 *no_add_attrs = true;
2759 return NULL_TREE;
2760 }
2761
2762 /* Can combine regparm with all attributes but fastcall. */
2763 if (is_attribute_p ("regparm", name))
2764 {
2765 tree cst;
2766
2767 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2768 {
2769 error ("fastcall and regparm attributes are not compatible");
2770 }
2771
2772 cst = TREE_VALUE (args);
2773 if (TREE_CODE (cst) != INTEGER_CST)
2774 {
2775 warning (OPT_Wattributes,
2776 "%qs attribute requires an integer constant argument",
2777 IDENTIFIER_POINTER (name));
2778 *no_add_attrs = true;
2779 }
2780 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
2781 {
2782 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
2783 IDENTIFIER_POINTER (name), REGPARM_MAX);
2784 *no_add_attrs = true;
2785 }
2786
2787 if (!TARGET_64BIT
2788 && lookup_attribute (ix86_force_align_arg_pointer_string,
2789 TYPE_ATTRIBUTES (*node))
2790 && compare_tree_int (cst, REGPARM_MAX-1))
2791 {
2792 error ("%s functions limited to %d register parameters",
2793 ix86_force_align_arg_pointer_string, REGPARM_MAX-1);
2794 }
2795
2796 return NULL_TREE;
2797 }
2798
2799 if (TARGET_64BIT)
2800 {
2801 /* Do not warn when emulating the MS ABI. */
2802 if (!TARGET_64BIT_MS_ABI)
2803 warning (OPT_Wattributes, "%qs attribute ignored",
2804 IDENTIFIER_POINTER (name));
2805 *no_add_attrs = true;
2806 return NULL_TREE;
2807 }
2808
2809 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
2810 if (is_attribute_p ("fastcall", name))
2811 {
2812 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2813 {
2814 error ("fastcall and cdecl attributes are not compatible");
2815 }
2816 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2817 {
2818 error ("fastcall and stdcall attributes are not compatible");
2819 }
2820 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
2821 {
2822 error ("fastcall and regparm attributes are not compatible");
2823 }
2824 }
2825
2826 /* Can combine stdcall with fastcall (redundant), regparm and
2827 sseregparm. */
2828 else if (is_attribute_p ("stdcall", name))
2829 {
2830 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2831 {
2832 error ("stdcall and cdecl attributes are not compatible");
2833 }
2834 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2835 {
2836 error ("stdcall and fastcall attributes are not compatible");
2837 }
2838 }
2839
2840 /* Can combine cdecl with regparm and sseregparm. */
2841 else if (is_attribute_p ("cdecl", name))
2842 {
2843 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2844 {
2845 error ("stdcall and cdecl attributes are not compatible");
2846 }
2847 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2848 {
2849 error ("fastcall and cdecl attributes are not compatible");
2850 }
2851 }
2852
2853 /* Can combine sseregparm with all attributes. */
2854
2855 return NULL_TREE;
2856 }
2857
2858 /* Return 0 if the attributes for two types are incompatible, 1 if they
2859 are compatible, and 2 if they are nearly compatible (which causes a
2860 warning to be generated). */
2861
2862 static int
2863 ix86_comp_type_attributes (tree type1, tree type2)
2864 {
2865 /* Check for mismatch of non-default calling convention. */
2866 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
2867
2868 if (TREE_CODE (type1) != FUNCTION_TYPE)
2869 return 1;
2870
2871 /* Check for mismatched fastcall/regparm types. */
2872 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
2873 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
2874 || (ix86_function_regparm (type1, NULL)
2875 != ix86_function_regparm (type2, NULL)))
2876 return 0;
2877
2878 /* Check for mismatched sseregparm types. */
2879 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
2880 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
2881 return 0;
2882
2883 /* Check for mismatched return types (cdecl vs stdcall). */
2884 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
2885 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
2886 return 0;
2887
2888 return 1;
2889 }
2890 \f
2891 /* Return the regparm value for a function with the indicated TYPE and DECL.
2892 DECL may be NULL when calling function indirectly
2893 or considering a libcall. */
2894
2895 static int
2896 ix86_function_regparm (tree type, tree decl)
2897 {
2898 tree attr;
2899 int regparm = ix86_regparm;
2900
2901 if (TARGET_64BIT)
2902 return regparm;
2903
2904 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
2905 if (attr)
2906 return TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
2907
2908 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
2909 return 2;
2910
2911 /* Use register calling convention for local functions when possible. */
2912 if (decl && TREE_CODE (decl) == FUNCTION_DECL
2913 && flag_unit_at_a_time && !profile_flag)
2914 {
2915 struct cgraph_local_info *i = cgraph_local_info (decl);
2916 if (i && i->local)
2917 {
2918 int local_regparm, globals = 0, regno;
2919 struct function *f;
2920
2921 /* Make sure no regparm register is taken by a
2922 global register variable. */
2923 for (local_regparm = 0; local_regparm < 3; local_regparm++)
2924 if (global_regs[local_regparm])
2925 break;
2926
2927 /* We can't use regparm(3) for nested functions as these use
2928 static chain pointer in third argument. */
2929 if (local_regparm == 3
2930 && (decl_function_context (decl)
2931 || ix86_force_align_arg_pointer)
2932 && !DECL_NO_STATIC_CHAIN (decl))
2933 local_regparm = 2;
2934
2935 /* If the function realigns its stackpointer, the prologue will
2936 clobber %ecx. If we've already generated code for the callee,
2937 the callee DECL_STRUCT_FUNCTION is gone, so we fall back to
2938 scanning the attributes for the self-realigning property. */
2939 f = DECL_STRUCT_FUNCTION (decl);
2940 if (local_regparm == 3
2941 && (f ? !!f->machine->force_align_arg_pointer
2942 : !!lookup_attribute (ix86_force_align_arg_pointer_string,
2943 TYPE_ATTRIBUTES (TREE_TYPE (decl)))))
2944 local_regparm = 2;
2945
2946 /* Each global register variable increases register preassure,
2947 so the more global reg vars there are, the smaller regparm
2948 optimization use, unless requested by the user explicitly. */
2949 for (regno = 0; regno < 6; regno++)
2950 if (global_regs[regno])
2951 globals++;
2952 local_regparm
2953 = globals < local_regparm ? local_regparm - globals : 0;
2954
2955 if (local_regparm > regparm)
2956 regparm = local_regparm;
2957 }
2958 }
2959
2960 return regparm;
2961 }
2962
2963 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
2964 DFmode (2) arguments in SSE registers for a function with the
2965 indicated TYPE and DECL. DECL may be NULL when calling function
2966 indirectly or considering a libcall. Otherwise return 0. */
2967
2968 static int
2969 ix86_function_sseregparm (tree type, tree decl)
2970 {
2971 gcc_assert (!TARGET_64BIT);
2972
2973 /* Use SSE registers to pass SFmode and DFmode arguments if requested
2974 by the sseregparm attribute. */
2975 if (TARGET_SSEREGPARM
2976 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
2977 {
2978 if (!TARGET_SSE)
2979 {
2980 if (decl)
2981 error ("Calling %qD with attribute sseregparm without "
2982 "SSE/SSE2 enabled", decl);
2983 else
2984 error ("Calling %qT with attribute sseregparm without "
2985 "SSE/SSE2 enabled", type);
2986 return 0;
2987 }
2988
2989 return 2;
2990 }
2991
2992 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
2993 (and DFmode for SSE2) arguments in SSE registers. */
2994 if (decl && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
2995 {
2996 struct cgraph_local_info *i = cgraph_local_info (decl);
2997 if (i && i->local)
2998 return TARGET_SSE2 ? 2 : 1;
2999 }
3000
3001 return 0;
3002 }
3003
3004 /* Return true if EAX is live at the start of the function. Used by
3005 ix86_expand_prologue to determine if we need special help before
3006 calling allocate_stack_worker. */
3007
3008 static bool
3009 ix86_eax_live_at_start_p (void)
3010 {
3011 /* Cheat. Don't bother working forward from ix86_function_regparm
3012 to the function type to whether an actual argument is located in
3013 eax. Instead just look at cfg info, which is still close enough
3014 to correct at this point. This gives false positives for broken
3015 functions that might use uninitialized data that happens to be
3016 allocated in eax, but who cares? */
3017 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->il.rtl->global_live_at_end, 0);
3018 }
3019
3020 /* Return true if TYPE has a variable argument list. */
3021
3022 static bool
3023 type_has_variadic_args_p (tree type)
3024 {
3025 tree n, t = TYPE_ARG_TYPES (type);
3026
3027 if (t == NULL)
3028 return false;
3029
3030 while ((n = TREE_CHAIN (t)) != NULL)
3031 t = n;
3032
3033 return TREE_VALUE (t) != void_type_node;
3034 }
3035
3036 /* Value is the number of bytes of arguments automatically
3037 popped when returning from a subroutine call.
3038 FUNDECL is the declaration node of the function (as a tree),
3039 FUNTYPE is the data type of the function (as a tree),
3040 or for a library call it is an identifier node for the subroutine name.
3041 SIZE is the number of bytes of arguments passed on the stack.
3042
3043 On the 80386, the RTD insn may be used to pop them if the number
3044 of args is fixed, but if the number is variable then the caller
3045 must pop them all. RTD can't be used for library calls now
3046 because the library is compiled with the Unix compiler.
3047 Use of RTD is a selectable option, since it is incompatible with
3048 standard Unix calling sequences. If the option is not selected,
3049 the caller must always pop the args.
3050
3051 The attribute stdcall is equivalent to RTD on a per module basis. */
3052
3053 int
3054 ix86_return_pops_args (tree fundecl, tree funtype, int size)
3055 {
3056 int rtd;
3057
3058 /* None of the 64-bit ABIs pop arguments. */
3059 if (TARGET_64BIT)
3060 return 0;
3061
3062 rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
3063
3064 /* Cdecl functions override -mrtd, and never pop the stack. */
3065 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
3066 {
3067 /* Stdcall and fastcall functions will pop the stack if not
3068 variable args. */
3069 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
3070 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
3071 rtd = 1;
3072
3073 if (rtd && ! type_has_variadic_args_p (funtype))
3074 return size;
3075 }
3076
3077 /* Lose any fake structure return argument if it is passed on the stack. */
3078 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
3079 && !KEEP_AGGREGATE_RETURN_POINTER)
3080 {
3081 int nregs = ix86_function_regparm (funtype, fundecl);
3082 if (nregs == 0)
3083 return GET_MODE_SIZE (Pmode);
3084 }
3085
3086 return 0;
3087 }
3088 \f
3089 /* Argument support functions. */
3090
3091 /* Return true when register may be used to pass function parameters. */
3092 bool
3093 ix86_function_arg_regno_p (int regno)
3094 {
3095 int i;
3096 const int *parm_regs;
3097
3098 if (!TARGET_64BIT)
3099 {
3100 if (TARGET_MACHO)
3101 return (regno < REGPARM_MAX
3102 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
3103 else
3104 return (regno < REGPARM_MAX
3105 || (TARGET_MMX && MMX_REGNO_P (regno)
3106 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
3107 || (TARGET_SSE && SSE_REGNO_P (regno)
3108 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
3109 }
3110
3111 if (TARGET_MACHO)
3112 {
3113 if (SSE_REGNO_P (regno) && TARGET_SSE)
3114 return true;
3115 }
3116 else
3117 {
3118 if (TARGET_SSE && SSE_REGNO_P (regno)
3119 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
3120 return true;
3121 }
3122
3123 /* RAX is used as hidden argument to va_arg functions. */
3124 if (!TARGET_64BIT_MS_ABI && regno == 0)
3125 return true;
3126
3127 if (TARGET_64BIT_MS_ABI)
3128 parm_regs = x86_64_ms_abi_int_parameter_registers;
3129 else
3130 parm_regs = x86_64_int_parameter_registers;
3131 for (i = 0; i < REGPARM_MAX; i++)
3132 if (regno == parm_regs[i])
3133 return true;
3134 return false;
3135 }
3136
3137 /* Return if we do not know how to pass TYPE solely in registers. */
3138
3139 static bool
3140 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
3141 {
3142 if (must_pass_in_stack_var_size_or_pad (mode, type))
3143 return true;
3144
3145 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
3146 The layout_type routine is crafty and tries to trick us into passing
3147 currently unsupported vector types on the stack by using TImode. */
3148 return (!TARGET_64BIT && mode == TImode
3149 && type && TREE_CODE (type) != VECTOR_TYPE);
3150 }
3151
3152 /* Initialize a variable CUM of type CUMULATIVE_ARGS
3153 for a call to a function whose data type is FNTYPE.
3154 For a library call, FNTYPE is 0. */
3155
3156 void
3157 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
3158 tree fntype, /* tree ptr for function decl */
3159 rtx libname, /* SYMBOL_REF of library name or 0 */
3160 tree fndecl)
3161 {
3162 memset (cum, 0, sizeof (*cum));
3163
3164 /* Set up the number of registers to use for passing arguments. */
3165 cum->nregs = ix86_regparm;
3166 if (TARGET_SSE)
3167 cum->sse_nregs = SSE_REGPARM_MAX;
3168 if (TARGET_MMX)
3169 cum->mmx_nregs = MMX_REGPARM_MAX;
3170 cum->warn_sse = true;
3171 cum->warn_mmx = true;
3172 cum->maybe_vaarg = (fntype
3173 ? (!TYPE_ARG_TYPES (fntype)
3174 || type_has_variadic_args_p (fntype))
3175 : !libname);
3176
3177 if (!TARGET_64BIT)
3178 {
3179 /* If there are variable arguments, then we won't pass anything
3180 in registers in 32-bit mode. */
3181 if (cum->maybe_vaarg)
3182 {
3183 cum->nregs = 0;
3184 cum->sse_nregs = 0;
3185 cum->mmx_nregs = 0;
3186 cum->warn_sse = 0;
3187 cum->warn_mmx = 0;
3188 return;
3189 }
3190
3191 /* Use ecx and edx registers if function has fastcall attribute,
3192 else look for regparm information. */
3193 if (fntype)
3194 {
3195 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
3196 {
3197 cum->nregs = 2;
3198 cum->fastcall = 1;
3199 }
3200 else
3201 cum->nregs = ix86_function_regparm (fntype, fndecl);
3202 }
3203
3204 /* Set up the number of SSE registers used for passing SFmode
3205 and DFmode arguments. Warn for mismatching ABI. */
3206 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl);
3207 }
3208 }
3209
3210 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
3211 But in the case of vector types, it is some vector mode.
3212
3213 When we have only some of our vector isa extensions enabled, then there
3214 are some modes for which vector_mode_supported_p is false. For these
3215 modes, the generic vector support in gcc will choose some non-vector mode
3216 in order to implement the type. By computing the natural mode, we'll
3217 select the proper ABI location for the operand and not depend on whatever
3218 the middle-end decides to do with these vector types. */
3219
3220 static enum machine_mode
3221 type_natural_mode (tree type)
3222 {
3223 enum machine_mode mode = TYPE_MODE (type);
3224
3225 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
3226 {
3227 HOST_WIDE_INT size = int_size_in_bytes (type);
3228 if ((size == 8 || size == 16)
3229 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
3230 && TYPE_VECTOR_SUBPARTS (type) > 1)
3231 {
3232 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
3233
3234 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
3235 mode = MIN_MODE_VECTOR_FLOAT;
3236 else
3237 mode = MIN_MODE_VECTOR_INT;
3238
3239 /* Get the mode which has this inner mode and number of units. */
3240 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
3241 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
3242 && GET_MODE_INNER (mode) == innermode)
3243 return mode;
3244
3245 gcc_unreachable ();
3246 }
3247 }
3248
3249 return mode;
3250 }
3251
3252 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
3253 this may not agree with the mode that the type system has chosen for the
3254 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
3255 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
3256
3257 static rtx
3258 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
3259 unsigned int regno)
3260 {
3261 rtx tmp;
3262
3263 if (orig_mode != BLKmode)
3264 tmp = gen_rtx_REG (orig_mode, regno);
3265 else
3266 {
3267 tmp = gen_rtx_REG (mode, regno);
3268 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
3269 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
3270 }
3271
3272 return tmp;
3273 }
3274
3275 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
3276 of this code is to classify each 8bytes of incoming argument by the register
3277 class and assign registers accordingly. */
3278
3279 /* Return the union class of CLASS1 and CLASS2.
3280 See the x86-64 PS ABI for details. */
3281
3282 static enum x86_64_reg_class
3283 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
3284 {
3285 /* Rule #1: If both classes are equal, this is the resulting class. */
3286 if (class1 == class2)
3287 return class1;
3288
3289 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
3290 the other class. */
3291 if (class1 == X86_64_NO_CLASS)
3292 return class2;
3293 if (class2 == X86_64_NO_CLASS)
3294 return class1;
3295
3296 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
3297 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
3298 return X86_64_MEMORY_CLASS;
3299
3300 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
3301 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
3302 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
3303 return X86_64_INTEGERSI_CLASS;
3304 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
3305 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
3306 return X86_64_INTEGER_CLASS;
3307
3308 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
3309 MEMORY is used. */
3310 if (class1 == X86_64_X87_CLASS
3311 || class1 == X86_64_X87UP_CLASS
3312 || class1 == X86_64_COMPLEX_X87_CLASS
3313 || class2 == X86_64_X87_CLASS
3314 || class2 == X86_64_X87UP_CLASS
3315 || class2 == X86_64_COMPLEX_X87_CLASS)
3316 return X86_64_MEMORY_CLASS;
3317
3318 /* Rule #6: Otherwise class SSE is used. */
3319 return X86_64_SSE_CLASS;
3320 }
3321
3322 /* Classify the argument of type TYPE and mode MODE.
3323 CLASSES will be filled by the register class used to pass each word
3324 of the operand. The number of words is returned. In case the parameter
3325 should be passed in memory, 0 is returned. As a special case for zero
3326 sized containers, classes[0] will be NO_CLASS and 1 is returned.
3327
3328 BIT_OFFSET is used internally for handling records and specifies offset
3329 of the offset in bits modulo 256 to avoid overflow cases.
3330
3331 See the x86-64 PS ABI for details.
3332 */
3333
3334 static int
3335 classify_argument (enum machine_mode mode, tree type,
3336 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
3337 {
3338 HOST_WIDE_INT bytes =
3339 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3340 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3341
3342 /* Variable sized entities are always passed/returned in memory. */
3343 if (bytes < 0)
3344 return 0;
3345
3346 if (mode != VOIDmode
3347 && targetm.calls.must_pass_in_stack (mode, type))
3348 return 0;
3349
3350 if (type && AGGREGATE_TYPE_P (type))
3351 {
3352 int i;
3353 tree field;
3354 enum x86_64_reg_class subclasses[MAX_CLASSES];
3355
3356 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
3357 if (bytes > 16)
3358 return 0;
3359
3360 for (i = 0; i < words; i++)
3361 classes[i] = X86_64_NO_CLASS;
3362
3363 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
3364 signalize memory class, so handle it as special case. */
3365 if (!words)
3366 {
3367 classes[0] = X86_64_NO_CLASS;
3368 return 1;
3369 }
3370
3371 /* Classify each field of record and merge classes. */
3372 switch (TREE_CODE (type))
3373 {
3374 case RECORD_TYPE:
3375 /* And now merge the fields of structure. */
3376 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3377 {
3378 if (TREE_CODE (field) == FIELD_DECL)
3379 {
3380 int num;
3381
3382 if (TREE_TYPE (field) == error_mark_node)
3383 continue;
3384
3385 /* Bitfields are always classified as integer. Handle them
3386 early, since later code would consider them to be
3387 misaligned integers. */
3388 if (DECL_BIT_FIELD (field))
3389 {
3390 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3391 i < ((int_bit_position (field) + (bit_offset % 64))
3392 + tree_low_cst (DECL_SIZE (field), 0)
3393 + 63) / 8 / 8; i++)
3394 classes[i] =
3395 merge_classes (X86_64_INTEGER_CLASS,
3396 classes[i]);
3397 }
3398 else
3399 {
3400 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3401 TREE_TYPE (field), subclasses,
3402 (int_bit_position (field)
3403 + bit_offset) % 256);
3404 if (!num)
3405 return 0;
3406 for (i = 0; i < num; i++)
3407 {
3408 int pos =
3409 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3410 classes[i + pos] =
3411 merge_classes (subclasses[i], classes[i + pos]);
3412 }
3413 }
3414 }
3415 }
3416 break;
3417
3418 case ARRAY_TYPE:
3419 /* Arrays are handled as small records. */
3420 {
3421 int num;
3422 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
3423 TREE_TYPE (type), subclasses, bit_offset);
3424 if (!num)
3425 return 0;
3426
3427 /* The partial classes are now full classes. */
3428 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
3429 subclasses[0] = X86_64_SSE_CLASS;
3430 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
3431 subclasses[0] = X86_64_INTEGER_CLASS;
3432
3433 for (i = 0; i < words; i++)
3434 classes[i] = subclasses[i % num];
3435
3436 break;
3437 }
3438 case UNION_TYPE:
3439 case QUAL_UNION_TYPE:
3440 /* Unions are similar to RECORD_TYPE but offset is always 0.
3441 */
3442 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3443 {
3444 if (TREE_CODE (field) == FIELD_DECL)
3445 {
3446 int num;
3447
3448 if (TREE_TYPE (field) == error_mark_node)
3449 continue;
3450
3451 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3452 TREE_TYPE (field), subclasses,
3453 bit_offset);
3454 if (!num)
3455 return 0;
3456 for (i = 0; i < num; i++)
3457 classes[i] = merge_classes (subclasses[i], classes[i]);
3458 }
3459 }
3460 break;
3461
3462 default:
3463 gcc_unreachable ();
3464 }
3465
3466 /* Final merger cleanup. */
3467 for (i = 0; i < words; i++)
3468 {
3469 /* If one class is MEMORY, everything should be passed in
3470 memory. */
3471 if (classes[i] == X86_64_MEMORY_CLASS)
3472 return 0;
3473
3474 /* The X86_64_SSEUP_CLASS should be always preceded by
3475 X86_64_SSE_CLASS. */
3476 if (classes[i] == X86_64_SSEUP_CLASS
3477 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
3478 classes[i] = X86_64_SSE_CLASS;
3479
3480 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
3481 if (classes[i] == X86_64_X87UP_CLASS
3482 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
3483 classes[i] = X86_64_SSE_CLASS;
3484 }
3485 return words;
3486 }
3487
3488 /* Compute alignment needed. We align all types to natural boundaries with
3489 exception of XFmode that is aligned to 64bits. */
3490 if (mode != VOIDmode && mode != BLKmode)
3491 {
3492 int mode_alignment = GET_MODE_BITSIZE (mode);
3493
3494 if (mode == XFmode)
3495 mode_alignment = 128;
3496 else if (mode == XCmode)
3497 mode_alignment = 256;
3498 if (COMPLEX_MODE_P (mode))
3499 mode_alignment /= 2;
3500 /* Misaligned fields are always returned in memory. */
3501 if (bit_offset % mode_alignment)
3502 return 0;
3503 }
3504
3505 /* for V1xx modes, just use the base mode */
3506 if (VECTOR_MODE_P (mode)
3507 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
3508 mode = GET_MODE_INNER (mode);
3509
3510 /* Classification of atomic types. */
3511 switch (mode)
3512 {
3513 case SDmode:
3514 case DDmode:
3515 classes[0] = X86_64_SSE_CLASS;
3516 return 1;
3517 case TDmode:
3518 classes[0] = X86_64_SSE_CLASS;
3519 classes[1] = X86_64_SSEUP_CLASS;
3520 return 2;
3521 case DImode:
3522 case SImode:
3523 case HImode:
3524 case QImode:
3525 case CSImode:
3526 case CHImode:
3527 case CQImode:
3528 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3529 classes[0] = X86_64_INTEGERSI_CLASS;
3530 else
3531 classes[0] = X86_64_INTEGER_CLASS;
3532 return 1;
3533 case CDImode:
3534 case TImode:
3535 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
3536 return 2;
3537 case CTImode:
3538 return 0;
3539 case SFmode:
3540 if (!(bit_offset % 64))
3541 classes[0] = X86_64_SSESF_CLASS;
3542 else
3543 classes[0] = X86_64_SSE_CLASS;
3544 return 1;
3545 case DFmode:
3546 classes[0] = X86_64_SSEDF_CLASS;
3547 return 1;
3548 case XFmode:
3549 classes[0] = X86_64_X87_CLASS;
3550 classes[1] = X86_64_X87UP_CLASS;
3551 return 2;
3552 case TFmode:
3553 classes[0] = X86_64_SSE_CLASS;
3554 classes[1] = X86_64_SSEUP_CLASS;
3555 return 2;
3556 case SCmode:
3557 classes[0] = X86_64_SSE_CLASS;
3558 return 1;
3559 case DCmode:
3560 classes[0] = X86_64_SSEDF_CLASS;
3561 classes[1] = X86_64_SSEDF_CLASS;
3562 return 2;
3563 case XCmode:
3564 classes[0] = X86_64_COMPLEX_X87_CLASS;
3565 return 1;
3566 case TCmode:
3567 /* This modes is larger than 16 bytes. */
3568 return 0;
3569 case V4SFmode:
3570 case V4SImode:
3571 case V16QImode:
3572 case V8HImode:
3573 case V2DFmode:
3574 case V2DImode:
3575 classes[0] = X86_64_SSE_CLASS;
3576 classes[1] = X86_64_SSEUP_CLASS;
3577 return 2;
3578 case V2SFmode:
3579 case V2SImode:
3580 case V4HImode:
3581 case V8QImode:
3582 classes[0] = X86_64_SSE_CLASS;
3583 return 1;
3584 case BLKmode:
3585 case VOIDmode:
3586 return 0;
3587 default:
3588 gcc_assert (VECTOR_MODE_P (mode));
3589
3590 if (bytes > 16)
3591 return 0;
3592
3593 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
3594
3595 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3596 classes[0] = X86_64_INTEGERSI_CLASS;
3597 else
3598 classes[0] = X86_64_INTEGER_CLASS;
3599 classes[1] = X86_64_INTEGER_CLASS;
3600 return 1 + (bytes > 8);
3601 }
3602 }
3603
3604 /* Examine the argument and return set number of register required in each
3605 class. Return 0 iff parameter should be passed in memory. */
3606 static int
3607 examine_argument (enum machine_mode mode, tree type, int in_return,
3608 int *int_nregs, int *sse_nregs)
3609 {
3610 enum x86_64_reg_class regclass[MAX_CLASSES];
3611 int n = classify_argument (mode, type, regclass, 0);
3612
3613 *int_nregs = 0;
3614 *sse_nregs = 0;
3615 if (!n)
3616 return 0;
3617 for (n--; n >= 0; n--)
3618 switch (regclass[n])
3619 {
3620 case X86_64_INTEGER_CLASS:
3621 case X86_64_INTEGERSI_CLASS:
3622 (*int_nregs)++;
3623 break;
3624 case X86_64_SSE_CLASS:
3625 case X86_64_SSESF_CLASS:
3626 case X86_64_SSEDF_CLASS:
3627 (*sse_nregs)++;
3628 break;
3629 case X86_64_NO_CLASS:
3630 case X86_64_SSEUP_CLASS:
3631 break;
3632 case X86_64_X87_CLASS:
3633 case X86_64_X87UP_CLASS:
3634 if (!in_return)
3635 return 0;
3636 break;
3637 case X86_64_COMPLEX_X87_CLASS:
3638 return in_return ? 2 : 0;
3639 case X86_64_MEMORY_CLASS:
3640 gcc_unreachable ();
3641 }
3642 return 1;
3643 }
3644
3645 /* Construct container for the argument used by GCC interface. See
3646 FUNCTION_ARG for the detailed description. */
3647
3648 static rtx
3649 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
3650 tree type, int in_return, int nintregs, int nsseregs,
3651 const int *intreg, int sse_regno)
3652 {
3653 /* The following variables hold the static issued_error state. */
3654 static bool issued_sse_arg_error;
3655 static bool issued_sse_ret_error;
3656 static bool issued_x87_ret_error;
3657
3658 enum machine_mode tmpmode;
3659 int bytes =
3660 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3661 enum x86_64_reg_class regclass[MAX_CLASSES];
3662 int n;
3663 int i;
3664 int nexps = 0;
3665 int needed_sseregs, needed_intregs;
3666 rtx exp[MAX_CLASSES];
3667 rtx ret;
3668
3669 n = classify_argument (mode, type, regclass, 0);
3670 if (!n)
3671 return NULL;
3672 if (!examine_argument (mode, type, in_return, &needed_intregs,
3673 &needed_sseregs))
3674 return NULL;
3675 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
3676 return NULL;
3677
3678 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
3679 some less clueful developer tries to use floating-point anyway. */
3680 if (needed_sseregs && !TARGET_SSE)
3681 {
3682 if (in_return)
3683 {
3684 if (!issued_sse_ret_error)
3685 {
3686 error ("SSE register return with SSE disabled");
3687 issued_sse_ret_error = true;
3688 }
3689 }
3690 else if (!issued_sse_arg_error)
3691 {
3692 error ("SSE register argument with SSE disabled");
3693 issued_sse_arg_error = true;
3694 }
3695 return NULL;
3696 }
3697
3698 /* Likewise, error if the ABI requires us to return values in the
3699 x87 registers and the user specified -mno-80387. */
3700 if (!TARGET_80387 && in_return)
3701 for (i = 0; i < n; i++)
3702 if (regclass[i] == X86_64_X87_CLASS
3703 || regclass[i] == X86_64_X87UP_CLASS
3704 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
3705 {
3706 if (!issued_x87_ret_error)
3707 {
3708 error ("x87 register return with x87 disabled");
3709 issued_x87_ret_error = true;
3710 }
3711 return NULL;
3712 }
3713
3714 /* First construct simple cases. Avoid SCmode, since we want to use
3715 single register to pass this type. */
3716 if (n == 1 && mode != SCmode)
3717 switch (regclass[0])
3718 {
3719 case X86_64_INTEGER_CLASS:
3720 case X86_64_INTEGERSI_CLASS:
3721 return gen_rtx_REG (mode, intreg[0]);
3722 case X86_64_SSE_CLASS:
3723 case X86_64_SSESF_CLASS:
3724 case X86_64_SSEDF_CLASS:
3725 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
3726 case X86_64_X87_CLASS:
3727 case X86_64_COMPLEX_X87_CLASS:
3728 return gen_rtx_REG (mode, FIRST_STACK_REG);
3729 case X86_64_NO_CLASS:
3730 /* Zero sized array, struct or class. */
3731 return NULL;
3732 default:
3733 gcc_unreachable ();
3734 }
3735 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
3736 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
3737 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
3738
3739 if (n == 2
3740 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
3741 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
3742 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
3743 && regclass[1] == X86_64_INTEGER_CLASS
3744 && (mode == CDImode || mode == TImode || mode == TFmode)
3745 && intreg[0] + 1 == intreg[1])
3746 return gen_rtx_REG (mode, intreg[0]);
3747
3748 /* Otherwise figure out the entries of the PARALLEL. */
3749 for (i = 0; i < n; i++)
3750 {
3751 switch (regclass[i])
3752 {
3753 case X86_64_NO_CLASS:
3754 break;
3755 case X86_64_INTEGER_CLASS:
3756 case X86_64_INTEGERSI_CLASS:
3757 /* Merge TImodes on aligned occasions here too. */
3758 if (i * 8 + 8 > bytes)
3759 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
3760 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
3761 tmpmode = SImode;
3762 else
3763 tmpmode = DImode;
3764 /* We've requested 24 bytes we don't have mode for. Use DImode. */
3765 if (tmpmode == BLKmode)
3766 tmpmode = DImode;
3767 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3768 gen_rtx_REG (tmpmode, *intreg),
3769 GEN_INT (i*8));
3770 intreg++;
3771 break;
3772 case X86_64_SSESF_CLASS:
3773 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3774 gen_rtx_REG (SFmode,
3775 SSE_REGNO (sse_regno)),
3776 GEN_INT (i*8));
3777 sse_regno++;
3778 break;
3779 case X86_64_SSEDF_CLASS:
3780 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3781 gen_rtx_REG (DFmode,
3782 SSE_REGNO (sse_regno)),
3783 GEN_INT (i*8));
3784 sse_regno++;
3785 break;
3786 case X86_64_SSE_CLASS:
3787 if (i < n - 1 && regclass[i + 1] == X86_64_SSEUP_CLASS)
3788 tmpmode = TImode;
3789 else
3790 tmpmode = DImode;
3791 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3792 gen_rtx_REG (tmpmode,
3793 SSE_REGNO (sse_regno)),
3794 GEN_INT (i*8));
3795 if (tmpmode == TImode)
3796 i++;
3797 sse_regno++;
3798 break;
3799 default:
3800 gcc_unreachable ();
3801 }
3802 }
3803
3804 /* Empty aligned struct, union or class. */
3805 if (nexps == 0)
3806 return NULL;
3807
3808 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
3809 for (i = 0; i < nexps; i++)
3810 XVECEXP (ret, 0, i) = exp [i];
3811 return ret;
3812 }
3813
3814 /* Update the data in CUM to advance over an argument of mode MODE
3815 and data type TYPE. (TYPE is null for libcalls where that information
3816 may not be available.) */
3817
3818 static void
3819 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3820 tree type, HOST_WIDE_INT bytes, HOST_WIDE_INT words)
3821 {
3822 switch (mode)
3823 {
3824 default:
3825 break;
3826
3827 case BLKmode:
3828 if (bytes < 0)
3829 break;
3830 /* FALLTHRU */
3831
3832 case DImode:
3833 case SImode:
3834 case HImode:
3835 case QImode:
3836 cum->words += words;
3837 cum->nregs -= words;
3838 cum->regno += words;
3839
3840 if (cum->nregs <= 0)
3841 {
3842 cum->nregs = 0;
3843 cum->regno = 0;
3844 }
3845 break;
3846
3847 case DFmode:
3848 if (cum->float_in_sse < 2)
3849 break;
3850 case SFmode:
3851 if (cum->float_in_sse < 1)
3852 break;
3853 /* FALLTHRU */
3854
3855 case TImode:
3856 case V16QImode:
3857 case V8HImode:
3858 case V4SImode:
3859 case V2DImode:
3860 case V4SFmode:
3861 case V2DFmode:
3862 if (!type || !AGGREGATE_TYPE_P (type))
3863 {
3864 cum->sse_words += words;
3865 cum->sse_nregs -= 1;
3866 cum->sse_regno += 1;
3867 if (cum->sse_nregs <= 0)
3868 {
3869 cum->sse_nregs = 0;
3870 cum->sse_regno = 0;
3871 }
3872 }
3873 break;
3874
3875 case V8QImode:
3876 case V4HImode:
3877 case V2SImode:
3878 case V2SFmode:
3879 if (!type || !AGGREGATE_TYPE_P (type))
3880 {
3881 cum->mmx_words += words;
3882 cum->mmx_nregs -= 1;
3883 cum->mmx_regno += 1;
3884 if (cum->mmx_nregs <= 0)
3885 {
3886 cum->mmx_nregs = 0;
3887 cum->mmx_regno = 0;
3888 }
3889 }
3890 break;
3891 }
3892 }
3893
3894 static void
3895 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3896 tree type, HOST_WIDE_INT words)
3897 {
3898 int int_nregs, sse_nregs;
3899
3900 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
3901 cum->words += words;
3902 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
3903 {
3904 cum->nregs -= int_nregs;
3905 cum->sse_nregs -= sse_nregs;
3906 cum->regno += int_nregs;
3907 cum->sse_regno += sse_nregs;
3908 }
3909 else
3910 cum->words += words;
3911 }
3912
3913 static void
3914 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
3915 HOST_WIDE_INT words)
3916 {
3917 /* Otherwise, this should be passed indirect. */
3918 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
3919
3920 cum->words += words;
3921 if (cum->nregs > 0)
3922 {
3923 cum->nregs -= 1;
3924 cum->regno += 1;
3925 }
3926 }
3927
3928 void
3929 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3930 tree type, int named ATTRIBUTE_UNUSED)
3931 {
3932 HOST_WIDE_INT bytes, words;
3933
3934 if (mode == BLKmode)
3935 bytes = int_size_in_bytes (type);
3936 else
3937 bytes = GET_MODE_SIZE (mode);
3938 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3939
3940 if (type)
3941 mode = type_natural_mode (type);
3942
3943 if (TARGET_64BIT_MS_ABI)
3944 function_arg_advance_ms_64 (cum, bytes, words);
3945 else if (TARGET_64BIT)
3946 function_arg_advance_64 (cum, mode, type, words);
3947 else
3948 function_arg_advance_32 (cum, mode, type, bytes, words);
3949 }
3950
3951 /* Define where to put the arguments to a function.
3952 Value is zero to push the argument on the stack,
3953 or a hard register in which to store the argument.
3954
3955 MODE is the argument's machine mode.
3956 TYPE is the data type of the argument (as a tree).
3957 This is null for libcalls where that information may
3958 not be available.
3959 CUM is a variable of type CUMULATIVE_ARGS which gives info about
3960 the preceding args and about the function being called.
3961 NAMED is nonzero if this argument is a named parameter
3962 (otherwise it is an extra parameter matching an ellipsis). */
3963
3964 static rtx
3965 function_arg_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3966 enum machine_mode orig_mode, tree type,
3967 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
3968 {
3969 static bool warnedsse, warnedmmx;
3970
3971 /* Avoid the AL settings for the Unix64 ABI. */
3972 if (mode == VOIDmode)
3973 return constm1_rtx;
3974
3975 switch (mode)
3976 {
3977 default:
3978 break;
3979
3980 case BLKmode:
3981 if (bytes < 0)
3982 break;
3983 /* FALLTHRU */
3984 case DImode:
3985 case SImode:
3986 case HImode:
3987 case QImode:
3988 if (words <= cum->nregs)
3989 {
3990 int regno = cum->regno;
3991
3992 /* Fastcall allocates the first two DWORD (SImode) or
3993 smaller arguments to ECX and EDX. */
3994 if (cum->fastcall)
3995 {
3996 if (mode == BLKmode || mode == DImode)
3997 break;
3998
3999 /* ECX not EAX is the first allocated register. */
4000 if (regno == 0)
4001 regno = 2;
4002 }
4003 return gen_rtx_REG (mode, regno);
4004 }
4005 break;
4006
4007 case DFmode:
4008 if (cum->float_in_sse < 2)
4009 break;
4010 case SFmode:
4011 if (cum->float_in_sse < 1)
4012 break;
4013 /* FALLTHRU */
4014 case TImode:
4015 case V16QImode:
4016 case V8HImode:
4017 case V4SImode:
4018 case V2DImode:
4019 case V4SFmode:
4020 case V2DFmode:
4021 if (!type || !AGGREGATE_TYPE_P (type))
4022 {
4023 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
4024 {
4025 warnedsse = true;
4026 warning (0, "SSE vector argument without SSE enabled "
4027 "changes the ABI");
4028 }
4029 if (cum->sse_nregs)
4030 return gen_reg_or_parallel (mode, orig_mode,
4031 cum->sse_regno + FIRST_SSE_REG);
4032 }
4033 break;
4034
4035 case V8QImode:
4036 case V4HImode:
4037 case V2SImode:
4038 case V2SFmode:
4039 if (!type || !AGGREGATE_TYPE_P (type))
4040 {
4041 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
4042 {
4043 warnedmmx = true;
4044 warning (0, "MMX vector argument without MMX enabled "
4045 "changes the ABI");
4046 }
4047 if (cum->mmx_nregs)
4048 return gen_reg_or_parallel (mode, orig_mode,
4049 cum->mmx_regno + FIRST_MMX_REG);
4050 }
4051 break;
4052 }
4053
4054 return NULL_RTX;
4055 }
4056
4057 static rtx
4058 function_arg_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4059 enum machine_mode orig_mode, tree type)
4060 {
4061 /* Handle a hidden AL argument containing number of registers
4062 for varargs x86-64 functions. */
4063 if (mode == VOIDmode)
4064 return GEN_INT (cum->maybe_vaarg
4065 ? (cum->sse_nregs < 0
4066 ? SSE_REGPARM_MAX
4067 : cum->sse_regno)
4068 : -1);
4069
4070 return construct_container (mode, orig_mode, type, 0, cum->nregs,
4071 cum->sse_nregs,
4072 &x86_64_int_parameter_registers [cum->regno],
4073 cum->sse_regno);
4074 }
4075
4076 static rtx
4077 function_arg_ms_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4078 enum machine_mode orig_mode, int named)
4079 {
4080 unsigned int regno;
4081
4082 /* Avoid the AL settings for the Unix64 ABI. */
4083 if (mode == VOIDmode)
4084 return constm1_rtx;
4085
4086 /* If we've run out of registers, it goes on the stack. */
4087 if (cum->nregs == 0)
4088 return NULL_RTX;
4089
4090 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
4091
4092 /* Only floating point modes are passed in anything but integer regs. */
4093 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
4094 {
4095 if (named)
4096 regno = cum->regno + FIRST_SSE_REG;
4097 else
4098 {
4099 rtx t1, t2;
4100
4101 /* Unnamed floating parameters are passed in both the
4102 SSE and integer registers. */
4103 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
4104 t2 = gen_rtx_REG (mode, regno);
4105 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
4106 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
4107 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
4108 }
4109 }
4110
4111 return gen_reg_or_parallel (mode, orig_mode, regno);
4112 }
4113
4114 rtx
4115 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
4116 tree type, int named)
4117 {
4118 enum machine_mode mode = omode;
4119 HOST_WIDE_INT bytes, words;
4120
4121 if (mode == BLKmode)
4122 bytes = int_size_in_bytes (type);
4123 else
4124 bytes = GET_MODE_SIZE (mode);
4125 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4126
4127 /* To simplify the code below, represent vector types with a vector mode
4128 even if MMX/SSE are not active. */
4129 if (type && TREE_CODE (type) == VECTOR_TYPE)
4130 mode = type_natural_mode (type);
4131
4132 if (TARGET_64BIT_MS_ABI)
4133 return function_arg_ms_64 (cum, mode, omode, named);
4134 else if (TARGET_64BIT)
4135 return function_arg_64 (cum, mode, omode, type);
4136 else
4137 return function_arg_32 (cum, mode, omode, type, bytes, words);
4138 }
4139
4140 /* A C expression that indicates when an argument must be passed by
4141 reference. If nonzero for an argument, a copy of that argument is
4142 made in memory and a pointer to the argument is passed instead of
4143 the argument itself. The pointer is passed in whatever way is
4144 appropriate for passing a pointer to that type. */
4145
4146 static bool
4147 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
4148 enum machine_mode mode ATTRIBUTE_UNUSED,
4149 tree type, bool named ATTRIBUTE_UNUSED)
4150 {
4151 if (TARGET_64BIT_MS_ABI)
4152 {
4153 if (type)
4154 {
4155 /* Arrays are passed by reference. */
4156 if (TREE_CODE (type) == ARRAY_TYPE)
4157 return true;
4158
4159 if (AGGREGATE_TYPE_P (type))
4160 {
4161 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
4162 are passed by reference. */
4163 int el2 = exact_log2 (int_size_in_bytes (type));
4164 return !(el2 >= 0 && el2 <= 3);
4165 }
4166 }
4167
4168 /* __m128 is passed by reference. */
4169 /* ??? How to handle complex? For now treat them as structs,
4170 and pass them by reference if they're too large. */
4171 if (GET_MODE_SIZE (mode) > 8)
4172 return true;
4173 }
4174 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
4175 return 1;
4176
4177 return 0;
4178 }
4179
4180 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
4181 ABI. Only called if TARGET_SSE. */
4182 static bool
4183 contains_128bit_aligned_vector_p (tree type)
4184 {
4185 enum machine_mode mode = TYPE_MODE (type);
4186 if (SSE_REG_MODE_P (mode)
4187 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
4188 return true;
4189 if (TYPE_ALIGN (type) < 128)
4190 return false;
4191
4192 if (AGGREGATE_TYPE_P (type))
4193 {
4194 /* Walk the aggregates recursively. */
4195 switch (TREE_CODE (type))
4196 {
4197 case RECORD_TYPE:
4198 case UNION_TYPE:
4199 case QUAL_UNION_TYPE:
4200 {
4201 tree field;
4202
4203 /* Walk all the structure fields. */
4204 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4205 {
4206 if (TREE_CODE (field) == FIELD_DECL
4207 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
4208 return true;
4209 }
4210 break;
4211 }
4212
4213 case ARRAY_TYPE:
4214 /* Just for use if some languages passes arrays by value. */
4215 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
4216 return true;
4217 break;
4218
4219 default:
4220 gcc_unreachable ();
4221 }
4222 }
4223 return false;
4224 }
4225
4226 /* Gives the alignment boundary, in bits, of an argument with the
4227 specified mode and type. */
4228
4229 int
4230 ix86_function_arg_boundary (enum machine_mode mode, tree type)
4231 {
4232 int align;
4233 if (type)
4234 align = TYPE_ALIGN (type);
4235 else
4236 align = GET_MODE_ALIGNMENT (mode);
4237 if (align < PARM_BOUNDARY)
4238 align = PARM_BOUNDARY;
4239 if (!TARGET_64BIT)
4240 {
4241 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
4242 make an exception for SSE modes since these require 128bit
4243 alignment.
4244
4245 The handling here differs from field_alignment. ICC aligns MMX
4246 arguments to 4 byte boundaries, while structure fields are aligned
4247 to 8 byte boundaries. */
4248 if (!TARGET_SSE)
4249 align = PARM_BOUNDARY;
4250 else if (!type)
4251 {
4252 if (!SSE_REG_MODE_P (mode))
4253 align = PARM_BOUNDARY;
4254 }
4255 else
4256 {
4257 if (!contains_128bit_aligned_vector_p (type))
4258 align = PARM_BOUNDARY;
4259 }
4260 }
4261 if (align > 128)
4262 align = 128;
4263 return align;
4264 }
4265
4266 /* Return true if N is a possible register number of function value. */
4267
4268 bool
4269 ix86_function_value_regno_p (int regno)
4270 {
4271 switch (regno)
4272 {
4273 case 0:
4274 return true;
4275
4276 case FIRST_FLOAT_REG:
4277 if (TARGET_64BIT_MS_ABI)
4278 return false;
4279 return TARGET_FLOAT_RETURNS_IN_80387;
4280
4281 case FIRST_SSE_REG:
4282 return TARGET_SSE;
4283
4284 case FIRST_MMX_REG:
4285 if (TARGET_MACHO || TARGET_64BIT)
4286 return false;
4287 return TARGET_MMX;
4288 }
4289
4290 return false;
4291 }
4292
4293 /* Define how to find the value returned by a function.
4294 VALTYPE is the data type of the value (as a tree).
4295 If the precise function being called is known, FUNC is its FUNCTION_DECL;
4296 otherwise, FUNC is 0. */
4297
4298 static rtx
4299 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
4300 tree fntype, tree fn)
4301 {
4302 unsigned int regno;
4303
4304 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4305 we normally prevent this case when mmx is not available. However
4306 some ABIs may require the result to be returned like DImode. */
4307 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4308 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
4309
4310 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4311 we prevent this case when sse is not available. However some ABIs
4312 may require the result to be returned like integer TImode. */
4313 else if (mode == TImode
4314 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4315 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
4316
4317 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
4318 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
4319 regno = FIRST_FLOAT_REG;
4320 else
4321 /* Most things go in %eax. */
4322 regno = 0;
4323
4324 /* Override FP return register with %xmm0 for local functions when
4325 SSE math is enabled or for functions with sseregparm attribute. */
4326 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
4327 {
4328 int sse_level = ix86_function_sseregparm (fntype, fn);
4329 if ((sse_level >= 1 && mode == SFmode)
4330 || (sse_level == 2 && mode == DFmode))
4331 regno = FIRST_SSE_REG;
4332 }
4333
4334 return gen_rtx_REG (orig_mode, regno);
4335 }
4336
4337 static rtx
4338 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
4339 tree valtype)
4340 {
4341 rtx ret;
4342
4343 /* Handle libcalls, which don't provide a type node. */
4344 if (valtype == NULL)
4345 {
4346 switch (mode)
4347 {
4348 case SFmode:
4349 case SCmode:
4350 case DFmode:
4351 case DCmode:
4352 case TFmode:
4353 case SDmode:
4354 case DDmode:
4355 case TDmode:
4356 return gen_rtx_REG (mode, FIRST_SSE_REG);
4357 case XFmode:
4358 case XCmode:
4359 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
4360 case TCmode:
4361 return NULL;
4362 default:
4363 return gen_rtx_REG (mode, 0);
4364 }
4365 }
4366
4367 ret = construct_container (mode, orig_mode, valtype, 1,
4368 REGPARM_MAX, SSE_REGPARM_MAX,
4369 x86_64_int_return_registers, 0);
4370
4371 /* For zero sized structures, construct_container returns NULL, but we
4372 need to keep rest of compiler happy by returning meaningful value. */
4373 if (!ret)
4374 ret = gen_rtx_REG (orig_mode, 0);
4375
4376 return ret;
4377 }
4378
4379 static rtx
4380 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
4381 {
4382 unsigned int regno = 0;
4383
4384 if (TARGET_SSE)
4385 {
4386 if (mode == SFmode || mode == DFmode)
4387 regno = FIRST_SSE_REG;
4388 else if (VECTOR_MODE_P (mode) || GET_MODE_SIZE (mode) == 16)
4389 regno = FIRST_SSE_REG;
4390 }
4391
4392 return gen_rtx_REG (orig_mode, regno);
4393 }
4394
4395 static rtx
4396 ix86_function_value_1 (tree valtype, tree fntype_or_decl,
4397 enum machine_mode orig_mode, enum machine_mode mode)
4398 {
4399 tree fn, fntype;
4400
4401 fn = NULL_TREE;
4402 if (fntype_or_decl && DECL_P (fntype_or_decl))
4403 fn = fntype_or_decl;
4404 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
4405
4406 if (TARGET_64BIT_MS_ABI)
4407 return function_value_ms_64 (orig_mode, mode);
4408 else if (TARGET_64BIT)
4409 return function_value_64 (orig_mode, mode, valtype);
4410 else
4411 return function_value_32 (orig_mode, mode, fntype, fn);
4412 }
4413
4414 static rtx
4415 ix86_function_value (tree valtype, tree fntype_or_decl,
4416 bool outgoing ATTRIBUTE_UNUSED)
4417 {
4418 enum machine_mode mode, orig_mode;
4419
4420 orig_mode = TYPE_MODE (valtype);
4421 mode = type_natural_mode (valtype);
4422 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
4423 }
4424
4425 rtx
4426 ix86_libcall_value (enum machine_mode mode)
4427 {
4428 return ix86_function_value_1 (NULL, NULL, mode, mode);
4429 }
4430
4431 /* Return true iff type is returned in memory. */
4432
4433 static int
4434 return_in_memory_32 (tree type, enum machine_mode mode)
4435 {
4436 HOST_WIDE_INT size;
4437
4438 if (mode == BLKmode)
4439 return 1;
4440
4441 size = int_size_in_bytes (type);
4442
4443 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
4444 return 0;
4445
4446 if (VECTOR_MODE_P (mode) || mode == TImode)
4447 {
4448 /* User-created vectors small enough to fit in EAX. */
4449 if (size < 8)
4450 return 0;
4451
4452 /* MMX/3dNow values are returned in MM0,
4453 except when it doesn't exits. */
4454 if (size == 8)
4455 return (TARGET_MMX ? 0 : 1);
4456
4457 /* SSE values are returned in XMM0, except when it doesn't exist. */
4458 if (size == 16)
4459 return (TARGET_SSE ? 0 : 1);
4460 }
4461
4462 if (mode == XFmode)
4463 return 0;
4464
4465 if (mode == TDmode)
4466 return 1;
4467
4468 if (size > 12)
4469 return 1;
4470 return 0;
4471 }
4472
4473 static int
4474 return_in_memory_64 (tree type, enum machine_mode mode)
4475 {
4476 int needed_intregs, needed_sseregs;
4477 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
4478 }
4479
4480 static int
4481 return_in_memory_ms_64 (tree type, enum machine_mode mode)
4482 {
4483 HOST_WIDE_INT size = int_size_in_bytes (type);
4484
4485 /* __m128 and friends are returned in xmm0. */
4486 if (size == 16 && VECTOR_MODE_P (mode))
4487 return 0;
4488
4489 /* Otherwise, the size must be exactly in [1248]. */
4490 return (size != 1 && size != 2 && size != 4 && size != 8);
4491 }
4492
4493 int
4494 ix86_return_in_memory (tree type)
4495 {
4496 enum machine_mode mode = type_natural_mode (type);
4497
4498 if (TARGET_64BIT_MS_ABI)
4499 return return_in_memory_ms_64 (type, mode);
4500 else if (TARGET_64BIT)
4501 return return_in_memory_64 (type, mode);
4502 else
4503 return return_in_memory_32 (type, mode);
4504 }
4505
4506 /* Return false iff TYPE is returned in memory. This version is used
4507 on Solaris 10. It is similar to the generic ix86_return_in_memory,
4508 but differs notably in that when MMX is available, 8-byte vectors
4509 are returned in memory, rather than in MMX registers. */
4510
4511 int
4512 ix86_sol10_return_in_memory (tree type)
4513 {
4514 int size;
4515 enum machine_mode mode = type_natural_mode (type);
4516
4517 if (TARGET_64BIT)
4518 return return_in_memory_64 (type, mode);
4519
4520 if (mode == BLKmode)
4521 return 1;
4522
4523 size = int_size_in_bytes (type);
4524
4525 if (VECTOR_MODE_P (mode))
4526 {
4527 /* Return in memory only if MMX registers *are* available. This
4528 seems backwards, but it is consistent with the existing
4529 Solaris x86 ABI. */
4530 if (size == 8)
4531 return TARGET_MMX;
4532 if (size == 16)
4533 return !TARGET_SSE;
4534 }
4535 else if (mode == TImode)
4536 return !TARGET_SSE;
4537 else if (mode == XFmode)
4538 return 0;
4539
4540 return size > 12;
4541 }
4542
4543 /* When returning SSE vector types, we have a choice of either
4544 (1) being abi incompatible with a -march switch, or
4545 (2) generating an error.
4546 Given no good solution, I think the safest thing is one warning.
4547 The user won't be able to use -Werror, but....
4548
4549 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
4550 called in response to actually generating a caller or callee that
4551 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
4552 via aggregate_value_p for general type probing from tree-ssa. */
4553
4554 static rtx
4555 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
4556 {
4557 static bool warnedsse, warnedmmx;
4558
4559 if (!TARGET_64BIT && type)
4560 {
4561 /* Look at the return type of the function, not the function type. */
4562 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
4563
4564 if (!TARGET_SSE && !warnedsse)
4565 {
4566 if (mode == TImode
4567 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4568 {
4569 warnedsse = true;
4570 warning (0, "SSE vector return without SSE enabled "
4571 "changes the ABI");
4572 }
4573 }
4574
4575 if (!TARGET_MMX && !warnedmmx)
4576 {
4577 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4578 {
4579 warnedmmx = true;
4580 warning (0, "MMX vector return without MMX enabled "
4581 "changes the ABI");
4582 }
4583 }
4584 }
4585
4586 return NULL;
4587 }
4588
4589 \f
4590 /* Create the va_list data type. */
4591
4592 static tree
4593 ix86_build_builtin_va_list (void)
4594 {
4595 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
4596
4597 /* For i386 we use plain pointer to argument area. */
4598 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
4599 return build_pointer_type (char_type_node);
4600
4601 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
4602 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
4603
4604 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
4605 unsigned_type_node);
4606 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
4607 unsigned_type_node);
4608 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
4609 ptr_type_node);
4610 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
4611 ptr_type_node);
4612
4613 va_list_gpr_counter_field = f_gpr;
4614 va_list_fpr_counter_field = f_fpr;
4615
4616 DECL_FIELD_CONTEXT (f_gpr) = record;
4617 DECL_FIELD_CONTEXT (f_fpr) = record;
4618 DECL_FIELD_CONTEXT (f_ovf) = record;
4619 DECL_FIELD_CONTEXT (f_sav) = record;
4620
4621 TREE_CHAIN (record) = type_decl;
4622 TYPE_NAME (record) = type_decl;
4623 TYPE_FIELDS (record) = f_gpr;
4624 TREE_CHAIN (f_gpr) = f_fpr;
4625 TREE_CHAIN (f_fpr) = f_ovf;
4626 TREE_CHAIN (f_ovf) = f_sav;
4627
4628 layout_type (record);
4629
4630 /* The correct type is an array type of one element. */
4631 return build_array_type (record, build_index_type (size_zero_node));
4632 }
4633
4634 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4635
4636 static void
4637 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
4638 {
4639 rtx save_area, mem;
4640 rtx label;
4641 rtx label_ref;
4642 rtx tmp_reg;
4643 rtx nsse_reg;
4644 int set;
4645 int i;
4646
4647 if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
4648 return;
4649
4650 /* Indicate to allocate space on the stack for varargs save area. */
4651 ix86_save_varrargs_registers = 1;
4652 cfun->stack_alignment_needed = 128;
4653
4654 save_area = frame_pointer_rtx;
4655 set = get_varargs_alias_set ();
4656
4657 for (i = cum->regno;
4658 i < ix86_regparm
4659 && i < cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
4660 i++)
4661 {
4662 mem = gen_rtx_MEM (Pmode,
4663 plus_constant (save_area, i * UNITS_PER_WORD));
4664 MEM_NOTRAP_P (mem) = 1;
4665 set_mem_alias_set (mem, set);
4666 emit_move_insn (mem, gen_rtx_REG (Pmode,
4667 x86_64_int_parameter_registers[i]));
4668 }
4669
4670 if (cum->sse_nregs && cfun->va_list_fpr_size)
4671 {
4672 /* Now emit code to save SSE registers. The AX parameter contains number
4673 of SSE parameter registers used to call this function. We use
4674 sse_prologue_save insn template that produces computed jump across
4675 SSE saves. We need some preparation work to get this working. */
4676
4677 label = gen_label_rtx ();
4678 label_ref = gen_rtx_LABEL_REF (Pmode, label);
4679
4680 /* Compute address to jump to :
4681 label - 5*eax + nnamed_sse_arguments*5 */
4682 tmp_reg = gen_reg_rtx (Pmode);
4683 nsse_reg = gen_reg_rtx (Pmode);
4684 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
4685 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4686 gen_rtx_MULT (Pmode, nsse_reg,
4687 GEN_INT (4))));
4688 if (cum->sse_regno)
4689 emit_move_insn
4690 (nsse_reg,
4691 gen_rtx_CONST (DImode,
4692 gen_rtx_PLUS (DImode,
4693 label_ref,
4694 GEN_INT (cum->sse_regno * 4))));
4695 else
4696 emit_move_insn (nsse_reg, label_ref);
4697 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
4698
4699 /* Compute address of memory block we save into. We always use pointer
4700 pointing 127 bytes after first byte to store - this is needed to keep
4701 instruction size limited by 4 bytes. */
4702 tmp_reg = gen_reg_rtx (Pmode);
4703 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4704 plus_constant (save_area,
4705 8 * REGPARM_MAX + 127)));
4706 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
4707 MEM_NOTRAP_P (mem) = 1;
4708 set_mem_alias_set (mem, set);
4709 set_mem_align (mem, BITS_PER_WORD);
4710
4711 /* And finally do the dirty job! */
4712 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
4713 GEN_INT (cum->sse_regno), label));
4714 }
4715 }
4716
4717 static void
4718 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
4719 {
4720 int set = get_varargs_alias_set ();
4721 int i;
4722
4723 for (i = cum->regno; i < REGPARM_MAX; i++)
4724 {
4725 rtx reg, mem;
4726
4727 mem = gen_rtx_MEM (Pmode,
4728 plus_constant (virtual_incoming_args_rtx,
4729 i * UNITS_PER_WORD));
4730 MEM_NOTRAP_P (mem) = 1;
4731 set_mem_alias_set (mem, set);
4732
4733 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
4734 emit_move_insn (mem, reg);
4735 }
4736 }
4737
4738 static void
4739 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4740 tree type, int *pretend_size ATTRIBUTE_UNUSED,
4741 int no_rtl)
4742 {
4743 CUMULATIVE_ARGS next_cum;
4744 tree fntype;
4745 int stdarg_p;
4746
4747 /* This argument doesn't appear to be used anymore. Which is good,
4748 because the old code here didn't suppress rtl generation. */
4749 gcc_assert (!no_rtl);
4750
4751 if (!TARGET_64BIT)
4752 return;
4753
4754 fntype = TREE_TYPE (current_function_decl);
4755 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
4756 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
4757 != void_type_node));
4758
4759 /* For varargs, we do not want to skip the dummy va_dcl argument.
4760 For stdargs, we do want to skip the last named argument. */
4761 next_cum = *cum;
4762 if (stdarg_p)
4763 function_arg_advance (&next_cum, mode, type, 1);
4764
4765 if (TARGET_64BIT_MS_ABI)
4766 setup_incoming_varargs_ms_64 (&next_cum);
4767 else
4768 setup_incoming_varargs_64 (&next_cum);
4769 }
4770
4771 /* Implement va_start. */
4772
4773 void
4774 ix86_va_start (tree valist, rtx nextarg)
4775 {
4776 HOST_WIDE_INT words, n_gpr, n_fpr;
4777 tree f_gpr, f_fpr, f_ovf, f_sav;
4778 tree gpr, fpr, ovf, sav, t;
4779 tree type;
4780
4781 /* Only 64bit target needs something special. */
4782 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
4783 {
4784 std_expand_builtin_va_start (valist, nextarg);
4785 return;
4786 }
4787
4788 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4789 f_fpr = TREE_CHAIN (f_gpr);
4790 f_ovf = TREE_CHAIN (f_fpr);
4791 f_sav = TREE_CHAIN (f_ovf);
4792
4793 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
4794 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4795 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4796 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4797 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4798
4799 /* Count number of gp and fp argument registers used. */
4800 words = current_function_args_info.words;
4801 n_gpr = current_function_args_info.regno;
4802 n_fpr = current_function_args_info.sse_regno;
4803
4804 if (cfun->va_list_gpr_size)
4805 {
4806 type = TREE_TYPE (gpr);
4807 t = build2 (GIMPLE_MODIFY_STMT, type, gpr,
4808 build_int_cst (type, n_gpr * 8));
4809 TREE_SIDE_EFFECTS (t) = 1;
4810 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4811 }
4812
4813 if (cfun->va_list_fpr_size)
4814 {
4815 type = TREE_TYPE (fpr);
4816 t = build2 (GIMPLE_MODIFY_STMT, type, fpr,
4817 build_int_cst (type, n_fpr * 16 + 8*REGPARM_MAX));
4818 TREE_SIDE_EFFECTS (t) = 1;
4819 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4820 }
4821
4822 /* Find the overflow area. */
4823 type = TREE_TYPE (ovf);
4824 t = make_tree (type, virtual_incoming_args_rtx);
4825 if (words != 0)
4826 t = build2 (PLUS_EXPR, type, t,
4827 build_int_cst (type, words * UNITS_PER_WORD));
4828 t = build2 (GIMPLE_MODIFY_STMT, type, ovf, t);
4829 TREE_SIDE_EFFECTS (t) = 1;
4830 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4831
4832 if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
4833 {
4834 /* Find the register save area.
4835 Prologue of the function save it right above stack frame. */
4836 type = TREE_TYPE (sav);
4837 t = make_tree (type, frame_pointer_rtx);
4838 t = build2 (GIMPLE_MODIFY_STMT, type, sav, t);
4839 TREE_SIDE_EFFECTS (t) = 1;
4840 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4841 }
4842 }
4843
4844 /* Implement va_arg. */
4845
4846 static tree
4847 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
4848 {
4849 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
4850 tree f_gpr, f_fpr, f_ovf, f_sav;
4851 tree gpr, fpr, ovf, sav, t;
4852 int size, rsize;
4853 tree lab_false, lab_over = NULL_TREE;
4854 tree addr, t2;
4855 rtx container;
4856 int indirect_p = 0;
4857 tree ptrtype;
4858 enum machine_mode nat_mode;
4859
4860 /* Only 64bit target needs something special. */
4861 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
4862 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4863
4864 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4865 f_fpr = TREE_CHAIN (f_gpr);
4866 f_ovf = TREE_CHAIN (f_fpr);
4867 f_sav = TREE_CHAIN (f_ovf);
4868
4869 valist = build_va_arg_indirect_ref (valist);
4870 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4871 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4872 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4873 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4874
4875 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
4876 if (indirect_p)
4877 type = build_pointer_type (type);
4878 size = int_size_in_bytes (type);
4879 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4880
4881 nat_mode = type_natural_mode (type);
4882 container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
4883 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
4884
4885 /* Pull the value out of the saved registers. */
4886
4887 addr = create_tmp_var (ptr_type_node, "addr");
4888 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
4889
4890 if (container)
4891 {
4892 int needed_intregs, needed_sseregs;
4893 bool need_temp;
4894 tree int_addr, sse_addr;
4895
4896 lab_false = create_artificial_label ();
4897 lab_over = create_artificial_label ();
4898
4899 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
4900
4901 need_temp = (!REG_P (container)
4902 && ((needed_intregs && TYPE_ALIGN (type) > 64)
4903 || TYPE_ALIGN (type) > 128));
4904
4905 /* In case we are passing structure, verify that it is consecutive block
4906 on the register save area. If not we need to do moves. */
4907 if (!need_temp && !REG_P (container))
4908 {
4909 /* Verify that all registers are strictly consecutive */
4910 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
4911 {
4912 int i;
4913
4914 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4915 {
4916 rtx slot = XVECEXP (container, 0, i);
4917 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
4918 || INTVAL (XEXP (slot, 1)) != i * 16)
4919 need_temp = 1;
4920 }
4921 }
4922 else
4923 {
4924 int i;
4925
4926 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4927 {
4928 rtx slot = XVECEXP (container, 0, i);
4929 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
4930 || INTVAL (XEXP (slot, 1)) != i * 8)
4931 need_temp = 1;
4932 }
4933 }
4934 }
4935 if (!need_temp)
4936 {
4937 int_addr = addr;
4938 sse_addr = addr;
4939 }
4940 else
4941 {
4942 int_addr = create_tmp_var (ptr_type_node, "int_addr");
4943 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
4944 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
4945 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
4946 }
4947
4948 /* First ensure that we fit completely in registers. */
4949 if (needed_intregs)
4950 {
4951 t = build_int_cst (TREE_TYPE (gpr),
4952 (REGPARM_MAX - needed_intregs + 1) * 8);
4953 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
4954 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4955 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4956 gimplify_and_add (t, pre_p);
4957 }
4958 if (needed_sseregs)
4959 {
4960 t = build_int_cst (TREE_TYPE (fpr),
4961 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
4962 + REGPARM_MAX * 8);
4963 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
4964 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4965 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4966 gimplify_and_add (t, pre_p);
4967 }
4968
4969 /* Compute index to start of area used for integer regs. */
4970 if (needed_intregs)
4971 {
4972 /* int_addr = gpr + sav; */
4973 t = fold_convert (ptr_type_node, fold_convert (size_type_node, gpr));
4974 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4975 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, int_addr, t);
4976 gimplify_and_add (t, pre_p);
4977 }
4978 if (needed_sseregs)
4979 {
4980 /* sse_addr = fpr + sav; */
4981 t = fold_convert (ptr_type_node, fold_convert (size_type_node, fpr));
4982 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4983 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, sse_addr, t);
4984 gimplify_and_add (t, pre_p);
4985 }
4986 if (need_temp)
4987 {
4988 int i;
4989 tree temp = create_tmp_var (type, "va_arg_tmp");
4990
4991 /* addr = &temp; */
4992 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
4993 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
4994 gimplify_and_add (t, pre_p);
4995
4996 for (i = 0; i < XVECLEN (container, 0); i++)
4997 {
4998 rtx slot = XVECEXP (container, 0, i);
4999 rtx reg = XEXP (slot, 0);
5000 enum machine_mode mode = GET_MODE (reg);
5001 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
5002 tree addr_type = build_pointer_type (piece_type);
5003 tree src_addr, src;
5004 int src_offset;
5005 tree dest_addr, dest;
5006
5007 if (SSE_REGNO_P (REGNO (reg)))
5008 {
5009 src_addr = sse_addr;
5010 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
5011 }
5012 else
5013 {
5014 src_addr = int_addr;
5015 src_offset = REGNO (reg) * 8;
5016 }
5017 src_addr = fold_convert (addr_type, src_addr);
5018 src_addr = fold_build2 (PLUS_EXPR, addr_type, src_addr,
5019 build_int_cst (addr_type, src_offset));
5020 src = build_va_arg_indirect_ref (src_addr);
5021
5022 dest_addr = fold_convert (addr_type, addr);
5023 dest_addr = fold_build2 (PLUS_EXPR, addr_type, dest_addr,
5024 build_int_cst (addr_type, INTVAL (XEXP (slot, 1))));
5025 dest = build_va_arg_indirect_ref (dest_addr);
5026
5027 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, dest, src);
5028 gimplify_and_add (t, pre_p);
5029 }
5030 }
5031
5032 if (needed_intregs)
5033 {
5034 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
5035 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
5036 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (gpr), gpr, t);
5037 gimplify_and_add (t, pre_p);
5038 }
5039 if (needed_sseregs)
5040 {
5041 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
5042 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
5043 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (fpr), fpr, t);
5044 gimplify_and_add (t, pre_p);
5045 }
5046
5047 t = build1 (GOTO_EXPR, void_type_node, lab_over);
5048 gimplify_and_add (t, pre_p);
5049
5050 t = build1 (LABEL_EXPR, void_type_node, lab_false);
5051 append_to_statement_list (t, pre_p);
5052 }
5053
5054 /* ... otherwise out of the overflow area. */
5055
5056 /* Care for on-stack alignment if needed. */
5057 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64
5058 || integer_zerop (TYPE_SIZE (type)))
5059 t = ovf;
5060 else
5061 {
5062 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
5063 t = build2 (PLUS_EXPR, TREE_TYPE (ovf), ovf,
5064 build_int_cst (TREE_TYPE (ovf), align - 1));
5065 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5066 build_int_cst (TREE_TYPE (t), -align));
5067 }
5068 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
5069
5070 t2 = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
5071 gimplify_and_add (t2, pre_p);
5072
5073 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
5074 build_int_cst (TREE_TYPE (t), rsize * UNITS_PER_WORD));
5075 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (ovf), ovf, t);
5076 gimplify_and_add (t, pre_p);
5077
5078 if (container)
5079 {
5080 t = build1 (LABEL_EXPR, void_type_node, lab_over);
5081 append_to_statement_list (t, pre_p);
5082 }
5083
5084 ptrtype = build_pointer_type (type);
5085 addr = fold_convert (ptrtype, addr);
5086
5087 if (indirect_p)
5088 addr = build_va_arg_indirect_ref (addr);
5089 return build_va_arg_indirect_ref (addr);
5090 }
5091 \f
5092 /* Return nonzero if OPNUM's MEM should be matched
5093 in movabs* patterns. */
5094
5095 int
5096 ix86_check_movabs (rtx insn, int opnum)
5097 {
5098 rtx set, mem;
5099
5100 set = PATTERN (insn);
5101 if (GET_CODE (set) == PARALLEL)
5102 set = XVECEXP (set, 0, 0);
5103 gcc_assert (GET_CODE (set) == SET);
5104 mem = XEXP (set, opnum);
5105 while (GET_CODE (mem) == SUBREG)
5106 mem = SUBREG_REG (mem);
5107 gcc_assert (MEM_P (mem));
5108 return (volatile_ok || !MEM_VOLATILE_P (mem));
5109 }
5110 \f
5111 /* Initialize the table of extra 80387 mathematical constants. */
5112
5113 static void
5114 init_ext_80387_constants (void)
5115 {
5116 static const char * cst[5] =
5117 {
5118 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
5119 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
5120 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
5121 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
5122 "3.1415926535897932385128089594061862044", /* 4: fldpi */
5123 };
5124 int i;
5125
5126 for (i = 0; i < 5; i++)
5127 {
5128 real_from_string (&ext_80387_constants_table[i], cst[i]);
5129 /* Ensure each constant is rounded to XFmode precision. */
5130 real_convert (&ext_80387_constants_table[i],
5131 XFmode, &ext_80387_constants_table[i]);
5132 }
5133
5134 ext_80387_constants_init = 1;
5135 }
5136
5137 /* Return true if the constant is something that can be loaded with
5138 a special instruction. */
5139
5140 int
5141 standard_80387_constant_p (rtx x)
5142 {
5143 enum machine_mode mode = GET_MODE (x);
5144
5145 REAL_VALUE_TYPE r;
5146
5147 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
5148 return -1;
5149
5150 if (x == CONST0_RTX (mode))
5151 return 1;
5152 if (x == CONST1_RTX (mode))
5153 return 2;
5154
5155 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5156
5157 /* For XFmode constants, try to find a special 80387 instruction when
5158 optimizing for size or on those CPUs that benefit from them. */
5159 if (mode == XFmode
5160 && (optimize_size || TARGET_EXT_80387_CONSTANTS))
5161 {
5162 int i;
5163
5164 if (! ext_80387_constants_init)
5165 init_ext_80387_constants ();
5166
5167 for (i = 0; i < 5; i++)
5168 if (real_identical (&r, &ext_80387_constants_table[i]))
5169 return i + 3;
5170 }
5171
5172 /* Load of the constant -0.0 or -1.0 will be split as
5173 fldz;fchs or fld1;fchs sequence. */
5174 if (real_isnegzero (&r))
5175 return 8;
5176 if (real_identical (&r, &dconstm1))
5177 return 9;
5178
5179 return 0;
5180 }
5181
5182 /* Return the opcode of the special instruction to be used to load
5183 the constant X. */
5184
5185 const char *
5186 standard_80387_constant_opcode (rtx x)
5187 {
5188 switch (standard_80387_constant_p (x))
5189 {
5190 case 1:
5191 return "fldz";
5192 case 2:
5193 return "fld1";
5194 case 3:
5195 return "fldlg2";
5196 case 4:
5197 return "fldln2";
5198 case 5:
5199 return "fldl2e";
5200 case 6:
5201 return "fldl2t";
5202 case 7:
5203 return "fldpi";
5204 case 8:
5205 case 9:
5206 return "#";
5207 default:
5208 gcc_unreachable ();
5209 }
5210 }
5211
5212 /* Return the CONST_DOUBLE representing the 80387 constant that is
5213 loaded by the specified special instruction. The argument IDX
5214 matches the return value from standard_80387_constant_p. */
5215
5216 rtx
5217 standard_80387_constant_rtx (int idx)
5218 {
5219 int i;
5220
5221 if (! ext_80387_constants_init)
5222 init_ext_80387_constants ();
5223
5224 switch (idx)
5225 {
5226 case 3:
5227 case 4:
5228 case 5:
5229 case 6:
5230 case 7:
5231 i = idx - 3;
5232 break;
5233
5234 default:
5235 gcc_unreachable ();
5236 }
5237
5238 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
5239 XFmode);
5240 }
5241
5242 /* Return 1 if mode is a valid mode for sse. */
5243 static int
5244 standard_sse_mode_p (enum machine_mode mode)
5245 {
5246 switch (mode)
5247 {
5248 case V16QImode:
5249 case V8HImode:
5250 case V4SImode:
5251 case V2DImode:
5252 case V4SFmode:
5253 case V2DFmode:
5254 return 1;
5255
5256 default:
5257 return 0;
5258 }
5259 }
5260
5261 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
5262 */
5263 int
5264 standard_sse_constant_p (rtx x)
5265 {
5266 enum machine_mode mode = GET_MODE (x);
5267
5268 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
5269 return 1;
5270 if (vector_all_ones_operand (x, mode)
5271 && standard_sse_mode_p (mode))
5272 return TARGET_SSE2 ? 2 : -1;
5273
5274 return 0;
5275 }
5276
5277 /* Return the opcode of the special instruction to be used to load
5278 the constant X. */
5279
5280 const char *
5281 standard_sse_constant_opcode (rtx insn, rtx x)
5282 {
5283 switch (standard_sse_constant_p (x))
5284 {
5285 case 1:
5286 if (get_attr_mode (insn) == MODE_V4SF)
5287 return "xorps\t%0, %0";
5288 else if (get_attr_mode (insn) == MODE_V2DF)
5289 return "xorpd\t%0, %0";
5290 else
5291 return "pxor\t%0, %0";
5292 case 2:
5293 return "pcmpeqd\t%0, %0";
5294 }
5295 gcc_unreachable ();
5296 }
5297
5298 /* Returns 1 if OP contains a symbol reference */
5299
5300 int
5301 symbolic_reference_mentioned_p (rtx op)
5302 {
5303 const char *fmt;
5304 int i;
5305
5306 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
5307 return 1;
5308
5309 fmt = GET_RTX_FORMAT (GET_CODE (op));
5310 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
5311 {
5312 if (fmt[i] == 'E')
5313 {
5314 int j;
5315
5316 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
5317 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
5318 return 1;
5319 }
5320
5321 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
5322 return 1;
5323 }
5324
5325 return 0;
5326 }
5327
5328 /* Return 1 if it is appropriate to emit `ret' instructions in the
5329 body of a function. Do this only if the epilogue is simple, needing a
5330 couple of insns. Prior to reloading, we can't tell how many registers
5331 must be saved, so return 0 then. Return 0 if there is no frame
5332 marker to de-allocate. */
5333
5334 int
5335 ix86_can_use_return_insn_p (void)
5336 {
5337 struct ix86_frame frame;
5338
5339 if (! reload_completed || frame_pointer_needed)
5340 return 0;
5341
5342 /* Don't allow more than 32 pop, since that's all we can do
5343 with one instruction. */
5344 if (current_function_pops_args
5345 && current_function_args_size >= 32768)
5346 return 0;
5347
5348 ix86_compute_frame_layout (&frame);
5349 return frame.to_allocate == 0 && frame.nregs == 0;
5350 }
5351 \f
5352 /* Value should be nonzero if functions must have frame pointers.
5353 Zero means the frame pointer need not be set up (and parms may
5354 be accessed via the stack pointer) in functions that seem suitable. */
5355
5356 int
5357 ix86_frame_pointer_required (void)
5358 {
5359 /* If we accessed previous frames, then the generated code expects
5360 to be able to access the saved ebp value in our frame. */
5361 if (cfun->machine->accesses_prev_frame)
5362 return 1;
5363
5364 /* Several x86 os'es need a frame pointer for other reasons,
5365 usually pertaining to setjmp. */
5366 if (SUBTARGET_FRAME_POINTER_REQUIRED)
5367 return 1;
5368
5369 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
5370 the frame pointer by default. Turn it back on now if we've not
5371 got a leaf function. */
5372 if (TARGET_OMIT_LEAF_FRAME_POINTER
5373 && (!current_function_is_leaf
5374 || ix86_current_function_calls_tls_descriptor))
5375 return 1;
5376
5377 if (current_function_profile)
5378 return 1;
5379
5380 return 0;
5381 }
5382
5383 /* Record that the current function accesses previous call frames. */
5384
5385 void
5386 ix86_setup_frame_addresses (void)
5387 {
5388 cfun->machine->accesses_prev_frame = 1;
5389 }
5390 \f
5391 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
5392 # define USE_HIDDEN_LINKONCE 1
5393 #else
5394 # define USE_HIDDEN_LINKONCE 0
5395 #endif
5396
5397 static int pic_labels_used;
5398
5399 /* Fills in the label name that should be used for a pc thunk for
5400 the given register. */
5401
5402 static void
5403 get_pc_thunk_name (char name[32], unsigned int regno)
5404 {
5405 gcc_assert (!TARGET_64BIT);
5406
5407 if (USE_HIDDEN_LINKONCE)
5408 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
5409 else
5410 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
5411 }
5412
5413
5414 /* This function generates code for -fpic that loads %ebx with
5415 the return address of the caller and then returns. */
5416
5417 void
5418 ix86_file_end (void)
5419 {
5420 rtx xops[2];
5421 int regno;
5422
5423 for (regno = 0; regno < 8; ++regno)
5424 {
5425 char name[32];
5426
5427 if (! ((pic_labels_used >> regno) & 1))
5428 continue;
5429
5430 get_pc_thunk_name (name, regno);
5431
5432 #if TARGET_MACHO
5433 if (TARGET_MACHO)
5434 {
5435 switch_to_section (darwin_sections[text_coal_section]);
5436 fputs ("\t.weak_definition\t", asm_out_file);
5437 assemble_name (asm_out_file, name);
5438 fputs ("\n\t.private_extern\t", asm_out_file);
5439 assemble_name (asm_out_file, name);
5440 fputs ("\n", asm_out_file);
5441 ASM_OUTPUT_LABEL (asm_out_file, name);
5442 }
5443 else
5444 #endif
5445 if (USE_HIDDEN_LINKONCE)
5446 {
5447 tree decl;
5448
5449 decl = build_decl (FUNCTION_DECL, get_identifier (name),
5450 error_mark_node);
5451 TREE_PUBLIC (decl) = 1;
5452 TREE_STATIC (decl) = 1;
5453 DECL_ONE_ONLY (decl) = 1;
5454
5455 (*targetm.asm_out.unique_section) (decl, 0);
5456 switch_to_section (get_named_section (decl, NULL, 0));
5457
5458 (*targetm.asm_out.globalize_label) (asm_out_file, name);
5459 fputs ("\t.hidden\t", asm_out_file);
5460 assemble_name (asm_out_file, name);
5461 fputc ('\n', asm_out_file);
5462 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
5463 }
5464 else
5465 {
5466 switch_to_section (text_section);
5467 ASM_OUTPUT_LABEL (asm_out_file, name);
5468 }
5469
5470 xops[0] = gen_rtx_REG (SImode, regno);
5471 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
5472 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
5473 output_asm_insn ("ret", xops);
5474 }
5475
5476 if (NEED_INDICATE_EXEC_STACK)
5477 file_end_indicate_exec_stack ();
5478 }
5479
5480 /* Emit code for the SET_GOT patterns. */
5481
5482 const char *
5483 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
5484 {
5485 rtx xops[3];
5486
5487 xops[0] = dest;
5488
5489 if (TARGET_VXWORKS_RTP && flag_pic)
5490 {
5491 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
5492 xops[2] = gen_rtx_MEM (Pmode,
5493 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
5494 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5495
5496 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
5497 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
5498 an unadorned address. */
5499 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
5500 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
5501 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
5502 return "";
5503 }
5504
5505 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
5506
5507 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
5508 {
5509 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
5510
5511 if (!flag_pic)
5512 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5513 else
5514 output_asm_insn ("call\t%a2", xops);
5515
5516 #if TARGET_MACHO
5517 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5518 is what will be referenced by the Mach-O PIC subsystem. */
5519 if (!label)
5520 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5521 #endif
5522
5523 (*targetm.asm_out.internal_label) (asm_out_file, "L",
5524 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
5525
5526 if (flag_pic)
5527 output_asm_insn ("pop{l}\t%0", xops);
5528 }
5529 else
5530 {
5531 char name[32];
5532 get_pc_thunk_name (name, REGNO (dest));
5533 pic_labels_used |= 1 << REGNO (dest);
5534
5535 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
5536 xops[2] = gen_rtx_MEM (QImode, xops[2]);
5537 output_asm_insn ("call\t%X2", xops);
5538 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5539 is what will be referenced by the Mach-O PIC subsystem. */
5540 #if TARGET_MACHO
5541 if (!label)
5542 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5543 else
5544 targetm.asm_out.internal_label (asm_out_file, "L",
5545 CODE_LABEL_NUMBER (label));
5546 #endif
5547 }
5548
5549 if (TARGET_MACHO)
5550 return "";
5551
5552 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
5553 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
5554 else
5555 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
5556
5557 return "";
5558 }
5559
5560 /* Generate an "push" pattern for input ARG. */
5561
5562 static rtx
5563 gen_push (rtx arg)
5564 {
5565 return gen_rtx_SET (VOIDmode,
5566 gen_rtx_MEM (Pmode,
5567 gen_rtx_PRE_DEC (Pmode,
5568 stack_pointer_rtx)),
5569 arg);
5570 }
5571
5572 /* Return >= 0 if there is an unused call-clobbered register available
5573 for the entire function. */
5574
5575 static unsigned int
5576 ix86_select_alt_pic_regnum (void)
5577 {
5578 if (current_function_is_leaf && !current_function_profile
5579 && !ix86_current_function_calls_tls_descriptor)
5580 {
5581 int i;
5582 for (i = 2; i >= 0; --i)
5583 if (!regs_ever_live[i])
5584 return i;
5585 }
5586
5587 return INVALID_REGNUM;
5588 }
5589
5590 /* Return 1 if we need to save REGNO. */
5591 static int
5592 ix86_save_reg (unsigned int regno, int maybe_eh_return)
5593 {
5594 if (pic_offset_table_rtx
5595 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
5596 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5597 || current_function_profile
5598 || current_function_calls_eh_return
5599 || current_function_uses_const_pool))
5600 {
5601 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
5602 return 0;
5603 return 1;
5604 }
5605
5606 if (current_function_calls_eh_return && maybe_eh_return)
5607 {
5608 unsigned i;
5609 for (i = 0; ; i++)
5610 {
5611 unsigned test = EH_RETURN_DATA_REGNO (i);
5612 if (test == INVALID_REGNUM)
5613 break;
5614 if (test == regno)
5615 return 1;
5616 }
5617 }
5618
5619 if (cfun->machine->force_align_arg_pointer
5620 && regno == REGNO (cfun->machine->force_align_arg_pointer))
5621 return 1;
5622
5623 return (regs_ever_live[regno]
5624 && !call_used_regs[regno]
5625 && !fixed_regs[regno]
5626 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
5627 }
5628
5629 /* Return number of registers to be saved on the stack. */
5630
5631 static int
5632 ix86_nsaved_regs (void)
5633 {
5634 int nregs = 0;
5635 int regno;
5636
5637 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5638 if (ix86_save_reg (regno, true))
5639 nregs++;
5640 return nregs;
5641 }
5642
5643 /* Return the offset between two registers, one to be eliminated, and the other
5644 its replacement, at the start of a routine. */
5645
5646 HOST_WIDE_INT
5647 ix86_initial_elimination_offset (int from, int to)
5648 {
5649 struct ix86_frame frame;
5650 ix86_compute_frame_layout (&frame);
5651
5652 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
5653 return frame.hard_frame_pointer_offset;
5654 else if (from == FRAME_POINTER_REGNUM
5655 && to == HARD_FRAME_POINTER_REGNUM)
5656 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
5657 else
5658 {
5659 gcc_assert (to == STACK_POINTER_REGNUM);
5660
5661 if (from == ARG_POINTER_REGNUM)
5662 return frame.stack_pointer_offset;
5663
5664 gcc_assert (from == FRAME_POINTER_REGNUM);
5665 return frame.stack_pointer_offset - frame.frame_pointer_offset;
5666 }
5667 }
5668
5669 /* Fill structure ix86_frame about frame of currently computed function. */
5670
5671 static void
5672 ix86_compute_frame_layout (struct ix86_frame *frame)
5673 {
5674 HOST_WIDE_INT total_size;
5675 unsigned int stack_alignment_needed;
5676 HOST_WIDE_INT offset;
5677 unsigned int preferred_alignment;
5678 HOST_WIDE_INT size = get_frame_size ();
5679
5680 frame->nregs = ix86_nsaved_regs ();
5681 total_size = size;
5682
5683 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
5684 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
5685
5686 /* During reload iteration the amount of registers saved can change.
5687 Recompute the value as needed. Do not recompute when amount of registers
5688 didn't change as reload does multiple calls to the function and does not
5689 expect the decision to change within single iteration. */
5690 if (!optimize_size
5691 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
5692 {
5693 int count = frame->nregs;
5694
5695 cfun->machine->use_fast_prologue_epilogue_nregs = count;
5696 /* The fast prologue uses move instead of push to save registers. This
5697 is significantly longer, but also executes faster as modern hardware
5698 can execute the moves in parallel, but can't do that for push/pop.
5699
5700 Be careful about choosing what prologue to emit: When function takes
5701 many instructions to execute we may use slow version as well as in
5702 case function is known to be outside hot spot (this is known with
5703 feedback only). Weight the size of function by number of registers
5704 to save as it is cheap to use one or two push instructions but very
5705 slow to use many of them. */
5706 if (count)
5707 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
5708 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
5709 || (flag_branch_probabilities
5710 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
5711 cfun->machine->use_fast_prologue_epilogue = false;
5712 else
5713 cfun->machine->use_fast_prologue_epilogue
5714 = !expensive_function_p (count);
5715 }
5716 if (TARGET_PROLOGUE_USING_MOVE
5717 && cfun->machine->use_fast_prologue_epilogue)
5718 frame->save_regs_using_mov = true;
5719 else
5720 frame->save_regs_using_mov = false;
5721
5722
5723 /* Skip return address and saved base pointer. */
5724 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
5725
5726 frame->hard_frame_pointer_offset = offset;
5727
5728 /* Do some sanity checking of stack_alignment_needed and
5729 preferred_alignment, since i386 port is the only using those features
5730 that may break easily. */
5731
5732 gcc_assert (!size || stack_alignment_needed);
5733 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
5734 gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5735 gcc_assert (stack_alignment_needed
5736 <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5737
5738 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
5739 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
5740
5741 /* Register save area */
5742 offset += frame->nregs * UNITS_PER_WORD;
5743
5744 /* Va-arg area */
5745 if (ix86_save_varrargs_registers)
5746 {
5747 offset += X86_64_VARARGS_SIZE;
5748 frame->va_arg_size = X86_64_VARARGS_SIZE;
5749 }
5750 else
5751 frame->va_arg_size = 0;
5752
5753 /* Align start of frame for local function. */
5754 frame->padding1 = ((offset + stack_alignment_needed - 1)
5755 & -stack_alignment_needed) - offset;
5756
5757 offset += frame->padding1;
5758
5759 /* Frame pointer points here. */
5760 frame->frame_pointer_offset = offset;
5761
5762 offset += size;
5763
5764 /* Add outgoing arguments area. Can be skipped if we eliminated
5765 all the function calls as dead code.
5766 Skipping is however impossible when function calls alloca. Alloca
5767 expander assumes that last current_function_outgoing_args_size
5768 of stack frame are unused. */
5769 if (ACCUMULATE_OUTGOING_ARGS
5770 && (!current_function_is_leaf || current_function_calls_alloca
5771 || ix86_current_function_calls_tls_descriptor))
5772 {
5773 offset += current_function_outgoing_args_size;
5774 frame->outgoing_arguments_size = current_function_outgoing_args_size;
5775 }
5776 else
5777 frame->outgoing_arguments_size = 0;
5778
5779 /* Align stack boundary. Only needed if we're calling another function
5780 or using alloca. */
5781 if (!current_function_is_leaf || current_function_calls_alloca
5782 || ix86_current_function_calls_tls_descriptor)
5783 frame->padding2 = ((offset + preferred_alignment - 1)
5784 & -preferred_alignment) - offset;
5785 else
5786 frame->padding2 = 0;
5787
5788 offset += frame->padding2;
5789
5790 /* We've reached end of stack frame. */
5791 frame->stack_pointer_offset = offset;
5792
5793 /* Size prologue needs to allocate. */
5794 frame->to_allocate =
5795 (size + frame->padding1 + frame->padding2
5796 + frame->outgoing_arguments_size + frame->va_arg_size);
5797
5798 if ((!frame->to_allocate && frame->nregs <= 1)
5799 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
5800 frame->save_regs_using_mov = false;
5801
5802 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5803 && current_function_is_leaf
5804 && !ix86_current_function_calls_tls_descriptor)
5805 {
5806 frame->red_zone_size = frame->to_allocate;
5807 if (frame->save_regs_using_mov)
5808 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
5809 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5810 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5811 }
5812 else
5813 frame->red_zone_size = 0;
5814 frame->to_allocate -= frame->red_zone_size;
5815 frame->stack_pointer_offset -= frame->red_zone_size;
5816 #if 0
5817 fprintf (stderr, "\n");
5818 fprintf (stderr, "nregs: %ld\n", (long)frame->nregs);
5819 fprintf (stderr, "size: %ld\n", (long)size);
5820 fprintf (stderr, "alignment1: %ld\n", (long)stack_alignment_needed);
5821 fprintf (stderr, "padding1: %ld\n", (long)frame->padding1);
5822 fprintf (stderr, "va_arg: %ld\n", (long)frame->va_arg_size);
5823 fprintf (stderr, "padding2: %ld\n", (long)frame->padding2);
5824 fprintf (stderr, "to_allocate: %ld\n", (long)frame->to_allocate);
5825 fprintf (stderr, "red_zone_size: %ld\n", (long)frame->red_zone_size);
5826 fprintf (stderr, "frame_pointer_offset: %ld\n", (long)frame->frame_pointer_offset);
5827 fprintf (stderr, "hard_frame_pointer_offset: %ld\n",
5828 (long)frame->hard_frame_pointer_offset);
5829 fprintf (stderr, "stack_pointer_offset: %ld\n", (long)frame->stack_pointer_offset);
5830 fprintf (stderr, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf);
5831 fprintf (stderr, "current_function_calls_alloca: %ld\n", (long)current_function_calls_alloca);
5832 fprintf (stderr, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor);
5833 #endif
5834 }
5835
5836 /* Emit code to save registers in the prologue. */
5837
5838 static void
5839 ix86_emit_save_regs (void)
5840 {
5841 unsigned int regno;
5842 rtx insn;
5843
5844 for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
5845 if (ix86_save_reg (regno, true))
5846 {
5847 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5848 RTX_FRAME_RELATED_P (insn) = 1;
5849 }
5850 }
5851
5852 /* Emit code to save registers using MOV insns. First register
5853 is restored from POINTER + OFFSET. */
5854 static void
5855 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
5856 {
5857 unsigned int regno;
5858 rtx insn;
5859
5860 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5861 if (ix86_save_reg (regno, true))
5862 {
5863 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5864 Pmode, offset),
5865 gen_rtx_REG (Pmode, regno));
5866 RTX_FRAME_RELATED_P (insn) = 1;
5867 offset += UNITS_PER_WORD;
5868 }
5869 }
5870
5871 /* Expand prologue or epilogue stack adjustment.
5872 The pattern exist to put a dependency on all ebp-based memory accesses.
5873 STYLE should be negative if instructions should be marked as frame related,
5874 zero if %r11 register is live and cannot be freely used and positive
5875 otherwise. */
5876
5877 static void
5878 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5879 {
5880 rtx insn;
5881
5882 if (! TARGET_64BIT)
5883 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5884 else if (x86_64_immediate_operand (offset, DImode))
5885 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5886 else
5887 {
5888 rtx r11;
5889 /* r11 is used by indirect sibcall return as well, set before the
5890 epilogue and used after the epilogue. ATM indirect sibcall
5891 shouldn't be used together with huge frame sizes in one
5892 function because of the frame_size check in sibcall.c. */
5893 gcc_assert (style);
5894 r11 = gen_rtx_REG (DImode, R11_REG);
5895 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5896 if (style < 0)
5897 RTX_FRAME_RELATED_P (insn) = 1;
5898 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5899 offset));
5900 }
5901 if (style < 0)
5902 RTX_FRAME_RELATED_P (insn) = 1;
5903 }
5904
5905 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
5906
5907 static rtx
5908 ix86_internal_arg_pointer (void)
5909 {
5910 bool has_force_align_arg_pointer =
5911 (0 != lookup_attribute (ix86_force_align_arg_pointer_string,
5912 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))));
5913 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
5914 && DECL_NAME (current_function_decl)
5915 && MAIN_NAME_P (DECL_NAME (current_function_decl))
5916 && DECL_FILE_SCOPE_P (current_function_decl))
5917 || ix86_force_align_arg_pointer
5918 || has_force_align_arg_pointer)
5919 {
5920 /* Nested functions can't realign the stack due to a register
5921 conflict. */
5922 if (DECL_CONTEXT (current_function_decl)
5923 && TREE_CODE (DECL_CONTEXT (current_function_decl)) == FUNCTION_DECL)
5924 {
5925 if (ix86_force_align_arg_pointer)
5926 warning (0, "-mstackrealign ignored for nested functions");
5927 if (has_force_align_arg_pointer)
5928 error ("%s not supported for nested functions",
5929 ix86_force_align_arg_pointer_string);
5930 return virtual_incoming_args_rtx;
5931 }
5932 cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, 2);
5933 return copy_to_reg (cfun->machine->force_align_arg_pointer);
5934 }
5935 else
5936 return virtual_incoming_args_rtx;
5937 }
5938
5939 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
5940 This is called from dwarf2out.c to emit call frame instructions
5941 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
5942 static void
5943 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
5944 {
5945 rtx unspec = SET_SRC (pattern);
5946 gcc_assert (GET_CODE (unspec) == UNSPEC);
5947
5948 switch (index)
5949 {
5950 case UNSPEC_REG_SAVE:
5951 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
5952 SET_DEST (pattern));
5953 break;
5954 case UNSPEC_DEF_CFA:
5955 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
5956 INTVAL (XVECEXP (unspec, 0, 0)));
5957 break;
5958 default:
5959 gcc_unreachable ();
5960 }
5961 }
5962
5963 /* Expand the prologue into a bunch of separate insns. */
5964
5965 void
5966 ix86_expand_prologue (void)
5967 {
5968 rtx insn;
5969 bool pic_reg_used;
5970 struct ix86_frame frame;
5971 HOST_WIDE_INT allocate;
5972
5973 ix86_compute_frame_layout (&frame);
5974
5975 if (cfun->machine->force_align_arg_pointer)
5976 {
5977 rtx x, y;
5978
5979 /* Grab the argument pointer. */
5980 x = plus_constant (stack_pointer_rtx, 4);
5981 y = cfun->machine->force_align_arg_pointer;
5982 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
5983 RTX_FRAME_RELATED_P (insn) = 1;
5984
5985 /* The unwind info consists of two parts: install the fafp as the cfa,
5986 and record the fafp as the "save register" of the stack pointer.
5987 The later is there in order that the unwinder can see where it
5988 should restore the stack pointer across the and insn. */
5989 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA);
5990 x = gen_rtx_SET (VOIDmode, y, x);
5991 RTX_FRAME_RELATED_P (x) = 1;
5992 y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx),
5993 UNSPEC_REG_SAVE);
5994 y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y);
5995 RTX_FRAME_RELATED_P (y) = 1;
5996 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y));
5997 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5998 REG_NOTES (insn) = x;
5999
6000 /* Align the stack. */
6001 emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx,
6002 GEN_INT (-16)));
6003
6004 /* And here we cheat like madmen with the unwind info. We force the
6005 cfa register back to sp+4, which is exactly what it was at the
6006 start of the function. Re-pushing the return address results in
6007 the return at the same spot relative to the cfa, and thus is
6008 correct wrt the unwind info. */
6009 x = cfun->machine->force_align_arg_pointer;
6010 x = gen_frame_mem (Pmode, plus_constant (x, -4));
6011 insn = emit_insn (gen_push (x));
6012 RTX_FRAME_RELATED_P (insn) = 1;
6013
6014 x = GEN_INT (4);
6015 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA);
6016 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
6017 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
6018 REG_NOTES (insn) = x;
6019 }
6020
6021 /* Note: AT&T enter does NOT have reversed args. Enter is probably
6022 slower on all targets. Also sdb doesn't like it. */
6023
6024 if (frame_pointer_needed)
6025 {
6026 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
6027 RTX_FRAME_RELATED_P (insn) = 1;
6028
6029 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
6030 RTX_FRAME_RELATED_P (insn) = 1;
6031 }
6032
6033 allocate = frame.to_allocate;
6034
6035 if (!frame.save_regs_using_mov)
6036 ix86_emit_save_regs ();
6037 else
6038 allocate += frame.nregs * UNITS_PER_WORD;
6039
6040 /* When using red zone we may start register saving before allocating
6041 the stack frame saving one cycle of the prologue. */
6042 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
6043 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
6044 : stack_pointer_rtx,
6045 -frame.nregs * UNITS_PER_WORD);
6046
6047 if (allocate == 0)
6048 ;
6049 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
6050 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6051 GEN_INT (-allocate), -1);
6052 else
6053 {
6054 /* Only valid for Win32. */
6055 rtx eax = gen_rtx_REG (Pmode, 0);
6056 bool eax_live;
6057 rtx t;
6058
6059 gcc_assert (!TARGET_64BIT || TARGET_64BIT_MS_ABI);
6060
6061 if (TARGET_64BIT_MS_ABI)
6062 eax_live = false;
6063 else
6064 eax_live = ix86_eax_live_at_start_p ();
6065
6066 if (eax_live)
6067 {
6068 emit_insn (gen_push (eax));
6069 allocate -= UNITS_PER_WORD;
6070 }
6071
6072 emit_move_insn (eax, GEN_INT (allocate));
6073
6074 if (TARGET_64BIT)
6075 insn = gen_allocate_stack_worker_64 (eax);
6076 else
6077 insn = gen_allocate_stack_worker_32 (eax);
6078 insn = emit_insn (insn);
6079 RTX_FRAME_RELATED_P (insn) = 1;
6080 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
6081 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
6082 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
6083 t, REG_NOTES (insn));
6084
6085 if (eax_live)
6086 {
6087 if (frame_pointer_needed)
6088 t = plus_constant (hard_frame_pointer_rtx,
6089 allocate
6090 - frame.to_allocate
6091 - frame.nregs * UNITS_PER_WORD);
6092 else
6093 t = plus_constant (stack_pointer_rtx, allocate);
6094 emit_move_insn (eax, gen_rtx_MEM (Pmode, t));
6095 }
6096 }
6097
6098 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
6099 {
6100 if (!frame_pointer_needed || !frame.to_allocate)
6101 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
6102 else
6103 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
6104 -frame.nregs * UNITS_PER_WORD);
6105 }
6106
6107 pic_reg_used = false;
6108 if (pic_offset_table_rtx
6109 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
6110 || current_function_profile))
6111 {
6112 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
6113
6114 if (alt_pic_reg_used != INVALID_REGNUM)
6115 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
6116
6117 pic_reg_used = true;
6118 }
6119
6120 if (pic_reg_used)
6121 {
6122 if (TARGET_64BIT)
6123 {
6124 if (ix86_cmodel == CM_LARGE_PIC)
6125 {
6126 rtx tmp_reg = gen_rtx_REG (DImode,
6127 FIRST_REX_INT_REG + 3 /* R11 */);
6128 rtx label = gen_label_rtx ();
6129 emit_label (label);
6130 LABEL_PRESERVE_P (label) = 1;
6131 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
6132 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
6133 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
6134 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
6135 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
6136 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
6137 pic_offset_table_rtx, tmp_reg));
6138 }
6139 else
6140 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
6141 }
6142 else
6143 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
6144
6145 /* Even with accurate pre-reload life analysis, we can wind up
6146 deleting all references to the pic register after reload.
6147 Consider if cross-jumping unifies two sides of a branch
6148 controlled by a comparison vs the only read from a global.
6149 In which case, allow the set_got to be deleted, though we're
6150 too late to do anything about the ebx save in the prologue. */
6151 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
6152 }
6153
6154 /* Prevent function calls from be scheduled before the call to mcount.
6155 In the pic_reg_used case, make sure that the got load isn't deleted. */
6156 if (current_function_profile)
6157 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
6158 }
6159
6160 /* Emit code to restore saved registers using MOV insns. First register
6161 is restored from POINTER + OFFSET. */
6162 static void
6163 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
6164 int maybe_eh_return)
6165 {
6166 int regno;
6167 rtx base_address = gen_rtx_MEM (Pmode, pointer);
6168
6169 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6170 if (ix86_save_reg (regno, maybe_eh_return))
6171 {
6172 /* Ensure that adjust_address won't be forced to produce pointer
6173 out of range allowed by x86-64 instruction set. */
6174 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
6175 {
6176 rtx r11;
6177
6178 r11 = gen_rtx_REG (DImode, R11_REG);
6179 emit_move_insn (r11, GEN_INT (offset));
6180 emit_insn (gen_adddi3 (r11, r11, pointer));
6181 base_address = gen_rtx_MEM (Pmode, r11);
6182 offset = 0;
6183 }
6184 emit_move_insn (gen_rtx_REG (Pmode, regno),
6185 adjust_address (base_address, Pmode, offset));
6186 offset += UNITS_PER_WORD;
6187 }
6188 }
6189
6190 /* Restore function stack, frame, and registers. */
6191
6192 void
6193 ix86_expand_epilogue (int style)
6194 {
6195 int regno;
6196 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
6197 struct ix86_frame frame;
6198 HOST_WIDE_INT offset;
6199
6200 ix86_compute_frame_layout (&frame);
6201
6202 /* Calculate start of saved registers relative to ebp. Special care
6203 must be taken for the normal return case of a function using
6204 eh_return: the eax and edx registers are marked as saved, but not
6205 restored along this path. */
6206 offset = frame.nregs;
6207 if (current_function_calls_eh_return && style != 2)
6208 offset -= 2;
6209 offset *= -UNITS_PER_WORD;
6210
6211 /* If we're only restoring one register and sp is not valid then
6212 using a move instruction to restore the register since it's
6213 less work than reloading sp and popping the register.
6214
6215 The default code result in stack adjustment using add/lea instruction,
6216 while this code results in LEAVE instruction (or discrete equivalent),
6217 so it is profitable in some other cases as well. Especially when there
6218 are no registers to restore. We also use this code when TARGET_USE_LEAVE
6219 and there is exactly one register to pop. This heuristic may need some
6220 tuning in future. */
6221 if ((!sp_valid && frame.nregs <= 1)
6222 || (TARGET_EPILOGUE_USING_MOVE
6223 && cfun->machine->use_fast_prologue_epilogue
6224 && (frame.nregs > 1 || frame.to_allocate))
6225 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
6226 || (frame_pointer_needed && TARGET_USE_LEAVE
6227 && cfun->machine->use_fast_prologue_epilogue
6228 && frame.nregs == 1)
6229 || current_function_calls_eh_return)
6230 {
6231 /* Restore registers. We can use ebp or esp to address the memory
6232 locations. If both are available, default to ebp, since offsets
6233 are known to be small. Only exception is esp pointing directly to the
6234 end of block of saved registers, where we may simplify addressing
6235 mode. */
6236
6237 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
6238 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
6239 frame.to_allocate, style == 2);
6240 else
6241 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
6242 offset, style == 2);
6243
6244 /* eh_return epilogues need %ecx added to the stack pointer. */
6245 if (style == 2)
6246 {
6247 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
6248
6249 if (frame_pointer_needed)
6250 {
6251 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
6252 tmp = plus_constant (tmp, UNITS_PER_WORD);
6253 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
6254
6255 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
6256 emit_move_insn (hard_frame_pointer_rtx, tmp);
6257
6258 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
6259 const0_rtx, style);
6260 }
6261 else
6262 {
6263 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
6264 tmp = plus_constant (tmp, (frame.to_allocate
6265 + frame.nregs * UNITS_PER_WORD));
6266 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
6267 }
6268 }
6269 else if (!frame_pointer_needed)
6270 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6271 GEN_INT (frame.to_allocate
6272 + frame.nregs * UNITS_PER_WORD),
6273 style);
6274 /* If not an i386, mov & pop is faster than "leave". */
6275 else if (TARGET_USE_LEAVE || optimize_size
6276 || !cfun->machine->use_fast_prologue_epilogue)
6277 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
6278 else
6279 {
6280 pro_epilogue_adjust_stack (stack_pointer_rtx,
6281 hard_frame_pointer_rtx,
6282 const0_rtx, style);
6283 if (TARGET_64BIT)
6284 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
6285 else
6286 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
6287 }
6288 }
6289 else
6290 {
6291 /* First step is to deallocate the stack frame so that we can
6292 pop the registers. */
6293 if (!sp_valid)
6294 {
6295 gcc_assert (frame_pointer_needed);
6296 pro_epilogue_adjust_stack (stack_pointer_rtx,
6297 hard_frame_pointer_rtx,
6298 GEN_INT (offset), style);
6299 }
6300 else if (frame.to_allocate)
6301 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6302 GEN_INT (frame.to_allocate), style);
6303
6304 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6305 if (ix86_save_reg (regno, false))
6306 {
6307 if (TARGET_64BIT)
6308 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
6309 else
6310 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
6311 }
6312 if (frame_pointer_needed)
6313 {
6314 /* Leave results in shorter dependency chains on CPUs that are
6315 able to grok it fast. */
6316 if (TARGET_USE_LEAVE)
6317 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
6318 else if (TARGET_64BIT)
6319 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
6320 else
6321 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
6322 }
6323 }
6324
6325 if (cfun->machine->force_align_arg_pointer)
6326 {
6327 emit_insn (gen_addsi3 (stack_pointer_rtx,
6328 cfun->machine->force_align_arg_pointer,
6329 GEN_INT (-4)));
6330 }
6331
6332 /* Sibcall epilogues don't want a return instruction. */
6333 if (style == 0)
6334 return;
6335
6336 if (current_function_pops_args && current_function_args_size)
6337 {
6338 rtx popc = GEN_INT (current_function_pops_args);
6339
6340 /* i386 can only pop 64K bytes. If asked to pop more, pop
6341 return address, do explicit add, and jump indirectly to the
6342 caller. */
6343
6344 if (current_function_pops_args >= 65536)
6345 {
6346 rtx ecx = gen_rtx_REG (SImode, 2);
6347
6348 /* There is no "pascal" calling convention in any 64bit ABI. */
6349 gcc_assert (!TARGET_64BIT);
6350
6351 emit_insn (gen_popsi1 (ecx));
6352 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
6353 emit_jump_insn (gen_return_indirect_internal (ecx));
6354 }
6355 else
6356 emit_jump_insn (gen_return_pop_internal (popc));
6357 }
6358 else
6359 emit_jump_insn (gen_return_internal ());
6360 }
6361
6362 /* Reset from the function's potential modifications. */
6363
6364 static void
6365 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6366 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
6367 {
6368 if (pic_offset_table_rtx)
6369 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
6370 #if TARGET_MACHO
6371 /* Mach-O doesn't support labels at the end of objects, so if
6372 it looks like we might want one, insert a NOP. */
6373 {
6374 rtx insn = get_last_insn ();
6375 while (insn
6376 && NOTE_P (insn)
6377 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
6378 insn = PREV_INSN (insn);
6379 if (insn
6380 && (LABEL_P (insn)
6381 || (NOTE_P (insn)
6382 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
6383 fputs ("\tnop\n", file);
6384 }
6385 #endif
6386
6387 }
6388 \f
6389 /* Extract the parts of an RTL expression that is a valid memory address
6390 for an instruction. Return 0 if the structure of the address is
6391 grossly off. Return -1 if the address contains ASHIFT, so it is not
6392 strictly valid, but still used for computing length of lea instruction. */
6393
6394 int
6395 ix86_decompose_address (rtx addr, struct ix86_address *out)
6396 {
6397 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
6398 rtx base_reg, index_reg;
6399 HOST_WIDE_INT scale = 1;
6400 rtx scale_rtx = NULL_RTX;
6401 int retval = 1;
6402 enum ix86_address_seg seg = SEG_DEFAULT;
6403
6404 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
6405 base = addr;
6406 else if (GET_CODE (addr) == PLUS)
6407 {
6408 rtx addends[4], op;
6409 int n = 0, i;
6410
6411 op = addr;
6412 do
6413 {
6414 if (n >= 4)
6415 return 0;
6416 addends[n++] = XEXP (op, 1);
6417 op = XEXP (op, 0);
6418 }
6419 while (GET_CODE (op) == PLUS);
6420 if (n >= 4)
6421 return 0;
6422 addends[n] = op;
6423
6424 for (i = n; i >= 0; --i)
6425 {
6426 op = addends[i];
6427 switch (GET_CODE (op))
6428 {
6429 case MULT:
6430 if (index)
6431 return 0;
6432 index = XEXP (op, 0);
6433 scale_rtx = XEXP (op, 1);
6434 break;
6435
6436 case UNSPEC:
6437 if (XINT (op, 1) == UNSPEC_TP
6438 && TARGET_TLS_DIRECT_SEG_REFS
6439 && seg == SEG_DEFAULT)
6440 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
6441 else
6442 return 0;
6443 break;
6444
6445 case REG:
6446 case SUBREG:
6447 if (!base)
6448 base = op;
6449 else if (!index)
6450 index = op;
6451 else
6452 return 0;
6453 break;
6454
6455 case CONST:
6456 case CONST_INT:
6457 case SYMBOL_REF:
6458 case LABEL_REF:
6459 if (disp)
6460 return 0;
6461 disp = op;
6462 break;
6463
6464 default:
6465 return 0;
6466 }
6467 }
6468 }
6469 else if (GET_CODE (addr) == MULT)
6470 {
6471 index = XEXP (addr, 0); /* index*scale */
6472 scale_rtx = XEXP (addr, 1);
6473 }
6474 else if (GET_CODE (addr) == ASHIFT)
6475 {
6476 rtx tmp;
6477
6478 /* We're called for lea too, which implements ashift on occasion. */
6479 index = XEXP (addr, 0);
6480 tmp = XEXP (addr, 1);
6481 if (!CONST_INT_P (tmp))
6482 return 0;
6483 scale = INTVAL (tmp);
6484 if ((unsigned HOST_WIDE_INT) scale > 3)
6485 return 0;
6486 scale = 1 << scale;
6487 retval = -1;
6488 }
6489 else
6490 disp = addr; /* displacement */
6491
6492 /* Extract the integral value of scale. */
6493 if (scale_rtx)
6494 {
6495 if (!CONST_INT_P (scale_rtx))
6496 return 0;
6497 scale = INTVAL (scale_rtx);
6498 }
6499
6500 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
6501 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
6502
6503 /* Allow arg pointer and stack pointer as index if there is not scaling. */
6504 if (base_reg && index_reg && scale == 1
6505 && (index_reg == arg_pointer_rtx
6506 || index_reg == frame_pointer_rtx
6507 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
6508 {
6509 rtx tmp;
6510 tmp = base, base = index, index = tmp;
6511 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
6512 }
6513
6514 /* Special case: %ebp cannot be encoded as a base without a displacement. */
6515 if ((base_reg == hard_frame_pointer_rtx
6516 || base_reg == frame_pointer_rtx
6517 || base_reg == arg_pointer_rtx) && !disp)
6518 disp = const0_rtx;
6519
6520 /* Special case: on K6, [%esi] makes the instruction vector decoded.
6521 Avoid this by transforming to [%esi+0]. */
6522 if (ix86_tune == PROCESSOR_K6 && !optimize_size
6523 && base_reg && !index_reg && !disp
6524 && REG_P (base_reg)
6525 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
6526 disp = const0_rtx;
6527
6528 /* Special case: encode reg+reg instead of reg*2. */
6529 if (!base && index && scale && scale == 2)
6530 base = index, base_reg = index_reg, scale = 1;
6531
6532 /* Special case: scaling cannot be encoded without base or displacement. */
6533 if (!base && !disp && index && scale != 1)
6534 disp = const0_rtx;
6535
6536 out->base = base;
6537 out->index = index;
6538 out->disp = disp;
6539 out->scale = scale;
6540 out->seg = seg;
6541
6542 return retval;
6543 }
6544 \f
6545 /* Return cost of the memory address x.
6546 For i386, it is better to use a complex address than let gcc copy
6547 the address into a reg and make a new pseudo. But not if the address
6548 requires to two regs - that would mean more pseudos with longer
6549 lifetimes. */
6550 static int
6551 ix86_address_cost (rtx x)
6552 {
6553 struct ix86_address parts;
6554 int cost = 1;
6555 int ok = ix86_decompose_address (x, &parts);
6556
6557 gcc_assert (ok);
6558
6559 if (parts.base && GET_CODE (parts.base) == SUBREG)
6560 parts.base = SUBREG_REG (parts.base);
6561 if (parts.index && GET_CODE (parts.index) == SUBREG)
6562 parts.index = SUBREG_REG (parts.index);
6563
6564 /* More complex memory references are better. */
6565 if (parts.disp && parts.disp != const0_rtx)
6566 cost--;
6567 if (parts.seg != SEG_DEFAULT)
6568 cost--;
6569
6570 /* Attempt to minimize number of registers in the address. */
6571 if ((parts.base
6572 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
6573 || (parts.index
6574 && (!REG_P (parts.index)
6575 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
6576 cost++;
6577
6578 if (parts.base
6579 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
6580 && parts.index
6581 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
6582 && parts.base != parts.index)
6583 cost++;
6584
6585 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
6586 since it's predecode logic can't detect the length of instructions
6587 and it degenerates to vector decoded. Increase cost of such
6588 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
6589 to split such addresses or even refuse such addresses at all.
6590
6591 Following addressing modes are affected:
6592 [base+scale*index]
6593 [scale*index+disp]
6594 [base+index]
6595
6596 The first and last case may be avoidable by explicitly coding the zero in
6597 memory address, but I don't have AMD-K6 machine handy to check this
6598 theory. */
6599
6600 if (TARGET_K6
6601 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
6602 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
6603 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
6604 cost += 10;
6605
6606 return cost;
6607 }
6608 \f
6609 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
6610 this is used for to form addresses to local data when -fPIC is in
6611 use. */
6612
6613 static bool
6614 darwin_local_data_pic (rtx disp)
6615 {
6616 if (GET_CODE (disp) == MINUS)
6617 {
6618 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
6619 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
6620 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
6621 {
6622 const char *sym_name = XSTR (XEXP (disp, 1), 0);
6623 if (! strcmp (sym_name, "<pic base>"))
6624 return true;
6625 }
6626 }
6627
6628 return false;
6629 }
6630
6631 /* Determine if a given RTX is a valid constant. We already know this
6632 satisfies CONSTANT_P. */
6633
6634 bool
6635 legitimate_constant_p (rtx x)
6636 {
6637 switch (GET_CODE (x))
6638 {
6639 case CONST:
6640 x = XEXP (x, 0);
6641
6642 if (GET_CODE (x) == PLUS)
6643 {
6644 if (!CONST_INT_P (XEXP (x, 1)))
6645 return false;
6646 x = XEXP (x, 0);
6647 }
6648
6649 if (TARGET_MACHO && darwin_local_data_pic (x))
6650 return true;
6651
6652 /* Only some unspecs are valid as "constants". */
6653 if (GET_CODE (x) == UNSPEC)
6654 switch (XINT (x, 1))
6655 {
6656 case UNSPEC_GOT:
6657 case UNSPEC_GOTOFF:
6658 case UNSPEC_PLTOFF:
6659 return TARGET_64BIT;
6660 case UNSPEC_TPOFF:
6661 case UNSPEC_NTPOFF:
6662 x = XVECEXP (x, 0, 0);
6663 return (GET_CODE (x) == SYMBOL_REF
6664 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6665 case UNSPEC_DTPOFF:
6666 x = XVECEXP (x, 0, 0);
6667 return (GET_CODE (x) == SYMBOL_REF
6668 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
6669 default:
6670 return false;
6671 }
6672
6673 /* We must have drilled down to a symbol. */
6674 if (GET_CODE (x) == LABEL_REF)
6675 return true;
6676 if (GET_CODE (x) != SYMBOL_REF)
6677 return false;
6678 /* FALLTHRU */
6679
6680 case SYMBOL_REF:
6681 /* TLS symbols are never valid. */
6682 if (SYMBOL_REF_TLS_MODEL (x))
6683 return false;
6684
6685 /* DLLIMPORT symbols are never valid. */
6686 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
6687 && SYMBOL_REF_DLLIMPORT_P (x))
6688 return false;
6689 break;
6690
6691 case CONST_DOUBLE:
6692 if (GET_MODE (x) == TImode
6693 && x != CONST0_RTX (TImode)
6694 && !TARGET_64BIT)
6695 return false;
6696 break;
6697
6698 case CONST_VECTOR:
6699 if (x == CONST0_RTX (GET_MODE (x)))
6700 return true;
6701 return false;
6702
6703 default:
6704 break;
6705 }
6706
6707 /* Otherwise we handle everything else in the move patterns. */
6708 return true;
6709 }
6710
6711 /* Determine if it's legal to put X into the constant pool. This
6712 is not possible for the address of thread-local symbols, which
6713 is checked above. */
6714
6715 static bool
6716 ix86_cannot_force_const_mem (rtx x)
6717 {
6718 /* We can always put integral constants and vectors in memory. */
6719 switch (GET_CODE (x))
6720 {
6721 case CONST_INT:
6722 case CONST_DOUBLE:
6723 case CONST_VECTOR:
6724 return false;
6725
6726 default:
6727 break;
6728 }
6729 return !legitimate_constant_p (x);
6730 }
6731
6732 /* Determine if a given RTX is a valid constant address. */
6733
6734 bool
6735 constant_address_p (rtx x)
6736 {
6737 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
6738 }
6739
6740 /* Nonzero if the constant value X is a legitimate general operand
6741 when generating PIC code. It is given that flag_pic is on and
6742 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
6743
6744 bool
6745 legitimate_pic_operand_p (rtx x)
6746 {
6747 rtx inner;
6748
6749 switch (GET_CODE (x))
6750 {
6751 case CONST:
6752 inner = XEXP (x, 0);
6753 if (GET_CODE (inner) == PLUS
6754 && CONST_INT_P (XEXP (inner, 1)))
6755 inner = XEXP (inner, 0);
6756
6757 /* Only some unspecs are valid as "constants". */
6758 if (GET_CODE (inner) == UNSPEC)
6759 switch (XINT (inner, 1))
6760 {
6761 case UNSPEC_GOT:
6762 case UNSPEC_GOTOFF:
6763 case UNSPEC_PLTOFF:
6764 return TARGET_64BIT;
6765 case UNSPEC_TPOFF:
6766 x = XVECEXP (inner, 0, 0);
6767 return (GET_CODE (x) == SYMBOL_REF
6768 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6769 default:
6770 return false;
6771 }
6772 /* FALLTHRU */
6773
6774 case SYMBOL_REF:
6775 case LABEL_REF:
6776 return legitimate_pic_address_disp_p (x);
6777
6778 default:
6779 return true;
6780 }
6781 }
6782
6783 /* Determine if a given CONST RTX is a valid memory displacement
6784 in PIC mode. */
6785
6786 int
6787 legitimate_pic_address_disp_p (rtx disp)
6788 {
6789 bool saw_plus;
6790
6791 /* In 64bit mode we can allow direct addresses of symbols and labels
6792 when they are not dynamic symbols. */
6793 if (TARGET_64BIT)
6794 {
6795 rtx op0 = disp, op1;
6796
6797 switch (GET_CODE (disp))
6798 {
6799 case LABEL_REF:
6800 return true;
6801
6802 case CONST:
6803 if (GET_CODE (XEXP (disp, 0)) != PLUS)
6804 break;
6805 op0 = XEXP (XEXP (disp, 0), 0);
6806 op1 = XEXP (XEXP (disp, 0), 1);
6807 if (!CONST_INT_P (op1)
6808 || INTVAL (op1) >= 16*1024*1024
6809 || INTVAL (op1) < -16*1024*1024)
6810 break;
6811 if (GET_CODE (op0) == LABEL_REF)
6812 return true;
6813 if (GET_CODE (op0) != SYMBOL_REF)
6814 break;
6815 /* FALLTHRU */
6816
6817 case SYMBOL_REF:
6818 /* TLS references should always be enclosed in UNSPEC. */
6819 if (SYMBOL_REF_TLS_MODEL (op0))
6820 return false;
6821 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
6822 && ix86_cmodel != CM_LARGE_PIC)
6823 return true;
6824 break;
6825
6826 default:
6827 break;
6828 }
6829 }
6830 if (GET_CODE (disp) != CONST)
6831 return 0;
6832 disp = XEXP (disp, 0);
6833
6834 if (TARGET_64BIT)
6835 {
6836 /* We are unsafe to allow PLUS expressions. This limit allowed distance
6837 of GOT tables. We should not need these anyway. */
6838 if (GET_CODE (disp) != UNSPEC
6839 || (XINT (disp, 1) != UNSPEC_GOTPCREL
6840 && XINT (disp, 1) != UNSPEC_GOTOFF
6841 && XINT (disp, 1) != UNSPEC_PLTOFF))
6842 return 0;
6843
6844 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
6845 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
6846 return 0;
6847 return 1;
6848 }
6849
6850 saw_plus = false;
6851 if (GET_CODE (disp) == PLUS)
6852 {
6853 if (!CONST_INT_P (XEXP (disp, 1)))
6854 return 0;
6855 disp = XEXP (disp, 0);
6856 saw_plus = true;
6857 }
6858
6859 if (TARGET_MACHO && darwin_local_data_pic (disp))
6860 return 1;
6861
6862 if (GET_CODE (disp) != UNSPEC)
6863 return 0;
6864
6865 switch (XINT (disp, 1))
6866 {
6867 case UNSPEC_GOT:
6868 if (saw_plus)
6869 return false;
6870 /* We need to check for both symbols and labels because VxWorks loads
6871 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
6872 details. */
6873 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6874 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
6875 case UNSPEC_GOTOFF:
6876 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
6877 While ABI specify also 32bit relocation but we don't produce it in
6878 small PIC model at all. */
6879 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6880 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
6881 && !TARGET_64BIT)
6882 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
6883 return false;
6884 case UNSPEC_GOTTPOFF:
6885 case UNSPEC_GOTNTPOFF:
6886 case UNSPEC_INDNTPOFF:
6887 if (saw_plus)
6888 return false;
6889 disp = XVECEXP (disp, 0, 0);
6890 return (GET_CODE (disp) == SYMBOL_REF
6891 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
6892 case UNSPEC_NTPOFF:
6893 disp = XVECEXP (disp, 0, 0);
6894 return (GET_CODE (disp) == SYMBOL_REF
6895 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
6896 case UNSPEC_DTPOFF:
6897 disp = XVECEXP (disp, 0, 0);
6898 return (GET_CODE (disp) == SYMBOL_REF
6899 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
6900 }
6901
6902 return 0;
6903 }
6904
6905 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6906 memory address for an instruction. The MODE argument is the machine mode
6907 for the MEM expression that wants to use this address.
6908
6909 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6910 convert common non-canonical forms to canonical form so that they will
6911 be recognized. */
6912
6913 int
6914 legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
6915 rtx addr, int strict)
6916 {
6917 struct ix86_address parts;
6918 rtx base, index, disp;
6919 HOST_WIDE_INT scale;
6920 const char *reason = NULL;
6921 rtx reason_rtx = NULL_RTX;
6922
6923 if (ix86_decompose_address (addr, &parts) <= 0)
6924 {
6925 reason = "decomposition failed";
6926 goto report_error;
6927 }
6928
6929 base = parts.base;
6930 index = parts.index;
6931 disp = parts.disp;
6932 scale = parts.scale;
6933
6934 /* Validate base register.
6935
6936 Don't allow SUBREG's that span more than a word here. It can lead to spill
6937 failures when the base is one word out of a two word structure, which is
6938 represented internally as a DImode int. */
6939
6940 if (base)
6941 {
6942 rtx reg;
6943 reason_rtx = base;
6944
6945 if (REG_P (base))
6946 reg = base;
6947 else if (GET_CODE (base) == SUBREG
6948 && REG_P (SUBREG_REG (base))
6949 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
6950 <= UNITS_PER_WORD)
6951 reg = SUBREG_REG (base);
6952 else
6953 {
6954 reason = "base is not a register";
6955 goto report_error;
6956 }
6957
6958 if (GET_MODE (base) != Pmode)
6959 {
6960 reason = "base is not in Pmode";
6961 goto report_error;
6962 }
6963
6964 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
6965 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
6966 {
6967 reason = "base is not valid";
6968 goto report_error;
6969 }
6970 }
6971
6972 /* Validate index register.
6973
6974 Don't allow SUBREG's that span more than a word here -- same as above. */
6975
6976 if (index)
6977 {
6978 rtx reg;
6979 reason_rtx = index;
6980
6981 if (REG_P (index))
6982 reg = index;
6983 else if (GET_CODE (index) == SUBREG
6984 && REG_P (SUBREG_REG (index))
6985 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
6986 <= UNITS_PER_WORD)
6987 reg = SUBREG_REG (index);
6988 else
6989 {
6990 reason = "index is not a register";
6991 goto report_error;
6992 }
6993
6994 if (GET_MODE (index) != Pmode)
6995 {
6996 reason = "index is not in Pmode";
6997 goto report_error;
6998 }
6999
7000 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
7001 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
7002 {
7003 reason = "index is not valid";
7004 goto report_error;
7005 }
7006 }
7007
7008 /* Validate scale factor. */
7009 if (scale != 1)
7010 {
7011 reason_rtx = GEN_INT (scale);
7012 if (!index)
7013 {
7014 reason = "scale without index";
7015 goto report_error;
7016 }
7017
7018 if (scale != 2 && scale != 4 && scale != 8)
7019 {
7020 reason = "scale is not a valid multiplier";
7021 goto report_error;
7022 }
7023 }
7024
7025 /* Validate displacement. */
7026 if (disp)
7027 {
7028 reason_rtx = disp;
7029
7030 if (GET_CODE (disp) == CONST
7031 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
7032 switch (XINT (XEXP (disp, 0), 1))
7033 {
7034 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
7035 used. While ABI specify also 32bit relocations, we don't produce
7036 them at all and use IP relative instead. */
7037 case UNSPEC_GOT:
7038 case UNSPEC_GOTOFF:
7039 gcc_assert (flag_pic);
7040 if (!TARGET_64BIT)
7041 goto is_legitimate_pic;
7042 reason = "64bit address unspec";
7043 goto report_error;
7044
7045 case UNSPEC_GOTPCREL:
7046 gcc_assert (flag_pic);
7047 goto is_legitimate_pic;
7048
7049 case UNSPEC_GOTTPOFF:
7050 case UNSPEC_GOTNTPOFF:
7051 case UNSPEC_INDNTPOFF:
7052 case UNSPEC_NTPOFF:
7053 case UNSPEC_DTPOFF:
7054 break;
7055
7056 default:
7057 reason = "invalid address unspec";
7058 goto report_error;
7059 }
7060
7061 else if (SYMBOLIC_CONST (disp)
7062 && (flag_pic
7063 || (TARGET_MACHO
7064 #if TARGET_MACHO
7065 && MACHOPIC_INDIRECT
7066 && !machopic_operand_p (disp)
7067 #endif
7068 )))
7069 {
7070
7071 is_legitimate_pic:
7072 if (TARGET_64BIT && (index || base))
7073 {
7074 /* foo@dtpoff(%rX) is ok. */
7075 if (GET_CODE (disp) != CONST
7076 || GET_CODE (XEXP (disp, 0)) != PLUS
7077 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
7078 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
7079 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
7080 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
7081 {
7082 reason = "non-constant pic memory reference";
7083 goto report_error;
7084 }
7085 }
7086 else if (! legitimate_pic_address_disp_p (disp))
7087 {
7088 reason = "displacement is an invalid pic construct";
7089 goto report_error;
7090 }
7091
7092 /* This code used to verify that a symbolic pic displacement
7093 includes the pic_offset_table_rtx register.
7094
7095 While this is good idea, unfortunately these constructs may
7096 be created by "adds using lea" optimization for incorrect
7097 code like:
7098
7099 int a;
7100 int foo(int i)
7101 {
7102 return *(&a+i);
7103 }
7104
7105 This code is nonsensical, but results in addressing
7106 GOT table with pic_offset_table_rtx base. We can't
7107 just refuse it easily, since it gets matched by
7108 "addsi3" pattern, that later gets split to lea in the
7109 case output register differs from input. While this
7110 can be handled by separate addsi pattern for this case
7111 that never results in lea, this seems to be easier and
7112 correct fix for crash to disable this test. */
7113 }
7114 else if (GET_CODE (disp) != LABEL_REF
7115 && !CONST_INT_P (disp)
7116 && (GET_CODE (disp) != CONST
7117 || !legitimate_constant_p (disp))
7118 && (GET_CODE (disp) != SYMBOL_REF
7119 || !legitimate_constant_p (disp)))
7120 {
7121 reason = "displacement is not constant";
7122 goto report_error;
7123 }
7124 else if (TARGET_64BIT
7125 && !x86_64_immediate_operand (disp, VOIDmode))
7126 {
7127 reason = "displacement is out of range";
7128 goto report_error;
7129 }
7130 }
7131
7132 /* Everything looks valid. */
7133 return TRUE;
7134
7135 report_error:
7136 return FALSE;
7137 }
7138 \f
7139 /* Return a unique alias set for the GOT. */
7140
7141 static HOST_WIDE_INT
7142 ix86_GOT_alias_set (void)
7143 {
7144 static HOST_WIDE_INT set = -1;
7145 if (set == -1)
7146 set = new_alias_set ();
7147 return set;
7148 }
7149
7150 /* Return a legitimate reference for ORIG (an address) using the
7151 register REG. If REG is 0, a new pseudo is generated.
7152
7153 There are two types of references that must be handled:
7154
7155 1. Global data references must load the address from the GOT, via
7156 the PIC reg. An insn is emitted to do this load, and the reg is
7157 returned.
7158
7159 2. Static data references, constant pool addresses, and code labels
7160 compute the address as an offset from the GOT, whose base is in
7161 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
7162 differentiate them from global data objects. The returned
7163 address is the PIC reg + an unspec constant.
7164
7165 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
7166 reg also appears in the address. */
7167
7168 static rtx
7169 legitimize_pic_address (rtx orig, rtx reg)
7170 {
7171 rtx addr = orig;
7172 rtx new_rtx = orig;
7173 rtx base;
7174
7175 #if TARGET_MACHO
7176 if (TARGET_MACHO && !TARGET_64BIT)
7177 {
7178 if (reg == 0)
7179 reg = gen_reg_rtx (Pmode);
7180 /* Use the generic Mach-O PIC machinery. */
7181 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
7182 }
7183 #endif
7184
7185 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
7186 new_rtx = addr;
7187 else if (TARGET_64BIT
7188 && ix86_cmodel != CM_SMALL_PIC
7189 && gotoff_operand (addr, Pmode))
7190 {
7191 rtx tmpreg;
7192 /* This symbol may be referenced via a displacement from the PIC
7193 base address (@GOTOFF). */
7194
7195 if (reload_in_progress)
7196 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7197 if (GET_CODE (addr) == CONST)
7198 addr = XEXP (addr, 0);
7199 if (GET_CODE (addr) == PLUS)
7200 {
7201 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
7202 UNSPEC_GOTOFF);
7203 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
7204 }
7205 else
7206 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
7207 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7208 if (!reg)
7209 tmpreg = gen_reg_rtx (Pmode);
7210 else
7211 tmpreg = reg;
7212 emit_move_insn (tmpreg, new_rtx);
7213
7214 if (reg != 0)
7215 {
7216 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
7217 tmpreg, 1, OPTAB_DIRECT);
7218 new_rtx = reg;
7219 }
7220 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
7221 }
7222 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
7223 {
7224 /* This symbol may be referenced via a displacement from the PIC
7225 base address (@GOTOFF). */
7226
7227 if (reload_in_progress)
7228 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7229 if (GET_CODE (addr) == CONST)
7230 addr = XEXP (addr, 0);
7231 if (GET_CODE (addr) == PLUS)
7232 {
7233 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
7234 UNSPEC_GOTOFF);
7235 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
7236 }
7237 else
7238 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
7239 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7240 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
7241
7242 if (reg != 0)
7243 {
7244 emit_move_insn (reg, new_rtx);
7245 new_rtx = reg;
7246 }
7247 }
7248 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
7249 /* We can't use @GOTOFF for text labels on VxWorks;
7250 see gotoff_operand. */
7251 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
7252 {
7253 /* Given that we've already handled dllimport variables separately
7254 in legitimize_address, and all other variables should satisfy
7255 legitimate_pic_address_disp_p, we should never arrive here. */
7256 gcc_assert (!TARGET_64BIT_MS_ABI);
7257
7258 if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
7259 {
7260 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
7261 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7262 new_rtx = gen_const_mem (Pmode, new_rtx);
7263 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
7264
7265 if (reg == 0)
7266 reg = gen_reg_rtx (Pmode);
7267 /* Use directly gen_movsi, otherwise the address is loaded
7268 into register for CSE. We don't want to CSE this addresses,
7269 instead we CSE addresses from the GOT table, so skip this. */
7270 emit_insn (gen_movsi (reg, new_rtx));
7271 new_rtx = reg;
7272 }
7273 else
7274 {
7275 /* This symbol must be referenced via a load from the
7276 Global Offset Table (@GOT). */
7277
7278 if (reload_in_progress)
7279 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7280 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
7281 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7282 if (TARGET_64BIT)
7283 new_rtx = force_reg (Pmode, new_rtx);
7284 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
7285 new_rtx = gen_const_mem (Pmode, new_rtx);
7286 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
7287
7288 if (reg == 0)
7289 reg = gen_reg_rtx (Pmode);
7290 emit_move_insn (reg, new_rtx);
7291 new_rtx = reg;
7292 }
7293 }
7294 else
7295 {
7296 if (CONST_INT_P (addr)
7297 && !x86_64_immediate_operand (addr, VOIDmode))
7298 {
7299 if (reg)
7300 {
7301 emit_move_insn (reg, addr);
7302 new_rtx = reg;
7303 }
7304 else
7305 new_rtx = force_reg (Pmode, addr);
7306 }
7307 else if (GET_CODE (addr) == CONST)
7308 {
7309 addr = XEXP (addr, 0);
7310
7311 /* We must match stuff we generate before. Assume the only
7312 unspecs that can get here are ours. Not that we could do
7313 anything with them anyway.... */
7314 if (GET_CODE (addr) == UNSPEC
7315 || (GET_CODE (addr) == PLUS
7316 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
7317 return orig;
7318 gcc_assert (GET_CODE (addr) == PLUS);
7319 }
7320 if (GET_CODE (addr) == PLUS)
7321 {
7322 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
7323
7324 /* Check first to see if this is a constant offset from a @GOTOFF
7325 symbol reference. */
7326 if (gotoff_operand (op0, Pmode)
7327 && CONST_INT_P (op1))
7328 {
7329 if (!TARGET_64BIT)
7330 {
7331 if (reload_in_progress)
7332 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7333 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
7334 UNSPEC_GOTOFF);
7335 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
7336 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7337 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
7338
7339 if (reg != 0)
7340 {
7341 emit_move_insn (reg, new_rtx);
7342 new_rtx = reg;
7343 }
7344 }
7345 else
7346 {
7347 if (INTVAL (op1) < -16*1024*1024
7348 || INTVAL (op1) >= 16*1024*1024)
7349 {
7350 if (!x86_64_immediate_operand (op1, Pmode))
7351 op1 = force_reg (Pmode, op1);
7352 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
7353 }
7354 }
7355 }
7356 else
7357 {
7358 base = legitimize_pic_address (XEXP (addr, 0), reg);
7359 new_rtx = legitimize_pic_address (XEXP (addr, 1),
7360 base == reg ? NULL_RTX : reg);
7361
7362 if (CONST_INT_P (new_rtx))
7363 new_rtx = plus_constant (base, INTVAL (new_rtx));
7364 else
7365 {
7366 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
7367 {
7368 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
7369 new_rtx = XEXP (new_rtx, 1);
7370 }
7371 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
7372 }
7373 }
7374 }
7375 }
7376 return new_rtx;
7377 }
7378 \f
7379 /* Load the thread pointer. If TO_REG is true, force it into a register. */
7380
7381 static rtx
7382 get_thread_pointer (int to_reg)
7383 {
7384 rtx tp, reg, insn;
7385
7386 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
7387 if (!to_reg)
7388 return tp;
7389
7390 reg = gen_reg_rtx (Pmode);
7391 insn = gen_rtx_SET (VOIDmode, reg, tp);
7392 insn = emit_insn (insn);
7393
7394 return reg;
7395 }
7396
7397 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
7398 false if we expect this to be used for a memory address and true if
7399 we expect to load the address into a register. */
7400
7401 static rtx
7402 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
7403 {
7404 rtx dest, base, off, pic, tp;
7405 int type;
7406
7407 switch (model)
7408 {
7409 case TLS_MODEL_GLOBAL_DYNAMIC:
7410 dest = gen_reg_rtx (Pmode);
7411 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7412
7413 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7414 {
7415 rtx rax = gen_rtx_REG (Pmode, 0), insns;
7416
7417 start_sequence ();
7418 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
7419 insns = get_insns ();
7420 end_sequence ();
7421
7422 CONST_OR_PURE_CALL_P (insns) = 1;
7423 emit_libcall_block (insns, dest, rax, x);
7424 }
7425 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7426 emit_insn (gen_tls_global_dynamic_64 (dest, x));
7427 else
7428 emit_insn (gen_tls_global_dynamic_32 (dest, x));
7429
7430 if (TARGET_GNU2_TLS)
7431 {
7432 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
7433
7434 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7435 }
7436 break;
7437
7438 case TLS_MODEL_LOCAL_DYNAMIC:
7439 base = gen_reg_rtx (Pmode);
7440 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7441
7442 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7443 {
7444 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
7445
7446 start_sequence ();
7447 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
7448 insns = get_insns ();
7449 end_sequence ();
7450
7451 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
7452 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
7453 CONST_OR_PURE_CALL_P (insns) = 1;
7454 emit_libcall_block (insns, base, rax, note);
7455 }
7456 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7457 emit_insn (gen_tls_local_dynamic_base_64 (base));
7458 else
7459 emit_insn (gen_tls_local_dynamic_base_32 (base));
7460
7461 if (TARGET_GNU2_TLS)
7462 {
7463 rtx x = ix86_tls_module_base ();
7464
7465 set_unique_reg_note (get_last_insn (), REG_EQUIV,
7466 gen_rtx_MINUS (Pmode, x, tp));
7467 }
7468
7469 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
7470 off = gen_rtx_CONST (Pmode, off);
7471
7472 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
7473
7474 if (TARGET_GNU2_TLS)
7475 {
7476 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
7477
7478 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7479 }
7480
7481 break;
7482
7483 case TLS_MODEL_INITIAL_EXEC:
7484 if (TARGET_64BIT)
7485 {
7486 pic = NULL;
7487 type = UNSPEC_GOTNTPOFF;
7488 }
7489 else if (flag_pic)
7490 {
7491 if (reload_in_progress)
7492 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7493 pic = pic_offset_table_rtx;
7494 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
7495 }
7496 else if (!TARGET_ANY_GNU_TLS)
7497 {
7498 pic = gen_reg_rtx (Pmode);
7499 emit_insn (gen_set_got (pic));
7500 type = UNSPEC_GOTTPOFF;
7501 }
7502 else
7503 {
7504 pic = NULL;
7505 type = UNSPEC_INDNTPOFF;
7506 }
7507
7508 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
7509 off = gen_rtx_CONST (Pmode, off);
7510 if (pic)
7511 off = gen_rtx_PLUS (Pmode, pic, off);
7512 off = gen_const_mem (Pmode, off);
7513 set_mem_alias_set (off, ix86_GOT_alias_set ());
7514
7515 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7516 {
7517 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7518 off = force_reg (Pmode, off);
7519 return gen_rtx_PLUS (Pmode, base, off);
7520 }
7521 else
7522 {
7523 base = get_thread_pointer (true);
7524 dest = gen_reg_rtx (Pmode);
7525 emit_insn (gen_subsi3 (dest, base, off));
7526 }
7527 break;
7528
7529 case TLS_MODEL_LOCAL_EXEC:
7530 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
7531 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7532 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
7533 off = gen_rtx_CONST (Pmode, off);
7534
7535 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7536 {
7537 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7538 return gen_rtx_PLUS (Pmode, base, off);
7539 }
7540 else
7541 {
7542 base = get_thread_pointer (true);
7543 dest = gen_reg_rtx (Pmode);
7544 emit_insn (gen_subsi3 (dest, base, off));
7545 }
7546 break;
7547
7548 default:
7549 gcc_unreachable ();
7550 }
7551
7552 return dest;
7553 }
7554
7555 /* Create or return the unique __imp_DECL dllimport symbol corresponding
7556 to symbol DECL. */
7557
7558 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
7559 htab_t dllimport_map;
7560
7561 static tree
7562 get_dllimport_decl (tree decl)
7563 {
7564 struct tree_map *h, in;
7565 void **loc;
7566 const char *name;
7567 const char *prefix;
7568 size_t namelen, prefixlen;
7569 char *imp_name;
7570 tree to;
7571 rtx rtl;
7572
7573 if (!dllimport_map)
7574 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
7575
7576 in.hash = htab_hash_pointer (decl);
7577 in.base.from = decl;
7578 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
7579 h = (struct tree_map *) *loc;
7580 if (h)
7581 return h->to;
7582
7583 *loc = h = GGC_NEW (struct tree_map);
7584 h->hash = in.hash;
7585 h->base.from = decl;
7586 h->to = to = build_decl (VAR_DECL, NULL, ptr_type_node);
7587 DECL_ARTIFICIAL (to) = 1;
7588 DECL_IGNORED_P (to) = 1;
7589 DECL_EXTERNAL (to) = 1;
7590 TREE_READONLY (to) = 1;
7591
7592 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
7593 name = targetm.strip_name_encoding (name);
7594 if (name[0] == FASTCALL_PREFIX)
7595 {
7596 name++;
7597 prefix = "*__imp_";
7598 }
7599 else
7600 prefix = "*__imp__";
7601
7602 namelen = strlen (name);
7603 prefixlen = strlen (prefix);
7604 imp_name = (char *) alloca (namelen + prefixlen + 1);
7605 memcpy (imp_name, prefix, prefixlen);
7606 memcpy (imp_name + prefixlen, name, namelen + 1);
7607
7608 name = ggc_alloc_string (imp_name, namelen + prefixlen);
7609 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
7610 SET_SYMBOL_REF_DECL (rtl, to);
7611 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
7612
7613 rtl = gen_const_mem (Pmode, rtl);
7614 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
7615
7616 SET_DECL_RTL (to, rtl);
7617
7618 return to;
7619 }
7620
7621 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
7622 true if we require the result be a register. */
7623
7624 static rtx
7625 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
7626 {
7627 tree imp_decl;
7628 rtx x;
7629
7630 gcc_assert (SYMBOL_REF_DECL (symbol));
7631 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
7632
7633 x = DECL_RTL (imp_decl);
7634 if (want_reg)
7635 x = force_reg (Pmode, x);
7636 return x;
7637 }
7638
7639 /* Try machine-dependent ways of modifying an illegitimate address
7640 to be legitimate. If we find one, return the new, valid address.
7641 This macro is used in only one place: `memory_address' in explow.c.
7642
7643 OLDX is the address as it was before break_out_memory_refs was called.
7644 In some cases it is useful to look at this to decide what needs to be done.
7645
7646 MODE and WIN are passed so that this macro can use
7647 GO_IF_LEGITIMATE_ADDRESS.
7648
7649 It is always safe for this macro to do nothing. It exists to recognize
7650 opportunities to optimize the output.
7651
7652 For the 80386, we handle X+REG by loading X into a register R and
7653 using R+REG. R will go in a general reg and indexing will be used.
7654 However, if REG is a broken-out memory address or multiplication,
7655 nothing needs to be done because REG can certainly go in a general reg.
7656
7657 When -fpic is used, special handling is needed for symbolic references.
7658 See comments by legitimize_pic_address in i386.c for details. */
7659
7660 rtx
7661 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
7662 {
7663 int changed = 0;
7664 unsigned log;
7665
7666 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
7667 if (log)
7668 return legitimize_tls_address (x, (enum tls_model) log, false);
7669 if (GET_CODE (x) == CONST
7670 && GET_CODE (XEXP (x, 0)) == PLUS
7671 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
7672 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
7673 {
7674 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
7675 (enum tls_model) log, false);
7676 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
7677 }
7678
7679 if (flag_pic && SYMBOLIC_CONST (x))
7680 return legitimize_pic_address (x, 0);
7681
7682 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
7683 {
7684 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
7685 return legitimize_dllimport_symbol (x, true);
7686 if (GET_CODE (x) == CONST
7687 && GET_CODE (XEXP (x, 0)) == PLUS
7688 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
7689 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
7690 {
7691 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
7692 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
7693 }
7694 }
7695
7696 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
7697 if (GET_CODE (x) == ASHIFT
7698 && CONST_INT_P (XEXP (x, 1))
7699 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
7700 {
7701 changed = 1;
7702 log = INTVAL (XEXP (x, 1));
7703 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
7704 GEN_INT (1 << log));
7705 }
7706
7707 if (GET_CODE (x) == PLUS)
7708 {
7709 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
7710
7711 if (GET_CODE (XEXP (x, 0)) == ASHIFT
7712 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
7713 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
7714 {
7715 changed = 1;
7716 log = INTVAL (XEXP (XEXP (x, 0), 1));
7717 XEXP (x, 0) = gen_rtx_MULT (Pmode,
7718 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
7719 GEN_INT (1 << log));
7720 }
7721
7722 if (GET_CODE (XEXP (x, 1)) == ASHIFT
7723 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
7724 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
7725 {
7726 changed = 1;
7727 log = INTVAL (XEXP (XEXP (x, 1), 1));
7728 XEXP (x, 1) = gen_rtx_MULT (Pmode,
7729 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
7730 GEN_INT (1 << log));
7731 }
7732
7733 /* Put multiply first if it isn't already. */
7734 if (GET_CODE (XEXP (x, 1)) == MULT)
7735 {
7736 rtx tmp = XEXP (x, 0);
7737 XEXP (x, 0) = XEXP (x, 1);
7738 XEXP (x, 1) = tmp;
7739 changed = 1;
7740 }
7741
7742 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
7743 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
7744 created by virtual register instantiation, register elimination, and
7745 similar optimizations. */
7746 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
7747 {
7748 changed = 1;
7749 x = gen_rtx_PLUS (Pmode,
7750 gen_rtx_PLUS (Pmode, XEXP (x, 0),
7751 XEXP (XEXP (x, 1), 0)),
7752 XEXP (XEXP (x, 1), 1));
7753 }
7754
7755 /* Canonicalize
7756 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
7757 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
7758 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
7759 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
7760 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
7761 && CONSTANT_P (XEXP (x, 1)))
7762 {
7763 rtx constant;
7764 rtx other = NULL_RTX;
7765
7766 if (CONST_INT_P (XEXP (x, 1)))
7767 {
7768 constant = XEXP (x, 1);
7769 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
7770 }
7771 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
7772 {
7773 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
7774 other = XEXP (x, 1);
7775 }
7776 else
7777 constant = 0;
7778
7779 if (constant)
7780 {
7781 changed = 1;
7782 x = gen_rtx_PLUS (Pmode,
7783 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
7784 XEXP (XEXP (XEXP (x, 0), 1), 0)),
7785 plus_constant (other, INTVAL (constant)));
7786 }
7787 }
7788
7789 if (changed && legitimate_address_p (mode, x, FALSE))
7790 return x;
7791
7792 if (GET_CODE (XEXP (x, 0)) == MULT)
7793 {
7794 changed = 1;
7795 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
7796 }
7797
7798 if (GET_CODE (XEXP (x, 1)) == MULT)
7799 {
7800 changed = 1;
7801 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
7802 }
7803
7804 if (changed
7805 && REG_P (XEXP (x, 1))
7806 && REG_P (XEXP (x, 0)))
7807 return x;
7808
7809 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
7810 {
7811 changed = 1;
7812 x = legitimize_pic_address (x, 0);
7813 }
7814
7815 if (changed && legitimate_address_p (mode, x, FALSE))
7816 return x;
7817
7818 if (REG_P (XEXP (x, 0)))
7819 {
7820 rtx temp = gen_reg_rtx (Pmode);
7821 rtx val = force_operand (XEXP (x, 1), temp);
7822 if (val != temp)
7823 emit_move_insn (temp, val);
7824
7825 XEXP (x, 1) = temp;
7826 return x;
7827 }
7828
7829 else if (REG_P (XEXP (x, 1)))
7830 {
7831 rtx temp = gen_reg_rtx (Pmode);
7832 rtx val = force_operand (XEXP (x, 0), temp);
7833 if (val != temp)
7834 emit_move_insn (temp, val);
7835
7836 XEXP (x, 0) = temp;
7837 return x;
7838 }
7839 }
7840
7841 return x;
7842 }
7843 \f
7844 /* Print an integer constant expression in assembler syntax. Addition
7845 and subtraction are the only arithmetic that may appear in these
7846 expressions. FILE is the stdio stream to write to, X is the rtx, and
7847 CODE is the operand print code from the output string. */
7848
7849 static void
7850 output_pic_addr_const (FILE *file, rtx x, int code)
7851 {
7852 char buf[256];
7853
7854 switch (GET_CODE (x))
7855 {
7856 case PC:
7857 gcc_assert (flag_pic);
7858 putc ('.', file);
7859 break;
7860
7861 case SYMBOL_REF:
7862 if (! TARGET_MACHO || TARGET_64BIT)
7863 output_addr_const (file, x);
7864 else
7865 {
7866 const char *name = XSTR (x, 0);
7867
7868 /* Mark the decl as referenced so that cgraph will
7869 output the function. */
7870 if (SYMBOL_REF_DECL (x))
7871 mark_decl_referenced (SYMBOL_REF_DECL (x));
7872
7873 #if TARGET_MACHO
7874 if (MACHOPIC_INDIRECT
7875 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
7876 name = machopic_indirection_name (x, /*stub_p=*/true);
7877 #endif
7878 assemble_name (file, name);
7879 }
7880 if (!TARGET_MACHO && !TARGET_64BIT_MS_ABI
7881 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
7882 fputs ("@PLT", file);
7883 break;
7884
7885 case LABEL_REF:
7886 x = XEXP (x, 0);
7887 /* FALLTHRU */
7888 case CODE_LABEL:
7889 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
7890 assemble_name (asm_out_file, buf);
7891 break;
7892
7893 case CONST_INT:
7894 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7895 break;
7896
7897 case CONST:
7898 /* This used to output parentheses around the expression,
7899 but that does not work on the 386 (either ATT or BSD assembler). */
7900 output_pic_addr_const (file, XEXP (x, 0), code);
7901 break;
7902
7903 case CONST_DOUBLE:
7904 if (GET_MODE (x) == VOIDmode)
7905 {
7906 /* We can use %d if the number is <32 bits and positive. */
7907 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
7908 fprintf (file, "0x%lx%08lx",
7909 (unsigned long) CONST_DOUBLE_HIGH (x),
7910 (unsigned long) CONST_DOUBLE_LOW (x));
7911 else
7912 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
7913 }
7914 else
7915 /* We can't handle floating point constants;
7916 PRINT_OPERAND must handle them. */
7917 output_operand_lossage ("floating constant misused");
7918 break;
7919
7920 case PLUS:
7921 /* Some assemblers need integer constants to appear first. */
7922 if (CONST_INT_P (XEXP (x, 0)))
7923 {
7924 output_pic_addr_const (file, XEXP (x, 0), code);
7925 putc ('+', file);
7926 output_pic_addr_const (file, XEXP (x, 1), code);
7927 }
7928 else
7929 {
7930 gcc_assert (CONST_INT_P (XEXP (x, 1)));
7931 output_pic_addr_const (file, XEXP (x, 1), code);
7932 putc ('+', file);
7933 output_pic_addr_const (file, XEXP (x, 0), code);
7934 }
7935 break;
7936
7937 case MINUS:
7938 if (!TARGET_MACHO)
7939 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
7940 output_pic_addr_const (file, XEXP (x, 0), code);
7941 putc ('-', file);
7942 output_pic_addr_const (file, XEXP (x, 1), code);
7943 if (!TARGET_MACHO)
7944 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
7945 break;
7946
7947 case UNSPEC:
7948 gcc_assert (XVECLEN (x, 0) == 1);
7949 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
7950 switch (XINT (x, 1))
7951 {
7952 case UNSPEC_GOT:
7953 fputs ("@GOT", file);
7954 break;
7955 case UNSPEC_GOTOFF:
7956 fputs ("@GOTOFF", file);
7957 break;
7958 case UNSPEC_PLTOFF:
7959 fputs ("@PLTOFF", file);
7960 break;
7961 case UNSPEC_GOTPCREL:
7962 fputs ("@GOTPCREL(%rip)", file);
7963 break;
7964 case UNSPEC_GOTTPOFF:
7965 /* FIXME: This might be @TPOFF in Sun ld too. */
7966 fputs ("@GOTTPOFF", file);
7967 break;
7968 case UNSPEC_TPOFF:
7969 fputs ("@TPOFF", file);
7970 break;
7971 case UNSPEC_NTPOFF:
7972 if (TARGET_64BIT)
7973 fputs ("@TPOFF", file);
7974 else
7975 fputs ("@NTPOFF", file);
7976 break;
7977 case UNSPEC_DTPOFF:
7978 fputs ("@DTPOFF", file);
7979 break;
7980 case UNSPEC_GOTNTPOFF:
7981 if (TARGET_64BIT)
7982 fputs ("@GOTTPOFF(%rip)", file);
7983 else
7984 fputs ("@GOTNTPOFF", file);
7985 break;
7986 case UNSPEC_INDNTPOFF:
7987 fputs ("@INDNTPOFF", file);
7988 break;
7989 default:
7990 output_operand_lossage ("invalid UNSPEC as operand");
7991 break;
7992 }
7993 break;
7994
7995 default:
7996 output_operand_lossage ("invalid expression as operand");
7997 }
7998 }
7999
8000 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
8001 We need to emit DTP-relative relocations. */
8002
8003 static void ATTRIBUTE_UNUSED
8004 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
8005 {
8006 fputs (ASM_LONG, file);
8007 output_addr_const (file, x);
8008 fputs ("@DTPOFF", file);
8009 switch (size)
8010 {
8011 case 4:
8012 break;
8013 case 8:
8014 fputs (", 0", file);
8015 break;
8016 default:
8017 gcc_unreachable ();
8018 }
8019 }
8020
8021 /* In the name of slightly smaller debug output, and to cater to
8022 general assembler lossage, recognize PIC+GOTOFF and turn it back
8023 into a direct symbol reference.
8024
8025 On Darwin, this is necessary to avoid a crash, because Darwin
8026 has a different PIC label for each routine but the DWARF debugging
8027 information is not associated with any particular routine, so it's
8028 necessary to remove references to the PIC label from RTL stored by
8029 the DWARF output code. */
8030
8031 static rtx
8032 ix86_delegitimize_address (rtx orig_x)
8033 {
8034 rtx x = orig_x;
8035 /* reg_addend is NULL or a multiple of some register. */
8036 rtx reg_addend = NULL_RTX;
8037 /* const_addend is NULL or a const_int. */
8038 rtx const_addend = NULL_RTX;
8039 /* This is the result, or NULL. */
8040 rtx result = NULL_RTX;
8041
8042 if (MEM_P (x))
8043 x = XEXP (x, 0);
8044
8045 if (TARGET_64BIT)
8046 {
8047 if (GET_CODE (x) != CONST
8048 || GET_CODE (XEXP (x, 0)) != UNSPEC
8049 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
8050 || !MEM_P (orig_x))
8051 return orig_x;
8052 return XVECEXP (XEXP (x, 0), 0, 0);
8053 }
8054
8055 if (GET_CODE (x) != PLUS
8056 || GET_CODE (XEXP (x, 1)) != CONST)
8057 return orig_x;
8058
8059 if (REG_P (XEXP (x, 0))
8060 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
8061 /* %ebx + GOT/GOTOFF */
8062 ;
8063 else if (GET_CODE (XEXP (x, 0)) == PLUS)
8064 {
8065 /* %ebx + %reg * scale + GOT/GOTOFF */
8066 reg_addend = XEXP (x, 0);
8067 if (REG_P (XEXP (reg_addend, 0))
8068 && REGNO (XEXP (reg_addend, 0)) == PIC_OFFSET_TABLE_REGNUM)
8069 reg_addend = XEXP (reg_addend, 1);
8070 else if (REG_P (XEXP (reg_addend, 1))
8071 && REGNO (XEXP (reg_addend, 1)) == PIC_OFFSET_TABLE_REGNUM)
8072 reg_addend = XEXP (reg_addend, 0);
8073 else
8074 return orig_x;
8075 if (!REG_P (reg_addend)
8076 && GET_CODE (reg_addend) != MULT
8077 && GET_CODE (reg_addend) != ASHIFT)
8078 return orig_x;
8079 }
8080 else
8081 return orig_x;
8082
8083 x = XEXP (XEXP (x, 1), 0);
8084 if (GET_CODE (x) == PLUS
8085 && CONST_INT_P (XEXP (x, 1)))
8086 {
8087 const_addend = XEXP (x, 1);
8088 x = XEXP (x, 0);
8089 }
8090
8091 if (GET_CODE (x) == UNSPEC
8092 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x))
8093 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
8094 result = XVECEXP (x, 0, 0);
8095
8096 if (TARGET_MACHO && darwin_local_data_pic (x)
8097 && !MEM_P (orig_x))
8098 result = XEXP (x, 0);
8099
8100 if (! result)
8101 return orig_x;
8102
8103 if (const_addend)
8104 result = gen_rtx_PLUS (Pmode, result, const_addend);
8105 if (reg_addend)
8106 result = gen_rtx_PLUS (Pmode, reg_addend, result);
8107 return result;
8108 }
8109
8110 /* If X is a machine specific address (i.e. a symbol or label being
8111 referenced as a displacement from the GOT implemented using an
8112 UNSPEC), then return the base term. Otherwise return X. */
8113
8114 rtx
8115 ix86_find_base_term (rtx x)
8116 {
8117 rtx term;
8118
8119 if (TARGET_64BIT)
8120 {
8121 if (GET_CODE (x) != CONST)
8122 return x;
8123 term = XEXP (x, 0);
8124 if (GET_CODE (term) == PLUS
8125 && (CONST_INT_P (XEXP (term, 1))
8126 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
8127 term = XEXP (term, 0);
8128 if (GET_CODE (term) != UNSPEC
8129 || XINT (term, 1) != UNSPEC_GOTPCREL)
8130 return x;
8131
8132 term = XVECEXP (term, 0, 0);
8133
8134 if (GET_CODE (term) != SYMBOL_REF
8135 && GET_CODE (term) != LABEL_REF)
8136 return x;
8137
8138 return term;
8139 }
8140
8141 term = ix86_delegitimize_address (x);
8142
8143 if (GET_CODE (term) != SYMBOL_REF
8144 && GET_CODE (term) != LABEL_REF)
8145 return x;
8146
8147 return term;
8148 }
8149 \f
8150 static void
8151 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
8152 int fp, FILE *file)
8153 {
8154 const char *suffix;
8155
8156 if (mode == CCFPmode || mode == CCFPUmode)
8157 {
8158 enum rtx_code second_code, bypass_code;
8159 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
8160 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
8161 code = ix86_fp_compare_code_to_integer (code);
8162 mode = CCmode;
8163 }
8164 if (reverse)
8165 code = reverse_condition (code);
8166
8167 switch (code)
8168 {
8169 case EQ:
8170 switch (mode)
8171 {
8172 case CCAmode:
8173 suffix = "a";
8174 break;
8175
8176 case CCCmode:
8177 suffix = "c";
8178 break;
8179
8180 case CCOmode:
8181 suffix = "o";
8182 break;
8183
8184 case CCSmode:
8185 suffix = "s";
8186 break;
8187
8188 default:
8189 suffix = "e";
8190 }
8191 break;
8192 case NE:
8193 switch (mode)
8194 {
8195 case CCAmode:
8196 suffix = "na";
8197 break;
8198
8199 case CCCmode:
8200 suffix = "nc";
8201 break;
8202
8203 case CCOmode:
8204 suffix = "no";
8205 break;
8206
8207 case CCSmode:
8208 suffix = "ns";
8209 break;
8210
8211 default:
8212 suffix = "ne";
8213 }
8214 break;
8215 case GT:
8216 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
8217 suffix = "g";
8218 break;
8219 case GTU:
8220 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
8221 Those same assemblers have the same but opposite lossage on cmov. */
8222 gcc_assert (mode == CCmode);
8223 suffix = fp ? "nbe" : "a";
8224 break;
8225 case LT:
8226 switch (mode)
8227 {
8228 case CCNOmode:
8229 case CCGOCmode:
8230 suffix = "s";
8231 break;
8232
8233 case CCmode:
8234 case CCGCmode:
8235 suffix = "l";
8236 break;
8237
8238 default:
8239 gcc_unreachable ();
8240 }
8241 break;
8242 case LTU:
8243 gcc_assert (mode == CCmode);
8244 suffix = "b";
8245 break;
8246 case GE:
8247 switch (mode)
8248 {
8249 case CCNOmode:
8250 case CCGOCmode:
8251 suffix = "ns";
8252 break;
8253
8254 case CCmode:
8255 case CCGCmode:
8256 suffix = "ge";
8257 break;
8258
8259 default:
8260 gcc_unreachable ();
8261 }
8262 break;
8263 case GEU:
8264 /* ??? As above. */
8265 gcc_assert (mode == CCmode);
8266 suffix = fp ? "nb" : "ae";
8267 break;
8268 case LE:
8269 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
8270 suffix = "le";
8271 break;
8272 case LEU:
8273 gcc_assert (mode == CCmode);
8274 suffix = "be";
8275 break;
8276 case UNORDERED:
8277 suffix = fp ? "u" : "p";
8278 break;
8279 case ORDERED:
8280 suffix = fp ? "nu" : "np";
8281 break;
8282 default:
8283 gcc_unreachable ();
8284 }
8285 fputs (suffix, file);
8286 }
8287
8288 /* Print the name of register X to FILE based on its machine mode and number.
8289 If CODE is 'w', pretend the mode is HImode.
8290 If CODE is 'b', pretend the mode is QImode.
8291 If CODE is 'k', pretend the mode is SImode.
8292 If CODE is 'q', pretend the mode is DImode.
8293 If CODE is 'h', pretend the reg is the 'high' byte register.
8294 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
8295
8296 void
8297 print_reg (rtx x, int code, FILE *file)
8298 {
8299 gcc_assert (REGNO (x) != ARG_POINTER_REGNUM
8300 && REGNO (x) != FRAME_POINTER_REGNUM
8301 && REGNO (x) != FLAGS_REG
8302 && REGNO (x) != FPSR_REG
8303 && REGNO (x) != FPCR_REG);
8304
8305 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
8306 putc ('%', file);
8307
8308 if (code == 'w' || MMX_REG_P (x))
8309 code = 2;
8310 else if (code == 'b')
8311 code = 1;
8312 else if (code == 'k')
8313 code = 4;
8314 else if (code == 'q')
8315 code = 8;
8316 else if (code == 'y')
8317 code = 3;
8318 else if (code == 'h')
8319 code = 0;
8320 else
8321 code = GET_MODE_SIZE (GET_MODE (x));
8322
8323 /* Irritatingly, AMD extended registers use different naming convention
8324 from the normal registers. */
8325 if (REX_INT_REG_P (x))
8326 {
8327 gcc_assert (TARGET_64BIT);
8328 switch (code)
8329 {
8330 case 0:
8331 error ("extended registers have no high halves");
8332 break;
8333 case 1:
8334 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
8335 break;
8336 case 2:
8337 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
8338 break;
8339 case 4:
8340 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
8341 break;
8342 case 8:
8343 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
8344 break;
8345 default:
8346 error ("unsupported operand size for extended register");
8347 break;
8348 }
8349 return;
8350 }
8351 switch (code)
8352 {
8353 case 3:
8354 if (STACK_TOP_P (x))
8355 {
8356 fputs ("st(0)", file);
8357 break;
8358 }
8359 /* FALLTHRU */
8360 case 8:
8361 case 4:
8362 case 12:
8363 if (! ANY_FP_REG_P (x))
8364 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
8365 /* FALLTHRU */
8366 case 16:
8367 case 2:
8368 normal:
8369 fputs (hi_reg_name[REGNO (x)], file);
8370 break;
8371 case 1:
8372 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
8373 goto normal;
8374 fputs (qi_reg_name[REGNO (x)], file);
8375 break;
8376 case 0:
8377 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
8378 goto normal;
8379 fputs (qi_high_reg_name[REGNO (x)], file);
8380 break;
8381 default:
8382 gcc_unreachable ();
8383 }
8384 }
8385
8386 /* Locate some local-dynamic symbol still in use by this function
8387 so that we can print its name in some tls_local_dynamic_base
8388 pattern. */
8389
8390 static int
8391 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
8392 {
8393 rtx x = *px;
8394
8395 if (GET_CODE (x) == SYMBOL_REF
8396 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
8397 {
8398 cfun->machine->some_ld_name = XSTR (x, 0);
8399 return 1;
8400 }
8401
8402 return 0;
8403 }
8404
8405 static const char *
8406 get_some_local_dynamic_name (void)
8407 {
8408 rtx insn;
8409
8410 if (cfun->machine->some_ld_name)
8411 return cfun->machine->some_ld_name;
8412
8413 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
8414 if (INSN_P (insn)
8415 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
8416 return cfun->machine->some_ld_name;
8417
8418 gcc_unreachable ();
8419 }
8420
8421 /* Meaning of CODE:
8422 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
8423 C -- print opcode suffix for set/cmov insn.
8424 c -- like C, but print reversed condition
8425 F,f -- likewise, but for floating-point.
8426 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
8427 otherwise nothing
8428 R -- print the prefix for register names.
8429 z -- print the opcode suffix for the size of the current operand.
8430 * -- print a star (in certain assembler syntax)
8431 A -- print an absolute memory reference.
8432 w -- print the operand as if it's a "word" (HImode) even if it isn't.
8433 s -- print a shift double count, followed by the assemblers argument
8434 delimiter.
8435 b -- print the QImode name of the register for the indicated operand.
8436 %b0 would print %al if operands[0] is reg 0.
8437 w -- likewise, print the HImode name of the register.
8438 k -- likewise, print the SImode name of the register.
8439 q -- likewise, print the DImode name of the register.
8440 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
8441 y -- print "st(0)" instead of "st" as a register.
8442 D -- print condition for SSE cmp instruction.
8443 P -- if PIC, print an @PLT suffix.
8444 X -- don't print any sort of PIC '@' suffix for a symbol.
8445 & -- print some in-use local-dynamic symbol name.
8446 H -- print a memory address offset by 8; used for sse high-parts
8447 */
8448
8449 void
8450 print_operand (FILE *file, rtx x, int code)
8451 {
8452 if (code)
8453 {
8454 switch (code)
8455 {
8456 case '*':
8457 if (ASSEMBLER_DIALECT == ASM_ATT)
8458 putc ('*', file);
8459 return;
8460
8461 case '&':
8462 assemble_name (file, get_some_local_dynamic_name ());
8463 return;
8464
8465 case 'A':
8466 switch (ASSEMBLER_DIALECT)
8467 {
8468 case ASM_ATT:
8469 putc ('*', file);
8470 break;
8471
8472 case ASM_INTEL:
8473 /* Intel syntax. For absolute addresses, registers should not
8474 be surrounded by braces. */
8475 if (!REG_P (x))
8476 {
8477 putc ('[', file);
8478 PRINT_OPERAND (file, x, 0);
8479 putc (']', file);
8480 return;
8481 }
8482 break;
8483
8484 default:
8485 gcc_unreachable ();
8486 }
8487
8488 PRINT_OPERAND (file, x, 0);
8489 return;
8490
8491
8492 case 'L':
8493 if (ASSEMBLER_DIALECT == ASM_ATT)
8494 putc ('l', file);
8495 return;
8496
8497 case 'W':
8498 if (ASSEMBLER_DIALECT == ASM_ATT)
8499 putc ('w', file);
8500 return;
8501
8502 case 'B':
8503 if (ASSEMBLER_DIALECT == ASM_ATT)
8504 putc ('b', file);
8505 return;
8506
8507 case 'Q':
8508 if (ASSEMBLER_DIALECT == ASM_ATT)
8509 putc ('l', file);
8510 return;
8511
8512 case 'S':
8513 if (ASSEMBLER_DIALECT == ASM_ATT)
8514 putc ('s', file);
8515 return;
8516
8517 case 'T':
8518 if (ASSEMBLER_DIALECT == ASM_ATT)
8519 putc ('t', file);
8520 return;
8521
8522 case 'z':
8523 /* 387 opcodes don't get size suffixes if the operands are
8524 registers. */
8525 if (STACK_REG_P (x))
8526 return;
8527
8528 /* Likewise if using Intel opcodes. */
8529 if (ASSEMBLER_DIALECT == ASM_INTEL)
8530 return;
8531
8532 /* This is the size of op from size of operand. */
8533 switch (GET_MODE_SIZE (GET_MODE (x)))
8534 {
8535 case 1:
8536 putc ('b', file);
8537 return;
8538
8539 case 2:
8540 if (MEM_P (x))
8541 {
8542 #ifdef HAVE_GAS_FILDS_FISTS
8543 putc ('s', file);
8544 #endif
8545 return;
8546 }
8547 else
8548 putc ('w', file);
8549 return;
8550
8551 case 4:
8552 if (GET_MODE (x) == SFmode)
8553 {
8554 putc ('s', file);
8555 return;
8556 }
8557 else
8558 putc ('l', file);
8559 return;
8560
8561 case 12:
8562 case 16:
8563 putc ('t', file);
8564 return;
8565
8566 case 8:
8567 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
8568 {
8569 #ifdef GAS_MNEMONICS
8570 putc ('q', file);
8571 #else
8572 putc ('l', file);
8573 putc ('l', file);
8574 #endif
8575 }
8576 else
8577 putc ('l', file);
8578 return;
8579
8580 default:
8581 gcc_unreachable ();
8582 }
8583
8584 case 'b':
8585 case 'w':
8586 case 'k':
8587 case 'q':
8588 case 'h':
8589 case 'y':
8590 case 'X':
8591 case 'P':
8592 break;
8593
8594 case 's':
8595 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
8596 {
8597 PRINT_OPERAND (file, x, 0);
8598 putc (',', file);
8599 }
8600 return;
8601
8602 case 'D':
8603 /* Little bit of braindamage here. The SSE compare instructions
8604 does use completely different names for the comparisons that the
8605 fp conditional moves. */
8606 switch (GET_CODE (x))
8607 {
8608 case EQ:
8609 case UNEQ:
8610 fputs ("eq", file);
8611 break;
8612 case LT:
8613 case UNLT:
8614 fputs ("lt", file);
8615 break;
8616 case LE:
8617 case UNLE:
8618 fputs ("le", file);
8619 break;
8620 case UNORDERED:
8621 fputs ("unord", file);
8622 break;
8623 case NE:
8624 case LTGT:
8625 fputs ("neq", file);
8626 break;
8627 case UNGE:
8628 case GE:
8629 fputs ("nlt", file);
8630 break;
8631 case UNGT:
8632 case GT:
8633 fputs ("nle", file);
8634 break;
8635 case ORDERED:
8636 fputs ("ord", file);
8637 break;
8638 default:
8639 gcc_unreachable ();
8640 }
8641 return;
8642 case 'O':
8643 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8644 if (ASSEMBLER_DIALECT == ASM_ATT)
8645 {
8646 switch (GET_MODE (x))
8647 {
8648 case HImode: putc ('w', file); break;
8649 case SImode:
8650 case SFmode: putc ('l', file); break;
8651 case DImode:
8652 case DFmode: putc ('q', file); break;
8653 default: gcc_unreachable ();
8654 }
8655 putc ('.', file);
8656 }
8657 #endif
8658 return;
8659 case 'C':
8660 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
8661 return;
8662 case 'F':
8663 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8664 if (ASSEMBLER_DIALECT == ASM_ATT)
8665 putc ('.', file);
8666 #endif
8667 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
8668 return;
8669
8670 /* Like above, but reverse condition */
8671 case 'c':
8672 /* Check to see if argument to %c is really a constant
8673 and not a condition code which needs to be reversed. */
8674 if (!COMPARISON_P (x))
8675 {
8676 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
8677 return;
8678 }
8679 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
8680 return;
8681 case 'f':
8682 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8683 if (ASSEMBLER_DIALECT == ASM_ATT)
8684 putc ('.', file);
8685 #endif
8686 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
8687 return;
8688
8689 case 'H':
8690 /* It doesn't actually matter what mode we use here, as we're
8691 only going to use this for printing. */
8692 x = adjust_address_nv (x, DImode, 8);
8693 break;
8694
8695 case '+':
8696 {
8697 rtx x;
8698
8699 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
8700 return;
8701
8702 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
8703 if (x)
8704 {
8705 int pred_val = INTVAL (XEXP (x, 0));
8706
8707 if (pred_val < REG_BR_PROB_BASE * 45 / 100
8708 || pred_val > REG_BR_PROB_BASE * 55 / 100)
8709 {
8710 int taken = pred_val > REG_BR_PROB_BASE / 2;
8711 int cputaken = final_forward_branch_p (current_output_insn) == 0;
8712
8713 /* Emit hints only in the case default branch prediction
8714 heuristics would fail. */
8715 if (taken != cputaken)
8716 {
8717 /* We use 3e (DS) prefix for taken branches and
8718 2e (CS) prefix for not taken branches. */
8719 if (taken)
8720 fputs ("ds ; ", file);
8721 else
8722 fputs ("cs ; ", file);
8723 }
8724 }
8725 }
8726 return;
8727 }
8728 default:
8729 output_operand_lossage ("invalid operand code '%c'", code);
8730 }
8731 }
8732
8733 if (REG_P (x))
8734 print_reg (x, code, file);
8735
8736 else if (MEM_P (x))
8737 {
8738 /* No `byte ptr' prefix for call instructions. */
8739 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
8740 {
8741 const char * size;
8742 switch (GET_MODE_SIZE (GET_MODE (x)))
8743 {
8744 case 1: size = "BYTE"; break;
8745 case 2: size = "WORD"; break;
8746 case 4: size = "DWORD"; break;
8747 case 8: size = "QWORD"; break;
8748 case 12: size = "XWORD"; break;
8749 case 16: size = "XMMWORD"; break;
8750 default:
8751 gcc_unreachable ();
8752 }
8753
8754 /* Check for explicit size override (codes 'b', 'w' and 'k') */
8755 if (code == 'b')
8756 size = "BYTE";
8757 else if (code == 'w')
8758 size = "WORD";
8759 else if (code == 'k')
8760 size = "DWORD";
8761
8762 fputs (size, file);
8763 fputs (" PTR ", file);
8764 }
8765
8766 x = XEXP (x, 0);
8767 /* Avoid (%rip) for call operands. */
8768 if (CONSTANT_ADDRESS_P (x) && code == 'P'
8769 && !CONST_INT_P (x))
8770 output_addr_const (file, x);
8771 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
8772 output_operand_lossage ("invalid constraints for operand");
8773 else
8774 output_address (x);
8775 }
8776
8777 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
8778 {
8779 REAL_VALUE_TYPE r;
8780 long l;
8781
8782 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8783 REAL_VALUE_TO_TARGET_SINGLE (r, l);
8784
8785 if (ASSEMBLER_DIALECT == ASM_ATT)
8786 putc ('$', file);
8787 fprintf (file, "0x%08lx", l);
8788 }
8789
8790 /* These float cases don't actually occur as immediate operands. */
8791 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
8792 {
8793 char dstr[30];
8794
8795 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8796 fprintf (file, "%s", dstr);
8797 }
8798
8799 else if (GET_CODE (x) == CONST_DOUBLE
8800 && GET_MODE (x) == XFmode)
8801 {
8802 char dstr[30];
8803
8804 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8805 fprintf (file, "%s", dstr);
8806 }
8807
8808 else
8809 {
8810 /* We have patterns that allow zero sets of memory, for instance.
8811 In 64-bit mode, we should probably support all 8-byte vectors,
8812 since we can in fact encode that into an immediate. */
8813 if (GET_CODE (x) == CONST_VECTOR)
8814 {
8815 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
8816 x = const0_rtx;
8817 }
8818
8819 if (code != 'P')
8820 {
8821 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
8822 {
8823 if (ASSEMBLER_DIALECT == ASM_ATT)
8824 putc ('$', file);
8825 }
8826 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
8827 || GET_CODE (x) == LABEL_REF)
8828 {
8829 if (ASSEMBLER_DIALECT == ASM_ATT)
8830 putc ('$', file);
8831 else
8832 fputs ("OFFSET FLAT:", file);
8833 }
8834 }
8835 if (CONST_INT_P (x))
8836 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
8837 else if (flag_pic)
8838 output_pic_addr_const (file, x, code);
8839 else
8840 output_addr_const (file, x);
8841 }
8842 }
8843 \f
8844 /* Print a memory operand whose address is ADDR. */
8845
8846 void
8847 print_operand_address (FILE *file, rtx addr)
8848 {
8849 struct ix86_address parts;
8850 rtx base, index, disp;
8851 int scale;
8852 int ok = ix86_decompose_address (addr, &parts);
8853
8854 gcc_assert (ok);
8855
8856 base = parts.base;
8857 index = parts.index;
8858 disp = parts.disp;
8859 scale = parts.scale;
8860
8861 switch (parts.seg)
8862 {
8863 case SEG_DEFAULT:
8864 break;
8865 case SEG_FS:
8866 case SEG_GS:
8867 if (USER_LABEL_PREFIX[0] == 0)
8868 putc ('%', file);
8869 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
8870 break;
8871 default:
8872 gcc_unreachable ();
8873 }
8874
8875 if (!base && !index)
8876 {
8877 /* Displacement only requires special attention. */
8878
8879 if (CONST_INT_P (disp))
8880 {
8881 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
8882 {
8883 if (USER_LABEL_PREFIX[0] == 0)
8884 putc ('%', file);
8885 fputs ("ds:", file);
8886 }
8887 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
8888 }
8889 else if (flag_pic)
8890 output_pic_addr_const (file, disp, 0);
8891 else
8892 output_addr_const (file, disp);
8893
8894 /* Use one byte shorter RIP relative addressing for 64bit mode. */
8895 if (TARGET_64BIT)
8896 {
8897 if (GET_CODE (disp) == CONST
8898 && GET_CODE (XEXP (disp, 0)) == PLUS
8899 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
8900 disp = XEXP (XEXP (disp, 0), 0);
8901 if (GET_CODE (disp) == LABEL_REF
8902 || (GET_CODE (disp) == SYMBOL_REF
8903 && SYMBOL_REF_TLS_MODEL (disp) == 0))
8904 fputs ("(%rip)", file);
8905 }
8906 }
8907 else
8908 {
8909 if (ASSEMBLER_DIALECT == ASM_ATT)
8910 {
8911 if (disp)
8912 {
8913 if (flag_pic)
8914 output_pic_addr_const (file, disp, 0);
8915 else if (GET_CODE (disp) == LABEL_REF)
8916 output_asm_label (disp);
8917 else
8918 output_addr_const (file, disp);
8919 }
8920
8921 putc ('(', file);
8922 if (base)
8923 print_reg (base, 0, file);
8924 if (index)
8925 {
8926 putc (',', file);
8927 print_reg (index, 0, file);
8928 if (scale != 1)
8929 fprintf (file, ",%d", scale);
8930 }
8931 putc (')', file);
8932 }
8933 else
8934 {
8935 rtx offset = NULL_RTX;
8936
8937 if (disp)
8938 {
8939 /* Pull out the offset of a symbol; print any symbol itself. */
8940 if (GET_CODE (disp) == CONST
8941 && GET_CODE (XEXP (disp, 0)) == PLUS
8942 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
8943 {
8944 offset = XEXP (XEXP (disp, 0), 1);
8945 disp = gen_rtx_CONST (VOIDmode,
8946 XEXP (XEXP (disp, 0), 0));
8947 }
8948
8949 if (flag_pic)
8950 output_pic_addr_const (file, disp, 0);
8951 else if (GET_CODE (disp) == LABEL_REF)
8952 output_asm_label (disp);
8953 else if (CONST_INT_P (disp))
8954 offset = disp;
8955 else
8956 output_addr_const (file, disp);
8957 }
8958
8959 putc ('[', file);
8960 if (base)
8961 {
8962 print_reg (base, 0, file);
8963 if (offset)
8964 {
8965 if (INTVAL (offset) >= 0)
8966 putc ('+', file);
8967 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8968 }
8969 }
8970 else if (offset)
8971 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8972 else
8973 putc ('0', file);
8974
8975 if (index)
8976 {
8977 putc ('+', file);
8978 print_reg (index, 0, file);
8979 if (scale != 1)
8980 fprintf (file, "*%d", scale);
8981 }
8982 putc (']', file);
8983 }
8984 }
8985 }
8986
8987 bool
8988 output_addr_const_extra (FILE *file, rtx x)
8989 {
8990 rtx op;
8991
8992 if (GET_CODE (x) != UNSPEC)
8993 return false;
8994
8995 op = XVECEXP (x, 0, 0);
8996 switch (XINT (x, 1))
8997 {
8998 case UNSPEC_GOTTPOFF:
8999 output_addr_const (file, op);
9000 /* FIXME: This might be @TPOFF in Sun ld. */
9001 fputs ("@GOTTPOFF", file);
9002 break;
9003 case UNSPEC_TPOFF:
9004 output_addr_const (file, op);
9005 fputs ("@TPOFF", file);
9006 break;
9007 case UNSPEC_NTPOFF:
9008 output_addr_const (file, op);
9009 if (TARGET_64BIT)
9010 fputs ("@TPOFF", file);
9011 else
9012 fputs ("@NTPOFF", file);
9013 break;
9014 case UNSPEC_DTPOFF:
9015 output_addr_const (file, op);
9016 fputs ("@DTPOFF", file);
9017 break;
9018 case UNSPEC_GOTNTPOFF:
9019 output_addr_const (file, op);
9020 if (TARGET_64BIT)
9021 fputs ("@GOTTPOFF(%rip)", file);
9022 else
9023 fputs ("@GOTNTPOFF", file);
9024 break;
9025 case UNSPEC_INDNTPOFF:
9026 output_addr_const (file, op);
9027 fputs ("@INDNTPOFF", file);
9028 break;
9029
9030 default:
9031 return false;
9032 }
9033
9034 return true;
9035 }
9036 \f
9037 /* Split one or more DImode RTL references into pairs of SImode
9038 references. The RTL can be REG, offsettable MEM, integer constant, or
9039 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
9040 split and "num" is its length. lo_half and hi_half are output arrays
9041 that parallel "operands". */
9042
9043 void
9044 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
9045 {
9046 while (num--)
9047 {
9048 rtx op = operands[num];
9049
9050 /* simplify_subreg refuse to split volatile memory addresses,
9051 but we still have to handle it. */
9052 if (MEM_P (op))
9053 {
9054 lo_half[num] = adjust_address (op, SImode, 0);
9055 hi_half[num] = adjust_address (op, SImode, 4);
9056 }
9057 else
9058 {
9059 lo_half[num] = simplify_gen_subreg (SImode, op,
9060 GET_MODE (op) == VOIDmode
9061 ? DImode : GET_MODE (op), 0);
9062 hi_half[num] = simplify_gen_subreg (SImode, op,
9063 GET_MODE (op) == VOIDmode
9064 ? DImode : GET_MODE (op), 4);
9065 }
9066 }
9067 }
9068 /* Split one or more TImode RTL references into pairs of DImode
9069 references. The RTL can be REG, offsettable MEM, integer constant, or
9070 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
9071 split and "num" is its length. lo_half and hi_half are output arrays
9072 that parallel "operands". */
9073
9074 void
9075 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
9076 {
9077 while (num--)
9078 {
9079 rtx op = operands[num];
9080
9081 /* simplify_subreg refuse to split volatile memory addresses, but we
9082 still have to handle it. */
9083 if (MEM_P (op))
9084 {
9085 lo_half[num] = adjust_address (op, DImode, 0);
9086 hi_half[num] = adjust_address (op, DImode, 8);
9087 }
9088 else
9089 {
9090 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
9091 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
9092 }
9093 }
9094 }
9095 \f
9096 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
9097 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
9098 is the expression of the binary operation. The output may either be
9099 emitted here, or returned to the caller, like all output_* functions.
9100
9101 There is no guarantee that the operands are the same mode, as they
9102 might be within FLOAT or FLOAT_EXTEND expressions. */
9103
9104 #ifndef SYSV386_COMPAT
9105 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
9106 wants to fix the assemblers because that causes incompatibility
9107 with gcc. No-one wants to fix gcc because that causes
9108 incompatibility with assemblers... You can use the option of
9109 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
9110 #define SYSV386_COMPAT 1
9111 #endif
9112
9113 const char *
9114 output_387_binary_op (rtx insn, rtx *operands)
9115 {
9116 static char buf[30];
9117 const char *p;
9118 const char *ssep;
9119 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
9120
9121 #ifdef ENABLE_CHECKING
9122 /* Even if we do not want to check the inputs, this documents input
9123 constraints. Which helps in understanding the following code. */
9124 if (STACK_REG_P (operands[0])
9125 && ((REG_P (operands[1])
9126 && REGNO (operands[0]) == REGNO (operands[1])
9127 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
9128 || (REG_P (operands[2])
9129 && REGNO (operands[0]) == REGNO (operands[2])
9130 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
9131 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
9132 ; /* ok */
9133 else
9134 gcc_assert (is_sse);
9135 #endif
9136
9137 switch (GET_CODE (operands[3]))
9138 {
9139 case PLUS:
9140 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9141 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9142 p = "fiadd";
9143 else
9144 p = "fadd";
9145 ssep = "add";
9146 break;
9147
9148 case MINUS:
9149 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9150 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9151 p = "fisub";
9152 else
9153 p = "fsub";
9154 ssep = "sub";
9155 break;
9156
9157 case MULT:
9158 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9159 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9160 p = "fimul";
9161 else
9162 p = "fmul";
9163 ssep = "mul";
9164 break;
9165
9166 case DIV:
9167 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9168 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9169 p = "fidiv";
9170 else
9171 p = "fdiv";
9172 ssep = "div";
9173 break;
9174
9175 default:
9176 gcc_unreachable ();
9177 }
9178
9179 if (is_sse)
9180 {
9181 strcpy (buf, ssep);
9182 if (GET_MODE (operands[0]) == SFmode)
9183 strcat (buf, "ss\t{%2, %0|%0, %2}");
9184 else
9185 strcat (buf, "sd\t{%2, %0|%0, %2}");
9186 return buf;
9187 }
9188 strcpy (buf, p);
9189
9190 switch (GET_CODE (operands[3]))
9191 {
9192 case MULT:
9193 case PLUS:
9194 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
9195 {
9196 rtx temp = operands[2];
9197 operands[2] = operands[1];
9198 operands[1] = temp;
9199 }
9200
9201 /* know operands[0] == operands[1]. */
9202
9203 if (MEM_P (operands[2]))
9204 {
9205 p = "%z2\t%2";
9206 break;
9207 }
9208
9209 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
9210 {
9211 if (STACK_TOP_P (operands[0]))
9212 /* How is it that we are storing to a dead operand[2]?
9213 Well, presumably operands[1] is dead too. We can't
9214 store the result to st(0) as st(0) gets popped on this
9215 instruction. Instead store to operands[2] (which I
9216 think has to be st(1)). st(1) will be popped later.
9217 gcc <= 2.8.1 didn't have this check and generated
9218 assembly code that the Unixware assembler rejected. */
9219 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
9220 else
9221 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
9222 break;
9223 }
9224
9225 if (STACK_TOP_P (operands[0]))
9226 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
9227 else
9228 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
9229 break;
9230
9231 case MINUS:
9232 case DIV:
9233 if (MEM_P (operands[1]))
9234 {
9235 p = "r%z1\t%1";
9236 break;
9237 }
9238
9239 if (MEM_P (operands[2]))
9240 {
9241 p = "%z2\t%2";
9242 break;
9243 }
9244
9245 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
9246 {
9247 #if SYSV386_COMPAT
9248 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
9249 derived assemblers, confusingly reverse the direction of
9250 the operation for fsub{r} and fdiv{r} when the
9251 destination register is not st(0). The Intel assembler
9252 doesn't have this brain damage. Read !SYSV386_COMPAT to
9253 figure out what the hardware really does. */
9254 if (STACK_TOP_P (operands[0]))
9255 p = "{p\t%0, %2|rp\t%2, %0}";
9256 else
9257 p = "{rp\t%2, %0|p\t%0, %2}";
9258 #else
9259 if (STACK_TOP_P (operands[0]))
9260 /* As above for fmul/fadd, we can't store to st(0). */
9261 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
9262 else
9263 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
9264 #endif
9265 break;
9266 }
9267
9268 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
9269 {
9270 #if SYSV386_COMPAT
9271 if (STACK_TOP_P (operands[0]))
9272 p = "{rp\t%0, %1|p\t%1, %0}";
9273 else
9274 p = "{p\t%1, %0|rp\t%0, %1}";
9275 #else
9276 if (STACK_TOP_P (operands[0]))
9277 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
9278 else
9279 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
9280 #endif
9281 break;
9282 }
9283
9284 if (STACK_TOP_P (operands[0]))
9285 {
9286 if (STACK_TOP_P (operands[1]))
9287 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
9288 else
9289 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
9290 break;
9291 }
9292 else if (STACK_TOP_P (operands[1]))
9293 {
9294 #if SYSV386_COMPAT
9295 p = "{\t%1, %0|r\t%0, %1}";
9296 #else
9297 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
9298 #endif
9299 }
9300 else
9301 {
9302 #if SYSV386_COMPAT
9303 p = "{r\t%2, %0|\t%0, %2}";
9304 #else
9305 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
9306 #endif
9307 }
9308 break;
9309
9310 default:
9311 gcc_unreachable ();
9312 }
9313
9314 strcat (buf, p);
9315 return buf;
9316 }
9317
9318 /* Return needed mode for entity in optimize_mode_switching pass. */
9319
9320 int
9321 ix86_mode_needed (int entity, rtx insn)
9322 {
9323 enum attr_i387_cw mode;
9324
9325 /* The mode UNINITIALIZED is used to store control word after a
9326 function call or ASM pattern. The mode ANY specify that function
9327 has no requirements on the control word and make no changes in the
9328 bits we are interested in. */
9329
9330 if (CALL_P (insn)
9331 || (NONJUMP_INSN_P (insn)
9332 && (asm_noperands (PATTERN (insn)) >= 0
9333 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
9334 return I387_CW_UNINITIALIZED;
9335
9336 if (recog_memoized (insn) < 0)
9337 return I387_CW_ANY;
9338
9339 mode = get_attr_i387_cw (insn);
9340
9341 switch (entity)
9342 {
9343 case I387_TRUNC:
9344 if (mode == I387_CW_TRUNC)
9345 return mode;
9346 break;
9347
9348 case I387_FLOOR:
9349 if (mode == I387_CW_FLOOR)
9350 return mode;
9351 break;
9352
9353 case I387_CEIL:
9354 if (mode == I387_CW_CEIL)
9355 return mode;
9356 break;
9357
9358 case I387_MASK_PM:
9359 if (mode == I387_CW_MASK_PM)
9360 return mode;
9361 break;
9362
9363 default:
9364 gcc_unreachable ();
9365 }
9366
9367 return I387_CW_ANY;
9368 }
9369
9370 /* Output code to initialize control word copies used by trunc?f?i and
9371 rounding patterns. CURRENT_MODE is set to current control word,
9372 while NEW_MODE is set to new control word. */
9373
9374 void
9375 emit_i387_cw_initialization (int mode)
9376 {
9377 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
9378 rtx new_mode;
9379
9380 enum ix86_stack_slot slot;
9381
9382 rtx reg = gen_reg_rtx (HImode);
9383
9384 emit_insn (gen_x86_fnstcw_1 (stored_mode));
9385 emit_move_insn (reg, copy_rtx (stored_mode));
9386
9387 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
9388 {
9389 switch (mode)
9390 {
9391 case I387_CW_TRUNC:
9392 /* round toward zero (truncate) */
9393 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
9394 slot = SLOT_CW_TRUNC;
9395 break;
9396
9397 case I387_CW_FLOOR:
9398 /* round down toward -oo */
9399 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
9400 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
9401 slot = SLOT_CW_FLOOR;
9402 break;
9403
9404 case I387_CW_CEIL:
9405 /* round up toward +oo */
9406 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
9407 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
9408 slot = SLOT_CW_CEIL;
9409 break;
9410
9411 case I387_CW_MASK_PM:
9412 /* mask precision exception for nearbyint() */
9413 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
9414 slot = SLOT_CW_MASK_PM;
9415 break;
9416
9417 default:
9418 gcc_unreachable ();
9419 }
9420 }
9421 else
9422 {
9423 switch (mode)
9424 {
9425 case I387_CW_TRUNC:
9426 /* round toward zero (truncate) */
9427 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
9428 slot = SLOT_CW_TRUNC;
9429 break;
9430
9431 case I387_CW_FLOOR:
9432 /* round down toward -oo */
9433 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
9434 slot = SLOT_CW_FLOOR;
9435 break;
9436
9437 case I387_CW_CEIL:
9438 /* round up toward +oo */
9439 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
9440 slot = SLOT_CW_CEIL;
9441 break;
9442
9443 case I387_CW_MASK_PM:
9444 /* mask precision exception for nearbyint() */
9445 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
9446 slot = SLOT_CW_MASK_PM;
9447 break;
9448
9449 default:
9450 gcc_unreachable ();
9451 }
9452 }
9453
9454 gcc_assert (slot < MAX_386_STACK_LOCALS);
9455
9456 new_mode = assign_386_stack_local (HImode, slot);
9457 emit_move_insn (new_mode, reg);
9458 }
9459
9460 /* Output code for INSN to convert a float to a signed int. OPERANDS
9461 are the insn operands. The output may be [HSD]Imode and the input
9462 operand may be [SDX]Fmode. */
9463
9464 const char *
9465 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
9466 {
9467 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
9468 int dimode_p = GET_MODE (operands[0]) == DImode;
9469 int round_mode = get_attr_i387_cw (insn);
9470
9471 /* Jump through a hoop or two for DImode, since the hardware has no
9472 non-popping instruction. We used to do this a different way, but
9473 that was somewhat fragile and broke with post-reload splitters. */
9474 if ((dimode_p || fisttp) && !stack_top_dies)
9475 output_asm_insn ("fld\t%y1", operands);
9476
9477 gcc_assert (STACK_TOP_P (operands[1]));
9478 gcc_assert (MEM_P (operands[0]));
9479 gcc_assert (GET_MODE (operands[1]) != TFmode);
9480
9481 if (fisttp)
9482 output_asm_insn ("fisttp%z0\t%0", operands);
9483 else
9484 {
9485 if (round_mode != I387_CW_ANY)
9486 output_asm_insn ("fldcw\t%3", operands);
9487 if (stack_top_dies || dimode_p)
9488 output_asm_insn ("fistp%z0\t%0", operands);
9489 else
9490 output_asm_insn ("fist%z0\t%0", operands);
9491 if (round_mode != I387_CW_ANY)
9492 output_asm_insn ("fldcw\t%2", operands);
9493 }
9494
9495 return "";
9496 }
9497
9498 /* Output code for x87 ffreep insn. The OPNO argument, which may only
9499 have the values zero or one, indicates the ffreep insn's operand
9500 from the OPERANDS array. */
9501
9502 static const char *
9503 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
9504 {
9505 if (TARGET_USE_FFREEP)
9506 #if HAVE_AS_IX86_FFREEP
9507 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
9508 #else
9509 {
9510 static char retval[] = ".word\t0xc_df";
9511 int regno = REGNO (operands[opno]);
9512
9513 gcc_assert (FP_REGNO_P (regno));
9514
9515 retval[9] = '0' + (regno - FIRST_STACK_REG);
9516 return retval;
9517 }
9518 #endif
9519
9520 return opno ? "fstp\t%y1" : "fstp\t%y0";
9521 }
9522
9523
9524 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
9525 should be used. UNORDERED_P is true when fucom should be used. */
9526
9527 const char *
9528 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
9529 {
9530 int stack_top_dies;
9531 rtx cmp_op0, cmp_op1;
9532 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
9533
9534 if (eflags_p)
9535 {
9536 cmp_op0 = operands[0];
9537 cmp_op1 = operands[1];
9538 }
9539 else
9540 {
9541 cmp_op0 = operands[1];
9542 cmp_op1 = operands[2];
9543 }
9544
9545 if (is_sse)
9546 {
9547 if (GET_MODE (operands[0]) == SFmode)
9548 if (unordered_p)
9549 return "ucomiss\t{%1, %0|%0, %1}";
9550 else
9551 return "comiss\t{%1, %0|%0, %1}";
9552 else
9553 if (unordered_p)
9554 return "ucomisd\t{%1, %0|%0, %1}";
9555 else
9556 return "comisd\t{%1, %0|%0, %1}";
9557 }
9558
9559 gcc_assert (STACK_TOP_P (cmp_op0));
9560
9561 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
9562
9563 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
9564 {
9565 if (stack_top_dies)
9566 {
9567 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
9568 return output_387_ffreep (operands, 1);
9569 }
9570 else
9571 return "ftst\n\tfnstsw\t%0";
9572 }
9573
9574 if (STACK_REG_P (cmp_op1)
9575 && stack_top_dies
9576 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
9577 && REGNO (cmp_op1) != FIRST_STACK_REG)
9578 {
9579 /* If both the top of the 387 stack dies, and the other operand
9580 is also a stack register that dies, then this must be a
9581 `fcompp' float compare */
9582
9583 if (eflags_p)
9584 {
9585 /* There is no double popping fcomi variant. Fortunately,
9586 eflags is immune from the fstp's cc clobbering. */
9587 if (unordered_p)
9588 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
9589 else
9590 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
9591 return output_387_ffreep (operands, 0);
9592 }
9593 else
9594 {
9595 if (unordered_p)
9596 return "fucompp\n\tfnstsw\t%0";
9597 else
9598 return "fcompp\n\tfnstsw\t%0";
9599 }
9600 }
9601 else
9602 {
9603 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
9604
9605 static const char * const alt[16] =
9606 {
9607 "fcom%z2\t%y2\n\tfnstsw\t%0",
9608 "fcomp%z2\t%y2\n\tfnstsw\t%0",
9609 "fucom%z2\t%y2\n\tfnstsw\t%0",
9610 "fucomp%z2\t%y2\n\tfnstsw\t%0",
9611
9612 "ficom%z2\t%y2\n\tfnstsw\t%0",
9613 "ficomp%z2\t%y2\n\tfnstsw\t%0",
9614 NULL,
9615 NULL,
9616
9617 "fcomi\t{%y1, %0|%0, %y1}",
9618 "fcomip\t{%y1, %0|%0, %y1}",
9619 "fucomi\t{%y1, %0|%0, %y1}",
9620 "fucomip\t{%y1, %0|%0, %y1}",
9621
9622 NULL,
9623 NULL,
9624 NULL,
9625 NULL
9626 };
9627
9628 int mask;
9629 const char *ret;
9630
9631 mask = eflags_p << 3;
9632 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
9633 mask |= unordered_p << 1;
9634 mask |= stack_top_dies;
9635
9636 gcc_assert (mask < 16);
9637 ret = alt[mask];
9638 gcc_assert (ret);
9639
9640 return ret;
9641 }
9642 }
9643
9644 void
9645 ix86_output_addr_vec_elt (FILE *file, int value)
9646 {
9647 const char *directive = ASM_LONG;
9648
9649 #ifdef ASM_QUAD
9650 if (TARGET_64BIT)
9651 directive = ASM_QUAD;
9652 #else
9653 gcc_assert (!TARGET_64BIT);
9654 #endif
9655
9656 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
9657 }
9658
9659 void
9660 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
9661 {
9662 const char *directive = ASM_LONG;
9663
9664 #ifdef ASM_QUAD
9665 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
9666 directive = ASM_QUAD;
9667 #else
9668 gcc_assert (!TARGET_64BIT);
9669 #endif
9670 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
9671 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
9672 fprintf (file, "%s%s%d-%s%d\n",
9673 directive, LPREFIX, value, LPREFIX, rel);
9674 else if (HAVE_AS_GOTOFF_IN_DATA)
9675 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
9676 #if TARGET_MACHO
9677 else if (TARGET_MACHO)
9678 {
9679 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
9680 machopic_output_function_base_name (file);
9681 fprintf(file, "\n");
9682 }
9683 #endif
9684 else
9685 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
9686 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
9687 }
9688 \f
9689 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
9690 for the target. */
9691
9692 void
9693 ix86_expand_clear (rtx dest)
9694 {
9695 rtx tmp;
9696
9697 /* We play register width games, which are only valid after reload. */
9698 gcc_assert (reload_completed);
9699
9700 /* Avoid HImode and its attendant prefix byte. */
9701 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
9702 dest = gen_rtx_REG (SImode, REGNO (dest));
9703 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
9704
9705 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
9706 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
9707 {
9708 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
9709 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
9710 }
9711
9712 emit_insn (tmp);
9713 }
9714
9715 /* X is an unchanging MEM. If it is a constant pool reference, return
9716 the constant pool rtx, else NULL. */
9717
9718 rtx
9719 maybe_get_pool_constant (rtx x)
9720 {
9721 x = ix86_delegitimize_address (XEXP (x, 0));
9722
9723 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9724 return get_pool_constant (x);
9725
9726 return NULL_RTX;
9727 }
9728
9729 void
9730 ix86_expand_move (enum machine_mode mode, rtx operands[])
9731 {
9732 int strict = (reload_in_progress || reload_completed);
9733 rtx op0, op1;
9734 enum tls_model model;
9735
9736 op0 = operands[0];
9737 op1 = operands[1];
9738
9739 if (GET_CODE (op1) == SYMBOL_REF)
9740 {
9741 model = SYMBOL_REF_TLS_MODEL (op1);
9742 if (model)
9743 {
9744 op1 = legitimize_tls_address (op1, model, true);
9745 op1 = force_operand (op1, op0);
9746 if (op1 == op0)
9747 return;
9748 }
9749 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
9750 && SYMBOL_REF_DLLIMPORT_P (op1))
9751 op1 = legitimize_dllimport_symbol (op1, false);
9752 }
9753 else if (GET_CODE (op1) == CONST
9754 && GET_CODE (XEXP (op1, 0)) == PLUS
9755 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
9756 {
9757 rtx addend = XEXP (XEXP (op1, 0), 1);
9758 rtx symbol = XEXP (XEXP (op1, 0), 0);
9759 rtx tmp = NULL;
9760
9761 model = SYMBOL_REF_TLS_MODEL (symbol);
9762 if (model)
9763 tmp = legitimize_tls_address (symbol, model, true);
9764 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
9765 && SYMBOL_REF_DLLIMPORT_P (symbol))
9766 tmp = legitimize_dllimport_symbol (symbol, true);
9767
9768 if (tmp)
9769 {
9770 tmp = force_operand (tmp, NULL);
9771 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
9772 op0, 1, OPTAB_DIRECT);
9773 if (tmp == op0)
9774 return;
9775 }
9776 }
9777
9778 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
9779 {
9780 if (TARGET_MACHO && !TARGET_64BIT)
9781 {
9782 #if TARGET_MACHO
9783 if (MACHOPIC_PURE)
9784 {
9785 rtx temp = ((reload_in_progress
9786 || ((op0 && REG_P (op0))
9787 && mode == Pmode))
9788 ? op0 : gen_reg_rtx (Pmode));
9789 op1 = machopic_indirect_data_reference (op1, temp);
9790 op1 = machopic_legitimize_pic_address (op1, mode,
9791 temp == op1 ? 0 : temp);
9792 }
9793 else if (MACHOPIC_INDIRECT)
9794 op1 = machopic_indirect_data_reference (op1, 0);
9795 if (op0 == op1)
9796 return;
9797 #endif
9798 }
9799 else
9800 {
9801 if (MEM_P (op0))
9802 op1 = force_reg (Pmode, op1);
9803 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
9804 {
9805 rtx reg = no_new_pseudos ? op0 : NULL_RTX;
9806 op1 = legitimize_pic_address (op1, reg);
9807 if (op0 == op1)
9808 return;
9809 }
9810 }
9811 }
9812 else
9813 {
9814 if (MEM_P (op0)
9815 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
9816 || !push_operand (op0, mode))
9817 && MEM_P (op1))
9818 op1 = force_reg (mode, op1);
9819
9820 if (push_operand (op0, mode)
9821 && ! general_no_elim_operand (op1, mode))
9822 op1 = copy_to_mode_reg (mode, op1);
9823
9824 /* Force large constants in 64bit compilation into register
9825 to get them CSEed. */
9826 if (TARGET_64BIT && mode == DImode
9827 && immediate_operand (op1, mode)
9828 && !x86_64_zext_immediate_operand (op1, VOIDmode)
9829 && !register_operand (op0, mode)
9830 && optimize && !reload_completed && !reload_in_progress)
9831 op1 = copy_to_mode_reg (mode, op1);
9832
9833 if (FLOAT_MODE_P (mode))
9834 {
9835 /* If we are loading a floating point constant to a register,
9836 force the value to memory now, since we'll get better code
9837 out the back end. */
9838
9839 if (strict)
9840 ;
9841 else if (GET_CODE (op1) == CONST_DOUBLE)
9842 {
9843 op1 = validize_mem (force_const_mem (mode, op1));
9844 if (!register_operand (op0, mode))
9845 {
9846 rtx temp = gen_reg_rtx (mode);
9847 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
9848 emit_move_insn (op0, temp);
9849 return;
9850 }
9851 }
9852 }
9853 }
9854
9855 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9856 }
9857
9858 void
9859 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
9860 {
9861 rtx op0 = operands[0], op1 = operands[1];
9862 unsigned int align = GET_MODE_ALIGNMENT (mode);
9863
9864 /* Force constants other than zero into memory. We do not know how
9865 the instructions used to build constants modify the upper 64 bits
9866 of the register, once we have that information we may be able
9867 to handle some of them more efficiently. */
9868 if ((reload_in_progress | reload_completed) == 0
9869 && register_operand (op0, mode)
9870 && (CONSTANT_P (op1)
9871 || (GET_CODE (op1) == SUBREG
9872 && CONSTANT_P (SUBREG_REG (op1))))
9873 && standard_sse_constant_p (op1) <= 0)
9874 op1 = validize_mem (force_const_mem (mode, op1));
9875
9876 /* TDmode values are passed as TImode on the stack. Timode values
9877 are moved via xmm registers, and moving them to stack can result in
9878 unaligned memory access. Use ix86_expand_vector_move_misalign()
9879 if memory operand is not aligned correctly. */
9880 if (!no_new_pseudos
9881 && (mode == TImode) && !TARGET_64BIT
9882 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
9883 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
9884 {
9885 rtx tmp[2];
9886
9887 /* ix86_expand_vector_move_misalign() does not like constants ... */
9888 if (CONSTANT_P (op1)
9889 || (GET_CODE (op1) == SUBREG
9890 && CONSTANT_P (SUBREG_REG (op1))))
9891 op1 = validize_mem (force_const_mem (mode, op1));
9892
9893 /* ... nor both arguments in memory. */
9894 if (!register_operand (op0, mode)
9895 && !register_operand (op1, mode))
9896 op1 = force_reg (mode, op1);
9897
9898 tmp[0] = op0; tmp[1] = op1;
9899 ix86_expand_vector_move_misalign (mode, tmp);
9900 return;
9901 }
9902
9903 /* Make operand1 a register if it isn't already. */
9904 if (!no_new_pseudos
9905 && !register_operand (op0, mode)
9906 && !register_operand (op1, mode))
9907 {
9908 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
9909 return;
9910 }
9911
9912 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9913 }
9914
9915 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
9916 straight to ix86_expand_vector_move. */
9917 /* Code generation for scalar reg-reg moves of single and double precision data:
9918 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
9919 movaps reg, reg
9920 else
9921 movss reg, reg
9922 if (x86_sse_partial_reg_dependency == true)
9923 movapd reg, reg
9924 else
9925 movsd reg, reg
9926
9927 Code generation for scalar loads of double precision data:
9928 if (x86_sse_split_regs == true)
9929 movlpd mem, reg (gas syntax)
9930 else
9931 movsd mem, reg
9932
9933 Code generation for unaligned packed loads of single precision data
9934 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
9935 if (x86_sse_unaligned_move_optimal)
9936 movups mem, reg
9937
9938 if (x86_sse_partial_reg_dependency == true)
9939 {
9940 xorps reg, reg
9941 movlps mem, reg
9942 movhps mem+8, reg
9943 }
9944 else
9945 {
9946 movlps mem, reg
9947 movhps mem+8, reg
9948 }
9949
9950 Code generation for unaligned packed loads of double precision data
9951 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
9952 if (x86_sse_unaligned_move_optimal)
9953 movupd mem, reg
9954
9955 if (x86_sse_split_regs == true)
9956 {
9957 movlpd mem, reg
9958 movhpd mem+8, reg
9959 }
9960 else
9961 {
9962 movsd mem, reg
9963 movhpd mem+8, reg
9964 }
9965 */
9966
9967 void
9968 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
9969 {
9970 rtx op0, op1, m;
9971
9972 op0 = operands[0];
9973 op1 = operands[1];
9974
9975 if (MEM_P (op1))
9976 {
9977 /* If we're optimizing for size, movups is the smallest. */
9978 if (optimize_size)
9979 {
9980 op0 = gen_lowpart (V4SFmode, op0);
9981 op1 = gen_lowpart (V4SFmode, op1);
9982 emit_insn (gen_sse_movups (op0, op1));
9983 return;
9984 }
9985
9986 /* ??? If we have typed data, then it would appear that using
9987 movdqu is the only way to get unaligned data loaded with
9988 integer type. */
9989 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9990 {
9991 op0 = gen_lowpart (V16QImode, op0);
9992 op1 = gen_lowpart (V16QImode, op1);
9993 emit_insn (gen_sse2_movdqu (op0, op1));
9994 return;
9995 }
9996
9997 if (TARGET_SSE2 && mode == V2DFmode)
9998 {
9999 rtx zero;
10000
10001 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
10002 {
10003 op0 = gen_lowpart (V2DFmode, op0);
10004 op1 = gen_lowpart (V2DFmode, op1);
10005 emit_insn (gen_sse2_movupd (op0, op1));
10006 return;
10007 }
10008
10009 /* When SSE registers are split into halves, we can avoid
10010 writing to the top half twice. */
10011 if (TARGET_SSE_SPLIT_REGS)
10012 {
10013 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
10014 zero = op0;
10015 }
10016 else
10017 {
10018 /* ??? Not sure about the best option for the Intel chips.
10019 The following would seem to satisfy; the register is
10020 entirely cleared, breaking the dependency chain. We
10021 then store to the upper half, with a dependency depth
10022 of one. A rumor has it that Intel recommends two movsd
10023 followed by an unpacklpd, but this is unconfirmed. And
10024 given that the dependency depth of the unpacklpd would
10025 still be one, I'm not sure why this would be better. */
10026 zero = CONST0_RTX (V2DFmode);
10027 }
10028
10029 m = adjust_address (op1, DFmode, 0);
10030 emit_insn (gen_sse2_loadlpd (op0, zero, m));
10031 m = adjust_address (op1, DFmode, 8);
10032 emit_insn (gen_sse2_loadhpd (op0, op0, m));
10033 }
10034 else
10035 {
10036 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
10037 {
10038 op0 = gen_lowpart (V4SFmode, op0);
10039 op1 = gen_lowpart (V4SFmode, op1);
10040 emit_insn (gen_sse_movups (op0, op1));
10041 return;
10042 }
10043
10044 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
10045 emit_move_insn (op0, CONST0_RTX (mode));
10046 else
10047 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
10048
10049 if (mode != V4SFmode)
10050 op0 = gen_lowpart (V4SFmode, op0);
10051 m = adjust_address (op1, V2SFmode, 0);
10052 emit_insn (gen_sse_loadlps (op0, op0, m));
10053 m = adjust_address (op1, V2SFmode, 8);
10054 emit_insn (gen_sse_loadhps (op0, op0, m));
10055 }
10056 }
10057 else if (MEM_P (op0))
10058 {
10059 /* If we're optimizing for size, movups is the smallest. */
10060 if (optimize_size)
10061 {
10062 op0 = gen_lowpart (V4SFmode, op0);
10063 op1 = gen_lowpart (V4SFmode, op1);
10064 emit_insn (gen_sse_movups (op0, op1));
10065 return;
10066 }
10067
10068 /* ??? Similar to above, only less clear because of quote
10069 typeless stores unquote. */
10070 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
10071 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
10072 {
10073 op0 = gen_lowpart (V16QImode, op0);
10074 op1 = gen_lowpart (V16QImode, op1);
10075 emit_insn (gen_sse2_movdqu (op0, op1));
10076 return;
10077 }
10078
10079 if (TARGET_SSE2 && mode == V2DFmode)
10080 {
10081 m = adjust_address (op0, DFmode, 0);
10082 emit_insn (gen_sse2_storelpd (m, op1));
10083 m = adjust_address (op0, DFmode, 8);
10084 emit_insn (gen_sse2_storehpd (m, op1));
10085 }
10086 else
10087 {
10088 if (mode != V4SFmode)
10089 op1 = gen_lowpart (V4SFmode, op1);
10090 m = adjust_address (op0, V2SFmode, 0);
10091 emit_insn (gen_sse_storelps (m, op1));
10092 m = adjust_address (op0, V2SFmode, 8);
10093 emit_insn (gen_sse_storehps (m, op1));
10094 }
10095 }
10096 else
10097 gcc_unreachable ();
10098 }
10099
10100 /* Expand a push in MODE. This is some mode for which we do not support
10101 proper push instructions, at least from the registers that we expect
10102 the value to live in. */
10103
10104 void
10105 ix86_expand_push (enum machine_mode mode, rtx x)
10106 {
10107 rtx tmp;
10108
10109 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
10110 GEN_INT (-GET_MODE_SIZE (mode)),
10111 stack_pointer_rtx, 1, OPTAB_DIRECT);
10112 if (tmp != stack_pointer_rtx)
10113 emit_move_insn (stack_pointer_rtx, tmp);
10114
10115 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
10116 emit_move_insn (tmp, x);
10117 }
10118
10119 /* Helper function of ix86_fixup_binary_operands to canonicalize
10120 operand order. Returns true if the operands should be swapped. */
10121
10122 static bool
10123 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
10124 rtx operands[])
10125 {
10126 rtx dst = operands[0];
10127 rtx src1 = operands[1];
10128 rtx src2 = operands[2];
10129
10130 /* If the operation is not commutative, we can't do anything. */
10131 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
10132 return false;
10133
10134 /* Highest priority is that src1 should match dst. */
10135 if (rtx_equal_p (dst, src1))
10136 return false;
10137 if (rtx_equal_p (dst, src2))
10138 return true;
10139
10140 /* Next highest priority is that immediate constants come second. */
10141 if (immediate_operand (src2, mode))
10142 return false;
10143 if (immediate_operand (src1, mode))
10144 return true;
10145
10146 /* Lowest priority is that memory references should come second. */
10147 if (MEM_P (src2))
10148 return false;
10149 if (MEM_P (src1))
10150 return true;
10151
10152 return false;
10153 }
10154
10155
10156 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
10157 destination to use for the operation. If different from the true
10158 destination in operands[0], a copy operation will be required. */
10159
10160 rtx
10161 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
10162 rtx operands[])
10163 {
10164 rtx dst = operands[0];
10165 rtx src1 = operands[1];
10166 rtx src2 = operands[2];
10167
10168 /* Canonicalize operand order. */
10169 if (ix86_swap_binary_operands_p (code, mode, operands))
10170 {
10171 rtx temp = src1;
10172 src1 = src2;
10173 src2 = temp;
10174 }
10175
10176 /* Both source operands cannot be in memory. */
10177 if (MEM_P (src1) && MEM_P (src2))
10178 {
10179 /* Optimization: Only read from memory once. */
10180 if (rtx_equal_p (src1, src2))
10181 {
10182 src2 = force_reg (mode, src2);
10183 src1 = src2;
10184 }
10185 else
10186 src2 = force_reg (mode, src2);
10187 }
10188
10189 /* If the destination is memory, and we do not have matching source
10190 operands, do things in registers. */
10191 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
10192 dst = gen_reg_rtx (mode);
10193
10194 /* Source 1 cannot be a constant. */
10195 if (CONSTANT_P (src1))
10196 src1 = force_reg (mode, src1);
10197
10198 /* Source 1 cannot be a non-matching memory. */
10199 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
10200 src1 = force_reg (mode, src1);
10201
10202 operands[1] = src1;
10203 operands[2] = src2;
10204 return dst;
10205 }
10206
10207 /* Similarly, but assume that the destination has already been
10208 set up properly. */
10209
10210 void
10211 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
10212 enum machine_mode mode, rtx operands[])
10213 {
10214 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
10215 gcc_assert (dst == operands[0]);
10216 }
10217
10218 /* Attempt to expand a binary operator. Make the expansion closer to the
10219 actual machine, then just general_operand, which will allow 3 separate
10220 memory references (one output, two input) in a single insn. */
10221
10222 void
10223 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
10224 rtx operands[])
10225 {
10226 rtx src1, src2, dst, op, clob;
10227
10228 dst = ix86_fixup_binary_operands (code, mode, operands);
10229 src1 = operands[1];
10230 src2 = operands[2];
10231
10232 /* Emit the instruction. */
10233
10234 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
10235 if (reload_in_progress)
10236 {
10237 /* Reload doesn't know about the flags register, and doesn't know that
10238 it doesn't want to clobber it. We can only do this with PLUS. */
10239 gcc_assert (code == PLUS);
10240 emit_insn (op);
10241 }
10242 else
10243 {
10244 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10245 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
10246 }
10247
10248 /* Fix up the destination if needed. */
10249 if (dst != operands[0])
10250 emit_move_insn (operands[0], dst);
10251 }
10252
10253 /* Return TRUE or FALSE depending on whether the binary operator meets the
10254 appropriate constraints. */
10255
10256 int
10257 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
10258 rtx operands[3])
10259 {
10260 rtx dst = operands[0];
10261 rtx src1 = operands[1];
10262 rtx src2 = operands[2];
10263
10264 /* Both source operands cannot be in memory. */
10265 if (MEM_P (src1) && MEM_P (src2))
10266 return 0;
10267
10268 /* Canonicalize operand order for commutative operators. */
10269 if (ix86_swap_binary_operands_p (code, mode, operands))
10270 {
10271 rtx temp = src1;
10272 src1 = src2;
10273 src2 = temp;
10274 }
10275
10276 /* If the destination is memory, we must have a matching source operand. */
10277 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
10278 return 0;
10279
10280 /* Source 1 cannot be a constant. */
10281 if (CONSTANT_P (src1))
10282 return 0;
10283
10284 /* Source 1 cannot be a non-matching memory. */
10285 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
10286 return 0;
10287
10288 return 1;
10289 }
10290
10291 /* Attempt to expand a unary operator. Make the expansion closer to the
10292 actual machine, then just general_operand, which will allow 2 separate
10293 memory references (one output, one input) in a single insn. */
10294
10295 void
10296 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
10297 rtx operands[])
10298 {
10299 int matching_memory;
10300 rtx src, dst, op, clob;
10301
10302 dst = operands[0];
10303 src = operands[1];
10304
10305 /* If the destination is memory, and we do not have matching source
10306 operands, do things in registers. */
10307 matching_memory = 0;
10308 if (MEM_P (dst))
10309 {
10310 if (rtx_equal_p (dst, src))
10311 matching_memory = 1;
10312 else
10313 dst = gen_reg_rtx (mode);
10314 }
10315
10316 /* When source operand is memory, destination must match. */
10317 if (MEM_P (src) && !matching_memory)
10318 src = force_reg (mode, src);
10319
10320 /* Emit the instruction. */
10321
10322 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
10323 if (reload_in_progress || code == NOT)
10324 {
10325 /* Reload doesn't know about the flags register, and doesn't know that
10326 it doesn't want to clobber it. */
10327 gcc_assert (code == NOT);
10328 emit_insn (op);
10329 }
10330 else
10331 {
10332 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10333 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
10334 }
10335
10336 /* Fix up the destination if needed. */
10337 if (dst != operands[0])
10338 emit_move_insn (operands[0], dst);
10339 }
10340
10341 /* Return TRUE or FALSE depending on whether the unary operator meets the
10342 appropriate constraints. */
10343
10344 int
10345 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
10346 enum machine_mode mode ATTRIBUTE_UNUSED,
10347 rtx operands[2] ATTRIBUTE_UNUSED)
10348 {
10349 /* If one of operands is memory, source and destination must match. */
10350 if ((MEM_P (operands[0])
10351 || MEM_P (operands[1]))
10352 && ! rtx_equal_p (operands[0], operands[1]))
10353 return FALSE;
10354 return TRUE;
10355 }
10356
10357 /* Post-reload splitter for converting an SF or DFmode value in an
10358 SSE register into an unsigned SImode. */
10359
10360 void
10361 ix86_split_convert_uns_si_sse (rtx operands[])
10362 {
10363 enum machine_mode vecmode;
10364 rtx value, large, zero_or_two31, input, two31, x;
10365
10366 large = operands[1];
10367 zero_or_two31 = operands[2];
10368 input = operands[3];
10369 two31 = operands[4];
10370 vecmode = GET_MODE (large);
10371 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
10372
10373 /* Load up the value into the low element. We must ensure that the other
10374 elements are valid floats -- zero is the easiest such value. */
10375 if (MEM_P (input))
10376 {
10377 if (vecmode == V4SFmode)
10378 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
10379 else
10380 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
10381 }
10382 else
10383 {
10384 input = gen_rtx_REG (vecmode, REGNO (input));
10385 emit_move_insn (value, CONST0_RTX (vecmode));
10386 if (vecmode == V4SFmode)
10387 emit_insn (gen_sse_movss (value, value, input));
10388 else
10389 emit_insn (gen_sse2_movsd (value, value, input));
10390 }
10391
10392 emit_move_insn (large, two31);
10393 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
10394
10395 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
10396 emit_insn (gen_rtx_SET (VOIDmode, large, x));
10397
10398 x = gen_rtx_AND (vecmode, zero_or_two31, large);
10399 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
10400
10401 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
10402 emit_insn (gen_rtx_SET (VOIDmode, value, x));
10403
10404 large = gen_rtx_REG (V4SImode, REGNO (large));
10405 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
10406
10407 x = gen_rtx_REG (V4SImode, REGNO (value));
10408 if (vecmode == V4SFmode)
10409 emit_insn (gen_sse2_cvttps2dq (x, value));
10410 else
10411 emit_insn (gen_sse2_cvttpd2dq (x, value));
10412 value = x;
10413
10414 emit_insn (gen_xorv4si3 (value, value, large));
10415 }
10416
10417 /* Convert an unsigned DImode value into a DFmode, using only SSE.
10418 Expects the 64-bit DImode to be supplied in a pair of integral
10419 registers. Requires SSE2; will use SSE3 if available. For x86_32,
10420 -mfpmath=sse, !optimize_size only. */
10421
10422 void
10423 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
10424 {
10425 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
10426 rtx int_xmm, fp_xmm;
10427 rtx biases, exponents;
10428 rtx x;
10429
10430 int_xmm = gen_reg_rtx (V4SImode);
10431 if (TARGET_INTER_UNIT_MOVES)
10432 emit_insn (gen_movdi_to_sse (int_xmm, input));
10433 else if (TARGET_SSE_SPLIT_REGS)
10434 {
10435 emit_insn (gen_rtx_CLOBBER (VOIDmode, int_xmm));
10436 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
10437 }
10438 else
10439 {
10440 x = gen_reg_rtx (V2DImode);
10441 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
10442 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
10443 }
10444
10445 x = gen_rtx_CONST_VECTOR (V4SImode,
10446 gen_rtvec (4, GEN_INT (0x43300000UL),
10447 GEN_INT (0x45300000UL),
10448 const0_rtx, const0_rtx));
10449 exponents = validize_mem (force_const_mem (V4SImode, x));
10450
10451 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
10452 emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, exponents));
10453
10454 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
10455 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
10456 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
10457 (0x1.0p84 + double(fp_value_hi_xmm)).
10458 Note these exponents differ by 32. */
10459
10460 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
10461
10462 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
10463 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
10464 real_ldexp (&bias_lo_rvt, &dconst1, 52);
10465 real_ldexp (&bias_hi_rvt, &dconst1, 84);
10466 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
10467 x = const_double_from_real_value (bias_hi_rvt, DFmode);
10468 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
10469 biases = validize_mem (force_const_mem (V2DFmode, biases));
10470 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
10471
10472 /* Add the upper and lower DFmode values together. */
10473 if (TARGET_SSE3)
10474 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
10475 else
10476 {
10477 x = copy_to_mode_reg (V2DFmode, fp_xmm);
10478 emit_insn (gen_sse2_unpckhpd (fp_xmm, fp_xmm, fp_xmm));
10479 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
10480 }
10481
10482 ix86_expand_vector_extract (false, target, fp_xmm, 0);
10483 }
10484
10485 /* Convert an unsigned SImode value into a DFmode. Only currently used
10486 for SSE, but applicable anywhere. */
10487
10488 void
10489 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
10490 {
10491 REAL_VALUE_TYPE TWO31r;
10492 rtx x, fp;
10493
10494 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
10495 NULL, 1, OPTAB_DIRECT);
10496
10497 fp = gen_reg_rtx (DFmode);
10498 emit_insn (gen_floatsidf2 (fp, x));
10499
10500 real_ldexp (&TWO31r, &dconst1, 31);
10501 x = const_double_from_real_value (TWO31r, DFmode);
10502
10503 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
10504 if (x != target)
10505 emit_move_insn (target, x);
10506 }
10507
10508 /* Convert a signed DImode value into a DFmode. Only used for SSE in
10509 32-bit mode; otherwise we have a direct convert instruction. */
10510
10511 void
10512 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
10513 {
10514 REAL_VALUE_TYPE TWO32r;
10515 rtx fp_lo, fp_hi, x;
10516
10517 fp_lo = gen_reg_rtx (DFmode);
10518 fp_hi = gen_reg_rtx (DFmode);
10519
10520 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
10521
10522 real_ldexp (&TWO32r, &dconst1, 32);
10523 x = const_double_from_real_value (TWO32r, DFmode);
10524 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
10525
10526 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
10527
10528 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
10529 0, OPTAB_DIRECT);
10530 if (x != target)
10531 emit_move_insn (target, x);
10532 }
10533
10534 /* Convert an unsigned SImode value into a SFmode, using only SSE.
10535 For x86_32, -mfpmath=sse, !optimize_size only. */
10536 void
10537 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
10538 {
10539 REAL_VALUE_TYPE ONE16r;
10540 rtx fp_hi, fp_lo, int_hi, int_lo, x;
10541
10542 real_ldexp (&ONE16r, &dconst1, 16);
10543 x = const_double_from_real_value (ONE16r, SFmode);
10544 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
10545 NULL, 0, OPTAB_DIRECT);
10546 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
10547 NULL, 0, OPTAB_DIRECT);
10548 fp_hi = gen_reg_rtx (SFmode);
10549 fp_lo = gen_reg_rtx (SFmode);
10550 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
10551 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
10552 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
10553 0, OPTAB_DIRECT);
10554 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
10555 0, OPTAB_DIRECT);
10556 if (!rtx_equal_p (target, fp_hi))
10557 emit_move_insn (target, fp_hi);
10558 }
10559
10560 /* A subroutine of ix86_build_signbit_mask_vector. If VECT is true,
10561 then replicate the value for all elements of the vector
10562 register. */
10563
10564 rtx
10565 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
10566 {
10567 rtvec v;
10568 switch (mode)
10569 {
10570 case SImode:
10571 gcc_assert (vect);
10572 v = gen_rtvec (4, value, value, value, value);
10573 return gen_rtx_CONST_VECTOR (V4SImode, v);
10574
10575 case DImode:
10576 gcc_assert (vect);
10577 v = gen_rtvec (2, value, value);
10578 return gen_rtx_CONST_VECTOR (V2DImode, v);
10579
10580 case SFmode:
10581 if (vect)
10582 v = gen_rtvec (4, value, value, value, value);
10583 else
10584 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
10585 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
10586 return gen_rtx_CONST_VECTOR (V4SFmode, v);
10587
10588 case DFmode:
10589 if (vect)
10590 v = gen_rtvec (2, value, value);
10591 else
10592 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
10593 return gen_rtx_CONST_VECTOR (V2DFmode, v);
10594
10595 default:
10596 gcc_unreachable ();
10597 }
10598 }
10599
10600 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
10601 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
10602 for an SSE register. If VECT is true, then replicate the mask for
10603 all elements of the vector register. If INVERT is true, then create
10604 a mask excluding the sign bit. */
10605
10606 rtx
10607 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
10608 {
10609 enum machine_mode vec_mode, imode;
10610 HOST_WIDE_INT hi, lo;
10611 int shift = 63;
10612 rtx v;
10613 rtx mask;
10614
10615 /* Find the sign bit, sign extended to 2*HWI. */
10616 switch (mode)
10617 {
10618 case SImode:
10619 case SFmode:
10620 imode = SImode;
10621 vec_mode = (mode == SImode) ? V4SImode : V4SFmode;
10622 lo = 0x80000000, hi = lo < 0;
10623 break;
10624
10625 case DImode:
10626 case DFmode:
10627 imode = DImode;
10628 vec_mode = (mode == DImode) ? V2DImode : V2DFmode;
10629 if (HOST_BITS_PER_WIDE_INT >= 64)
10630 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
10631 else
10632 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
10633 break;
10634
10635 default:
10636 gcc_unreachable ();
10637 }
10638
10639 if (invert)
10640 lo = ~lo, hi = ~hi;
10641
10642 /* Force this value into the low part of a fp vector constant. */
10643 mask = immed_double_const (lo, hi, imode);
10644 mask = gen_lowpart (mode, mask);
10645
10646 v = ix86_build_const_vector (mode, vect, mask);
10647 return force_reg (vec_mode, v);
10648 }
10649
10650 /* Generate code for floating point ABS or NEG. */
10651
10652 void
10653 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
10654 rtx operands[])
10655 {
10656 rtx mask, set, use, clob, dst, src;
10657 bool matching_memory;
10658 bool use_sse = false;
10659 bool vector_mode = VECTOR_MODE_P (mode);
10660 enum machine_mode elt_mode = mode;
10661
10662 if (vector_mode)
10663 {
10664 elt_mode = GET_MODE_INNER (mode);
10665 use_sse = true;
10666 }
10667 else if (TARGET_SSE_MATH)
10668 use_sse = SSE_FLOAT_MODE_P (mode);
10669
10670 /* NEG and ABS performed with SSE use bitwise mask operations.
10671 Create the appropriate mask now. */
10672 if (use_sse)
10673 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
10674 else
10675 mask = NULL_RTX;
10676
10677 dst = operands[0];
10678 src = operands[1];
10679
10680 /* If the destination is memory, and we don't have matching source
10681 operands or we're using the x87, do things in registers. */
10682 matching_memory = false;
10683 if (MEM_P (dst))
10684 {
10685 if (use_sse && rtx_equal_p (dst, src))
10686 matching_memory = true;
10687 else
10688 dst = gen_reg_rtx (mode);
10689 }
10690 if (MEM_P (src) && !matching_memory)
10691 src = force_reg (mode, src);
10692
10693 if (vector_mode)
10694 {
10695 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
10696 set = gen_rtx_SET (VOIDmode, dst, set);
10697 emit_insn (set);
10698 }
10699 else
10700 {
10701 set = gen_rtx_fmt_e (code, mode, src);
10702 set = gen_rtx_SET (VOIDmode, dst, set);
10703 if (mask)
10704 {
10705 use = gen_rtx_USE (VOIDmode, mask);
10706 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10707 emit_insn (gen_rtx_PARALLEL (VOIDmode,
10708 gen_rtvec (3, set, use, clob)));
10709 }
10710 else
10711 emit_insn (set);
10712 }
10713
10714 if (dst != operands[0])
10715 emit_move_insn (operands[0], dst);
10716 }
10717
10718 /* Expand a copysign operation. Special case operand 0 being a constant. */
10719
10720 void
10721 ix86_expand_copysign (rtx operands[])
10722 {
10723 enum machine_mode mode, vmode;
10724 rtx dest, op0, op1, mask, nmask;
10725
10726 dest = operands[0];
10727 op0 = operands[1];
10728 op1 = operands[2];
10729
10730 mode = GET_MODE (dest);
10731 vmode = mode == SFmode ? V4SFmode : V2DFmode;
10732
10733 if (GET_CODE (op0) == CONST_DOUBLE)
10734 {
10735 rtvec v;
10736
10737 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
10738 op0 = simplify_unary_operation (ABS, mode, op0, mode);
10739
10740 if (op0 == CONST0_RTX (mode))
10741 op0 = CONST0_RTX (vmode);
10742 else
10743 {
10744 if (mode == SFmode)
10745 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
10746 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
10747 else
10748 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
10749 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
10750 }
10751
10752 mask = ix86_build_signbit_mask (mode, 0, 0);
10753
10754 if (mode == SFmode)
10755 emit_insn (gen_copysignsf3_const (dest, op0, op1, mask));
10756 else
10757 emit_insn (gen_copysigndf3_const (dest, op0, op1, mask));
10758 }
10759 else
10760 {
10761 nmask = ix86_build_signbit_mask (mode, 0, 1);
10762 mask = ix86_build_signbit_mask (mode, 0, 0);
10763
10764 if (mode == SFmode)
10765 emit_insn (gen_copysignsf3_var (dest, NULL, op0, op1, nmask, mask));
10766 else
10767 emit_insn (gen_copysigndf3_var (dest, NULL, op0, op1, nmask, mask));
10768 }
10769 }
10770
10771 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
10772 be a constant, and so has already been expanded into a vector constant. */
10773
10774 void
10775 ix86_split_copysign_const (rtx operands[])
10776 {
10777 enum machine_mode mode, vmode;
10778 rtx dest, op0, op1, mask, x;
10779
10780 dest = operands[0];
10781 op0 = operands[1];
10782 op1 = operands[2];
10783 mask = operands[3];
10784
10785 mode = GET_MODE (dest);
10786 vmode = GET_MODE (mask);
10787
10788 dest = simplify_gen_subreg (vmode, dest, mode, 0);
10789 x = gen_rtx_AND (vmode, dest, mask);
10790 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10791
10792 if (op0 != CONST0_RTX (vmode))
10793 {
10794 x = gen_rtx_IOR (vmode, dest, op0);
10795 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10796 }
10797 }
10798
10799 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
10800 so we have to do two masks. */
10801
10802 void
10803 ix86_split_copysign_var (rtx operands[])
10804 {
10805 enum machine_mode mode, vmode;
10806 rtx dest, scratch, op0, op1, mask, nmask, x;
10807
10808 dest = operands[0];
10809 scratch = operands[1];
10810 op0 = operands[2];
10811 op1 = operands[3];
10812 nmask = operands[4];
10813 mask = operands[5];
10814
10815 mode = GET_MODE (dest);
10816 vmode = GET_MODE (mask);
10817
10818 if (rtx_equal_p (op0, op1))
10819 {
10820 /* Shouldn't happen often (it's useless, obviously), but when it does
10821 we'd generate incorrect code if we continue below. */
10822 emit_move_insn (dest, op0);
10823 return;
10824 }
10825
10826 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
10827 {
10828 gcc_assert (REGNO (op1) == REGNO (scratch));
10829
10830 x = gen_rtx_AND (vmode, scratch, mask);
10831 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
10832
10833 dest = mask;
10834 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
10835 x = gen_rtx_NOT (vmode, dest);
10836 x = gen_rtx_AND (vmode, x, op0);
10837 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10838 }
10839 else
10840 {
10841 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
10842 {
10843 x = gen_rtx_AND (vmode, scratch, mask);
10844 }
10845 else /* alternative 2,4 */
10846 {
10847 gcc_assert (REGNO (mask) == REGNO (scratch));
10848 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
10849 x = gen_rtx_AND (vmode, scratch, op1);
10850 }
10851 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
10852
10853 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
10854 {
10855 dest = simplify_gen_subreg (vmode, op0, mode, 0);
10856 x = gen_rtx_AND (vmode, dest, nmask);
10857 }
10858 else /* alternative 3,4 */
10859 {
10860 gcc_assert (REGNO (nmask) == REGNO (dest));
10861 dest = nmask;
10862 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
10863 x = gen_rtx_AND (vmode, dest, op0);
10864 }
10865 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10866 }
10867
10868 x = gen_rtx_IOR (vmode, dest, scratch);
10869 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10870 }
10871
10872 /* Return TRUE or FALSE depending on whether the first SET in INSN
10873 has source and destination with matching CC modes, and that the
10874 CC mode is at least as constrained as REQ_MODE. */
10875
10876 int
10877 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
10878 {
10879 rtx set;
10880 enum machine_mode set_mode;
10881
10882 set = PATTERN (insn);
10883 if (GET_CODE (set) == PARALLEL)
10884 set = XVECEXP (set, 0, 0);
10885 gcc_assert (GET_CODE (set) == SET);
10886 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
10887
10888 set_mode = GET_MODE (SET_DEST (set));
10889 switch (set_mode)
10890 {
10891 case CCNOmode:
10892 if (req_mode != CCNOmode
10893 && (req_mode != CCmode
10894 || XEXP (SET_SRC (set), 1) != const0_rtx))
10895 return 0;
10896 break;
10897 case CCmode:
10898 if (req_mode == CCGCmode)
10899 return 0;
10900 /* FALLTHRU */
10901 case CCGCmode:
10902 if (req_mode == CCGOCmode || req_mode == CCNOmode)
10903 return 0;
10904 /* FALLTHRU */
10905 case CCGOCmode:
10906 if (req_mode == CCZmode)
10907 return 0;
10908 /* FALLTHRU */
10909 case CCZmode:
10910 break;
10911
10912 default:
10913 gcc_unreachable ();
10914 }
10915
10916 return (GET_MODE (SET_SRC (set)) == set_mode);
10917 }
10918
10919 /* Generate insn patterns to do an integer compare of OPERANDS. */
10920
10921 static rtx
10922 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
10923 {
10924 enum machine_mode cmpmode;
10925 rtx tmp, flags;
10926
10927 cmpmode = SELECT_CC_MODE (code, op0, op1);
10928 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
10929
10930 /* This is very simple, but making the interface the same as in the
10931 FP case makes the rest of the code easier. */
10932 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
10933 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
10934
10935 /* Return the test that should be put into the flags user, i.e.
10936 the bcc, scc, or cmov instruction. */
10937 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
10938 }
10939
10940 /* Figure out whether to use ordered or unordered fp comparisons.
10941 Return the appropriate mode to use. */
10942
10943 enum machine_mode
10944 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
10945 {
10946 /* ??? In order to make all comparisons reversible, we do all comparisons
10947 non-trapping when compiling for IEEE. Once gcc is able to distinguish
10948 all forms trapping and nontrapping comparisons, we can make inequality
10949 comparisons trapping again, since it results in better code when using
10950 FCOM based compares. */
10951 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
10952 }
10953
10954 enum machine_mode
10955 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
10956 {
10957 enum machine_mode mode = GET_MODE (op0);
10958
10959 if (SCALAR_FLOAT_MODE_P (mode))
10960 {
10961 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
10962 return ix86_fp_compare_mode (code);
10963 }
10964
10965 switch (code)
10966 {
10967 /* Only zero flag is needed. */
10968 case EQ: /* ZF=0 */
10969 case NE: /* ZF!=0 */
10970 return CCZmode;
10971 /* Codes needing carry flag. */
10972 case GEU: /* CF=0 */
10973 case GTU: /* CF=0 & ZF=0 */
10974 case LTU: /* CF=1 */
10975 case LEU: /* CF=1 | ZF=1 */
10976 return CCmode;
10977 /* Codes possibly doable only with sign flag when
10978 comparing against zero. */
10979 case GE: /* SF=OF or SF=0 */
10980 case LT: /* SF<>OF or SF=1 */
10981 if (op1 == const0_rtx)
10982 return CCGOCmode;
10983 else
10984 /* For other cases Carry flag is not required. */
10985 return CCGCmode;
10986 /* Codes doable only with sign flag when comparing
10987 against zero, but we miss jump instruction for it
10988 so we need to use relational tests against overflow
10989 that thus needs to be zero. */
10990 case GT: /* ZF=0 & SF=OF */
10991 case LE: /* ZF=1 | SF<>OF */
10992 if (op1 == const0_rtx)
10993 return CCNOmode;
10994 else
10995 return CCGCmode;
10996 /* strcmp pattern do (use flags) and combine may ask us for proper
10997 mode. */
10998 case USE:
10999 return CCmode;
11000 default:
11001 gcc_unreachable ();
11002 }
11003 }
11004
11005 /* Return the fixed registers used for condition codes. */
11006
11007 static bool
11008 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
11009 {
11010 *p1 = FLAGS_REG;
11011 *p2 = FPSR_REG;
11012 return true;
11013 }
11014
11015 /* If two condition code modes are compatible, return a condition code
11016 mode which is compatible with both. Otherwise, return
11017 VOIDmode. */
11018
11019 static enum machine_mode
11020 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
11021 {
11022 if (m1 == m2)
11023 return m1;
11024
11025 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
11026 return VOIDmode;
11027
11028 if ((m1 == CCGCmode && m2 == CCGOCmode)
11029 || (m1 == CCGOCmode && m2 == CCGCmode))
11030 return CCGCmode;
11031
11032 switch (m1)
11033 {
11034 default:
11035 gcc_unreachable ();
11036
11037 case CCmode:
11038 case CCGCmode:
11039 case CCGOCmode:
11040 case CCNOmode:
11041 case CCAmode:
11042 case CCCmode:
11043 case CCOmode:
11044 case CCSmode:
11045 case CCZmode:
11046 switch (m2)
11047 {
11048 default:
11049 return VOIDmode;
11050
11051 case CCmode:
11052 case CCGCmode:
11053 case CCGOCmode:
11054 case CCNOmode:
11055 case CCAmode:
11056 case CCCmode:
11057 case CCOmode:
11058 case CCSmode:
11059 case CCZmode:
11060 return CCmode;
11061 }
11062
11063 case CCFPmode:
11064 case CCFPUmode:
11065 /* These are only compatible with themselves, which we already
11066 checked above. */
11067 return VOIDmode;
11068 }
11069 }
11070
11071 /* Split comparison code CODE into comparisons we can do using branch
11072 instructions. BYPASS_CODE is comparison code for branch that will
11073 branch around FIRST_CODE and SECOND_CODE. If some of branches
11074 is not required, set value to UNKNOWN.
11075 We never require more than two branches. */
11076
11077 void
11078 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
11079 enum rtx_code *first_code,
11080 enum rtx_code *second_code)
11081 {
11082 *first_code = code;
11083 *bypass_code = UNKNOWN;
11084 *second_code = UNKNOWN;
11085
11086 /* The fcomi comparison sets flags as follows:
11087
11088 cmp ZF PF CF
11089 > 0 0 0
11090 < 0 0 1
11091 = 1 0 0
11092 un 1 1 1 */
11093
11094 switch (code)
11095 {
11096 case GT: /* GTU - CF=0 & ZF=0 */
11097 case GE: /* GEU - CF=0 */
11098 case ORDERED: /* PF=0 */
11099 case UNORDERED: /* PF=1 */
11100 case UNEQ: /* EQ - ZF=1 */
11101 case UNLT: /* LTU - CF=1 */
11102 case UNLE: /* LEU - CF=1 | ZF=1 */
11103 case LTGT: /* EQ - ZF=0 */
11104 break;
11105 case LT: /* LTU - CF=1 - fails on unordered */
11106 *first_code = UNLT;
11107 *bypass_code = UNORDERED;
11108 break;
11109 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
11110 *first_code = UNLE;
11111 *bypass_code = UNORDERED;
11112 break;
11113 case EQ: /* EQ - ZF=1 - fails on unordered */
11114 *first_code = UNEQ;
11115 *bypass_code = UNORDERED;
11116 break;
11117 case NE: /* NE - ZF=0 - fails on unordered */
11118 *first_code = LTGT;
11119 *second_code = UNORDERED;
11120 break;
11121 case UNGE: /* GEU - CF=0 - fails on unordered */
11122 *first_code = GE;
11123 *second_code = UNORDERED;
11124 break;
11125 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
11126 *first_code = GT;
11127 *second_code = UNORDERED;
11128 break;
11129 default:
11130 gcc_unreachable ();
11131 }
11132 if (!TARGET_IEEE_FP)
11133 {
11134 *second_code = UNKNOWN;
11135 *bypass_code = UNKNOWN;
11136 }
11137 }
11138
11139 /* Return cost of comparison done fcom + arithmetics operations on AX.
11140 All following functions do use number of instructions as a cost metrics.
11141 In future this should be tweaked to compute bytes for optimize_size and
11142 take into account performance of various instructions on various CPUs. */
11143 static int
11144 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
11145 {
11146 if (!TARGET_IEEE_FP)
11147 return 4;
11148 /* The cost of code output by ix86_expand_fp_compare. */
11149 switch (code)
11150 {
11151 case UNLE:
11152 case UNLT:
11153 case LTGT:
11154 case GT:
11155 case GE:
11156 case UNORDERED:
11157 case ORDERED:
11158 case UNEQ:
11159 return 4;
11160 break;
11161 case LT:
11162 case NE:
11163 case EQ:
11164 case UNGE:
11165 return 5;
11166 break;
11167 case LE:
11168 case UNGT:
11169 return 6;
11170 break;
11171 default:
11172 gcc_unreachable ();
11173 }
11174 }
11175
11176 /* Return cost of comparison done using fcomi operation.
11177 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11178 static int
11179 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
11180 {
11181 enum rtx_code bypass_code, first_code, second_code;
11182 /* Return arbitrarily high cost when instruction is not supported - this
11183 prevents gcc from using it. */
11184 if (!TARGET_CMOVE)
11185 return 1024;
11186 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11187 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
11188 }
11189
11190 /* Return cost of comparison done using sahf operation.
11191 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11192 static int
11193 ix86_fp_comparison_sahf_cost (enum rtx_code code)
11194 {
11195 enum rtx_code bypass_code, first_code, second_code;
11196 /* Return arbitrarily high cost when instruction is not preferred - this
11197 avoids gcc from using it. */
11198 if (!(TARGET_SAHF && (TARGET_USE_SAHF || optimize_size)))
11199 return 1024;
11200 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11201 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
11202 }
11203
11204 /* Compute cost of the comparison done using any method.
11205 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11206 static int
11207 ix86_fp_comparison_cost (enum rtx_code code)
11208 {
11209 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
11210 int min;
11211
11212 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
11213 sahf_cost = ix86_fp_comparison_sahf_cost (code);
11214
11215 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
11216 if (min > sahf_cost)
11217 min = sahf_cost;
11218 if (min > fcomi_cost)
11219 min = fcomi_cost;
11220 return min;
11221 }
11222
11223 /* Return true if we should use an FCOMI instruction for this
11224 fp comparison. */
11225
11226 int
11227 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
11228 {
11229 enum rtx_code swapped_code = swap_condition (code);
11230
11231 return ((ix86_fp_comparison_cost (code)
11232 == ix86_fp_comparison_fcomi_cost (code))
11233 || (ix86_fp_comparison_cost (swapped_code)
11234 == ix86_fp_comparison_fcomi_cost (swapped_code)));
11235 }
11236
11237 /* Swap, force into registers, or otherwise massage the two operands
11238 to a fp comparison. The operands are updated in place; the new
11239 comparison code is returned. */
11240
11241 static enum rtx_code
11242 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
11243 {
11244 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
11245 rtx op0 = *pop0, op1 = *pop1;
11246 enum machine_mode op_mode = GET_MODE (op0);
11247 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
11248
11249 /* All of the unordered compare instructions only work on registers.
11250 The same is true of the fcomi compare instructions. The XFmode
11251 compare instructions require registers except when comparing
11252 against zero or when converting operand 1 from fixed point to
11253 floating point. */
11254
11255 if (!is_sse
11256 && (fpcmp_mode == CCFPUmode
11257 || (op_mode == XFmode
11258 && ! (standard_80387_constant_p (op0) == 1
11259 || standard_80387_constant_p (op1) == 1)
11260 && GET_CODE (op1) != FLOAT)
11261 || ix86_use_fcomi_compare (code)))
11262 {
11263 op0 = force_reg (op_mode, op0);
11264 op1 = force_reg (op_mode, op1);
11265 }
11266 else
11267 {
11268 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
11269 things around if they appear profitable, otherwise force op0
11270 into a register. */
11271
11272 if (standard_80387_constant_p (op0) == 0
11273 || (MEM_P (op0)
11274 && ! (standard_80387_constant_p (op1) == 0
11275 || MEM_P (op1))))
11276 {
11277 rtx tmp;
11278 tmp = op0, op0 = op1, op1 = tmp;
11279 code = swap_condition (code);
11280 }
11281
11282 if (!REG_P (op0))
11283 op0 = force_reg (op_mode, op0);
11284
11285 if (CONSTANT_P (op1))
11286 {
11287 int tmp = standard_80387_constant_p (op1);
11288 if (tmp == 0)
11289 op1 = validize_mem (force_const_mem (op_mode, op1));
11290 else if (tmp == 1)
11291 {
11292 if (TARGET_CMOVE)
11293 op1 = force_reg (op_mode, op1);
11294 }
11295 else
11296 op1 = force_reg (op_mode, op1);
11297 }
11298 }
11299
11300 /* Try to rearrange the comparison to make it cheaper. */
11301 if (ix86_fp_comparison_cost (code)
11302 > ix86_fp_comparison_cost (swap_condition (code))
11303 && (REG_P (op1) || !no_new_pseudos))
11304 {
11305 rtx tmp;
11306 tmp = op0, op0 = op1, op1 = tmp;
11307 code = swap_condition (code);
11308 if (!REG_P (op0))
11309 op0 = force_reg (op_mode, op0);
11310 }
11311
11312 *pop0 = op0;
11313 *pop1 = op1;
11314 return code;
11315 }
11316
11317 /* Convert comparison codes we use to represent FP comparison to integer
11318 code that will result in proper branch. Return UNKNOWN if no such code
11319 is available. */
11320
11321 enum rtx_code
11322 ix86_fp_compare_code_to_integer (enum rtx_code code)
11323 {
11324 switch (code)
11325 {
11326 case GT:
11327 return GTU;
11328 case GE:
11329 return GEU;
11330 case ORDERED:
11331 case UNORDERED:
11332 return code;
11333 break;
11334 case UNEQ:
11335 return EQ;
11336 break;
11337 case UNLT:
11338 return LTU;
11339 break;
11340 case UNLE:
11341 return LEU;
11342 break;
11343 case LTGT:
11344 return NE;
11345 break;
11346 default:
11347 return UNKNOWN;
11348 }
11349 }
11350
11351 /* Generate insn patterns to do a floating point compare of OPERANDS. */
11352
11353 static rtx
11354 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
11355 rtx *second_test, rtx *bypass_test)
11356 {
11357 enum machine_mode fpcmp_mode, intcmp_mode;
11358 rtx tmp, tmp2;
11359 int cost = ix86_fp_comparison_cost (code);
11360 enum rtx_code bypass_code, first_code, second_code;
11361
11362 fpcmp_mode = ix86_fp_compare_mode (code);
11363 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
11364
11365 if (second_test)
11366 *second_test = NULL_RTX;
11367 if (bypass_test)
11368 *bypass_test = NULL_RTX;
11369
11370 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11371
11372 /* Do fcomi/sahf based test when profitable. */
11373 if ((TARGET_CMOVE || TARGET_SAHF)
11374 && (bypass_code == UNKNOWN || bypass_test)
11375 && (second_code == UNKNOWN || second_test)
11376 && ix86_fp_comparison_arithmetics_cost (code) > cost)
11377 {
11378 if (TARGET_CMOVE)
11379 {
11380 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
11381 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
11382 tmp);
11383 emit_insn (tmp);
11384 }
11385 else
11386 {
11387 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
11388 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
11389 if (!scratch)
11390 scratch = gen_reg_rtx (HImode);
11391 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
11392 emit_insn (gen_x86_sahf_1 (scratch));
11393 }
11394
11395 /* The FP codes work out to act like unsigned. */
11396 intcmp_mode = fpcmp_mode;
11397 code = first_code;
11398 if (bypass_code != UNKNOWN)
11399 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
11400 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11401 const0_rtx);
11402 if (second_code != UNKNOWN)
11403 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
11404 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11405 const0_rtx);
11406 }
11407 else
11408 {
11409 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
11410 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
11411 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
11412 if (!scratch)
11413 scratch = gen_reg_rtx (HImode);
11414 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
11415
11416 /* In the unordered case, we have to check C2 for NaN's, which
11417 doesn't happen to work out to anything nice combination-wise.
11418 So do some bit twiddling on the value we've got in AH to come
11419 up with an appropriate set of condition codes. */
11420
11421 intcmp_mode = CCNOmode;
11422 switch (code)
11423 {
11424 case GT:
11425 case UNGT:
11426 if (code == GT || !TARGET_IEEE_FP)
11427 {
11428 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
11429 code = EQ;
11430 }
11431 else
11432 {
11433 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11434 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
11435 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
11436 intcmp_mode = CCmode;
11437 code = GEU;
11438 }
11439 break;
11440 case LT:
11441 case UNLT:
11442 if (code == LT && TARGET_IEEE_FP)
11443 {
11444 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11445 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
11446 intcmp_mode = CCmode;
11447 code = EQ;
11448 }
11449 else
11450 {
11451 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
11452 code = NE;
11453 }
11454 break;
11455 case GE:
11456 case UNGE:
11457 if (code == GE || !TARGET_IEEE_FP)
11458 {
11459 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
11460 code = EQ;
11461 }
11462 else
11463 {
11464 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11465 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
11466 GEN_INT (0x01)));
11467 code = NE;
11468 }
11469 break;
11470 case LE:
11471 case UNLE:
11472 if (code == LE && TARGET_IEEE_FP)
11473 {
11474 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11475 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
11476 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
11477 intcmp_mode = CCmode;
11478 code = LTU;
11479 }
11480 else
11481 {
11482 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
11483 code = NE;
11484 }
11485 break;
11486 case EQ:
11487 case UNEQ:
11488 if (code == EQ && TARGET_IEEE_FP)
11489 {
11490 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11491 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
11492 intcmp_mode = CCmode;
11493 code = EQ;
11494 }
11495 else
11496 {
11497 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
11498 code = NE;
11499 break;
11500 }
11501 break;
11502 case NE:
11503 case LTGT:
11504 if (code == NE && TARGET_IEEE_FP)
11505 {
11506 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11507 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
11508 GEN_INT (0x40)));
11509 code = NE;
11510 }
11511 else
11512 {
11513 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
11514 code = EQ;
11515 }
11516 break;
11517
11518 case UNORDERED:
11519 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
11520 code = NE;
11521 break;
11522 case ORDERED:
11523 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
11524 code = EQ;
11525 break;
11526
11527 default:
11528 gcc_unreachable ();
11529 }
11530 }
11531
11532 /* Return the test that should be put into the flags user, i.e.
11533 the bcc, scc, or cmov instruction. */
11534 return gen_rtx_fmt_ee (code, VOIDmode,
11535 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11536 const0_rtx);
11537 }
11538
11539 rtx
11540 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
11541 {
11542 rtx op0, op1, ret;
11543 op0 = ix86_compare_op0;
11544 op1 = ix86_compare_op1;
11545
11546 if (second_test)
11547 *second_test = NULL_RTX;
11548 if (bypass_test)
11549 *bypass_test = NULL_RTX;
11550
11551 if (ix86_compare_emitted)
11552 {
11553 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
11554 ix86_compare_emitted = NULL_RTX;
11555 }
11556 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
11557 {
11558 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
11559 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
11560 second_test, bypass_test);
11561 }
11562 else
11563 ret = ix86_expand_int_compare (code, op0, op1);
11564
11565 return ret;
11566 }
11567
11568 /* Return true if the CODE will result in nontrivial jump sequence. */
11569 bool
11570 ix86_fp_jump_nontrivial_p (enum rtx_code code)
11571 {
11572 enum rtx_code bypass_code, first_code, second_code;
11573 if (!TARGET_CMOVE)
11574 return true;
11575 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11576 return bypass_code != UNKNOWN || second_code != UNKNOWN;
11577 }
11578
11579 void
11580 ix86_expand_branch (enum rtx_code code, rtx label)
11581 {
11582 rtx tmp;
11583
11584 /* If we have emitted a compare insn, go straight to simple.
11585 ix86_expand_compare won't emit anything if ix86_compare_emitted
11586 is non NULL. */
11587 if (ix86_compare_emitted)
11588 goto simple;
11589
11590 switch (GET_MODE (ix86_compare_op0))
11591 {
11592 case QImode:
11593 case HImode:
11594 case SImode:
11595 simple:
11596 tmp = ix86_expand_compare (code, NULL, NULL);
11597 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11598 gen_rtx_LABEL_REF (VOIDmode, label),
11599 pc_rtx);
11600 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11601 return;
11602
11603 case SFmode:
11604 case DFmode:
11605 case XFmode:
11606 {
11607 rtvec vec;
11608 int use_fcomi;
11609 enum rtx_code bypass_code, first_code, second_code;
11610
11611 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
11612 &ix86_compare_op1);
11613
11614 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11615
11616 /* Check whether we will use the natural sequence with one jump. If
11617 so, we can expand jump early. Otherwise delay expansion by
11618 creating compound insn to not confuse optimizers. */
11619 if (bypass_code == UNKNOWN && second_code == UNKNOWN
11620 && TARGET_CMOVE)
11621 {
11622 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
11623 gen_rtx_LABEL_REF (VOIDmode, label),
11624 pc_rtx, NULL_RTX, NULL_RTX);
11625 }
11626 else
11627 {
11628 tmp = gen_rtx_fmt_ee (code, VOIDmode,
11629 ix86_compare_op0, ix86_compare_op1);
11630 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11631 gen_rtx_LABEL_REF (VOIDmode, label),
11632 pc_rtx);
11633 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
11634
11635 use_fcomi = ix86_use_fcomi_compare (code);
11636 vec = rtvec_alloc (3 + !use_fcomi);
11637 RTVEC_ELT (vec, 0) = tmp;
11638 RTVEC_ELT (vec, 1)
11639 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
11640 RTVEC_ELT (vec, 2)
11641 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
11642 if (! use_fcomi)
11643 RTVEC_ELT (vec, 3)
11644 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
11645
11646 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
11647 }
11648 return;
11649 }
11650
11651 case DImode:
11652 if (TARGET_64BIT)
11653 goto simple;
11654 case TImode:
11655 /* Expand DImode branch into multiple compare+branch. */
11656 {
11657 rtx lo[2], hi[2], label2;
11658 enum rtx_code code1, code2, code3;
11659 enum machine_mode submode;
11660
11661 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
11662 {
11663 tmp = ix86_compare_op0;
11664 ix86_compare_op0 = ix86_compare_op1;
11665 ix86_compare_op1 = tmp;
11666 code = swap_condition (code);
11667 }
11668 if (GET_MODE (ix86_compare_op0) == DImode)
11669 {
11670 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
11671 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
11672 submode = SImode;
11673 }
11674 else
11675 {
11676 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
11677 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
11678 submode = DImode;
11679 }
11680
11681 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
11682 avoid two branches. This costs one extra insn, so disable when
11683 optimizing for size. */
11684
11685 if ((code == EQ || code == NE)
11686 && (!optimize_size
11687 || hi[1] == const0_rtx || lo[1] == const0_rtx))
11688 {
11689 rtx xor0, xor1;
11690
11691 xor1 = hi[0];
11692 if (hi[1] != const0_rtx)
11693 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
11694 NULL_RTX, 0, OPTAB_WIDEN);
11695
11696 xor0 = lo[0];
11697 if (lo[1] != const0_rtx)
11698 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
11699 NULL_RTX, 0, OPTAB_WIDEN);
11700
11701 tmp = expand_binop (submode, ior_optab, xor1, xor0,
11702 NULL_RTX, 0, OPTAB_WIDEN);
11703
11704 ix86_compare_op0 = tmp;
11705 ix86_compare_op1 = const0_rtx;
11706 ix86_expand_branch (code, label);
11707 return;
11708 }
11709
11710 /* Otherwise, if we are doing less-than or greater-or-equal-than,
11711 op1 is a constant and the low word is zero, then we can just
11712 examine the high word. */
11713
11714 if (CONST_INT_P (hi[1]) && lo[1] == const0_rtx)
11715 switch (code)
11716 {
11717 case LT: case LTU: case GE: case GEU:
11718 ix86_compare_op0 = hi[0];
11719 ix86_compare_op1 = hi[1];
11720 ix86_expand_branch (code, label);
11721 return;
11722 default:
11723 break;
11724 }
11725
11726 /* Otherwise, we need two or three jumps. */
11727
11728 label2 = gen_label_rtx ();
11729
11730 code1 = code;
11731 code2 = swap_condition (code);
11732 code3 = unsigned_condition (code);
11733
11734 switch (code)
11735 {
11736 case LT: case GT: case LTU: case GTU:
11737 break;
11738
11739 case LE: code1 = LT; code2 = GT; break;
11740 case GE: code1 = GT; code2 = LT; break;
11741 case LEU: code1 = LTU; code2 = GTU; break;
11742 case GEU: code1 = GTU; code2 = LTU; break;
11743
11744 case EQ: code1 = UNKNOWN; code2 = NE; break;
11745 case NE: code2 = UNKNOWN; break;
11746
11747 default:
11748 gcc_unreachable ();
11749 }
11750
11751 /*
11752 * a < b =>
11753 * if (hi(a) < hi(b)) goto true;
11754 * if (hi(a) > hi(b)) goto false;
11755 * if (lo(a) < lo(b)) goto true;
11756 * false:
11757 */
11758
11759 ix86_compare_op0 = hi[0];
11760 ix86_compare_op1 = hi[1];
11761
11762 if (code1 != UNKNOWN)
11763 ix86_expand_branch (code1, label);
11764 if (code2 != UNKNOWN)
11765 ix86_expand_branch (code2, label2);
11766
11767 ix86_compare_op0 = lo[0];
11768 ix86_compare_op1 = lo[1];
11769 ix86_expand_branch (code3, label);
11770
11771 if (code2 != UNKNOWN)
11772 emit_label (label2);
11773 return;
11774 }
11775
11776 default:
11777 gcc_unreachable ();
11778 }
11779 }
11780
11781 /* Split branch based on floating point condition. */
11782 void
11783 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
11784 rtx target1, rtx target2, rtx tmp, rtx pushed)
11785 {
11786 rtx second, bypass;
11787 rtx label = NULL_RTX;
11788 rtx condition;
11789 int bypass_probability = -1, second_probability = -1, probability = -1;
11790 rtx i;
11791
11792 if (target2 != pc_rtx)
11793 {
11794 rtx tmp = target2;
11795 code = reverse_condition_maybe_unordered (code);
11796 target2 = target1;
11797 target1 = tmp;
11798 }
11799
11800 condition = ix86_expand_fp_compare (code, op1, op2,
11801 tmp, &second, &bypass);
11802
11803 /* Remove pushed operand from stack. */
11804 if (pushed)
11805 ix86_free_from_memory (GET_MODE (pushed));
11806
11807 if (split_branch_probability >= 0)
11808 {
11809 /* Distribute the probabilities across the jumps.
11810 Assume the BYPASS and SECOND to be always test
11811 for UNORDERED. */
11812 probability = split_branch_probability;
11813
11814 /* Value of 1 is low enough to make no need for probability
11815 to be updated. Later we may run some experiments and see
11816 if unordered values are more frequent in practice. */
11817 if (bypass)
11818 bypass_probability = 1;
11819 if (second)
11820 second_probability = 1;
11821 }
11822 if (bypass != NULL_RTX)
11823 {
11824 label = gen_label_rtx ();
11825 i = emit_jump_insn (gen_rtx_SET
11826 (VOIDmode, pc_rtx,
11827 gen_rtx_IF_THEN_ELSE (VOIDmode,
11828 bypass,
11829 gen_rtx_LABEL_REF (VOIDmode,
11830 label),
11831 pc_rtx)));
11832 if (bypass_probability >= 0)
11833 REG_NOTES (i)
11834 = gen_rtx_EXPR_LIST (REG_BR_PROB,
11835 GEN_INT (bypass_probability),
11836 REG_NOTES (i));
11837 }
11838 i = emit_jump_insn (gen_rtx_SET
11839 (VOIDmode, pc_rtx,
11840 gen_rtx_IF_THEN_ELSE (VOIDmode,
11841 condition, target1, target2)));
11842 if (probability >= 0)
11843 REG_NOTES (i)
11844 = gen_rtx_EXPR_LIST (REG_BR_PROB,
11845 GEN_INT (probability),
11846 REG_NOTES (i));
11847 if (second != NULL_RTX)
11848 {
11849 i = emit_jump_insn (gen_rtx_SET
11850 (VOIDmode, pc_rtx,
11851 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
11852 target2)));
11853 if (second_probability >= 0)
11854 REG_NOTES (i)
11855 = gen_rtx_EXPR_LIST (REG_BR_PROB,
11856 GEN_INT (second_probability),
11857 REG_NOTES (i));
11858 }
11859 if (label != NULL_RTX)
11860 emit_label (label);
11861 }
11862
11863 int
11864 ix86_expand_setcc (enum rtx_code code, rtx dest)
11865 {
11866 rtx ret, tmp, tmpreg, equiv;
11867 rtx second_test, bypass_test;
11868
11869 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
11870 return 0; /* FAIL */
11871
11872 gcc_assert (GET_MODE (dest) == QImode);
11873
11874 ret = ix86_expand_compare (code, &second_test, &bypass_test);
11875 PUT_MODE (ret, QImode);
11876
11877 tmp = dest;
11878 tmpreg = dest;
11879
11880 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
11881 if (bypass_test || second_test)
11882 {
11883 rtx test = second_test;
11884 int bypass = 0;
11885 rtx tmp2 = gen_reg_rtx (QImode);
11886 if (bypass_test)
11887 {
11888 gcc_assert (!second_test);
11889 test = bypass_test;
11890 bypass = 1;
11891 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
11892 }
11893 PUT_MODE (test, QImode);
11894 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
11895
11896 if (bypass)
11897 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
11898 else
11899 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
11900 }
11901
11902 /* Attach a REG_EQUAL note describing the comparison result. */
11903 if (ix86_compare_op0 && ix86_compare_op1)
11904 {
11905 equiv = simplify_gen_relational (code, QImode,
11906 GET_MODE (ix86_compare_op0),
11907 ix86_compare_op0, ix86_compare_op1);
11908 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
11909 }
11910
11911 return 1; /* DONE */
11912 }
11913
11914 /* Expand comparison setting or clearing carry flag. Return true when
11915 successful and set pop for the operation. */
11916 static bool
11917 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
11918 {
11919 enum machine_mode mode =
11920 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
11921
11922 /* Do not handle DImode compares that go through special path.
11923 Also we can't deal with FP compares yet. This is possible to add. */
11924 if (mode == (TARGET_64BIT ? TImode : DImode))
11925 return false;
11926
11927 if (SCALAR_FLOAT_MODE_P (mode))
11928 {
11929 rtx second_test = NULL, bypass_test = NULL;
11930 rtx compare_op, compare_seq;
11931
11932 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
11933
11934 /* Shortcut: following common codes never translate
11935 into carry flag compares. */
11936 if (code == EQ || code == NE || code == UNEQ || code == LTGT
11937 || code == ORDERED || code == UNORDERED)
11938 return false;
11939
11940 /* These comparisons require zero flag; swap operands so they won't. */
11941 if ((code == GT || code == UNLE || code == LE || code == UNGT)
11942 && !TARGET_IEEE_FP)
11943 {
11944 rtx tmp = op0;
11945 op0 = op1;
11946 op1 = tmp;
11947 code = swap_condition (code);
11948 }
11949
11950 /* Try to expand the comparison and verify that we end up with carry flag
11951 based comparison. This is fails to be true only when we decide to expand
11952 comparison using arithmetic that is not too common scenario. */
11953 start_sequence ();
11954 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
11955 &second_test, &bypass_test);
11956 compare_seq = get_insns ();
11957 end_sequence ();
11958
11959 if (second_test || bypass_test)
11960 return false;
11961 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11962 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11963 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
11964 else
11965 code = GET_CODE (compare_op);
11966 if (code != LTU && code != GEU)
11967 return false;
11968 emit_insn (compare_seq);
11969 *pop = compare_op;
11970 return true;
11971 }
11972 if (!INTEGRAL_MODE_P (mode))
11973 return false;
11974 switch (code)
11975 {
11976 case LTU:
11977 case GEU:
11978 break;
11979
11980 /* Convert a==0 into (unsigned)a<1. */
11981 case EQ:
11982 case NE:
11983 if (op1 != const0_rtx)
11984 return false;
11985 op1 = const1_rtx;
11986 code = (code == EQ ? LTU : GEU);
11987 break;
11988
11989 /* Convert a>b into b<a or a>=b-1. */
11990 case GTU:
11991 case LEU:
11992 if (CONST_INT_P (op1))
11993 {
11994 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
11995 /* Bail out on overflow. We still can swap operands but that
11996 would force loading of the constant into register. */
11997 if (op1 == const0_rtx
11998 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
11999 return false;
12000 code = (code == GTU ? GEU : LTU);
12001 }
12002 else
12003 {
12004 rtx tmp = op1;
12005 op1 = op0;
12006 op0 = tmp;
12007 code = (code == GTU ? LTU : GEU);
12008 }
12009 break;
12010
12011 /* Convert a>=0 into (unsigned)a<0x80000000. */
12012 case LT:
12013 case GE:
12014 if (mode == DImode || op1 != const0_rtx)
12015 return false;
12016 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
12017 code = (code == LT ? GEU : LTU);
12018 break;
12019 case LE:
12020 case GT:
12021 if (mode == DImode || op1 != constm1_rtx)
12022 return false;
12023 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
12024 code = (code == LE ? GEU : LTU);
12025 break;
12026
12027 default:
12028 return false;
12029 }
12030 /* Swapping operands may cause constant to appear as first operand. */
12031 if (!nonimmediate_operand (op0, VOIDmode))
12032 {
12033 if (no_new_pseudos)
12034 return false;
12035 op0 = force_reg (mode, op0);
12036 }
12037 ix86_compare_op0 = op0;
12038 ix86_compare_op1 = op1;
12039 *pop = ix86_expand_compare (code, NULL, NULL);
12040 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
12041 return true;
12042 }
12043
12044 int
12045 ix86_expand_int_movcc (rtx operands[])
12046 {
12047 enum rtx_code code = GET_CODE (operands[1]), compare_code;
12048 rtx compare_seq, compare_op;
12049 rtx second_test, bypass_test;
12050 enum machine_mode mode = GET_MODE (operands[0]);
12051 bool sign_bit_compare_p = false;;
12052
12053 start_sequence ();
12054 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
12055 compare_seq = get_insns ();
12056 end_sequence ();
12057
12058 compare_code = GET_CODE (compare_op);
12059
12060 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
12061 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
12062 sign_bit_compare_p = true;
12063
12064 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
12065 HImode insns, we'd be swallowed in word prefix ops. */
12066
12067 if ((mode != HImode || TARGET_FAST_PREFIX)
12068 && (mode != (TARGET_64BIT ? TImode : DImode))
12069 && CONST_INT_P (operands[2])
12070 && CONST_INT_P (operands[3]))
12071 {
12072 rtx out = operands[0];
12073 HOST_WIDE_INT ct = INTVAL (operands[2]);
12074 HOST_WIDE_INT cf = INTVAL (operands[3]);
12075 HOST_WIDE_INT diff;
12076
12077 diff = ct - cf;
12078 /* Sign bit compares are better done using shifts than we do by using
12079 sbb. */
12080 if (sign_bit_compare_p
12081 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
12082 ix86_compare_op1, &compare_op))
12083 {
12084 /* Detect overlap between destination and compare sources. */
12085 rtx tmp = out;
12086
12087 if (!sign_bit_compare_p)
12088 {
12089 bool fpcmp = false;
12090
12091 compare_code = GET_CODE (compare_op);
12092
12093 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
12094 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
12095 {
12096 fpcmp = true;
12097 compare_code = ix86_fp_compare_code_to_integer (compare_code);
12098 }
12099
12100 /* To simplify rest of code, restrict to the GEU case. */
12101 if (compare_code == LTU)
12102 {
12103 HOST_WIDE_INT tmp = ct;
12104 ct = cf;
12105 cf = tmp;
12106 compare_code = reverse_condition (compare_code);
12107 code = reverse_condition (code);
12108 }
12109 else
12110 {
12111 if (fpcmp)
12112 PUT_CODE (compare_op,
12113 reverse_condition_maybe_unordered
12114 (GET_CODE (compare_op)));
12115 else
12116 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
12117 }
12118 diff = ct - cf;
12119
12120 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
12121 || reg_overlap_mentioned_p (out, ix86_compare_op1))
12122 tmp = gen_reg_rtx (mode);
12123
12124 if (mode == DImode)
12125 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
12126 else
12127 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
12128 }
12129 else
12130 {
12131 if (code == GT || code == GE)
12132 code = reverse_condition (code);
12133 else
12134 {
12135 HOST_WIDE_INT tmp = ct;
12136 ct = cf;
12137 cf = tmp;
12138 diff = ct - cf;
12139 }
12140 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
12141 ix86_compare_op1, VOIDmode, 0, -1);
12142 }
12143
12144 if (diff == 1)
12145 {
12146 /*
12147 * cmpl op0,op1
12148 * sbbl dest,dest
12149 * [addl dest, ct]
12150 *
12151 * Size 5 - 8.
12152 */
12153 if (ct)
12154 tmp = expand_simple_binop (mode, PLUS,
12155 tmp, GEN_INT (ct),
12156 copy_rtx (tmp), 1, OPTAB_DIRECT);
12157 }
12158 else if (cf == -1)
12159 {
12160 /*
12161 * cmpl op0,op1
12162 * sbbl dest,dest
12163 * orl $ct, dest
12164 *
12165 * Size 8.
12166 */
12167 tmp = expand_simple_binop (mode, IOR,
12168 tmp, GEN_INT (ct),
12169 copy_rtx (tmp), 1, OPTAB_DIRECT);
12170 }
12171 else if (diff == -1 && ct)
12172 {
12173 /*
12174 * cmpl op0,op1
12175 * sbbl dest,dest
12176 * notl dest
12177 * [addl dest, cf]
12178 *
12179 * Size 8 - 11.
12180 */
12181 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
12182 if (cf)
12183 tmp = expand_simple_binop (mode, PLUS,
12184 copy_rtx (tmp), GEN_INT (cf),
12185 copy_rtx (tmp), 1, OPTAB_DIRECT);
12186 }
12187 else
12188 {
12189 /*
12190 * cmpl op0,op1
12191 * sbbl dest,dest
12192 * [notl dest]
12193 * andl cf - ct, dest
12194 * [addl dest, ct]
12195 *
12196 * Size 8 - 11.
12197 */
12198
12199 if (cf == 0)
12200 {
12201 cf = ct;
12202 ct = 0;
12203 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
12204 }
12205
12206 tmp = expand_simple_binop (mode, AND,
12207 copy_rtx (tmp),
12208 gen_int_mode (cf - ct, mode),
12209 copy_rtx (tmp), 1, OPTAB_DIRECT);
12210 if (ct)
12211 tmp = expand_simple_binop (mode, PLUS,
12212 copy_rtx (tmp), GEN_INT (ct),
12213 copy_rtx (tmp), 1, OPTAB_DIRECT);
12214 }
12215
12216 if (!rtx_equal_p (tmp, out))
12217 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
12218
12219 return 1; /* DONE */
12220 }
12221
12222 if (diff < 0)
12223 {
12224 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
12225
12226 HOST_WIDE_INT tmp;
12227 tmp = ct, ct = cf, cf = tmp;
12228 diff = -diff;
12229
12230 if (SCALAR_FLOAT_MODE_P (cmp_mode))
12231 {
12232 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
12233
12234 /* We may be reversing unordered compare to normal compare, that
12235 is not valid in general (we may convert non-trapping condition
12236 to trapping one), however on i386 we currently emit all
12237 comparisons unordered. */
12238 compare_code = reverse_condition_maybe_unordered (compare_code);
12239 code = reverse_condition_maybe_unordered (code);
12240 }
12241 else
12242 {
12243 compare_code = reverse_condition (compare_code);
12244 code = reverse_condition (code);
12245 }
12246 }
12247
12248 compare_code = UNKNOWN;
12249 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
12250 && CONST_INT_P (ix86_compare_op1))
12251 {
12252 if (ix86_compare_op1 == const0_rtx
12253 && (code == LT || code == GE))
12254 compare_code = code;
12255 else if (ix86_compare_op1 == constm1_rtx)
12256 {
12257 if (code == LE)
12258 compare_code = LT;
12259 else if (code == GT)
12260 compare_code = GE;
12261 }
12262 }
12263
12264 /* Optimize dest = (op0 < 0) ? -1 : cf. */
12265 if (compare_code != UNKNOWN
12266 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
12267 && (cf == -1 || ct == -1))
12268 {
12269 /* If lea code below could be used, only optimize
12270 if it results in a 2 insn sequence. */
12271
12272 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
12273 || diff == 3 || diff == 5 || diff == 9)
12274 || (compare_code == LT && ct == -1)
12275 || (compare_code == GE && cf == -1))
12276 {
12277 /*
12278 * notl op1 (if necessary)
12279 * sarl $31, op1
12280 * orl cf, op1
12281 */
12282 if (ct != -1)
12283 {
12284 cf = ct;
12285 ct = -1;
12286 code = reverse_condition (code);
12287 }
12288
12289 out = emit_store_flag (out, code, ix86_compare_op0,
12290 ix86_compare_op1, VOIDmode, 0, -1);
12291
12292 out = expand_simple_binop (mode, IOR,
12293 out, GEN_INT (cf),
12294 out, 1, OPTAB_DIRECT);
12295 if (out != operands[0])
12296 emit_move_insn (operands[0], out);
12297
12298 return 1; /* DONE */
12299 }
12300 }
12301
12302
12303 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
12304 || diff == 3 || diff == 5 || diff == 9)
12305 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
12306 && (mode != DImode
12307 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
12308 {
12309 /*
12310 * xorl dest,dest
12311 * cmpl op1,op2
12312 * setcc dest
12313 * lea cf(dest*(ct-cf)),dest
12314 *
12315 * Size 14.
12316 *
12317 * This also catches the degenerate setcc-only case.
12318 */
12319
12320 rtx tmp;
12321 int nops;
12322
12323 out = emit_store_flag (out, code, ix86_compare_op0,
12324 ix86_compare_op1, VOIDmode, 0, 1);
12325
12326 nops = 0;
12327 /* On x86_64 the lea instruction operates on Pmode, so we need
12328 to get arithmetics done in proper mode to match. */
12329 if (diff == 1)
12330 tmp = copy_rtx (out);
12331 else
12332 {
12333 rtx out1;
12334 out1 = copy_rtx (out);
12335 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
12336 nops++;
12337 if (diff & 1)
12338 {
12339 tmp = gen_rtx_PLUS (mode, tmp, out1);
12340 nops++;
12341 }
12342 }
12343 if (cf != 0)
12344 {
12345 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
12346 nops++;
12347 }
12348 if (!rtx_equal_p (tmp, out))
12349 {
12350 if (nops == 1)
12351 out = force_operand (tmp, copy_rtx (out));
12352 else
12353 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
12354 }
12355 if (!rtx_equal_p (out, operands[0]))
12356 emit_move_insn (operands[0], copy_rtx (out));
12357
12358 return 1; /* DONE */
12359 }
12360
12361 /*
12362 * General case: Jumpful:
12363 * xorl dest,dest cmpl op1, op2
12364 * cmpl op1, op2 movl ct, dest
12365 * setcc dest jcc 1f
12366 * decl dest movl cf, dest
12367 * andl (cf-ct),dest 1:
12368 * addl ct,dest
12369 *
12370 * Size 20. Size 14.
12371 *
12372 * This is reasonably steep, but branch mispredict costs are
12373 * high on modern cpus, so consider failing only if optimizing
12374 * for space.
12375 */
12376
12377 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
12378 && BRANCH_COST >= 2)
12379 {
12380 if (cf == 0)
12381 {
12382 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
12383
12384 cf = ct;
12385 ct = 0;
12386
12387 if (SCALAR_FLOAT_MODE_P (cmp_mode))
12388 {
12389 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
12390
12391 /* We may be reversing unordered compare to normal compare,
12392 that is not valid in general (we may convert non-trapping
12393 condition to trapping one), however on i386 we currently
12394 emit all comparisons unordered. */
12395 code = reverse_condition_maybe_unordered (code);
12396 }
12397 else
12398 {
12399 code = reverse_condition (code);
12400 if (compare_code != UNKNOWN)
12401 compare_code = reverse_condition (compare_code);
12402 }
12403 }
12404
12405 if (compare_code != UNKNOWN)
12406 {
12407 /* notl op1 (if needed)
12408 sarl $31, op1
12409 andl (cf-ct), op1
12410 addl ct, op1
12411
12412 For x < 0 (resp. x <= -1) there will be no notl,
12413 so if possible swap the constants to get rid of the
12414 complement.
12415 True/false will be -1/0 while code below (store flag
12416 followed by decrement) is 0/-1, so the constants need
12417 to be exchanged once more. */
12418
12419 if (compare_code == GE || !cf)
12420 {
12421 code = reverse_condition (code);
12422 compare_code = LT;
12423 }
12424 else
12425 {
12426 HOST_WIDE_INT tmp = cf;
12427 cf = ct;
12428 ct = tmp;
12429 }
12430
12431 out = emit_store_flag (out, code, ix86_compare_op0,
12432 ix86_compare_op1, VOIDmode, 0, -1);
12433 }
12434 else
12435 {
12436 out = emit_store_flag (out, code, ix86_compare_op0,
12437 ix86_compare_op1, VOIDmode, 0, 1);
12438
12439 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
12440 copy_rtx (out), 1, OPTAB_DIRECT);
12441 }
12442
12443 out = expand_simple_binop (mode, AND, copy_rtx (out),
12444 gen_int_mode (cf - ct, mode),
12445 copy_rtx (out), 1, OPTAB_DIRECT);
12446 if (ct)
12447 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
12448 copy_rtx (out), 1, OPTAB_DIRECT);
12449 if (!rtx_equal_p (out, operands[0]))
12450 emit_move_insn (operands[0], copy_rtx (out));
12451
12452 return 1; /* DONE */
12453 }
12454 }
12455
12456 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
12457 {
12458 /* Try a few things more with specific constants and a variable. */
12459
12460 optab op;
12461 rtx var, orig_out, out, tmp;
12462
12463 if (BRANCH_COST <= 2)
12464 return 0; /* FAIL */
12465
12466 /* If one of the two operands is an interesting constant, load a
12467 constant with the above and mask it in with a logical operation. */
12468
12469 if (CONST_INT_P (operands[2]))
12470 {
12471 var = operands[3];
12472 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
12473 operands[3] = constm1_rtx, op = and_optab;
12474 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
12475 operands[3] = const0_rtx, op = ior_optab;
12476 else
12477 return 0; /* FAIL */
12478 }
12479 else if (CONST_INT_P (operands[3]))
12480 {
12481 var = operands[2];
12482 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
12483 operands[2] = constm1_rtx, op = and_optab;
12484 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
12485 operands[2] = const0_rtx, op = ior_optab;
12486 else
12487 return 0; /* FAIL */
12488 }
12489 else
12490 return 0; /* FAIL */
12491
12492 orig_out = operands[0];
12493 tmp = gen_reg_rtx (mode);
12494 operands[0] = tmp;
12495
12496 /* Recurse to get the constant loaded. */
12497 if (ix86_expand_int_movcc (operands) == 0)
12498 return 0; /* FAIL */
12499
12500 /* Mask in the interesting variable. */
12501 out = expand_binop (mode, op, var, tmp, orig_out, 0,
12502 OPTAB_WIDEN);
12503 if (!rtx_equal_p (out, orig_out))
12504 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
12505
12506 return 1; /* DONE */
12507 }
12508
12509 /*
12510 * For comparison with above,
12511 *
12512 * movl cf,dest
12513 * movl ct,tmp
12514 * cmpl op1,op2
12515 * cmovcc tmp,dest
12516 *
12517 * Size 15.
12518 */
12519
12520 if (! nonimmediate_operand (operands[2], mode))
12521 operands[2] = force_reg (mode, operands[2]);
12522 if (! nonimmediate_operand (operands[3], mode))
12523 operands[3] = force_reg (mode, operands[3]);
12524
12525 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
12526 {
12527 rtx tmp = gen_reg_rtx (mode);
12528 emit_move_insn (tmp, operands[3]);
12529 operands[3] = tmp;
12530 }
12531 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
12532 {
12533 rtx tmp = gen_reg_rtx (mode);
12534 emit_move_insn (tmp, operands[2]);
12535 operands[2] = tmp;
12536 }
12537
12538 if (! register_operand (operands[2], VOIDmode)
12539 && (mode == QImode
12540 || ! register_operand (operands[3], VOIDmode)))
12541 operands[2] = force_reg (mode, operands[2]);
12542
12543 if (mode == QImode
12544 && ! register_operand (operands[3], VOIDmode))
12545 operands[3] = force_reg (mode, operands[3]);
12546
12547 emit_insn (compare_seq);
12548 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12549 gen_rtx_IF_THEN_ELSE (mode,
12550 compare_op, operands[2],
12551 operands[3])));
12552 if (bypass_test)
12553 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
12554 gen_rtx_IF_THEN_ELSE (mode,
12555 bypass_test,
12556 copy_rtx (operands[3]),
12557 copy_rtx (operands[0]))));
12558 if (second_test)
12559 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
12560 gen_rtx_IF_THEN_ELSE (mode,
12561 second_test,
12562 copy_rtx (operands[2]),
12563 copy_rtx (operands[0]))));
12564
12565 return 1; /* DONE */
12566 }
12567
12568 /* Swap, force into registers, or otherwise massage the two operands
12569 to an sse comparison with a mask result. Thus we differ a bit from
12570 ix86_prepare_fp_compare_args which expects to produce a flags result.
12571
12572 The DEST operand exists to help determine whether to commute commutative
12573 operators. The POP0/POP1 operands are updated in place. The new
12574 comparison code is returned, or UNKNOWN if not implementable. */
12575
12576 static enum rtx_code
12577 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
12578 rtx *pop0, rtx *pop1)
12579 {
12580 rtx tmp;
12581
12582 switch (code)
12583 {
12584 case LTGT:
12585 case UNEQ:
12586 /* We have no LTGT as an operator. We could implement it with
12587 NE & ORDERED, but this requires an extra temporary. It's
12588 not clear that it's worth it. */
12589 return UNKNOWN;
12590
12591 case LT:
12592 case LE:
12593 case UNGT:
12594 case UNGE:
12595 /* These are supported directly. */
12596 break;
12597
12598 case EQ:
12599 case NE:
12600 case UNORDERED:
12601 case ORDERED:
12602 /* For commutative operators, try to canonicalize the destination
12603 operand to be first in the comparison - this helps reload to
12604 avoid extra moves. */
12605 if (!dest || !rtx_equal_p (dest, *pop1))
12606 break;
12607 /* FALLTHRU */
12608
12609 case GE:
12610 case GT:
12611 case UNLE:
12612 case UNLT:
12613 /* These are not supported directly. Swap the comparison operands
12614 to transform into something that is supported. */
12615 tmp = *pop0;
12616 *pop0 = *pop1;
12617 *pop1 = tmp;
12618 code = swap_condition (code);
12619 break;
12620
12621 default:
12622 gcc_unreachable ();
12623 }
12624
12625 return code;
12626 }
12627
12628 /* Detect conditional moves that exactly match min/max operational
12629 semantics. Note that this is IEEE safe, as long as we don't
12630 interchange the operands.
12631
12632 Returns FALSE if this conditional move doesn't match a MIN/MAX,
12633 and TRUE if the operation is successful and instructions are emitted. */
12634
12635 static bool
12636 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
12637 rtx cmp_op1, rtx if_true, rtx if_false)
12638 {
12639 enum machine_mode mode;
12640 bool is_min;
12641 rtx tmp;
12642
12643 if (code == LT)
12644 ;
12645 else if (code == UNGE)
12646 {
12647 tmp = if_true;
12648 if_true = if_false;
12649 if_false = tmp;
12650 }
12651 else
12652 return false;
12653
12654 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
12655 is_min = true;
12656 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
12657 is_min = false;
12658 else
12659 return false;
12660
12661 mode = GET_MODE (dest);
12662
12663 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
12664 but MODE may be a vector mode and thus not appropriate. */
12665 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
12666 {
12667 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
12668 rtvec v;
12669
12670 if_true = force_reg (mode, if_true);
12671 v = gen_rtvec (2, if_true, if_false);
12672 tmp = gen_rtx_UNSPEC (mode, v, u);
12673 }
12674 else
12675 {
12676 code = is_min ? SMIN : SMAX;
12677 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
12678 }
12679
12680 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
12681 return true;
12682 }
12683
12684 /* Expand an sse vector comparison. Return the register with the result. */
12685
12686 static rtx
12687 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
12688 rtx op_true, rtx op_false)
12689 {
12690 enum machine_mode mode = GET_MODE (dest);
12691 rtx x;
12692
12693 cmp_op0 = force_reg (mode, cmp_op0);
12694 if (!nonimmediate_operand (cmp_op1, mode))
12695 cmp_op1 = force_reg (mode, cmp_op1);
12696
12697 if (optimize
12698 || reg_overlap_mentioned_p (dest, op_true)
12699 || reg_overlap_mentioned_p (dest, op_false))
12700 dest = gen_reg_rtx (mode);
12701
12702 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
12703 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12704
12705 return dest;
12706 }
12707
12708 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
12709 operations. This is used for both scalar and vector conditional moves. */
12710
12711 static void
12712 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
12713 {
12714 enum machine_mode mode = GET_MODE (dest);
12715 rtx t2, t3, x;
12716
12717 if (op_false == CONST0_RTX (mode))
12718 {
12719 op_true = force_reg (mode, op_true);
12720 x = gen_rtx_AND (mode, cmp, op_true);
12721 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12722 }
12723 else if (op_true == CONST0_RTX (mode))
12724 {
12725 op_false = force_reg (mode, op_false);
12726 x = gen_rtx_NOT (mode, cmp);
12727 x = gen_rtx_AND (mode, x, op_false);
12728 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12729 }
12730 else
12731 {
12732 op_true = force_reg (mode, op_true);
12733 op_false = force_reg (mode, op_false);
12734
12735 t2 = gen_reg_rtx (mode);
12736 if (optimize)
12737 t3 = gen_reg_rtx (mode);
12738 else
12739 t3 = dest;
12740
12741 x = gen_rtx_AND (mode, op_true, cmp);
12742 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
12743
12744 x = gen_rtx_NOT (mode, cmp);
12745 x = gen_rtx_AND (mode, x, op_false);
12746 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
12747
12748 x = gen_rtx_IOR (mode, t3, t2);
12749 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12750 }
12751 }
12752
12753 /* Expand a floating-point conditional move. Return true if successful. */
12754
12755 int
12756 ix86_expand_fp_movcc (rtx operands[])
12757 {
12758 enum machine_mode mode = GET_MODE (operands[0]);
12759 enum rtx_code code = GET_CODE (operands[1]);
12760 rtx tmp, compare_op, second_test, bypass_test;
12761
12762 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
12763 {
12764 enum machine_mode cmode;
12765
12766 /* Since we've no cmove for sse registers, don't force bad register
12767 allocation just to gain access to it. Deny movcc when the
12768 comparison mode doesn't match the move mode. */
12769 cmode = GET_MODE (ix86_compare_op0);
12770 if (cmode == VOIDmode)
12771 cmode = GET_MODE (ix86_compare_op1);
12772 if (cmode != mode)
12773 return 0;
12774
12775 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
12776 &ix86_compare_op0,
12777 &ix86_compare_op1);
12778 if (code == UNKNOWN)
12779 return 0;
12780
12781 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
12782 ix86_compare_op1, operands[2],
12783 operands[3]))
12784 return 1;
12785
12786 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
12787 ix86_compare_op1, operands[2], operands[3]);
12788 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
12789 return 1;
12790 }
12791
12792 /* The floating point conditional move instructions don't directly
12793 support conditions resulting from a signed integer comparison. */
12794
12795 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
12796
12797 /* The floating point conditional move instructions don't directly
12798 support signed integer comparisons. */
12799
12800 if (!fcmov_comparison_operator (compare_op, VOIDmode))
12801 {
12802 gcc_assert (!second_test && !bypass_test);
12803 tmp = gen_reg_rtx (QImode);
12804 ix86_expand_setcc (code, tmp);
12805 code = NE;
12806 ix86_compare_op0 = tmp;
12807 ix86_compare_op1 = const0_rtx;
12808 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
12809 }
12810 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
12811 {
12812 tmp = gen_reg_rtx (mode);
12813 emit_move_insn (tmp, operands[3]);
12814 operands[3] = tmp;
12815 }
12816 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
12817 {
12818 tmp = gen_reg_rtx (mode);
12819 emit_move_insn (tmp, operands[2]);
12820 operands[2] = tmp;
12821 }
12822
12823 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12824 gen_rtx_IF_THEN_ELSE (mode, compare_op,
12825 operands[2], operands[3])));
12826 if (bypass_test)
12827 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12828 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
12829 operands[3], operands[0])));
12830 if (second_test)
12831 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12832 gen_rtx_IF_THEN_ELSE (mode, second_test,
12833 operands[2], operands[0])));
12834
12835 return 1;
12836 }
12837
12838 /* Expand a floating-point vector conditional move; a vcond operation
12839 rather than a movcc operation. */
12840
12841 bool
12842 ix86_expand_fp_vcond (rtx operands[])
12843 {
12844 enum rtx_code code = GET_CODE (operands[3]);
12845 rtx cmp;
12846
12847 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
12848 &operands[4], &operands[5]);
12849 if (code == UNKNOWN)
12850 return false;
12851
12852 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
12853 operands[5], operands[1], operands[2]))
12854 return true;
12855
12856 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
12857 operands[1], operands[2]);
12858 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
12859 return true;
12860 }
12861
12862 /* Expand a signed/unsigned integral vector conditional move. */
12863
12864 bool
12865 ix86_expand_int_vcond (rtx operands[])
12866 {
12867 enum machine_mode mode = GET_MODE (operands[0]);
12868 enum rtx_code code = GET_CODE (operands[3]);
12869 bool negate = false;
12870 rtx x, cop0, cop1;
12871
12872 cop0 = operands[4];
12873 cop1 = operands[5];
12874
12875 /* Canonicalize the comparison to EQ, GT, GTU. */
12876 switch (code)
12877 {
12878 case EQ:
12879 case GT:
12880 case GTU:
12881 break;
12882
12883 case NE:
12884 case LE:
12885 case LEU:
12886 code = reverse_condition (code);
12887 negate = true;
12888 break;
12889
12890 case GE:
12891 case GEU:
12892 code = reverse_condition (code);
12893 negate = true;
12894 /* FALLTHRU */
12895
12896 case LT:
12897 case LTU:
12898 code = swap_condition (code);
12899 x = cop0, cop0 = cop1, cop1 = x;
12900 break;
12901
12902 default:
12903 gcc_unreachable ();
12904 }
12905
12906 /* Only SSE4.1/SSE4.2 supports V2DImode. */
12907 if (mode == V2DImode)
12908 {
12909 switch (code)
12910 {
12911 case EQ:
12912 /* SSE4.1 supports EQ. */
12913 if (!TARGET_SSE4_1)
12914 return false;
12915 break;
12916
12917 case GT:
12918 case GTU:
12919 /* SSE4.2 supports GT/GTU. */
12920 if (!TARGET_SSE4_2)
12921 return false;
12922 break;
12923
12924 default:
12925 gcc_unreachable ();
12926 }
12927 }
12928
12929 /* Unsigned parallel compare is not supported by the hardware. Play some
12930 tricks to turn this into a signed comparison against 0. */
12931 if (code == GTU)
12932 {
12933 cop0 = force_reg (mode, cop0);
12934
12935 switch (mode)
12936 {
12937 case V4SImode:
12938 case V2DImode:
12939 {
12940 rtx t1, t2, mask;
12941
12942 /* Perform a parallel modulo subtraction. */
12943 t1 = gen_reg_rtx (mode);
12944 emit_insn ((mode == V4SImode
12945 ? gen_subv4si3
12946 : gen_subv2di3) (t1, cop0, cop1));
12947
12948 /* Extract the original sign bit of op0. */
12949 mask = ix86_build_signbit_mask (GET_MODE_INNER (mode),
12950 true, false);
12951 t2 = gen_reg_rtx (mode);
12952 emit_insn ((mode == V4SImode
12953 ? gen_andv4si3
12954 : gen_andv2di3) (t2, cop0, mask));
12955
12956 /* XOR it back into the result of the subtraction. This results
12957 in the sign bit set iff we saw unsigned underflow. */
12958 x = gen_reg_rtx (mode);
12959 emit_insn ((mode == V4SImode
12960 ? gen_xorv4si3
12961 : gen_xorv2di3) (x, t1, t2));
12962
12963 code = GT;
12964 }
12965 break;
12966
12967 case V16QImode:
12968 case V8HImode:
12969 /* Perform a parallel unsigned saturating subtraction. */
12970 x = gen_reg_rtx (mode);
12971 emit_insn (gen_rtx_SET (VOIDmode, x,
12972 gen_rtx_US_MINUS (mode, cop0, cop1)));
12973
12974 code = EQ;
12975 negate = !negate;
12976 break;
12977
12978 default:
12979 gcc_unreachable ();
12980 }
12981
12982 cop0 = x;
12983 cop1 = CONST0_RTX (mode);
12984 }
12985
12986 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
12987 operands[1+negate], operands[2-negate]);
12988
12989 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
12990 operands[2-negate]);
12991 return true;
12992 }
12993
12994 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
12995 true if we should do zero extension, else sign extension. HIGH_P is
12996 true if we want the N/2 high elements, else the low elements. */
12997
12998 void
12999 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
13000 {
13001 enum machine_mode imode = GET_MODE (operands[1]);
13002 rtx (*unpack)(rtx, rtx, rtx);
13003 rtx se, dest;
13004
13005 switch (imode)
13006 {
13007 case V16QImode:
13008 if (high_p)
13009 unpack = gen_vec_interleave_highv16qi;
13010 else
13011 unpack = gen_vec_interleave_lowv16qi;
13012 break;
13013 case V8HImode:
13014 if (high_p)
13015 unpack = gen_vec_interleave_highv8hi;
13016 else
13017 unpack = gen_vec_interleave_lowv8hi;
13018 break;
13019 case V4SImode:
13020 if (high_p)
13021 unpack = gen_vec_interleave_highv4si;
13022 else
13023 unpack = gen_vec_interleave_lowv4si;
13024 break;
13025 default:
13026 gcc_unreachable ();
13027 }
13028
13029 dest = gen_lowpart (imode, operands[0]);
13030
13031 if (unsigned_p)
13032 se = force_reg (imode, CONST0_RTX (imode));
13033 else
13034 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
13035 operands[1], pc_rtx, pc_rtx);
13036
13037 emit_insn (unpack (dest, operands[1], se));
13038 }
13039
13040 /* This function performs the same task as ix86_expand_sse_unpack,
13041 but with SSE4.1 instructions. */
13042
13043 void
13044 ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
13045 {
13046 enum machine_mode imode = GET_MODE (operands[1]);
13047 rtx (*unpack)(rtx, rtx);
13048 rtx src, dest;
13049
13050 switch (imode)
13051 {
13052 case V16QImode:
13053 if (unsigned_p)
13054 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
13055 else
13056 unpack = gen_sse4_1_extendv8qiv8hi2;
13057 break;
13058 case V8HImode:
13059 if (unsigned_p)
13060 unpack = gen_sse4_1_zero_extendv4hiv4si2;
13061 else
13062 unpack = gen_sse4_1_extendv4hiv4si2;
13063 break;
13064 case V4SImode:
13065 if (unsigned_p)
13066 unpack = gen_sse4_1_zero_extendv2siv2di2;
13067 else
13068 unpack = gen_sse4_1_extendv2siv2di2;
13069 break;
13070 default:
13071 gcc_unreachable ();
13072 }
13073
13074 dest = operands[0];
13075 if (high_p)
13076 {
13077 /* Shift higher 8 bytes to lower 8 bytes. */
13078 src = gen_reg_rtx (imode);
13079 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, src),
13080 gen_lowpart (TImode, operands[1]),
13081 GEN_INT (64)));
13082 }
13083 else
13084 src = operands[1];
13085
13086 emit_insn (unpack (dest, src));
13087 }
13088
13089 /* Expand conditional increment or decrement using adb/sbb instructions.
13090 The default case using setcc followed by the conditional move can be
13091 done by generic code. */
13092 int
13093 ix86_expand_int_addcc (rtx operands[])
13094 {
13095 enum rtx_code code = GET_CODE (operands[1]);
13096 rtx compare_op;
13097 rtx val = const0_rtx;
13098 bool fpcmp = false;
13099 enum machine_mode mode = GET_MODE (operands[0]);
13100
13101 if (operands[3] != const1_rtx
13102 && operands[3] != constm1_rtx)
13103 return 0;
13104 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
13105 ix86_compare_op1, &compare_op))
13106 return 0;
13107 code = GET_CODE (compare_op);
13108
13109 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
13110 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
13111 {
13112 fpcmp = true;
13113 code = ix86_fp_compare_code_to_integer (code);
13114 }
13115
13116 if (code != LTU)
13117 {
13118 val = constm1_rtx;
13119 if (fpcmp)
13120 PUT_CODE (compare_op,
13121 reverse_condition_maybe_unordered
13122 (GET_CODE (compare_op)));
13123 else
13124 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
13125 }
13126 PUT_MODE (compare_op, mode);
13127
13128 /* Construct either adc or sbb insn. */
13129 if ((code == LTU) == (operands[3] == constm1_rtx))
13130 {
13131 switch (GET_MODE (operands[0]))
13132 {
13133 case QImode:
13134 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
13135 break;
13136 case HImode:
13137 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
13138 break;
13139 case SImode:
13140 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
13141 break;
13142 case DImode:
13143 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
13144 break;
13145 default:
13146 gcc_unreachable ();
13147 }
13148 }
13149 else
13150 {
13151 switch (GET_MODE (operands[0]))
13152 {
13153 case QImode:
13154 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
13155 break;
13156 case HImode:
13157 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
13158 break;
13159 case SImode:
13160 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
13161 break;
13162 case DImode:
13163 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
13164 break;
13165 default:
13166 gcc_unreachable ();
13167 }
13168 }
13169 return 1; /* DONE */
13170 }
13171
13172
13173 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
13174 works for floating pointer parameters and nonoffsetable memories.
13175 For pushes, it returns just stack offsets; the values will be saved
13176 in the right order. Maximally three parts are generated. */
13177
13178 static int
13179 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
13180 {
13181 int size;
13182
13183 if (!TARGET_64BIT)
13184 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
13185 else
13186 size = (GET_MODE_SIZE (mode) + 4) / 8;
13187
13188 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
13189 gcc_assert (size >= 2 && size <= 3);
13190
13191 /* Optimize constant pool reference to immediates. This is used by fp
13192 moves, that force all constants to memory to allow combining. */
13193 if (MEM_P (operand) && MEM_READONLY_P (operand))
13194 {
13195 rtx tmp = maybe_get_pool_constant (operand);
13196 if (tmp)
13197 operand = tmp;
13198 }
13199
13200 if (MEM_P (operand) && !offsettable_memref_p (operand))
13201 {
13202 /* The only non-offsetable memories we handle are pushes. */
13203 int ok = push_operand (operand, VOIDmode);
13204
13205 gcc_assert (ok);
13206
13207 operand = copy_rtx (operand);
13208 PUT_MODE (operand, Pmode);
13209 parts[0] = parts[1] = parts[2] = operand;
13210 return size;
13211 }
13212
13213 if (GET_CODE (operand) == CONST_VECTOR)
13214 {
13215 enum machine_mode imode = int_mode_for_mode (mode);
13216 /* Caution: if we looked through a constant pool memory above,
13217 the operand may actually have a different mode now. That's
13218 ok, since we want to pun this all the way back to an integer. */
13219 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
13220 gcc_assert (operand != NULL);
13221 mode = imode;
13222 }
13223
13224 if (!TARGET_64BIT)
13225 {
13226 if (mode == DImode)
13227 split_di (&operand, 1, &parts[0], &parts[1]);
13228 else
13229 {
13230 if (REG_P (operand))
13231 {
13232 gcc_assert (reload_completed);
13233 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
13234 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
13235 if (size == 3)
13236 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
13237 }
13238 else if (offsettable_memref_p (operand))
13239 {
13240 operand = adjust_address (operand, SImode, 0);
13241 parts[0] = operand;
13242 parts[1] = adjust_address (operand, SImode, 4);
13243 if (size == 3)
13244 parts[2] = adjust_address (operand, SImode, 8);
13245 }
13246 else if (GET_CODE (operand) == CONST_DOUBLE)
13247 {
13248 REAL_VALUE_TYPE r;
13249 long l[4];
13250
13251 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
13252 switch (mode)
13253 {
13254 case XFmode:
13255 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
13256 parts[2] = gen_int_mode (l[2], SImode);
13257 break;
13258 case DFmode:
13259 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
13260 break;
13261 default:
13262 gcc_unreachable ();
13263 }
13264 parts[1] = gen_int_mode (l[1], SImode);
13265 parts[0] = gen_int_mode (l[0], SImode);
13266 }
13267 else
13268 gcc_unreachable ();
13269 }
13270 }
13271 else
13272 {
13273 if (mode == TImode)
13274 split_ti (&operand, 1, &parts[0], &parts[1]);
13275 if (mode == XFmode || mode == TFmode)
13276 {
13277 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
13278 if (REG_P (operand))
13279 {
13280 gcc_assert (reload_completed);
13281 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
13282 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
13283 }
13284 else if (offsettable_memref_p (operand))
13285 {
13286 operand = adjust_address (operand, DImode, 0);
13287 parts[0] = operand;
13288 parts[1] = adjust_address (operand, upper_mode, 8);
13289 }
13290 else if (GET_CODE (operand) == CONST_DOUBLE)
13291 {
13292 REAL_VALUE_TYPE r;
13293 long l[4];
13294
13295 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
13296 real_to_target (l, &r, mode);
13297
13298 /* Do not use shift by 32 to avoid warning on 32bit systems. */
13299 if (HOST_BITS_PER_WIDE_INT >= 64)
13300 parts[0]
13301 = gen_int_mode
13302 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
13303 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
13304 DImode);
13305 else
13306 parts[0] = immed_double_const (l[0], l[1], DImode);
13307
13308 if (upper_mode == SImode)
13309 parts[1] = gen_int_mode (l[2], SImode);
13310 else if (HOST_BITS_PER_WIDE_INT >= 64)
13311 parts[1]
13312 = gen_int_mode
13313 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
13314 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
13315 DImode);
13316 else
13317 parts[1] = immed_double_const (l[2], l[3], DImode);
13318 }
13319 else
13320 gcc_unreachable ();
13321 }
13322 }
13323
13324 return size;
13325 }
13326
13327 /* Emit insns to perform a move or push of DI, DF, and XF values.
13328 Return false when normal moves are needed; true when all required
13329 insns have been emitted. Operands 2-4 contain the input values
13330 int the correct order; operands 5-7 contain the output values. */
13331
13332 void
13333 ix86_split_long_move (rtx operands[])
13334 {
13335 rtx part[2][3];
13336 int nparts;
13337 int push = 0;
13338 int collisions = 0;
13339 enum machine_mode mode = GET_MODE (operands[0]);
13340
13341 /* The DFmode expanders may ask us to move double.
13342 For 64bit target this is single move. By hiding the fact
13343 here we simplify i386.md splitters. */
13344 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
13345 {
13346 /* Optimize constant pool reference to immediates. This is used by
13347 fp moves, that force all constants to memory to allow combining. */
13348
13349 if (MEM_P (operands[1])
13350 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
13351 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
13352 operands[1] = get_pool_constant (XEXP (operands[1], 0));
13353 if (push_operand (operands[0], VOIDmode))
13354 {
13355 operands[0] = copy_rtx (operands[0]);
13356 PUT_MODE (operands[0], Pmode);
13357 }
13358 else
13359 operands[0] = gen_lowpart (DImode, operands[0]);
13360 operands[1] = gen_lowpart (DImode, operands[1]);
13361 emit_move_insn (operands[0], operands[1]);
13362 return;
13363 }
13364
13365 /* The only non-offsettable memory we handle is push. */
13366 if (push_operand (operands[0], VOIDmode))
13367 push = 1;
13368 else
13369 gcc_assert (!MEM_P (operands[0])
13370 || offsettable_memref_p (operands[0]));
13371
13372 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
13373 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
13374
13375 /* When emitting push, take care for source operands on the stack. */
13376 if (push && MEM_P (operands[1])
13377 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
13378 {
13379 if (nparts == 3)
13380 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
13381 XEXP (part[1][2], 0));
13382 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
13383 XEXP (part[1][1], 0));
13384 }
13385
13386 /* We need to do copy in the right order in case an address register
13387 of the source overlaps the destination. */
13388 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
13389 {
13390 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
13391 collisions++;
13392 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
13393 collisions++;
13394 if (nparts == 3
13395 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
13396 collisions++;
13397
13398 /* Collision in the middle part can be handled by reordering. */
13399 if (collisions == 1 && nparts == 3
13400 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
13401 {
13402 rtx tmp;
13403 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
13404 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
13405 }
13406
13407 /* If there are more collisions, we can't handle it by reordering.
13408 Do an lea to the last part and use only one colliding move. */
13409 else if (collisions > 1)
13410 {
13411 rtx base;
13412
13413 collisions = 1;
13414
13415 base = part[0][nparts - 1];
13416
13417 /* Handle the case when the last part isn't valid for lea.
13418 Happens in 64-bit mode storing the 12-byte XFmode. */
13419 if (GET_MODE (base) != Pmode)
13420 base = gen_rtx_REG (Pmode, REGNO (base));
13421
13422 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
13423 part[1][0] = replace_equiv_address (part[1][0], base);
13424 part[1][1] = replace_equiv_address (part[1][1],
13425 plus_constant (base, UNITS_PER_WORD));
13426 if (nparts == 3)
13427 part[1][2] = replace_equiv_address (part[1][2],
13428 plus_constant (base, 8));
13429 }
13430 }
13431
13432 if (push)
13433 {
13434 if (!TARGET_64BIT)
13435 {
13436 if (nparts == 3)
13437 {
13438 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
13439 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
13440 emit_move_insn (part[0][2], part[1][2]);
13441 }
13442 }
13443 else
13444 {
13445 /* In 64bit mode we don't have 32bit push available. In case this is
13446 register, it is OK - we will just use larger counterpart. We also
13447 retype memory - these comes from attempt to avoid REX prefix on
13448 moving of second half of TFmode value. */
13449 if (GET_MODE (part[1][1]) == SImode)
13450 {
13451 switch (GET_CODE (part[1][1]))
13452 {
13453 case MEM:
13454 part[1][1] = adjust_address (part[1][1], DImode, 0);
13455 break;
13456
13457 case REG:
13458 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
13459 break;
13460
13461 default:
13462 gcc_unreachable ();
13463 }
13464
13465 if (GET_MODE (part[1][0]) == SImode)
13466 part[1][0] = part[1][1];
13467 }
13468 }
13469 emit_move_insn (part[0][1], part[1][1]);
13470 emit_move_insn (part[0][0], part[1][0]);
13471 return;
13472 }
13473
13474 /* Choose correct order to not overwrite the source before it is copied. */
13475 if ((REG_P (part[0][0])
13476 && REG_P (part[1][1])
13477 && (REGNO (part[0][0]) == REGNO (part[1][1])
13478 || (nparts == 3
13479 && REGNO (part[0][0]) == REGNO (part[1][2]))))
13480 || (collisions > 0
13481 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
13482 {
13483 if (nparts == 3)
13484 {
13485 operands[2] = part[0][2];
13486 operands[3] = part[0][1];
13487 operands[4] = part[0][0];
13488 operands[5] = part[1][2];
13489 operands[6] = part[1][1];
13490 operands[7] = part[1][0];
13491 }
13492 else
13493 {
13494 operands[2] = part[0][1];
13495 operands[3] = part[0][0];
13496 operands[5] = part[1][1];
13497 operands[6] = part[1][0];
13498 }
13499 }
13500 else
13501 {
13502 if (nparts == 3)
13503 {
13504 operands[2] = part[0][0];
13505 operands[3] = part[0][1];
13506 operands[4] = part[0][2];
13507 operands[5] = part[1][0];
13508 operands[6] = part[1][1];
13509 operands[7] = part[1][2];
13510 }
13511 else
13512 {
13513 operands[2] = part[0][0];
13514 operands[3] = part[0][1];
13515 operands[5] = part[1][0];
13516 operands[6] = part[1][1];
13517 }
13518 }
13519
13520 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
13521 if (optimize_size)
13522 {
13523 if (CONST_INT_P (operands[5])
13524 && operands[5] != const0_rtx
13525 && REG_P (operands[2]))
13526 {
13527 if (CONST_INT_P (operands[6])
13528 && INTVAL (operands[6]) == INTVAL (operands[5]))
13529 operands[6] = operands[2];
13530
13531 if (nparts == 3
13532 && CONST_INT_P (operands[7])
13533 && INTVAL (operands[7]) == INTVAL (operands[5]))
13534 operands[7] = operands[2];
13535 }
13536
13537 if (nparts == 3
13538 && CONST_INT_P (operands[6])
13539 && operands[6] != const0_rtx
13540 && REG_P (operands[3])
13541 && CONST_INT_P (operands[7])
13542 && INTVAL (operands[7]) == INTVAL (operands[6]))
13543 operands[7] = operands[3];
13544 }
13545
13546 emit_move_insn (operands[2], operands[5]);
13547 emit_move_insn (operands[3], operands[6]);
13548 if (nparts == 3)
13549 emit_move_insn (operands[4], operands[7]);
13550
13551 return;
13552 }
13553
13554 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
13555 left shift by a constant, either using a single shift or
13556 a sequence of add instructions. */
13557
13558 static void
13559 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
13560 {
13561 if (count == 1)
13562 {
13563 emit_insn ((mode == DImode
13564 ? gen_addsi3
13565 : gen_adddi3) (operand, operand, operand));
13566 }
13567 else if (!optimize_size
13568 && count * ix86_cost->add <= ix86_cost->shift_const)
13569 {
13570 int i;
13571 for (i=0; i<count; i++)
13572 {
13573 emit_insn ((mode == DImode
13574 ? gen_addsi3
13575 : gen_adddi3) (operand, operand, operand));
13576 }
13577 }
13578 else
13579 emit_insn ((mode == DImode
13580 ? gen_ashlsi3
13581 : gen_ashldi3) (operand, operand, GEN_INT (count)));
13582 }
13583
13584 void
13585 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
13586 {
13587 rtx low[2], high[2];
13588 int count;
13589 const int single_width = mode == DImode ? 32 : 64;
13590
13591 if (CONST_INT_P (operands[2]))
13592 {
13593 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
13594 count = INTVAL (operands[2]) & (single_width * 2 - 1);
13595
13596 if (count >= single_width)
13597 {
13598 emit_move_insn (high[0], low[1]);
13599 emit_move_insn (low[0], const0_rtx);
13600
13601 if (count > single_width)
13602 ix86_expand_ashl_const (high[0], count - single_width, mode);
13603 }
13604 else
13605 {
13606 if (!rtx_equal_p (operands[0], operands[1]))
13607 emit_move_insn (operands[0], operands[1]);
13608 emit_insn ((mode == DImode
13609 ? gen_x86_shld_1
13610 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
13611 ix86_expand_ashl_const (low[0], count, mode);
13612 }
13613 return;
13614 }
13615
13616 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13617
13618 if (operands[1] == const1_rtx)
13619 {
13620 /* Assuming we've chosen a QImode capable registers, then 1 << N
13621 can be done with two 32/64-bit shifts, no branches, no cmoves. */
13622 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
13623 {
13624 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
13625
13626 ix86_expand_clear (low[0]);
13627 ix86_expand_clear (high[0]);
13628 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
13629
13630 d = gen_lowpart (QImode, low[0]);
13631 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
13632 s = gen_rtx_EQ (QImode, flags, const0_rtx);
13633 emit_insn (gen_rtx_SET (VOIDmode, d, s));
13634
13635 d = gen_lowpart (QImode, high[0]);
13636 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
13637 s = gen_rtx_NE (QImode, flags, const0_rtx);
13638 emit_insn (gen_rtx_SET (VOIDmode, d, s));
13639 }
13640
13641 /* Otherwise, we can get the same results by manually performing
13642 a bit extract operation on bit 5/6, and then performing the two
13643 shifts. The two methods of getting 0/1 into low/high are exactly
13644 the same size. Avoiding the shift in the bit extract case helps
13645 pentium4 a bit; no one else seems to care much either way. */
13646 else
13647 {
13648 rtx x;
13649
13650 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
13651 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
13652 else
13653 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
13654 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
13655
13656 emit_insn ((mode == DImode
13657 ? gen_lshrsi3
13658 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
13659 emit_insn ((mode == DImode
13660 ? gen_andsi3
13661 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
13662 emit_move_insn (low[0], high[0]);
13663 emit_insn ((mode == DImode
13664 ? gen_xorsi3
13665 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
13666 }
13667
13668 emit_insn ((mode == DImode
13669 ? gen_ashlsi3
13670 : gen_ashldi3) (low[0], low[0], operands[2]));
13671 emit_insn ((mode == DImode
13672 ? gen_ashlsi3
13673 : gen_ashldi3) (high[0], high[0], operands[2]));
13674 return;
13675 }
13676
13677 if (operands[1] == constm1_rtx)
13678 {
13679 /* For -1 << N, we can avoid the shld instruction, because we
13680 know that we're shifting 0...31/63 ones into a -1. */
13681 emit_move_insn (low[0], constm1_rtx);
13682 if (optimize_size)
13683 emit_move_insn (high[0], low[0]);
13684 else
13685 emit_move_insn (high[0], constm1_rtx);
13686 }
13687 else
13688 {
13689 if (!rtx_equal_p (operands[0], operands[1]))
13690 emit_move_insn (operands[0], operands[1]);
13691
13692 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13693 emit_insn ((mode == DImode
13694 ? gen_x86_shld_1
13695 : gen_x86_64_shld) (high[0], low[0], operands[2]));
13696 }
13697
13698 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
13699
13700 if (TARGET_CMOVE && scratch)
13701 {
13702 ix86_expand_clear (scratch);
13703 emit_insn ((mode == DImode
13704 ? gen_x86_shift_adj_1
13705 : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch));
13706 }
13707 else
13708 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
13709 }
13710
13711 void
13712 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
13713 {
13714 rtx low[2], high[2];
13715 int count;
13716 const int single_width = mode == DImode ? 32 : 64;
13717
13718 if (CONST_INT_P (operands[2]))
13719 {
13720 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
13721 count = INTVAL (operands[2]) & (single_width * 2 - 1);
13722
13723 if (count == single_width * 2 - 1)
13724 {
13725 emit_move_insn (high[0], high[1]);
13726 emit_insn ((mode == DImode
13727 ? gen_ashrsi3
13728 : gen_ashrdi3) (high[0], high[0],
13729 GEN_INT (single_width - 1)));
13730 emit_move_insn (low[0], high[0]);
13731
13732 }
13733 else if (count >= single_width)
13734 {
13735 emit_move_insn (low[0], high[1]);
13736 emit_move_insn (high[0], low[0]);
13737 emit_insn ((mode == DImode
13738 ? gen_ashrsi3
13739 : gen_ashrdi3) (high[0], high[0],
13740 GEN_INT (single_width - 1)));
13741 if (count > single_width)
13742 emit_insn ((mode == DImode
13743 ? gen_ashrsi3
13744 : gen_ashrdi3) (low[0], low[0],
13745 GEN_INT (count - single_width)));
13746 }
13747 else
13748 {
13749 if (!rtx_equal_p (operands[0], operands[1]))
13750 emit_move_insn (operands[0], operands[1]);
13751 emit_insn ((mode == DImode
13752 ? gen_x86_shrd_1
13753 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
13754 emit_insn ((mode == DImode
13755 ? gen_ashrsi3
13756 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
13757 }
13758 }
13759 else
13760 {
13761 if (!rtx_equal_p (operands[0], operands[1]))
13762 emit_move_insn (operands[0], operands[1]);
13763
13764 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13765
13766 emit_insn ((mode == DImode
13767 ? gen_x86_shrd_1
13768 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
13769 emit_insn ((mode == DImode
13770 ? gen_ashrsi3
13771 : gen_ashrdi3) (high[0], high[0], operands[2]));
13772
13773 if (TARGET_CMOVE && scratch)
13774 {
13775 emit_move_insn (scratch, high[0]);
13776 emit_insn ((mode == DImode
13777 ? gen_ashrsi3
13778 : gen_ashrdi3) (scratch, scratch,
13779 GEN_INT (single_width - 1)));
13780 emit_insn ((mode == DImode
13781 ? gen_x86_shift_adj_1
13782 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
13783 scratch));
13784 }
13785 else
13786 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
13787 }
13788 }
13789
13790 void
13791 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
13792 {
13793 rtx low[2], high[2];
13794 int count;
13795 const int single_width = mode == DImode ? 32 : 64;
13796
13797 if (CONST_INT_P (operands[2]))
13798 {
13799 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
13800 count = INTVAL (operands[2]) & (single_width * 2 - 1);
13801
13802 if (count >= single_width)
13803 {
13804 emit_move_insn (low[0], high[1]);
13805 ix86_expand_clear (high[0]);
13806
13807 if (count > single_width)
13808 emit_insn ((mode == DImode
13809 ? gen_lshrsi3
13810 : gen_lshrdi3) (low[0], low[0],
13811 GEN_INT (count - single_width)));
13812 }
13813 else
13814 {
13815 if (!rtx_equal_p (operands[0], operands[1]))
13816 emit_move_insn (operands[0], operands[1]);
13817 emit_insn ((mode == DImode
13818 ? gen_x86_shrd_1
13819 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
13820 emit_insn ((mode == DImode
13821 ? gen_lshrsi3
13822 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
13823 }
13824 }
13825 else
13826 {
13827 if (!rtx_equal_p (operands[0], operands[1]))
13828 emit_move_insn (operands[0], operands[1]);
13829
13830 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13831
13832 emit_insn ((mode == DImode
13833 ? gen_x86_shrd_1
13834 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
13835 emit_insn ((mode == DImode
13836 ? gen_lshrsi3
13837 : gen_lshrdi3) (high[0], high[0], operands[2]));
13838
13839 /* Heh. By reversing the arguments, we can reuse this pattern. */
13840 if (TARGET_CMOVE && scratch)
13841 {
13842 ix86_expand_clear (scratch);
13843 emit_insn ((mode == DImode
13844 ? gen_x86_shift_adj_1
13845 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
13846 scratch));
13847 }
13848 else
13849 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
13850 }
13851 }
13852
13853 /* Predict just emitted jump instruction to be taken with probability PROB. */
13854 static void
13855 predict_jump (int prob)
13856 {
13857 rtx insn = get_last_insn ();
13858 gcc_assert (JUMP_P (insn));
13859 REG_NOTES (insn)
13860 = gen_rtx_EXPR_LIST (REG_BR_PROB,
13861 GEN_INT (prob),
13862 REG_NOTES (insn));
13863 }
13864
13865 /* Helper function for the string operations below. Dest VARIABLE whether
13866 it is aligned to VALUE bytes. If true, jump to the label. */
13867 static rtx
13868 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
13869 {
13870 rtx label = gen_label_rtx ();
13871 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
13872 if (GET_MODE (variable) == DImode)
13873 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
13874 else
13875 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
13876 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
13877 1, label);
13878 if (epilogue)
13879 predict_jump (REG_BR_PROB_BASE * 50 / 100);
13880 else
13881 predict_jump (REG_BR_PROB_BASE * 90 / 100);
13882 return label;
13883 }
13884
13885 /* Adjust COUNTER by the VALUE. */
13886 static void
13887 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
13888 {
13889 if (GET_MODE (countreg) == DImode)
13890 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
13891 else
13892 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
13893 }
13894
13895 /* Zero extend possibly SImode EXP to Pmode register. */
13896 rtx
13897 ix86_zero_extend_to_Pmode (rtx exp)
13898 {
13899 rtx r;
13900 if (GET_MODE (exp) == VOIDmode)
13901 return force_reg (Pmode, exp);
13902 if (GET_MODE (exp) == Pmode)
13903 return copy_to_mode_reg (Pmode, exp);
13904 r = gen_reg_rtx (Pmode);
13905 emit_insn (gen_zero_extendsidi2 (r, exp));
13906 return r;
13907 }
13908
13909 /* Divide COUNTREG by SCALE. */
13910 static rtx
13911 scale_counter (rtx countreg, int scale)
13912 {
13913 rtx sc;
13914 rtx piece_size_mask;
13915
13916 if (scale == 1)
13917 return countreg;
13918 if (CONST_INT_P (countreg))
13919 return GEN_INT (INTVAL (countreg) / scale);
13920 gcc_assert (REG_P (countreg));
13921
13922 piece_size_mask = GEN_INT (scale - 1);
13923 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
13924 GEN_INT (exact_log2 (scale)),
13925 NULL, 1, OPTAB_DIRECT);
13926 return sc;
13927 }
13928
13929 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
13930 DImode for constant loop counts. */
13931
13932 static enum machine_mode
13933 counter_mode (rtx count_exp)
13934 {
13935 if (GET_MODE (count_exp) != VOIDmode)
13936 return GET_MODE (count_exp);
13937 if (GET_CODE (count_exp) != CONST_INT)
13938 return Pmode;
13939 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
13940 return DImode;
13941 return SImode;
13942 }
13943
13944 /* When SRCPTR is non-NULL, output simple loop to move memory
13945 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
13946 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
13947 equivalent loop to set memory by VALUE (supposed to be in MODE).
13948
13949 The size is rounded down to whole number of chunk size moved at once.
13950 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
13951
13952
13953 static void
13954 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
13955 rtx destptr, rtx srcptr, rtx value,
13956 rtx count, enum machine_mode mode, int unroll,
13957 int expected_size)
13958 {
13959 rtx out_label, top_label, iter, tmp;
13960 enum machine_mode iter_mode = counter_mode (count);
13961 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
13962 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
13963 rtx size;
13964 rtx x_addr;
13965 rtx y_addr;
13966 int i;
13967
13968 top_label = gen_label_rtx ();
13969 out_label = gen_label_rtx ();
13970 iter = gen_reg_rtx (iter_mode);
13971
13972 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
13973 NULL, 1, OPTAB_DIRECT);
13974 /* Those two should combine. */
13975 if (piece_size == const1_rtx)
13976 {
13977 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
13978 true, out_label);
13979 predict_jump (REG_BR_PROB_BASE * 10 / 100);
13980 }
13981 emit_move_insn (iter, const0_rtx);
13982
13983 emit_label (top_label);
13984
13985 tmp = convert_modes (Pmode, iter_mode, iter, true);
13986 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
13987 destmem = change_address (destmem, mode, x_addr);
13988
13989 if (srcmem)
13990 {
13991 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
13992 srcmem = change_address (srcmem, mode, y_addr);
13993
13994 /* When unrolling for chips that reorder memory reads and writes,
13995 we can save registers by using single temporary.
13996 Also using 4 temporaries is overkill in 32bit mode. */
13997 if (!TARGET_64BIT && 0)
13998 {
13999 for (i = 0; i < unroll; i++)
14000 {
14001 if (i)
14002 {
14003 destmem =
14004 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
14005 srcmem =
14006 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
14007 }
14008 emit_move_insn (destmem, srcmem);
14009 }
14010 }
14011 else
14012 {
14013 rtx tmpreg[4];
14014 gcc_assert (unroll <= 4);
14015 for (i = 0; i < unroll; i++)
14016 {
14017 tmpreg[i] = gen_reg_rtx (mode);
14018 if (i)
14019 {
14020 srcmem =
14021 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
14022 }
14023 emit_move_insn (tmpreg[i], srcmem);
14024 }
14025 for (i = 0; i < unroll; i++)
14026 {
14027 if (i)
14028 {
14029 destmem =
14030 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
14031 }
14032 emit_move_insn (destmem, tmpreg[i]);
14033 }
14034 }
14035 }
14036 else
14037 for (i = 0; i < unroll; i++)
14038 {
14039 if (i)
14040 destmem =
14041 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
14042 emit_move_insn (destmem, value);
14043 }
14044
14045 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
14046 true, OPTAB_LIB_WIDEN);
14047 if (tmp != iter)
14048 emit_move_insn (iter, tmp);
14049
14050 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
14051 true, top_label);
14052 if (expected_size != -1)
14053 {
14054 expected_size /= GET_MODE_SIZE (mode) * unroll;
14055 if (expected_size == 0)
14056 predict_jump (0);
14057 else if (expected_size > REG_BR_PROB_BASE)
14058 predict_jump (REG_BR_PROB_BASE - 1);
14059 else
14060 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
14061 }
14062 else
14063 predict_jump (REG_BR_PROB_BASE * 80 / 100);
14064 iter = ix86_zero_extend_to_Pmode (iter);
14065 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
14066 true, OPTAB_LIB_WIDEN);
14067 if (tmp != destptr)
14068 emit_move_insn (destptr, tmp);
14069 if (srcptr)
14070 {
14071 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
14072 true, OPTAB_LIB_WIDEN);
14073 if (tmp != srcptr)
14074 emit_move_insn (srcptr, tmp);
14075 }
14076 emit_label (out_label);
14077 }
14078
14079 /* Output "rep; mov" instruction.
14080 Arguments have same meaning as for previous function */
14081 static void
14082 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
14083 rtx destptr, rtx srcptr,
14084 rtx count,
14085 enum machine_mode mode)
14086 {
14087 rtx destexp;
14088 rtx srcexp;
14089 rtx countreg;
14090
14091 /* If the size is known, it is shorter to use rep movs. */
14092 if (mode == QImode && CONST_INT_P (count)
14093 && !(INTVAL (count) & 3))
14094 mode = SImode;
14095
14096 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
14097 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
14098 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
14099 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
14100 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
14101 if (mode != QImode)
14102 {
14103 destexp = gen_rtx_ASHIFT (Pmode, countreg,
14104 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
14105 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
14106 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
14107 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
14108 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
14109 }
14110 else
14111 {
14112 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
14113 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
14114 }
14115 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
14116 destexp, srcexp));
14117 }
14118
14119 /* Output "rep; stos" instruction.
14120 Arguments have same meaning as for previous function */
14121 static void
14122 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
14123 rtx count,
14124 enum machine_mode mode)
14125 {
14126 rtx destexp;
14127 rtx countreg;
14128
14129 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
14130 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
14131 value = force_reg (mode, gen_lowpart (mode, value));
14132 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
14133 if (mode != QImode)
14134 {
14135 destexp = gen_rtx_ASHIFT (Pmode, countreg,
14136 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
14137 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
14138 }
14139 else
14140 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
14141 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
14142 }
14143
14144 static void
14145 emit_strmov (rtx destmem, rtx srcmem,
14146 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
14147 {
14148 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
14149 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
14150 emit_insn (gen_strmov (destptr, dest, srcptr, src));
14151 }
14152
14153 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
14154 static void
14155 expand_movmem_epilogue (rtx destmem, rtx srcmem,
14156 rtx destptr, rtx srcptr, rtx count, int max_size)
14157 {
14158 rtx src, dest;
14159 if (CONST_INT_P (count))
14160 {
14161 HOST_WIDE_INT countval = INTVAL (count);
14162 int offset = 0;
14163
14164 if ((countval & 0x10) && max_size > 16)
14165 {
14166 if (TARGET_64BIT)
14167 {
14168 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
14169 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
14170 }
14171 else
14172 gcc_unreachable ();
14173 offset += 16;
14174 }
14175 if ((countval & 0x08) && max_size > 8)
14176 {
14177 if (TARGET_64BIT)
14178 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
14179 else
14180 {
14181 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
14182 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
14183 }
14184 offset += 8;
14185 }
14186 if ((countval & 0x04) && max_size > 4)
14187 {
14188 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
14189 offset += 4;
14190 }
14191 if ((countval & 0x02) && max_size > 2)
14192 {
14193 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
14194 offset += 2;
14195 }
14196 if ((countval & 0x01) && max_size > 1)
14197 {
14198 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
14199 offset += 1;
14200 }
14201 return;
14202 }
14203 if (max_size > 8)
14204 {
14205 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
14206 count, 1, OPTAB_DIRECT);
14207 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
14208 count, QImode, 1, 4);
14209 return;
14210 }
14211
14212 /* When there are stringops, we can cheaply increase dest and src pointers.
14213 Otherwise we save code size by maintaining offset (zero is readily
14214 available from preceding rep operation) and using x86 addressing modes.
14215 */
14216 if (TARGET_SINGLE_STRINGOP)
14217 {
14218 if (max_size > 4)
14219 {
14220 rtx label = ix86_expand_aligntest (count, 4, true);
14221 src = change_address (srcmem, SImode, srcptr);
14222 dest = change_address (destmem, SImode, destptr);
14223 emit_insn (gen_strmov (destptr, dest, srcptr, src));
14224 emit_label (label);
14225 LABEL_NUSES (label) = 1;
14226 }
14227 if (max_size > 2)
14228 {
14229 rtx label = ix86_expand_aligntest (count, 2, true);
14230 src = change_address (srcmem, HImode, srcptr);
14231 dest = change_address (destmem, HImode, destptr);
14232 emit_insn (gen_strmov (destptr, dest, srcptr, src));
14233 emit_label (label);
14234 LABEL_NUSES (label) = 1;
14235 }
14236 if (max_size > 1)
14237 {
14238 rtx label = ix86_expand_aligntest (count, 1, true);
14239 src = change_address (srcmem, QImode, srcptr);
14240 dest = change_address (destmem, QImode, destptr);
14241 emit_insn (gen_strmov (destptr, dest, srcptr, src));
14242 emit_label (label);
14243 LABEL_NUSES (label) = 1;
14244 }
14245 }
14246 else
14247 {
14248 rtx offset = force_reg (Pmode, const0_rtx);
14249 rtx tmp;
14250
14251 if (max_size > 4)
14252 {
14253 rtx label = ix86_expand_aligntest (count, 4, true);
14254 src = change_address (srcmem, SImode, srcptr);
14255 dest = change_address (destmem, SImode, destptr);
14256 emit_move_insn (dest, src);
14257 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
14258 true, OPTAB_LIB_WIDEN);
14259 if (tmp != offset)
14260 emit_move_insn (offset, tmp);
14261 emit_label (label);
14262 LABEL_NUSES (label) = 1;
14263 }
14264 if (max_size > 2)
14265 {
14266 rtx label = ix86_expand_aligntest (count, 2, true);
14267 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
14268 src = change_address (srcmem, HImode, tmp);
14269 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
14270 dest = change_address (destmem, HImode, tmp);
14271 emit_move_insn (dest, src);
14272 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
14273 true, OPTAB_LIB_WIDEN);
14274 if (tmp != offset)
14275 emit_move_insn (offset, tmp);
14276 emit_label (label);
14277 LABEL_NUSES (label) = 1;
14278 }
14279 if (max_size > 1)
14280 {
14281 rtx label = ix86_expand_aligntest (count, 1, true);
14282 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
14283 src = change_address (srcmem, QImode, tmp);
14284 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
14285 dest = change_address (destmem, QImode, tmp);
14286 emit_move_insn (dest, src);
14287 emit_label (label);
14288 LABEL_NUSES (label) = 1;
14289 }
14290 }
14291 }
14292
14293 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
14294 static void
14295 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
14296 rtx count, int max_size)
14297 {
14298 count =
14299 expand_simple_binop (counter_mode (count), AND, count,
14300 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
14301 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
14302 gen_lowpart (QImode, value), count, QImode,
14303 1, max_size / 2);
14304 }
14305
14306 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
14307 static void
14308 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
14309 {
14310 rtx dest;
14311
14312 if (CONST_INT_P (count))
14313 {
14314 HOST_WIDE_INT countval = INTVAL (count);
14315 int offset = 0;
14316
14317 if ((countval & 0x10) && max_size > 16)
14318 {
14319 if (TARGET_64BIT)
14320 {
14321 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
14322 emit_insn (gen_strset (destptr, dest, value));
14323 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
14324 emit_insn (gen_strset (destptr, dest, value));
14325 }
14326 else
14327 gcc_unreachable ();
14328 offset += 16;
14329 }
14330 if ((countval & 0x08) && max_size > 8)
14331 {
14332 if (TARGET_64BIT)
14333 {
14334 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
14335 emit_insn (gen_strset (destptr, dest, value));
14336 }
14337 else
14338 {
14339 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
14340 emit_insn (gen_strset (destptr, dest, value));
14341 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
14342 emit_insn (gen_strset (destptr, dest, value));
14343 }
14344 offset += 8;
14345 }
14346 if ((countval & 0x04) && max_size > 4)
14347 {
14348 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
14349 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
14350 offset += 4;
14351 }
14352 if ((countval & 0x02) && max_size > 2)
14353 {
14354 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
14355 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
14356 offset += 2;
14357 }
14358 if ((countval & 0x01) && max_size > 1)
14359 {
14360 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
14361 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
14362 offset += 1;
14363 }
14364 return;
14365 }
14366 if (max_size > 32)
14367 {
14368 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
14369 return;
14370 }
14371 if (max_size > 16)
14372 {
14373 rtx label = ix86_expand_aligntest (count, 16, true);
14374 if (TARGET_64BIT)
14375 {
14376 dest = change_address (destmem, DImode, destptr);
14377 emit_insn (gen_strset (destptr, dest, value));
14378 emit_insn (gen_strset (destptr, dest, value));
14379 }
14380 else
14381 {
14382 dest = change_address (destmem, SImode, destptr);
14383 emit_insn (gen_strset (destptr, dest, value));
14384 emit_insn (gen_strset (destptr, dest, value));
14385 emit_insn (gen_strset (destptr, dest, value));
14386 emit_insn (gen_strset (destptr, dest, value));
14387 }
14388 emit_label (label);
14389 LABEL_NUSES (label) = 1;
14390 }
14391 if (max_size > 8)
14392 {
14393 rtx label = ix86_expand_aligntest (count, 8, true);
14394 if (TARGET_64BIT)
14395 {
14396 dest = change_address (destmem, DImode, destptr);
14397 emit_insn (gen_strset (destptr, dest, value));
14398 }
14399 else
14400 {
14401 dest = change_address (destmem, SImode, destptr);
14402 emit_insn (gen_strset (destptr, dest, value));
14403 emit_insn (gen_strset (destptr, dest, value));
14404 }
14405 emit_label (label);
14406 LABEL_NUSES (label) = 1;
14407 }
14408 if (max_size > 4)
14409 {
14410 rtx label = ix86_expand_aligntest (count, 4, true);
14411 dest = change_address (destmem, SImode, destptr);
14412 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
14413 emit_label (label);
14414 LABEL_NUSES (label) = 1;
14415 }
14416 if (max_size > 2)
14417 {
14418 rtx label = ix86_expand_aligntest (count, 2, true);
14419 dest = change_address (destmem, HImode, destptr);
14420 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
14421 emit_label (label);
14422 LABEL_NUSES (label) = 1;
14423 }
14424 if (max_size > 1)
14425 {
14426 rtx label = ix86_expand_aligntest (count, 1, true);
14427 dest = change_address (destmem, QImode, destptr);
14428 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
14429 emit_label (label);
14430 LABEL_NUSES (label) = 1;
14431 }
14432 }
14433
14434 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
14435 DESIRED_ALIGNMENT. */
14436 static void
14437 expand_movmem_prologue (rtx destmem, rtx srcmem,
14438 rtx destptr, rtx srcptr, rtx count,
14439 int align, int desired_alignment)
14440 {
14441 if (align <= 1 && desired_alignment > 1)
14442 {
14443 rtx label = ix86_expand_aligntest (destptr, 1, false);
14444 srcmem = change_address (srcmem, QImode, srcptr);
14445 destmem = change_address (destmem, QImode, destptr);
14446 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
14447 ix86_adjust_counter (count, 1);
14448 emit_label (label);
14449 LABEL_NUSES (label) = 1;
14450 }
14451 if (align <= 2 && desired_alignment > 2)
14452 {
14453 rtx label = ix86_expand_aligntest (destptr, 2, false);
14454 srcmem = change_address (srcmem, HImode, srcptr);
14455 destmem = change_address (destmem, HImode, destptr);
14456 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
14457 ix86_adjust_counter (count, 2);
14458 emit_label (label);
14459 LABEL_NUSES (label) = 1;
14460 }
14461 if (align <= 4 && desired_alignment > 4)
14462 {
14463 rtx label = ix86_expand_aligntest (destptr, 4, false);
14464 srcmem = change_address (srcmem, SImode, srcptr);
14465 destmem = change_address (destmem, SImode, destptr);
14466 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
14467 ix86_adjust_counter (count, 4);
14468 emit_label (label);
14469 LABEL_NUSES (label) = 1;
14470 }
14471 gcc_assert (desired_alignment <= 8);
14472 }
14473
14474 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
14475 DESIRED_ALIGNMENT. */
14476 static void
14477 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
14478 int align, int desired_alignment)
14479 {
14480 if (align <= 1 && desired_alignment > 1)
14481 {
14482 rtx label = ix86_expand_aligntest (destptr, 1, false);
14483 destmem = change_address (destmem, QImode, destptr);
14484 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
14485 ix86_adjust_counter (count, 1);
14486 emit_label (label);
14487 LABEL_NUSES (label) = 1;
14488 }
14489 if (align <= 2 && desired_alignment > 2)
14490 {
14491 rtx label = ix86_expand_aligntest (destptr, 2, false);
14492 destmem = change_address (destmem, HImode, destptr);
14493 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
14494 ix86_adjust_counter (count, 2);
14495 emit_label (label);
14496 LABEL_NUSES (label) = 1;
14497 }
14498 if (align <= 4 && desired_alignment > 4)
14499 {
14500 rtx label = ix86_expand_aligntest (destptr, 4, false);
14501 destmem = change_address (destmem, SImode, destptr);
14502 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
14503 ix86_adjust_counter (count, 4);
14504 emit_label (label);
14505 LABEL_NUSES (label) = 1;
14506 }
14507 gcc_assert (desired_alignment <= 8);
14508 }
14509
14510 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
14511 static enum stringop_alg
14512 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
14513 int *dynamic_check)
14514 {
14515 const struct stringop_algs * algs;
14516
14517 *dynamic_check = -1;
14518 if (memset)
14519 algs = &ix86_cost->memset[TARGET_64BIT != 0];
14520 else
14521 algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
14522 if (stringop_alg != no_stringop)
14523 return stringop_alg;
14524 /* rep; movq or rep; movl is the smallest variant. */
14525 else if (optimize_size)
14526 {
14527 if (!count || (count & 3))
14528 return rep_prefix_1_byte;
14529 else
14530 return rep_prefix_4_byte;
14531 }
14532 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
14533 */
14534 else if (expected_size != -1 && expected_size < 4)
14535 return loop_1_byte;
14536 else if (expected_size != -1)
14537 {
14538 unsigned int i;
14539 enum stringop_alg alg = libcall;
14540 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
14541 {
14542 gcc_assert (algs->size[i].max);
14543 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
14544 {
14545 if (algs->size[i].alg != libcall)
14546 alg = algs->size[i].alg;
14547 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
14548 last non-libcall inline algorithm. */
14549 if (TARGET_INLINE_ALL_STRINGOPS)
14550 {
14551 /* When the current size is best to be copied by a libcall,
14552 but we are still forced to inline, run the heuristic bellow
14553 that will pick code for medium sized blocks. */
14554 if (alg != libcall)
14555 return alg;
14556 break;
14557 }
14558 else
14559 return algs->size[i].alg;
14560 }
14561 }
14562 gcc_assert (TARGET_INLINE_ALL_STRINGOPS);
14563 }
14564 /* When asked to inline the call anyway, try to pick meaningful choice.
14565 We look for maximal size of block that is faster to copy by hand and
14566 take blocks of at most of that size guessing that average size will
14567 be roughly half of the block.
14568
14569 If this turns out to be bad, we might simply specify the preferred
14570 choice in ix86_costs. */
14571 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
14572 && algs->unknown_size == libcall)
14573 {
14574 int max = -1;
14575 enum stringop_alg alg;
14576 int i;
14577
14578 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
14579 if (algs->size[i].alg != libcall && algs->size[i].alg)
14580 max = algs->size[i].max;
14581 if (max == -1)
14582 max = 4096;
14583 alg = decide_alg (count, max / 2, memset, dynamic_check);
14584 gcc_assert (*dynamic_check == -1);
14585 gcc_assert (alg != libcall);
14586 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
14587 *dynamic_check = max;
14588 return alg;
14589 }
14590 return algs->unknown_size;
14591 }
14592
14593 /* Decide on alignment. We know that the operand is already aligned to ALIGN
14594 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
14595 static int
14596 decide_alignment (int align,
14597 enum stringop_alg alg,
14598 int expected_size)
14599 {
14600 int desired_align = 0;
14601 switch (alg)
14602 {
14603 case no_stringop:
14604 gcc_unreachable ();
14605 case loop:
14606 case unrolled_loop:
14607 desired_align = GET_MODE_SIZE (Pmode);
14608 break;
14609 case rep_prefix_8_byte:
14610 desired_align = 8;
14611 break;
14612 case rep_prefix_4_byte:
14613 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
14614 copying whole cacheline at once. */
14615 if (TARGET_PENTIUMPRO)
14616 desired_align = 8;
14617 else
14618 desired_align = 4;
14619 break;
14620 case rep_prefix_1_byte:
14621 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
14622 copying whole cacheline at once. */
14623 if (TARGET_PENTIUMPRO)
14624 desired_align = 8;
14625 else
14626 desired_align = 1;
14627 break;
14628 case loop_1_byte:
14629 desired_align = 1;
14630 break;
14631 case libcall:
14632 return 0;
14633 }
14634
14635 if (optimize_size)
14636 desired_align = 1;
14637 if (desired_align < align)
14638 desired_align = align;
14639 if (expected_size != -1 && expected_size < 4)
14640 desired_align = align;
14641 return desired_align;
14642 }
14643
14644 /* Return the smallest power of 2 greater than VAL. */
14645 static int
14646 smallest_pow2_greater_than (int val)
14647 {
14648 int ret = 1;
14649 while (ret <= val)
14650 ret <<= 1;
14651 return ret;
14652 }
14653
14654 /* Expand string move (memcpy) operation. Use i386 string operations when
14655 profitable. expand_clrmem contains similar code. The code depends upon
14656 architecture, block size and alignment, but always has the same
14657 overall structure:
14658
14659 1) Prologue guard: Conditional that jumps up to epilogues for small
14660 blocks that can be handled by epilogue alone. This is faster but
14661 also needed for correctness, since prologue assume the block is larger
14662 than the desired alignment.
14663
14664 Optional dynamic check for size and libcall for large
14665 blocks is emitted here too, with -minline-stringops-dynamically.
14666
14667 2) Prologue: copy first few bytes in order to get destination aligned
14668 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
14669 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
14670 We emit either a jump tree on power of two sized blocks, or a byte loop.
14671
14672 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
14673 with specified algorithm.
14674
14675 4) Epilogue: code copying tail of the block that is too small to be
14676 handled by main body (or up to size guarded by prologue guard). */
14677
14678 int
14679 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
14680 rtx expected_align_exp, rtx expected_size_exp)
14681 {
14682 rtx destreg;
14683 rtx srcreg;
14684 rtx label = NULL;
14685 rtx tmp;
14686 rtx jump_around_label = NULL;
14687 HOST_WIDE_INT align = 1;
14688 unsigned HOST_WIDE_INT count = 0;
14689 HOST_WIDE_INT expected_size = -1;
14690 int size_needed = 0, epilogue_size_needed;
14691 int desired_align = 0;
14692 enum stringop_alg alg;
14693 int dynamic_check;
14694
14695 if (CONST_INT_P (align_exp))
14696 align = INTVAL (align_exp);
14697 /* i386 can do misaligned access on reasonably increased cost. */
14698 if (CONST_INT_P (expected_align_exp)
14699 && INTVAL (expected_align_exp) > align)
14700 align = INTVAL (expected_align_exp);
14701 if (CONST_INT_P (count_exp))
14702 count = expected_size = INTVAL (count_exp);
14703 if (CONST_INT_P (expected_size_exp) && count == 0)
14704 expected_size = INTVAL (expected_size_exp);
14705
14706 /* Step 0: Decide on preferred algorithm, desired alignment and
14707 size of chunks to be copied by main loop. */
14708
14709 alg = decide_alg (count, expected_size, false, &dynamic_check);
14710 desired_align = decide_alignment (align, alg, expected_size);
14711
14712 if (!TARGET_ALIGN_STRINGOPS)
14713 align = desired_align;
14714
14715 if (alg == libcall)
14716 return 0;
14717 gcc_assert (alg != no_stringop);
14718 if (!count)
14719 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
14720 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
14721 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
14722 switch (alg)
14723 {
14724 case libcall:
14725 case no_stringop:
14726 gcc_unreachable ();
14727 case loop:
14728 size_needed = GET_MODE_SIZE (Pmode);
14729 break;
14730 case unrolled_loop:
14731 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
14732 break;
14733 case rep_prefix_8_byte:
14734 size_needed = 8;
14735 break;
14736 case rep_prefix_4_byte:
14737 size_needed = 4;
14738 break;
14739 case rep_prefix_1_byte:
14740 case loop_1_byte:
14741 size_needed = 1;
14742 break;
14743 }
14744
14745 epilogue_size_needed = size_needed;
14746
14747 /* Step 1: Prologue guard. */
14748
14749 /* Alignment code needs count to be in register. */
14750 if (CONST_INT_P (count_exp) && desired_align > align)
14751 {
14752 enum machine_mode mode = SImode;
14753 if (TARGET_64BIT && (count & ~0xffffffff))
14754 mode = DImode;
14755 count_exp = force_reg (mode, count_exp);
14756 }
14757 gcc_assert (desired_align >= 1 && align >= 1);
14758
14759 /* Ensure that alignment prologue won't copy past end of block. */
14760 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
14761 {
14762 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
14763 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
14764 Make sure it is power of 2. */
14765 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
14766
14767 label = gen_label_rtx ();
14768 emit_cmp_and_jump_insns (count_exp,
14769 GEN_INT (epilogue_size_needed),
14770 LTU, 0, counter_mode (count_exp), 1, label);
14771 if (GET_CODE (count_exp) == CONST_INT)
14772 ;
14773 else if (expected_size == -1 || expected_size < epilogue_size_needed)
14774 predict_jump (REG_BR_PROB_BASE * 60 / 100);
14775 else
14776 predict_jump (REG_BR_PROB_BASE * 20 / 100);
14777 }
14778 /* Emit code to decide on runtime whether library call or inline should be
14779 used. */
14780 if (dynamic_check != -1)
14781 {
14782 rtx hot_label = gen_label_rtx ();
14783 jump_around_label = gen_label_rtx ();
14784 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
14785 LEU, 0, GET_MODE (count_exp), 1, hot_label);
14786 predict_jump (REG_BR_PROB_BASE * 90 / 100);
14787 emit_block_move_via_libcall (dst, src, count_exp, false);
14788 emit_jump (jump_around_label);
14789 emit_label (hot_label);
14790 }
14791
14792 /* Step 2: Alignment prologue. */
14793
14794 if (desired_align > align)
14795 {
14796 /* Except for the first move in epilogue, we no longer know
14797 constant offset in aliasing info. It don't seems to worth
14798 the pain to maintain it for the first move, so throw away
14799 the info early. */
14800 src = change_address (src, BLKmode, srcreg);
14801 dst = change_address (dst, BLKmode, destreg);
14802 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
14803 desired_align);
14804 }
14805 if (label && size_needed == 1)
14806 {
14807 emit_label (label);
14808 LABEL_NUSES (label) = 1;
14809 label = NULL;
14810 }
14811
14812 /* Step 3: Main loop. */
14813
14814 switch (alg)
14815 {
14816 case libcall:
14817 case no_stringop:
14818 gcc_unreachable ();
14819 case loop_1_byte:
14820 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
14821 count_exp, QImode, 1, expected_size);
14822 break;
14823 case loop:
14824 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
14825 count_exp, Pmode, 1, expected_size);
14826 break;
14827 case unrolled_loop:
14828 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
14829 registers for 4 temporaries anyway. */
14830 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
14831 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
14832 expected_size);
14833 break;
14834 case rep_prefix_8_byte:
14835 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
14836 DImode);
14837 break;
14838 case rep_prefix_4_byte:
14839 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
14840 SImode);
14841 break;
14842 case rep_prefix_1_byte:
14843 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
14844 QImode);
14845 break;
14846 }
14847 /* Adjust properly the offset of src and dest memory for aliasing. */
14848 if (CONST_INT_P (count_exp))
14849 {
14850 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
14851 (count / size_needed) * size_needed);
14852 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
14853 (count / size_needed) * size_needed);
14854 }
14855 else
14856 {
14857 src = change_address (src, BLKmode, srcreg);
14858 dst = change_address (dst, BLKmode, destreg);
14859 }
14860
14861 /* Step 4: Epilogue to copy the remaining bytes. */
14862
14863 if (label)
14864 {
14865 /* When the main loop is done, COUNT_EXP might hold original count,
14866 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
14867 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
14868 bytes. Compensate if needed. */
14869
14870 if (size_needed < epilogue_size_needed)
14871 {
14872 tmp =
14873 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
14874 GEN_INT (size_needed - 1), count_exp, 1,
14875 OPTAB_DIRECT);
14876 if (tmp != count_exp)
14877 emit_move_insn (count_exp, tmp);
14878 }
14879 emit_label (label);
14880 LABEL_NUSES (label) = 1;
14881 }
14882
14883 if (count_exp != const0_rtx && epilogue_size_needed > 1)
14884 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
14885 epilogue_size_needed);
14886 if (jump_around_label)
14887 emit_label (jump_around_label);
14888 return 1;
14889 }
14890
14891 /* Helper function for memcpy. For QImode value 0xXY produce
14892 0xXYXYXYXY of wide specified by MODE. This is essentially
14893 a * 0x10101010, but we can do slightly better than
14894 synth_mult by unwinding the sequence by hand on CPUs with
14895 slow multiply. */
14896 static rtx
14897 promote_duplicated_reg (enum machine_mode mode, rtx val)
14898 {
14899 enum machine_mode valmode = GET_MODE (val);
14900 rtx tmp;
14901 int nops = mode == DImode ? 3 : 2;
14902
14903 gcc_assert (mode == SImode || mode == DImode);
14904 if (val == const0_rtx)
14905 return copy_to_mode_reg (mode, const0_rtx);
14906 if (CONST_INT_P (val))
14907 {
14908 HOST_WIDE_INT v = INTVAL (val) & 255;
14909
14910 v |= v << 8;
14911 v |= v << 16;
14912 if (mode == DImode)
14913 v |= (v << 16) << 16;
14914 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
14915 }
14916
14917 if (valmode == VOIDmode)
14918 valmode = QImode;
14919 if (valmode != QImode)
14920 val = gen_lowpart (QImode, val);
14921 if (mode == QImode)
14922 return val;
14923 if (!TARGET_PARTIAL_REG_STALL)
14924 nops--;
14925 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
14926 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
14927 <= (ix86_cost->shift_const + ix86_cost->add) * nops
14928 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
14929 {
14930 rtx reg = convert_modes (mode, QImode, val, true);
14931 tmp = promote_duplicated_reg (mode, const1_rtx);
14932 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
14933 OPTAB_DIRECT);
14934 }
14935 else
14936 {
14937 rtx reg = convert_modes (mode, QImode, val, true);
14938
14939 if (!TARGET_PARTIAL_REG_STALL)
14940 if (mode == SImode)
14941 emit_insn (gen_movsi_insv_1 (reg, reg));
14942 else
14943 emit_insn (gen_movdi_insv_1_rex64 (reg, reg));
14944 else
14945 {
14946 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
14947 NULL, 1, OPTAB_DIRECT);
14948 reg =
14949 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
14950 }
14951 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
14952 NULL, 1, OPTAB_DIRECT);
14953 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
14954 if (mode == SImode)
14955 return reg;
14956 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
14957 NULL, 1, OPTAB_DIRECT);
14958 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
14959 return reg;
14960 }
14961 }
14962
14963 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
14964 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
14965 alignment from ALIGN to DESIRED_ALIGN. */
14966 static rtx
14967 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
14968 {
14969 rtx promoted_val;
14970
14971 if (TARGET_64BIT
14972 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
14973 promoted_val = promote_duplicated_reg (DImode, val);
14974 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
14975 promoted_val = promote_duplicated_reg (SImode, val);
14976 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
14977 promoted_val = promote_duplicated_reg (HImode, val);
14978 else
14979 promoted_val = val;
14980
14981 return promoted_val;
14982 }
14983
14984 /* Expand string clear operation (bzero). Use i386 string operations when
14985 profitable. See expand_movmem comment for explanation of individual
14986 steps performed. */
14987 int
14988 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
14989 rtx expected_align_exp, rtx expected_size_exp)
14990 {
14991 rtx destreg;
14992 rtx label = NULL;
14993 rtx tmp;
14994 rtx jump_around_label = NULL;
14995 HOST_WIDE_INT align = 1;
14996 unsigned HOST_WIDE_INT count = 0;
14997 HOST_WIDE_INT expected_size = -1;
14998 int size_needed = 0, epilogue_size_needed;
14999 int desired_align = 0;
15000 enum stringop_alg alg;
15001 rtx promoted_val = NULL;
15002 bool force_loopy_epilogue = false;
15003 int dynamic_check;
15004
15005 if (CONST_INT_P (align_exp))
15006 align = INTVAL (align_exp);
15007 /* i386 can do misaligned access on reasonably increased cost. */
15008 if (CONST_INT_P (expected_align_exp)
15009 && INTVAL (expected_align_exp) > align)
15010 align = INTVAL (expected_align_exp);
15011 if (CONST_INT_P (count_exp))
15012 count = expected_size = INTVAL (count_exp);
15013 if (CONST_INT_P (expected_size_exp) && count == 0)
15014 expected_size = INTVAL (expected_size_exp);
15015
15016 /* Step 0: Decide on preferred algorithm, desired alignment and
15017 size of chunks to be copied by main loop. */
15018
15019 alg = decide_alg (count, expected_size, true, &dynamic_check);
15020 desired_align = decide_alignment (align, alg, expected_size);
15021
15022 if (!TARGET_ALIGN_STRINGOPS)
15023 align = desired_align;
15024
15025 if (alg == libcall)
15026 return 0;
15027 gcc_assert (alg != no_stringop);
15028 if (!count)
15029 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
15030 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
15031 switch (alg)
15032 {
15033 case libcall:
15034 case no_stringop:
15035 gcc_unreachable ();
15036 case loop:
15037 size_needed = GET_MODE_SIZE (Pmode);
15038 break;
15039 case unrolled_loop:
15040 size_needed = GET_MODE_SIZE (Pmode) * 4;
15041 break;
15042 case rep_prefix_8_byte:
15043 size_needed = 8;
15044 break;
15045 case rep_prefix_4_byte:
15046 size_needed = 4;
15047 break;
15048 case rep_prefix_1_byte:
15049 case loop_1_byte:
15050 size_needed = 1;
15051 break;
15052 }
15053 epilogue_size_needed = size_needed;
15054
15055 /* Step 1: Prologue guard. */
15056
15057 /* Alignment code needs count to be in register. */
15058 if (CONST_INT_P (count_exp) && desired_align > align)
15059 {
15060 enum machine_mode mode = SImode;
15061 if (TARGET_64BIT && (count & ~0xffffffff))
15062 mode = DImode;
15063 count_exp = force_reg (mode, count_exp);
15064 }
15065 /* Do the cheap promotion to allow better CSE across the
15066 main loop and epilogue (ie one load of the big constant in the
15067 front of all code. */
15068 if (CONST_INT_P (val_exp))
15069 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
15070 desired_align, align);
15071 /* Ensure that alignment prologue won't copy past end of block. */
15072 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
15073 {
15074 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
15075 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
15076 Make sure it is power of 2. */
15077 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
15078
15079 /* To improve performance of small blocks, we jump around the VAL
15080 promoting mode. This mean that if the promoted VAL is not constant,
15081 we might not use it in the epilogue and have to use byte
15082 loop variant. */
15083 if (epilogue_size_needed > 2 && !promoted_val)
15084 force_loopy_epilogue = true;
15085 label = gen_label_rtx ();
15086 emit_cmp_and_jump_insns (count_exp,
15087 GEN_INT (epilogue_size_needed),
15088 LTU, 0, counter_mode (count_exp), 1, label);
15089 if (GET_CODE (count_exp) == CONST_INT)
15090 ;
15091 else if (expected_size == -1 || expected_size <= epilogue_size_needed)
15092 predict_jump (REG_BR_PROB_BASE * 60 / 100);
15093 else
15094 predict_jump (REG_BR_PROB_BASE * 20 / 100);
15095 }
15096 if (dynamic_check != -1)
15097 {
15098 rtx hot_label = gen_label_rtx ();
15099 jump_around_label = gen_label_rtx ();
15100 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
15101 LEU, 0, counter_mode (count_exp), 1, hot_label);
15102 predict_jump (REG_BR_PROB_BASE * 90 / 100);
15103 set_storage_via_libcall (dst, count_exp, val_exp, false);
15104 emit_jump (jump_around_label);
15105 emit_label (hot_label);
15106 }
15107
15108 /* Step 2: Alignment prologue. */
15109
15110 /* Do the expensive promotion once we branched off the small blocks. */
15111 if (!promoted_val)
15112 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
15113 desired_align, align);
15114 gcc_assert (desired_align >= 1 && align >= 1);
15115
15116 if (desired_align > align)
15117 {
15118 /* Except for the first move in epilogue, we no longer know
15119 constant offset in aliasing info. It don't seems to worth
15120 the pain to maintain it for the first move, so throw away
15121 the info early. */
15122 dst = change_address (dst, BLKmode, destreg);
15123 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
15124 desired_align);
15125 }
15126 if (label && size_needed == 1)
15127 {
15128 emit_label (label);
15129 LABEL_NUSES (label) = 1;
15130 label = NULL;
15131 }
15132
15133 /* Step 3: Main loop. */
15134
15135 switch (alg)
15136 {
15137 case libcall:
15138 case no_stringop:
15139 gcc_unreachable ();
15140 case loop_1_byte:
15141 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
15142 count_exp, QImode, 1, expected_size);
15143 break;
15144 case loop:
15145 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
15146 count_exp, Pmode, 1, expected_size);
15147 break;
15148 case unrolled_loop:
15149 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
15150 count_exp, Pmode, 4, expected_size);
15151 break;
15152 case rep_prefix_8_byte:
15153 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
15154 DImode);
15155 break;
15156 case rep_prefix_4_byte:
15157 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
15158 SImode);
15159 break;
15160 case rep_prefix_1_byte:
15161 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
15162 QImode);
15163 break;
15164 }
15165 /* Adjust properly the offset of src and dest memory for aliasing. */
15166 if (CONST_INT_P (count_exp))
15167 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
15168 (count / size_needed) * size_needed);
15169 else
15170 dst = change_address (dst, BLKmode, destreg);
15171
15172 /* Step 4: Epilogue to copy the remaining bytes. */
15173
15174 if (label)
15175 {
15176 /* When the main loop is done, COUNT_EXP might hold original count,
15177 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
15178 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
15179 bytes. Compensate if needed. */
15180
15181 if (size_needed < desired_align - align)
15182 {
15183 tmp =
15184 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
15185 GEN_INT (size_needed - 1), count_exp, 1,
15186 OPTAB_DIRECT);
15187 size_needed = desired_align - align + 1;
15188 if (tmp != count_exp)
15189 emit_move_insn (count_exp, tmp);
15190 }
15191 emit_label (label);
15192 LABEL_NUSES (label) = 1;
15193 }
15194 if (count_exp != const0_rtx && epilogue_size_needed > 1)
15195 {
15196 if (force_loopy_epilogue)
15197 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
15198 size_needed);
15199 else
15200 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
15201 size_needed);
15202 }
15203 if (jump_around_label)
15204 emit_label (jump_around_label);
15205 return 1;
15206 }
15207
15208 /* Expand the appropriate insns for doing strlen if not just doing
15209 repnz; scasb
15210
15211 out = result, initialized with the start address
15212 align_rtx = alignment of the address.
15213 scratch = scratch register, initialized with the startaddress when
15214 not aligned, otherwise undefined
15215
15216 This is just the body. It needs the initializations mentioned above and
15217 some address computing at the end. These things are done in i386.md. */
15218
15219 static void
15220 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
15221 {
15222 int align;
15223 rtx tmp;
15224 rtx align_2_label = NULL_RTX;
15225 rtx align_3_label = NULL_RTX;
15226 rtx align_4_label = gen_label_rtx ();
15227 rtx end_0_label = gen_label_rtx ();
15228 rtx mem;
15229 rtx tmpreg = gen_reg_rtx (SImode);
15230 rtx scratch = gen_reg_rtx (SImode);
15231 rtx cmp;
15232
15233 align = 0;
15234 if (CONST_INT_P (align_rtx))
15235 align = INTVAL (align_rtx);
15236
15237 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
15238
15239 /* Is there a known alignment and is it less than 4? */
15240 if (align < 4)
15241 {
15242 rtx scratch1 = gen_reg_rtx (Pmode);
15243 emit_move_insn (scratch1, out);
15244 /* Is there a known alignment and is it not 2? */
15245 if (align != 2)
15246 {
15247 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
15248 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
15249
15250 /* Leave just the 3 lower bits. */
15251 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
15252 NULL_RTX, 0, OPTAB_WIDEN);
15253
15254 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
15255 Pmode, 1, align_4_label);
15256 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
15257 Pmode, 1, align_2_label);
15258 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
15259 Pmode, 1, align_3_label);
15260 }
15261 else
15262 {
15263 /* Since the alignment is 2, we have to check 2 or 0 bytes;
15264 check if is aligned to 4 - byte. */
15265
15266 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
15267 NULL_RTX, 0, OPTAB_WIDEN);
15268
15269 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
15270 Pmode, 1, align_4_label);
15271 }
15272
15273 mem = change_address (src, QImode, out);
15274
15275 /* Now compare the bytes. */
15276
15277 /* Compare the first n unaligned byte on a byte per byte basis. */
15278 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
15279 QImode, 1, end_0_label);
15280
15281 /* Increment the address. */
15282 if (TARGET_64BIT)
15283 emit_insn (gen_adddi3 (out, out, const1_rtx));
15284 else
15285 emit_insn (gen_addsi3 (out, out, const1_rtx));
15286
15287 /* Not needed with an alignment of 2 */
15288 if (align != 2)
15289 {
15290 emit_label (align_2_label);
15291
15292 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
15293 end_0_label);
15294
15295 if (TARGET_64BIT)
15296 emit_insn (gen_adddi3 (out, out, const1_rtx));
15297 else
15298 emit_insn (gen_addsi3 (out, out, const1_rtx));
15299
15300 emit_label (align_3_label);
15301 }
15302
15303 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
15304 end_0_label);
15305
15306 if (TARGET_64BIT)
15307 emit_insn (gen_adddi3 (out, out, const1_rtx));
15308 else
15309 emit_insn (gen_addsi3 (out, out, const1_rtx));
15310 }
15311
15312 /* Generate loop to check 4 bytes at a time. It is not a good idea to
15313 align this loop. It gives only huge programs, but does not help to
15314 speed up. */
15315 emit_label (align_4_label);
15316
15317 mem = change_address (src, SImode, out);
15318 emit_move_insn (scratch, mem);
15319 if (TARGET_64BIT)
15320 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
15321 else
15322 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
15323
15324 /* This formula yields a nonzero result iff one of the bytes is zero.
15325 This saves three branches inside loop and many cycles. */
15326
15327 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
15328 emit_insn (gen_one_cmplsi2 (scratch, scratch));
15329 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
15330 emit_insn (gen_andsi3 (tmpreg, tmpreg,
15331 gen_int_mode (0x80808080, SImode)));
15332 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
15333 align_4_label);
15334
15335 if (TARGET_CMOVE)
15336 {
15337 rtx reg = gen_reg_rtx (SImode);
15338 rtx reg2 = gen_reg_rtx (Pmode);
15339 emit_move_insn (reg, tmpreg);
15340 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
15341
15342 /* If zero is not in the first two bytes, move two bytes forward. */
15343 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
15344 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
15345 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
15346 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
15347 gen_rtx_IF_THEN_ELSE (SImode, tmp,
15348 reg,
15349 tmpreg)));
15350 /* Emit lea manually to avoid clobbering of flags. */
15351 emit_insn (gen_rtx_SET (SImode, reg2,
15352 gen_rtx_PLUS (Pmode, out, const2_rtx)));
15353
15354 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
15355 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
15356 emit_insn (gen_rtx_SET (VOIDmode, out,
15357 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
15358 reg2,
15359 out)));
15360
15361 }
15362 else
15363 {
15364 rtx end_2_label = gen_label_rtx ();
15365 /* Is zero in the first two bytes? */
15366
15367 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
15368 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
15369 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
15370 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
15371 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
15372 pc_rtx);
15373 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
15374 JUMP_LABEL (tmp) = end_2_label;
15375
15376 /* Not in the first two. Move two bytes forward. */
15377 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
15378 if (TARGET_64BIT)
15379 emit_insn (gen_adddi3 (out, out, const2_rtx));
15380 else
15381 emit_insn (gen_addsi3 (out, out, const2_rtx));
15382
15383 emit_label (end_2_label);
15384
15385 }
15386
15387 /* Avoid branch in fixing the byte. */
15388 tmpreg = gen_lowpart (QImode, tmpreg);
15389 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
15390 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
15391 if (TARGET_64BIT)
15392 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
15393 else
15394 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
15395
15396 emit_label (end_0_label);
15397 }
15398
15399 /* Expand strlen. */
15400
15401 int
15402 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
15403 {
15404 rtx addr, scratch1, scratch2, scratch3, scratch4;
15405
15406 /* The generic case of strlen expander is long. Avoid it's
15407 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
15408
15409 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
15410 && !TARGET_INLINE_ALL_STRINGOPS
15411 && !optimize_size
15412 && (!CONST_INT_P (align) || INTVAL (align) < 4))
15413 return 0;
15414
15415 addr = force_reg (Pmode, XEXP (src, 0));
15416 scratch1 = gen_reg_rtx (Pmode);
15417
15418 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
15419 && !optimize_size)
15420 {
15421 /* Well it seems that some optimizer does not combine a call like
15422 foo(strlen(bar), strlen(bar));
15423 when the move and the subtraction is done here. It does calculate
15424 the length just once when these instructions are done inside of
15425 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
15426 often used and I use one fewer register for the lifetime of
15427 output_strlen_unroll() this is better. */
15428
15429 emit_move_insn (out, addr);
15430
15431 ix86_expand_strlensi_unroll_1 (out, src, align);
15432
15433 /* strlensi_unroll_1 returns the address of the zero at the end of
15434 the string, like memchr(), so compute the length by subtracting
15435 the start address. */
15436 if (TARGET_64BIT)
15437 emit_insn (gen_subdi3 (out, out, addr));
15438 else
15439 emit_insn (gen_subsi3 (out, out, addr));
15440 }
15441 else
15442 {
15443 rtx unspec;
15444 scratch2 = gen_reg_rtx (Pmode);
15445 scratch3 = gen_reg_rtx (Pmode);
15446 scratch4 = force_reg (Pmode, constm1_rtx);
15447
15448 emit_move_insn (scratch3, addr);
15449 eoschar = force_reg (QImode, eoschar);
15450
15451 src = replace_equiv_address_nv (src, scratch3);
15452
15453 /* If .md starts supporting :P, this can be done in .md. */
15454 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
15455 scratch4), UNSPEC_SCAS);
15456 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
15457 if (TARGET_64BIT)
15458 {
15459 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
15460 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
15461 }
15462 else
15463 {
15464 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
15465 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
15466 }
15467 }
15468 return 1;
15469 }
15470
15471 /* For given symbol (function) construct code to compute address of it's PLT
15472 entry in large x86-64 PIC model. */
15473 rtx
15474 construct_plt_address (rtx symbol)
15475 {
15476 rtx tmp = gen_reg_rtx (Pmode);
15477 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
15478
15479 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
15480 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
15481
15482 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
15483 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
15484 return tmp;
15485 }
15486
15487 void
15488 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
15489 rtx callarg2 ATTRIBUTE_UNUSED,
15490 rtx pop, int sibcall)
15491 {
15492 rtx use = NULL, call;
15493
15494 if (pop == const0_rtx)
15495 pop = NULL;
15496 gcc_assert (!TARGET_64BIT || !pop);
15497
15498 if (TARGET_MACHO && !TARGET_64BIT)
15499 {
15500 #if TARGET_MACHO
15501 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
15502 fnaddr = machopic_indirect_call_target (fnaddr);
15503 #endif
15504 }
15505 else
15506 {
15507 /* Static functions and indirect calls don't need the pic register. */
15508 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
15509 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
15510 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
15511 use_reg (&use, pic_offset_table_rtx);
15512 }
15513
15514 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
15515 {
15516 rtx al = gen_rtx_REG (QImode, 0);
15517 emit_move_insn (al, callarg2);
15518 use_reg (&use, al);
15519 }
15520
15521 if (ix86_cmodel == CM_LARGE_PIC
15522 && GET_CODE (fnaddr) == MEM
15523 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
15524 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
15525 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
15526 else if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
15527 {
15528 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
15529 fnaddr = gen_rtx_MEM (QImode, fnaddr);
15530 }
15531 if (sibcall && TARGET_64BIT
15532 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
15533 {
15534 rtx addr;
15535 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
15536 fnaddr = gen_rtx_REG (Pmode, R11_REG);
15537 emit_move_insn (fnaddr, addr);
15538 fnaddr = gen_rtx_MEM (QImode, fnaddr);
15539 }
15540
15541 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
15542 if (retval)
15543 call = gen_rtx_SET (VOIDmode, retval, call);
15544 if (pop)
15545 {
15546 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
15547 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
15548 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
15549 }
15550
15551 call = emit_call_insn (call);
15552 if (use)
15553 CALL_INSN_FUNCTION_USAGE (call) = use;
15554 }
15555
15556 \f
15557 /* Clear stack slot assignments remembered from previous functions.
15558 This is called from INIT_EXPANDERS once before RTL is emitted for each
15559 function. */
15560
15561 static struct machine_function *
15562 ix86_init_machine_status (void)
15563 {
15564 struct machine_function *f;
15565
15566 f = GGC_CNEW (struct machine_function);
15567 f->use_fast_prologue_epilogue_nregs = -1;
15568 f->tls_descriptor_call_expanded_p = 0;
15569
15570 return f;
15571 }
15572
15573 /* Return a MEM corresponding to a stack slot with mode MODE.
15574 Allocate a new slot if necessary.
15575
15576 The RTL for a function can have several slots available: N is
15577 which slot to use. */
15578
15579 rtx
15580 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
15581 {
15582 struct stack_local_entry *s;
15583
15584 gcc_assert (n < MAX_386_STACK_LOCALS);
15585
15586 for (s = ix86_stack_locals; s; s = s->next)
15587 if (s->mode == mode && s->n == n)
15588 return copy_rtx (s->rtl);
15589
15590 s = (struct stack_local_entry *)
15591 ggc_alloc (sizeof (struct stack_local_entry));
15592 s->n = n;
15593 s->mode = mode;
15594 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
15595
15596 s->next = ix86_stack_locals;
15597 ix86_stack_locals = s;
15598 return s->rtl;
15599 }
15600
15601 /* Construct the SYMBOL_REF for the tls_get_addr function. */
15602
15603 static GTY(()) rtx ix86_tls_symbol;
15604 rtx
15605 ix86_tls_get_addr (void)
15606 {
15607
15608 if (!ix86_tls_symbol)
15609 {
15610 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
15611 (TARGET_ANY_GNU_TLS
15612 && !TARGET_64BIT)
15613 ? "___tls_get_addr"
15614 : "__tls_get_addr");
15615 }
15616
15617 return ix86_tls_symbol;
15618 }
15619
15620 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
15621
15622 static GTY(()) rtx ix86_tls_module_base_symbol;
15623 rtx
15624 ix86_tls_module_base (void)
15625 {
15626
15627 if (!ix86_tls_module_base_symbol)
15628 {
15629 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
15630 "_TLS_MODULE_BASE_");
15631 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
15632 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
15633 }
15634
15635 return ix86_tls_module_base_symbol;
15636 }
15637 \f
15638 /* Calculate the length of the memory address in the instruction
15639 encoding. Does not include the one-byte modrm, opcode, or prefix. */
15640
15641 int
15642 memory_address_length (rtx addr)
15643 {
15644 struct ix86_address parts;
15645 rtx base, index, disp;
15646 int len;
15647 int ok;
15648
15649 if (GET_CODE (addr) == PRE_DEC
15650 || GET_CODE (addr) == POST_INC
15651 || GET_CODE (addr) == PRE_MODIFY
15652 || GET_CODE (addr) == POST_MODIFY)
15653 return 0;
15654
15655 ok = ix86_decompose_address (addr, &parts);
15656 gcc_assert (ok);
15657
15658 if (parts.base && GET_CODE (parts.base) == SUBREG)
15659 parts.base = SUBREG_REG (parts.base);
15660 if (parts.index && GET_CODE (parts.index) == SUBREG)
15661 parts.index = SUBREG_REG (parts.index);
15662
15663 base = parts.base;
15664 index = parts.index;
15665 disp = parts.disp;
15666 len = 0;
15667
15668 /* Rule of thumb:
15669 - esp as the base always wants an index,
15670 - ebp as the base always wants a displacement. */
15671
15672 /* Register Indirect. */
15673 if (base && !index && !disp)
15674 {
15675 /* esp (for its index) and ebp (for its displacement) need
15676 the two-byte modrm form. */
15677 if (addr == stack_pointer_rtx
15678 || addr == arg_pointer_rtx
15679 || addr == frame_pointer_rtx
15680 || addr == hard_frame_pointer_rtx)
15681 len = 1;
15682 }
15683
15684 /* Direct Addressing. */
15685 else if (disp && !base && !index)
15686 len = 4;
15687
15688 else
15689 {
15690 /* Find the length of the displacement constant. */
15691 if (disp)
15692 {
15693 if (base && satisfies_constraint_K (disp))
15694 len = 1;
15695 else
15696 len = 4;
15697 }
15698 /* ebp always wants a displacement. */
15699 else if (base == hard_frame_pointer_rtx)
15700 len = 1;
15701
15702 /* An index requires the two-byte modrm form.... */
15703 if (index
15704 /* ...like esp, which always wants an index. */
15705 || base == stack_pointer_rtx
15706 || base == arg_pointer_rtx
15707 || base == frame_pointer_rtx)
15708 len += 1;
15709 }
15710
15711 return len;
15712 }
15713
15714 /* Compute default value for "length_immediate" attribute. When SHORTFORM
15715 is set, expect that insn have 8bit immediate alternative. */
15716 int
15717 ix86_attr_length_immediate_default (rtx insn, int shortform)
15718 {
15719 int len = 0;
15720 int i;
15721 extract_insn_cached (insn);
15722 for (i = recog_data.n_operands - 1; i >= 0; --i)
15723 if (CONSTANT_P (recog_data.operand[i]))
15724 {
15725 gcc_assert (!len);
15726 if (shortform && satisfies_constraint_K (recog_data.operand[i]))
15727 len = 1;
15728 else
15729 {
15730 switch (get_attr_mode (insn))
15731 {
15732 case MODE_QI:
15733 len+=1;
15734 break;
15735 case MODE_HI:
15736 len+=2;
15737 break;
15738 case MODE_SI:
15739 len+=4;
15740 break;
15741 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
15742 case MODE_DI:
15743 len+=4;
15744 break;
15745 default:
15746 fatal_insn ("unknown insn mode", insn);
15747 }
15748 }
15749 }
15750 return len;
15751 }
15752 /* Compute default value for "length_address" attribute. */
15753 int
15754 ix86_attr_length_address_default (rtx insn)
15755 {
15756 int i;
15757
15758 if (get_attr_type (insn) == TYPE_LEA)
15759 {
15760 rtx set = PATTERN (insn);
15761
15762 if (GET_CODE (set) == PARALLEL)
15763 set = XVECEXP (set, 0, 0);
15764
15765 gcc_assert (GET_CODE (set) == SET);
15766
15767 return memory_address_length (SET_SRC (set));
15768 }
15769
15770 extract_insn_cached (insn);
15771 for (i = recog_data.n_operands - 1; i >= 0; --i)
15772 if (MEM_P (recog_data.operand[i]))
15773 {
15774 return memory_address_length (XEXP (recog_data.operand[i], 0));
15775 break;
15776 }
15777 return 0;
15778 }
15779 \f
15780 /* Return the maximum number of instructions a cpu can issue. */
15781
15782 static int
15783 ix86_issue_rate (void)
15784 {
15785 switch (ix86_tune)
15786 {
15787 case PROCESSOR_PENTIUM:
15788 case PROCESSOR_K6:
15789 return 2;
15790
15791 case PROCESSOR_PENTIUMPRO:
15792 case PROCESSOR_PENTIUM4:
15793 case PROCESSOR_ATHLON:
15794 case PROCESSOR_K8:
15795 case PROCESSOR_AMDFAM10:
15796 case PROCESSOR_NOCONA:
15797 case PROCESSOR_GENERIC32:
15798 case PROCESSOR_GENERIC64:
15799 return 3;
15800
15801 case PROCESSOR_CORE2:
15802 return 4;
15803
15804 default:
15805 return 1;
15806 }
15807 }
15808
15809 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
15810 by DEP_INSN and nothing set by DEP_INSN. */
15811
15812 static int
15813 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
15814 {
15815 rtx set, set2;
15816
15817 /* Simplify the test for uninteresting insns. */
15818 if (insn_type != TYPE_SETCC
15819 && insn_type != TYPE_ICMOV
15820 && insn_type != TYPE_FCMOV
15821 && insn_type != TYPE_IBR)
15822 return 0;
15823
15824 if ((set = single_set (dep_insn)) != 0)
15825 {
15826 set = SET_DEST (set);
15827 set2 = NULL_RTX;
15828 }
15829 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
15830 && XVECLEN (PATTERN (dep_insn), 0) == 2
15831 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
15832 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
15833 {
15834 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
15835 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
15836 }
15837 else
15838 return 0;
15839
15840 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
15841 return 0;
15842
15843 /* This test is true if the dependent insn reads the flags but
15844 not any other potentially set register. */
15845 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
15846 return 0;
15847
15848 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
15849 return 0;
15850
15851 return 1;
15852 }
15853
15854 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
15855 address with operands set by DEP_INSN. */
15856
15857 static int
15858 ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
15859 {
15860 rtx addr;
15861
15862 if (insn_type == TYPE_LEA
15863 && TARGET_PENTIUM)
15864 {
15865 addr = PATTERN (insn);
15866
15867 if (GET_CODE (addr) == PARALLEL)
15868 addr = XVECEXP (addr, 0, 0);
15869
15870 gcc_assert (GET_CODE (addr) == SET);
15871
15872 addr = SET_SRC (addr);
15873 }
15874 else
15875 {
15876 int i;
15877 extract_insn_cached (insn);
15878 for (i = recog_data.n_operands - 1; i >= 0; --i)
15879 if (MEM_P (recog_data.operand[i]))
15880 {
15881 addr = XEXP (recog_data.operand[i], 0);
15882 goto found;
15883 }
15884 return 0;
15885 found:;
15886 }
15887
15888 return modified_in_p (addr, dep_insn);
15889 }
15890
15891 static int
15892 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
15893 {
15894 enum attr_type insn_type, dep_insn_type;
15895 enum attr_memory memory;
15896 rtx set, set2;
15897 int dep_insn_code_number;
15898
15899 /* Anti and output dependencies have zero cost on all CPUs. */
15900 if (REG_NOTE_KIND (link) != 0)
15901 return 0;
15902
15903 dep_insn_code_number = recog_memoized (dep_insn);
15904
15905 /* If we can't recognize the insns, we can't really do anything. */
15906 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
15907 return cost;
15908
15909 insn_type = get_attr_type (insn);
15910 dep_insn_type = get_attr_type (dep_insn);
15911
15912 switch (ix86_tune)
15913 {
15914 case PROCESSOR_PENTIUM:
15915 /* Address Generation Interlock adds a cycle of latency. */
15916 if (ix86_agi_dependent (insn, dep_insn, insn_type))
15917 cost += 1;
15918
15919 /* ??? Compares pair with jump/setcc. */
15920 if (ix86_flags_dependent (insn, dep_insn, insn_type))
15921 cost = 0;
15922
15923 /* Floating point stores require value to be ready one cycle earlier. */
15924 if (insn_type == TYPE_FMOV
15925 && get_attr_memory (insn) == MEMORY_STORE
15926 && !ix86_agi_dependent (insn, dep_insn, insn_type))
15927 cost += 1;
15928 break;
15929
15930 case PROCESSOR_PENTIUMPRO:
15931 memory = get_attr_memory (insn);
15932
15933 /* INT->FP conversion is expensive. */
15934 if (get_attr_fp_int_src (dep_insn))
15935 cost += 5;
15936
15937 /* There is one cycle extra latency between an FP op and a store. */
15938 if (insn_type == TYPE_FMOV
15939 && (set = single_set (dep_insn)) != NULL_RTX
15940 && (set2 = single_set (insn)) != NULL_RTX
15941 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
15942 && MEM_P (SET_DEST (set2)))
15943 cost += 1;
15944
15945 /* Show ability of reorder buffer to hide latency of load by executing
15946 in parallel with previous instruction in case
15947 previous instruction is not needed to compute the address. */
15948 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
15949 && !ix86_agi_dependent (insn, dep_insn, insn_type))
15950 {
15951 /* Claim moves to take one cycle, as core can issue one load
15952 at time and the next load can start cycle later. */
15953 if (dep_insn_type == TYPE_IMOV
15954 || dep_insn_type == TYPE_FMOV)
15955 cost = 1;
15956 else if (cost > 1)
15957 cost--;
15958 }
15959 break;
15960
15961 case PROCESSOR_K6:
15962 memory = get_attr_memory (insn);
15963
15964 /* The esp dependency is resolved before the instruction is really
15965 finished. */
15966 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
15967 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
15968 return 1;
15969
15970 /* INT->FP conversion is expensive. */
15971 if (get_attr_fp_int_src (dep_insn))
15972 cost += 5;
15973
15974 /* Show ability of reorder buffer to hide latency of load by executing
15975 in parallel with previous instruction in case
15976 previous instruction is not needed to compute the address. */
15977 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
15978 && !ix86_agi_dependent (insn, dep_insn, insn_type))
15979 {
15980 /* Claim moves to take one cycle, as core can issue one load
15981 at time and the next load can start cycle later. */
15982 if (dep_insn_type == TYPE_IMOV
15983 || dep_insn_type == TYPE_FMOV)
15984 cost = 1;
15985 else if (cost > 2)
15986 cost -= 2;
15987 else
15988 cost = 1;
15989 }
15990 break;
15991
15992 case PROCESSOR_ATHLON:
15993 case PROCESSOR_K8:
15994 case PROCESSOR_AMDFAM10:
15995 case PROCESSOR_GENERIC32:
15996 case PROCESSOR_GENERIC64:
15997 memory = get_attr_memory (insn);
15998
15999 /* Show ability of reorder buffer to hide latency of load by executing
16000 in parallel with previous instruction in case
16001 previous instruction is not needed to compute the address. */
16002 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
16003 && !ix86_agi_dependent (insn, dep_insn, insn_type))
16004 {
16005 enum attr_unit unit = get_attr_unit (insn);
16006 int loadcost = 3;
16007
16008 /* Because of the difference between the length of integer and
16009 floating unit pipeline preparation stages, the memory operands
16010 for floating point are cheaper.
16011
16012 ??? For Athlon it the difference is most probably 2. */
16013 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
16014 loadcost = 3;
16015 else
16016 loadcost = TARGET_ATHLON ? 2 : 0;
16017
16018 if (cost >= loadcost)
16019 cost -= loadcost;
16020 else
16021 cost = 0;
16022 }
16023
16024 default:
16025 break;
16026 }
16027
16028 return cost;
16029 }
16030
16031 /* How many alternative schedules to try. This should be as wide as the
16032 scheduling freedom in the DFA, but no wider. Making this value too
16033 large results extra work for the scheduler. */
16034
16035 static int
16036 ia32_multipass_dfa_lookahead (void)
16037 {
16038 if (ix86_tune == PROCESSOR_PENTIUM)
16039 return 2;
16040
16041 if (ix86_tune == PROCESSOR_PENTIUMPRO
16042 || ix86_tune == PROCESSOR_K6)
16043 return 1;
16044
16045 else
16046 return 0;
16047 }
16048
16049 \f
16050 /* Compute the alignment given to a constant that is being placed in memory.
16051 EXP is the constant and ALIGN is the alignment that the object would
16052 ordinarily have.
16053 The value of this function is used instead of that alignment to align
16054 the object. */
16055
16056 int
16057 ix86_constant_alignment (tree exp, int align)
16058 {
16059 if (TREE_CODE (exp) == REAL_CST)
16060 {
16061 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
16062 return 64;
16063 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
16064 return 128;
16065 }
16066 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
16067 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
16068 return BITS_PER_WORD;
16069
16070 return align;
16071 }
16072
16073 /* Compute the alignment for a static variable.
16074 TYPE is the data type, and ALIGN is the alignment that
16075 the object would ordinarily have. The value of this function is used
16076 instead of that alignment to align the object. */
16077
16078 int
16079 ix86_data_alignment (tree type, int align)
16080 {
16081 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
16082
16083 if (AGGREGATE_TYPE_P (type)
16084 && TYPE_SIZE (type)
16085 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
16086 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
16087 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
16088 && align < max_align)
16089 align = max_align;
16090
16091 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
16092 to 16byte boundary. */
16093 if (TARGET_64BIT)
16094 {
16095 if (AGGREGATE_TYPE_P (type)
16096 && TYPE_SIZE (type)
16097 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
16098 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
16099 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
16100 return 128;
16101 }
16102
16103 if (TREE_CODE (type) == ARRAY_TYPE)
16104 {
16105 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
16106 return 64;
16107 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
16108 return 128;
16109 }
16110 else if (TREE_CODE (type) == COMPLEX_TYPE)
16111 {
16112
16113 if (TYPE_MODE (type) == DCmode && align < 64)
16114 return 64;
16115 if (TYPE_MODE (type) == XCmode && align < 128)
16116 return 128;
16117 }
16118 else if ((TREE_CODE (type) == RECORD_TYPE
16119 || TREE_CODE (type) == UNION_TYPE
16120 || TREE_CODE (type) == QUAL_UNION_TYPE)
16121 && TYPE_FIELDS (type))
16122 {
16123 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
16124 return 64;
16125 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
16126 return 128;
16127 }
16128 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
16129 || TREE_CODE (type) == INTEGER_TYPE)
16130 {
16131 if (TYPE_MODE (type) == DFmode && align < 64)
16132 return 64;
16133 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
16134 return 128;
16135 }
16136
16137 return align;
16138 }
16139
16140 /* Compute the alignment for a local variable.
16141 TYPE is the data type, and ALIGN is the alignment that
16142 the object would ordinarily have. The value of this macro is used
16143 instead of that alignment to align the object. */
16144
16145 int
16146 ix86_local_alignment (tree type, int align)
16147 {
16148 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
16149 to 16byte boundary. */
16150 if (TARGET_64BIT)
16151 {
16152 if (AGGREGATE_TYPE_P (type)
16153 && TYPE_SIZE (type)
16154 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
16155 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
16156 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
16157 return 128;
16158 }
16159 if (TREE_CODE (type) == ARRAY_TYPE)
16160 {
16161 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
16162 return 64;
16163 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
16164 return 128;
16165 }
16166 else if (TREE_CODE (type) == COMPLEX_TYPE)
16167 {
16168 if (TYPE_MODE (type) == DCmode && align < 64)
16169 return 64;
16170 if (TYPE_MODE (type) == XCmode && align < 128)
16171 return 128;
16172 }
16173 else if ((TREE_CODE (type) == RECORD_TYPE
16174 || TREE_CODE (type) == UNION_TYPE
16175 || TREE_CODE (type) == QUAL_UNION_TYPE)
16176 && TYPE_FIELDS (type))
16177 {
16178 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
16179 return 64;
16180 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
16181 return 128;
16182 }
16183 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
16184 || TREE_CODE (type) == INTEGER_TYPE)
16185 {
16186
16187 if (TYPE_MODE (type) == DFmode && align < 64)
16188 return 64;
16189 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
16190 return 128;
16191 }
16192 return align;
16193 }
16194 \f
16195 /* Emit RTL insns to initialize the variable parts of a trampoline.
16196 FNADDR is an RTX for the address of the function's pure code.
16197 CXT is an RTX for the static chain value for the function. */
16198 void
16199 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
16200 {
16201 if (!TARGET_64BIT)
16202 {
16203 /* Compute offset from the end of the jmp to the target function. */
16204 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
16205 plus_constant (tramp, 10),
16206 NULL_RTX, 1, OPTAB_DIRECT);
16207 emit_move_insn (gen_rtx_MEM (QImode, tramp),
16208 gen_int_mode (0xb9, QImode));
16209 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
16210 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
16211 gen_int_mode (0xe9, QImode));
16212 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
16213 }
16214 else
16215 {
16216 int offset = 0;
16217 /* Try to load address using shorter movl instead of movabs.
16218 We may want to support movq for kernel mode, but kernel does not use
16219 trampolines at the moment. */
16220 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
16221 {
16222 fnaddr = copy_to_mode_reg (DImode, fnaddr);
16223 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
16224 gen_int_mode (0xbb41, HImode));
16225 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
16226 gen_lowpart (SImode, fnaddr));
16227 offset += 6;
16228 }
16229 else
16230 {
16231 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
16232 gen_int_mode (0xbb49, HImode));
16233 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
16234 fnaddr);
16235 offset += 10;
16236 }
16237 /* Load static chain using movabs to r10. */
16238 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
16239 gen_int_mode (0xba49, HImode));
16240 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
16241 cxt);
16242 offset += 10;
16243 /* Jump to the r11 */
16244 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
16245 gen_int_mode (0xff49, HImode));
16246 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
16247 gen_int_mode (0xe3, QImode));
16248 offset += 3;
16249 gcc_assert (offset <= TRAMPOLINE_SIZE);
16250 }
16251
16252 #ifdef ENABLE_EXECUTE_STACK
16253 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
16254 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
16255 #endif
16256 }
16257 \f
16258 /* Codes for all the SSE/MMX builtins. */
16259 enum ix86_builtins
16260 {
16261 IX86_BUILTIN_ADDPS,
16262 IX86_BUILTIN_ADDSS,
16263 IX86_BUILTIN_DIVPS,
16264 IX86_BUILTIN_DIVSS,
16265 IX86_BUILTIN_MULPS,
16266 IX86_BUILTIN_MULSS,
16267 IX86_BUILTIN_SUBPS,
16268 IX86_BUILTIN_SUBSS,
16269
16270 IX86_BUILTIN_CMPEQPS,
16271 IX86_BUILTIN_CMPLTPS,
16272 IX86_BUILTIN_CMPLEPS,
16273 IX86_BUILTIN_CMPGTPS,
16274 IX86_BUILTIN_CMPGEPS,
16275 IX86_BUILTIN_CMPNEQPS,
16276 IX86_BUILTIN_CMPNLTPS,
16277 IX86_BUILTIN_CMPNLEPS,
16278 IX86_BUILTIN_CMPNGTPS,
16279 IX86_BUILTIN_CMPNGEPS,
16280 IX86_BUILTIN_CMPORDPS,
16281 IX86_BUILTIN_CMPUNORDPS,
16282 IX86_BUILTIN_CMPEQSS,
16283 IX86_BUILTIN_CMPLTSS,
16284 IX86_BUILTIN_CMPLESS,
16285 IX86_BUILTIN_CMPNEQSS,
16286 IX86_BUILTIN_CMPNLTSS,
16287 IX86_BUILTIN_CMPNLESS,
16288 IX86_BUILTIN_CMPNGTSS,
16289 IX86_BUILTIN_CMPNGESS,
16290 IX86_BUILTIN_CMPORDSS,
16291 IX86_BUILTIN_CMPUNORDSS,
16292
16293 IX86_BUILTIN_COMIEQSS,
16294 IX86_BUILTIN_COMILTSS,
16295 IX86_BUILTIN_COMILESS,
16296 IX86_BUILTIN_COMIGTSS,
16297 IX86_BUILTIN_COMIGESS,
16298 IX86_BUILTIN_COMINEQSS,
16299 IX86_BUILTIN_UCOMIEQSS,
16300 IX86_BUILTIN_UCOMILTSS,
16301 IX86_BUILTIN_UCOMILESS,
16302 IX86_BUILTIN_UCOMIGTSS,
16303 IX86_BUILTIN_UCOMIGESS,
16304 IX86_BUILTIN_UCOMINEQSS,
16305
16306 IX86_BUILTIN_CVTPI2PS,
16307 IX86_BUILTIN_CVTPS2PI,
16308 IX86_BUILTIN_CVTSI2SS,
16309 IX86_BUILTIN_CVTSI642SS,
16310 IX86_BUILTIN_CVTSS2SI,
16311 IX86_BUILTIN_CVTSS2SI64,
16312 IX86_BUILTIN_CVTTPS2PI,
16313 IX86_BUILTIN_CVTTSS2SI,
16314 IX86_BUILTIN_CVTTSS2SI64,
16315
16316 IX86_BUILTIN_MAXPS,
16317 IX86_BUILTIN_MAXSS,
16318 IX86_BUILTIN_MINPS,
16319 IX86_BUILTIN_MINSS,
16320
16321 IX86_BUILTIN_LOADUPS,
16322 IX86_BUILTIN_STOREUPS,
16323 IX86_BUILTIN_MOVSS,
16324
16325 IX86_BUILTIN_MOVHLPS,
16326 IX86_BUILTIN_MOVLHPS,
16327 IX86_BUILTIN_LOADHPS,
16328 IX86_BUILTIN_LOADLPS,
16329 IX86_BUILTIN_STOREHPS,
16330 IX86_BUILTIN_STORELPS,
16331
16332 IX86_BUILTIN_MASKMOVQ,
16333 IX86_BUILTIN_MOVMSKPS,
16334 IX86_BUILTIN_PMOVMSKB,
16335
16336 IX86_BUILTIN_MOVNTPS,
16337 IX86_BUILTIN_MOVNTQ,
16338
16339 IX86_BUILTIN_LOADDQU,
16340 IX86_BUILTIN_STOREDQU,
16341
16342 IX86_BUILTIN_PACKSSWB,
16343 IX86_BUILTIN_PACKSSDW,
16344 IX86_BUILTIN_PACKUSWB,
16345
16346 IX86_BUILTIN_PADDB,
16347 IX86_BUILTIN_PADDW,
16348 IX86_BUILTIN_PADDD,
16349 IX86_BUILTIN_PADDQ,
16350 IX86_BUILTIN_PADDSB,
16351 IX86_BUILTIN_PADDSW,
16352 IX86_BUILTIN_PADDUSB,
16353 IX86_BUILTIN_PADDUSW,
16354 IX86_BUILTIN_PSUBB,
16355 IX86_BUILTIN_PSUBW,
16356 IX86_BUILTIN_PSUBD,
16357 IX86_BUILTIN_PSUBQ,
16358 IX86_BUILTIN_PSUBSB,
16359 IX86_BUILTIN_PSUBSW,
16360 IX86_BUILTIN_PSUBUSB,
16361 IX86_BUILTIN_PSUBUSW,
16362
16363 IX86_BUILTIN_PAND,
16364 IX86_BUILTIN_PANDN,
16365 IX86_BUILTIN_POR,
16366 IX86_BUILTIN_PXOR,
16367
16368 IX86_BUILTIN_PAVGB,
16369 IX86_BUILTIN_PAVGW,
16370
16371 IX86_BUILTIN_PCMPEQB,
16372 IX86_BUILTIN_PCMPEQW,
16373 IX86_BUILTIN_PCMPEQD,
16374 IX86_BUILTIN_PCMPGTB,
16375 IX86_BUILTIN_PCMPGTW,
16376 IX86_BUILTIN_PCMPGTD,
16377
16378 IX86_BUILTIN_PMADDWD,
16379
16380 IX86_BUILTIN_PMAXSW,
16381 IX86_BUILTIN_PMAXUB,
16382 IX86_BUILTIN_PMINSW,
16383 IX86_BUILTIN_PMINUB,
16384
16385 IX86_BUILTIN_PMULHUW,
16386 IX86_BUILTIN_PMULHW,
16387 IX86_BUILTIN_PMULLW,
16388
16389 IX86_BUILTIN_PSADBW,
16390 IX86_BUILTIN_PSHUFW,
16391
16392 IX86_BUILTIN_PSLLW,
16393 IX86_BUILTIN_PSLLD,
16394 IX86_BUILTIN_PSLLQ,
16395 IX86_BUILTIN_PSRAW,
16396 IX86_BUILTIN_PSRAD,
16397 IX86_BUILTIN_PSRLW,
16398 IX86_BUILTIN_PSRLD,
16399 IX86_BUILTIN_PSRLQ,
16400 IX86_BUILTIN_PSLLWI,
16401 IX86_BUILTIN_PSLLDI,
16402 IX86_BUILTIN_PSLLQI,
16403 IX86_BUILTIN_PSRAWI,
16404 IX86_BUILTIN_PSRADI,
16405 IX86_BUILTIN_PSRLWI,
16406 IX86_BUILTIN_PSRLDI,
16407 IX86_BUILTIN_PSRLQI,
16408
16409 IX86_BUILTIN_PUNPCKHBW,
16410 IX86_BUILTIN_PUNPCKHWD,
16411 IX86_BUILTIN_PUNPCKHDQ,
16412 IX86_BUILTIN_PUNPCKLBW,
16413 IX86_BUILTIN_PUNPCKLWD,
16414 IX86_BUILTIN_PUNPCKLDQ,
16415
16416 IX86_BUILTIN_SHUFPS,
16417
16418 IX86_BUILTIN_RCPPS,
16419 IX86_BUILTIN_RCPSS,
16420 IX86_BUILTIN_RSQRTPS,
16421 IX86_BUILTIN_RSQRTSS,
16422 IX86_BUILTIN_SQRTPS,
16423 IX86_BUILTIN_SQRTSS,
16424
16425 IX86_BUILTIN_UNPCKHPS,
16426 IX86_BUILTIN_UNPCKLPS,
16427
16428 IX86_BUILTIN_ANDPS,
16429 IX86_BUILTIN_ANDNPS,
16430 IX86_BUILTIN_ORPS,
16431 IX86_BUILTIN_XORPS,
16432
16433 IX86_BUILTIN_EMMS,
16434 IX86_BUILTIN_LDMXCSR,
16435 IX86_BUILTIN_STMXCSR,
16436 IX86_BUILTIN_SFENCE,
16437
16438 /* 3DNow! Original */
16439 IX86_BUILTIN_FEMMS,
16440 IX86_BUILTIN_PAVGUSB,
16441 IX86_BUILTIN_PF2ID,
16442 IX86_BUILTIN_PFACC,
16443 IX86_BUILTIN_PFADD,
16444 IX86_BUILTIN_PFCMPEQ,
16445 IX86_BUILTIN_PFCMPGE,
16446 IX86_BUILTIN_PFCMPGT,
16447 IX86_BUILTIN_PFMAX,
16448 IX86_BUILTIN_PFMIN,
16449 IX86_BUILTIN_PFMUL,
16450 IX86_BUILTIN_PFRCP,
16451 IX86_BUILTIN_PFRCPIT1,
16452 IX86_BUILTIN_PFRCPIT2,
16453 IX86_BUILTIN_PFRSQIT1,
16454 IX86_BUILTIN_PFRSQRT,
16455 IX86_BUILTIN_PFSUB,
16456 IX86_BUILTIN_PFSUBR,
16457 IX86_BUILTIN_PI2FD,
16458 IX86_BUILTIN_PMULHRW,
16459
16460 /* 3DNow! Athlon Extensions */
16461 IX86_BUILTIN_PF2IW,
16462 IX86_BUILTIN_PFNACC,
16463 IX86_BUILTIN_PFPNACC,
16464 IX86_BUILTIN_PI2FW,
16465 IX86_BUILTIN_PSWAPDSI,
16466 IX86_BUILTIN_PSWAPDSF,
16467
16468 /* SSE2 */
16469 IX86_BUILTIN_ADDPD,
16470 IX86_BUILTIN_ADDSD,
16471 IX86_BUILTIN_DIVPD,
16472 IX86_BUILTIN_DIVSD,
16473 IX86_BUILTIN_MULPD,
16474 IX86_BUILTIN_MULSD,
16475 IX86_BUILTIN_SUBPD,
16476 IX86_BUILTIN_SUBSD,
16477
16478 IX86_BUILTIN_CMPEQPD,
16479 IX86_BUILTIN_CMPLTPD,
16480 IX86_BUILTIN_CMPLEPD,
16481 IX86_BUILTIN_CMPGTPD,
16482 IX86_BUILTIN_CMPGEPD,
16483 IX86_BUILTIN_CMPNEQPD,
16484 IX86_BUILTIN_CMPNLTPD,
16485 IX86_BUILTIN_CMPNLEPD,
16486 IX86_BUILTIN_CMPNGTPD,
16487 IX86_BUILTIN_CMPNGEPD,
16488 IX86_BUILTIN_CMPORDPD,
16489 IX86_BUILTIN_CMPUNORDPD,
16490 IX86_BUILTIN_CMPEQSD,
16491 IX86_BUILTIN_CMPLTSD,
16492 IX86_BUILTIN_CMPLESD,
16493 IX86_BUILTIN_CMPNEQSD,
16494 IX86_BUILTIN_CMPNLTSD,
16495 IX86_BUILTIN_CMPNLESD,
16496 IX86_BUILTIN_CMPORDSD,
16497 IX86_BUILTIN_CMPUNORDSD,
16498
16499 IX86_BUILTIN_COMIEQSD,
16500 IX86_BUILTIN_COMILTSD,
16501 IX86_BUILTIN_COMILESD,
16502 IX86_BUILTIN_COMIGTSD,
16503 IX86_BUILTIN_COMIGESD,
16504 IX86_BUILTIN_COMINEQSD,
16505 IX86_BUILTIN_UCOMIEQSD,
16506 IX86_BUILTIN_UCOMILTSD,
16507 IX86_BUILTIN_UCOMILESD,
16508 IX86_BUILTIN_UCOMIGTSD,
16509 IX86_BUILTIN_UCOMIGESD,
16510 IX86_BUILTIN_UCOMINEQSD,
16511
16512 IX86_BUILTIN_MAXPD,
16513 IX86_BUILTIN_MAXSD,
16514 IX86_BUILTIN_MINPD,
16515 IX86_BUILTIN_MINSD,
16516
16517 IX86_BUILTIN_ANDPD,
16518 IX86_BUILTIN_ANDNPD,
16519 IX86_BUILTIN_ORPD,
16520 IX86_BUILTIN_XORPD,
16521
16522 IX86_BUILTIN_SQRTPD,
16523 IX86_BUILTIN_SQRTSD,
16524
16525 IX86_BUILTIN_UNPCKHPD,
16526 IX86_BUILTIN_UNPCKLPD,
16527
16528 IX86_BUILTIN_SHUFPD,
16529
16530 IX86_BUILTIN_LOADUPD,
16531 IX86_BUILTIN_STOREUPD,
16532 IX86_BUILTIN_MOVSD,
16533
16534 IX86_BUILTIN_LOADHPD,
16535 IX86_BUILTIN_LOADLPD,
16536
16537 IX86_BUILTIN_CVTDQ2PD,
16538 IX86_BUILTIN_CVTDQ2PS,
16539
16540 IX86_BUILTIN_CVTPD2DQ,
16541 IX86_BUILTIN_CVTPD2PI,
16542 IX86_BUILTIN_CVTPD2PS,
16543 IX86_BUILTIN_CVTTPD2DQ,
16544 IX86_BUILTIN_CVTTPD2PI,
16545
16546 IX86_BUILTIN_CVTPI2PD,
16547 IX86_BUILTIN_CVTSI2SD,
16548 IX86_BUILTIN_CVTSI642SD,
16549
16550 IX86_BUILTIN_CVTSD2SI,
16551 IX86_BUILTIN_CVTSD2SI64,
16552 IX86_BUILTIN_CVTSD2SS,
16553 IX86_BUILTIN_CVTSS2SD,
16554 IX86_BUILTIN_CVTTSD2SI,
16555 IX86_BUILTIN_CVTTSD2SI64,
16556
16557 IX86_BUILTIN_CVTPS2DQ,
16558 IX86_BUILTIN_CVTPS2PD,
16559 IX86_BUILTIN_CVTTPS2DQ,
16560
16561 IX86_BUILTIN_MOVNTI,
16562 IX86_BUILTIN_MOVNTPD,
16563 IX86_BUILTIN_MOVNTDQ,
16564
16565 /* SSE2 MMX */
16566 IX86_BUILTIN_MASKMOVDQU,
16567 IX86_BUILTIN_MOVMSKPD,
16568 IX86_BUILTIN_PMOVMSKB128,
16569
16570 IX86_BUILTIN_PACKSSWB128,
16571 IX86_BUILTIN_PACKSSDW128,
16572 IX86_BUILTIN_PACKUSWB128,
16573
16574 IX86_BUILTIN_PADDB128,
16575 IX86_BUILTIN_PADDW128,
16576 IX86_BUILTIN_PADDD128,
16577 IX86_BUILTIN_PADDQ128,
16578 IX86_BUILTIN_PADDSB128,
16579 IX86_BUILTIN_PADDSW128,
16580 IX86_BUILTIN_PADDUSB128,
16581 IX86_BUILTIN_PADDUSW128,
16582 IX86_BUILTIN_PSUBB128,
16583 IX86_BUILTIN_PSUBW128,
16584 IX86_BUILTIN_PSUBD128,
16585 IX86_BUILTIN_PSUBQ128,
16586 IX86_BUILTIN_PSUBSB128,
16587 IX86_BUILTIN_PSUBSW128,
16588 IX86_BUILTIN_PSUBUSB128,
16589 IX86_BUILTIN_PSUBUSW128,
16590
16591 IX86_BUILTIN_PAND128,
16592 IX86_BUILTIN_PANDN128,
16593 IX86_BUILTIN_POR128,
16594 IX86_BUILTIN_PXOR128,
16595
16596 IX86_BUILTIN_PAVGB128,
16597 IX86_BUILTIN_PAVGW128,
16598
16599 IX86_BUILTIN_PCMPEQB128,
16600 IX86_BUILTIN_PCMPEQW128,
16601 IX86_BUILTIN_PCMPEQD128,
16602 IX86_BUILTIN_PCMPGTB128,
16603 IX86_BUILTIN_PCMPGTW128,
16604 IX86_BUILTIN_PCMPGTD128,
16605
16606 IX86_BUILTIN_PMADDWD128,
16607
16608 IX86_BUILTIN_PMAXSW128,
16609 IX86_BUILTIN_PMAXUB128,
16610 IX86_BUILTIN_PMINSW128,
16611 IX86_BUILTIN_PMINUB128,
16612
16613 IX86_BUILTIN_PMULUDQ,
16614 IX86_BUILTIN_PMULUDQ128,
16615 IX86_BUILTIN_PMULHUW128,
16616 IX86_BUILTIN_PMULHW128,
16617 IX86_BUILTIN_PMULLW128,
16618
16619 IX86_BUILTIN_PSADBW128,
16620 IX86_BUILTIN_PSHUFHW,
16621 IX86_BUILTIN_PSHUFLW,
16622 IX86_BUILTIN_PSHUFD,
16623
16624 IX86_BUILTIN_PSLLDQI128,
16625 IX86_BUILTIN_PSLLWI128,
16626 IX86_BUILTIN_PSLLDI128,
16627 IX86_BUILTIN_PSLLQI128,
16628 IX86_BUILTIN_PSRAWI128,
16629 IX86_BUILTIN_PSRADI128,
16630 IX86_BUILTIN_PSRLDQI128,
16631 IX86_BUILTIN_PSRLWI128,
16632 IX86_BUILTIN_PSRLDI128,
16633 IX86_BUILTIN_PSRLQI128,
16634
16635 IX86_BUILTIN_PSLLDQ128,
16636 IX86_BUILTIN_PSLLW128,
16637 IX86_BUILTIN_PSLLD128,
16638 IX86_BUILTIN_PSLLQ128,
16639 IX86_BUILTIN_PSRAW128,
16640 IX86_BUILTIN_PSRAD128,
16641 IX86_BUILTIN_PSRLW128,
16642 IX86_BUILTIN_PSRLD128,
16643 IX86_BUILTIN_PSRLQ128,
16644
16645 IX86_BUILTIN_PUNPCKHBW128,
16646 IX86_BUILTIN_PUNPCKHWD128,
16647 IX86_BUILTIN_PUNPCKHDQ128,
16648 IX86_BUILTIN_PUNPCKHQDQ128,
16649 IX86_BUILTIN_PUNPCKLBW128,
16650 IX86_BUILTIN_PUNPCKLWD128,
16651 IX86_BUILTIN_PUNPCKLDQ128,
16652 IX86_BUILTIN_PUNPCKLQDQ128,
16653
16654 IX86_BUILTIN_CLFLUSH,
16655 IX86_BUILTIN_MFENCE,
16656 IX86_BUILTIN_LFENCE,
16657
16658 /* Prescott New Instructions. */
16659 IX86_BUILTIN_ADDSUBPS,
16660 IX86_BUILTIN_HADDPS,
16661 IX86_BUILTIN_HSUBPS,
16662 IX86_BUILTIN_MOVSHDUP,
16663 IX86_BUILTIN_MOVSLDUP,
16664 IX86_BUILTIN_ADDSUBPD,
16665 IX86_BUILTIN_HADDPD,
16666 IX86_BUILTIN_HSUBPD,
16667 IX86_BUILTIN_LDDQU,
16668
16669 IX86_BUILTIN_MONITOR,
16670 IX86_BUILTIN_MWAIT,
16671
16672 /* SSSE3. */
16673 IX86_BUILTIN_PHADDW,
16674 IX86_BUILTIN_PHADDD,
16675 IX86_BUILTIN_PHADDSW,
16676 IX86_BUILTIN_PHSUBW,
16677 IX86_BUILTIN_PHSUBD,
16678 IX86_BUILTIN_PHSUBSW,
16679 IX86_BUILTIN_PMADDUBSW,
16680 IX86_BUILTIN_PMULHRSW,
16681 IX86_BUILTIN_PSHUFB,
16682 IX86_BUILTIN_PSIGNB,
16683 IX86_BUILTIN_PSIGNW,
16684 IX86_BUILTIN_PSIGND,
16685 IX86_BUILTIN_PALIGNR,
16686 IX86_BUILTIN_PABSB,
16687 IX86_BUILTIN_PABSW,
16688 IX86_BUILTIN_PABSD,
16689
16690 IX86_BUILTIN_PHADDW128,
16691 IX86_BUILTIN_PHADDD128,
16692 IX86_BUILTIN_PHADDSW128,
16693 IX86_BUILTIN_PHSUBW128,
16694 IX86_BUILTIN_PHSUBD128,
16695 IX86_BUILTIN_PHSUBSW128,
16696 IX86_BUILTIN_PMADDUBSW128,
16697 IX86_BUILTIN_PMULHRSW128,
16698 IX86_BUILTIN_PSHUFB128,
16699 IX86_BUILTIN_PSIGNB128,
16700 IX86_BUILTIN_PSIGNW128,
16701 IX86_BUILTIN_PSIGND128,
16702 IX86_BUILTIN_PALIGNR128,
16703 IX86_BUILTIN_PABSB128,
16704 IX86_BUILTIN_PABSW128,
16705 IX86_BUILTIN_PABSD128,
16706
16707 /* AMDFAM10 - SSE4A New Instructions. */
16708 IX86_BUILTIN_MOVNTSD,
16709 IX86_BUILTIN_MOVNTSS,
16710 IX86_BUILTIN_EXTRQI,
16711 IX86_BUILTIN_EXTRQ,
16712 IX86_BUILTIN_INSERTQI,
16713 IX86_BUILTIN_INSERTQ,
16714
16715 /* SSE4.1. */
16716 IX86_BUILTIN_BLENDPD,
16717 IX86_BUILTIN_BLENDPS,
16718 IX86_BUILTIN_BLENDVPD,
16719 IX86_BUILTIN_BLENDVPS,
16720 IX86_BUILTIN_PBLENDVB128,
16721 IX86_BUILTIN_PBLENDW128,
16722
16723 IX86_BUILTIN_DPPD,
16724 IX86_BUILTIN_DPPS,
16725
16726 IX86_BUILTIN_INSERTPS128,
16727
16728 IX86_BUILTIN_MOVNTDQA,
16729 IX86_BUILTIN_MPSADBW128,
16730 IX86_BUILTIN_PACKUSDW128,
16731 IX86_BUILTIN_PCMPEQQ,
16732 IX86_BUILTIN_PHMINPOSUW128,
16733
16734 IX86_BUILTIN_PMAXSB128,
16735 IX86_BUILTIN_PMAXSD128,
16736 IX86_BUILTIN_PMAXUD128,
16737 IX86_BUILTIN_PMAXUW128,
16738
16739 IX86_BUILTIN_PMINSB128,
16740 IX86_BUILTIN_PMINSD128,
16741 IX86_BUILTIN_PMINUD128,
16742 IX86_BUILTIN_PMINUW128,
16743
16744 IX86_BUILTIN_PMOVSXBW128,
16745 IX86_BUILTIN_PMOVSXBD128,
16746 IX86_BUILTIN_PMOVSXBQ128,
16747 IX86_BUILTIN_PMOVSXWD128,
16748 IX86_BUILTIN_PMOVSXWQ128,
16749 IX86_BUILTIN_PMOVSXDQ128,
16750
16751 IX86_BUILTIN_PMOVZXBW128,
16752 IX86_BUILTIN_PMOVZXBD128,
16753 IX86_BUILTIN_PMOVZXBQ128,
16754 IX86_BUILTIN_PMOVZXWD128,
16755 IX86_BUILTIN_PMOVZXWQ128,
16756 IX86_BUILTIN_PMOVZXDQ128,
16757
16758 IX86_BUILTIN_PMULDQ128,
16759 IX86_BUILTIN_PMULLD128,
16760
16761 IX86_BUILTIN_ROUNDPD,
16762 IX86_BUILTIN_ROUNDPS,
16763 IX86_BUILTIN_ROUNDSD,
16764 IX86_BUILTIN_ROUNDSS,
16765
16766 IX86_BUILTIN_PTESTZ,
16767 IX86_BUILTIN_PTESTC,
16768 IX86_BUILTIN_PTESTNZC,
16769
16770 IX86_BUILTIN_VEC_INIT_V2SI,
16771 IX86_BUILTIN_VEC_INIT_V4HI,
16772 IX86_BUILTIN_VEC_INIT_V8QI,
16773 IX86_BUILTIN_VEC_EXT_V2DF,
16774 IX86_BUILTIN_VEC_EXT_V2DI,
16775 IX86_BUILTIN_VEC_EXT_V4SF,
16776 IX86_BUILTIN_VEC_EXT_V4SI,
16777 IX86_BUILTIN_VEC_EXT_V8HI,
16778 IX86_BUILTIN_VEC_EXT_V2SI,
16779 IX86_BUILTIN_VEC_EXT_V4HI,
16780 IX86_BUILTIN_VEC_EXT_V16QI,
16781 IX86_BUILTIN_VEC_SET_V2DI,
16782 IX86_BUILTIN_VEC_SET_V4SF,
16783 IX86_BUILTIN_VEC_SET_V4SI,
16784 IX86_BUILTIN_VEC_SET_V8HI,
16785 IX86_BUILTIN_VEC_SET_V4HI,
16786 IX86_BUILTIN_VEC_SET_V16QI,
16787
16788 /* SSE4.2. */
16789 IX86_BUILTIN_CRC32QI,
16790 IX86_BUILTIN_CRC32HI,
16791 IX86_BUILTIN_CRC32SI,
16792 IX86_BUILTIN_CRC32DI,
16793
16794 IX86_BUILTIN_PCMPESTRI128,
16795 IX86_BUILTIN_PCMPESTRM128,
16796 IX86_BUILTIN_PCMPESTRA128,
16797 IX86_BUILTIN_PCMPESTRC128,
16798 IX86_BUILTIN_PCMPESTRO128,
16799 IX86_BUILTIN_PCMPESTRS128,
16800 IX86_BUILTIN_PCMPESTRZ128,
16801 IX86_BUILTIN_PCMPISTRI128,
16802 IX86_BUILTIN_PCMPISTRM128,
16803 IX86_BUILTIN_PCMPISTRA128,
16804 IX86_BUILTIN_PCMPISTRC128,
16805 IX86_BUILTIN_PCMPISTRO128,
16806 IX86_BUILTIN_PCMPISTRS128,
16807 IX86_BUILTIN_PCMPISTRZ128,
16808
16809 IX86_BUILTIN_PCMPGTQ,
16810
16811 IX86_BUILTIN_MAX
16812 };
16813
16814 /* Table for the ix86 builtin decls. */
16815 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
16816
16817 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Do so,
16818 * if the target_flags include one of MASK. Stores the function decl
16819 * in the ix86_builtins array.
16820 * Returns the function decl or NULL_TREE, if the builtin was not added. */
16821
16822 static inline tree
16823 def_builtin (int mask, const char *name, tree type, enum ix86_builtins code)
16824 {
16825 tree decl = NULL_TREE;
16826
16827 if (mask & ix86_isa_flags
16828 && (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT))
16829 {
16830 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
16831 NULL, NULL_TREE);
16832 ix86_builtins[(int) code] = decl;
16833 }
16834
16835 return decl;
16836 }
16837
16838 /* Like def_builtin, but also marks the function decl "const". */
16839
16840 static inline tree
16841 def_builtin_const (int mask, const char *name, tree type,
16842 enum ix86_builtins code)
16843 {
16844 tree decl = def_builtin (mask, name, type, code);
16845 if (decl)
16846 TREE_READONLY (decl) = 1;
16847 return decl;
16848 }
16849
16850 /* Bits for builtin_description.flag. */
16851
16852 /* Set when we don't support the comparison natively, and should
16853 swap_comparison in order to support it. */
16854 #define BUILTIN_DESC_SWAP_OPERANDS 1
16855
16856 struct builtin_description
16857 {
16858 const unsigned int mask;
16859 const enum insn_code icode;
16860 const char *const name;
16861 const enum ix86_builtins code;
16862 const enum rtx_code comparison;
16863 const int flag;
16864 };
16865
16866 static const struct builtin_description bdesc_comi[] =
16867 {
16868 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
16869 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
16870 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
16871 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
16872 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
16873 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
16874 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
16875 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
16876 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
16877 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
16878 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
16879 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
16880 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
16881 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
16882 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
16883 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
16884 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
16885 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
16886 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
16887 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
16888 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
16889 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
16890 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
16891 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
16892 };
16893
16894 static const struct builtin_description bdesc_ptest[] =
16895 {
16896 /* SSE4.1 */
16897 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, 0 },
16898 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, 0 },
16899 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, 0 },
16900 };
16901
16902 static const struct builtin_description bdesc_pcmpestr[] =
16903 {
16904 /* SSE4.2 */
16905 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
16906 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
16907 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
16908 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
16909 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
16910 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
16911 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
16912 };
16913
16914 static const struct builtin_description bdesc_pcmpistr[] =
16915 {
16916 /* SSE4.2 */
16917 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
16918 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
16919 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
16920 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
16921 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
16922 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
16923 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
16924 };
16925
16926 static const struct builtin_description bdesc_crc32[] =
16927 {
16928 /* SSE4.2 */
16929 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32qi, 0, IX86_BUILTIN_CRC32QI, UNKNOWN, 0 },
16930 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32hi, 0, IX86_BUILTIN_CRC32HI, UNKNOWN, 0 },
16931 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32si, 0, IX86_BUILTIN_CRC32SI, UNKNOWN, 0 },
16932 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32di, 0, IX86_BUILTIN_CRC32DI, UNKNOWN, 0 },
16933 };
16934
16935 /* SSE builtins with 3 arguments and the last argument must be an immediate or xmm0. */
16936 static const struct builtin_description bdesc_sse_3arg[] =
16937 {
16938 /* SSE4.1 */
16939 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, 0 },
16940 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, 0 },
16941 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, 0 },
16942 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, 0 },
16943 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, 0 },
16944 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, 0 },
16945 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, 0 },
16946 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, 0 },
16947 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, 0 },
16948 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, 0 },
16949 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_roundsd, 0, IX86_BUILTIN_ROUNDSD, UNKNOWN, 0 },
16950 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_roundss, 0, IX86_BUILTIN_ROUNDSS, UNKNOWN, 0 },
16951 };
16952
16953 static const struct builtin_description bdesc_2arg[] =
16954 {
16955 /* SSE */
16956 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, 0 },
16957 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, 0 },
16958 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, 0 },
16959 { OPTION_MASK_ISA_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, 0 },
16960 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, 0 },
16961 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, 0 },
16962 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, 0 },
16963 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, 0 },
16964
16965 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
16966 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
16967 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
16968 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, BUILTIN_DESC_SWAP_OPERANDS },
16969 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, BUILTIN_DESC_SWAP_OPERANDS },
16970 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
16971 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 },
16972 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 },
16973 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 },
16974 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, BUILTIN_DESC_SWAP_OPERANDS },
16975 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, BUILTIN_DESC_SWAP_OPERANDS },
16976 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 },
16977 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
16978 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
16979 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
16980 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
16981 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 },
16982 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 },
16983 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 },
16984 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, BUILTIN_DESC_SWAP_OPERANDS },
16985 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, BUILTIN_DESC_SWAP_OPERANDS },
16986 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, 0 },
16987
16988 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, 0 },
16989 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, 0 },
16990 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, 0 },
16991 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, 0 },
16992
16993 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, 0 },
16994 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, 0 },
16995 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, 0 },
16996 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, 0 },
16997
16998 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, 0 },
16999 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, 0 },
17000 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, 0 },
17001 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, 0 },
17002 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, 0 },
17003
17004 /* MMX */
17005 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, 0 },
17006 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, 0 },
17007 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, 0 },
17008 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, 0 },
17009 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, 0 },
17010 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, 0 },
17011 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, 0 },
17012 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, 0 },
17013
17014 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, 0 },
17015 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, 0 },
17016 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, 0 },
17017 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, 0 },
17018 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, 0 },
17019 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, 0 },
17020 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, 0 },
17021 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, 0 },
17022
17023 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, 0 },
17024 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, 0 },
17025 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, 0 },
17026
17027 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, 0 },
17028 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, 0 },
17029 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, 0 },
17030 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, 0 },
17031
17032 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, 0 },
17033 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, 0 },
17034
17035 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, 0 },
17036 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, 0 },
17037 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, 0 },
17038 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, 0 },
17039 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, 0 },
17040 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, 0 },
17041
17042 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, 0 },
17043 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, 0 },
17044 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, 0 },
17045 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, 0 },
17046
17047 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, 0 },
17048 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, 0 },
17049 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, 0 },
17050 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, 0 },
17051 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, 0 },
17052 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, 0 },
17053
17054 /* Special. */
17055 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, UNKNOWN, 0 },
17056 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, UNKNOWN, 0 },
17057 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, UNKNOWN, 0 },
17058
17059 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, UNKNOWN, 0 },
17060 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, UNKNOWN, 0 },
17061 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, UNKNOWN, 0 },
17062
17063 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, UNKNOWN, 0 },
17064 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, UNKNOWN, 0 },
17065 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, UNKNOWN, 0 },
17066 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, UNKNOWN, 0 },
17067 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, UNKNOWN, 0 },
17068 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, UNKNOWN, 0 },
17069
17070 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, UNKNOWN, 0 },
17071 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, UNKNOWN, 0 },
17072 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, UNKNOWN, 0 },
17073 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, UNKNOWN, 0 },
17074 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, UNKNOWN, 0 },
17075 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, UNKNOWN, 0 },
17076
17077 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, UNKNOWN, 0 },
17078 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, UNKNOWN, 0 },
17079 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, UNKNOWN, 0 },
17080 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, UNKNOWN, 0 },
17081
17082 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, UNKNOWN, 0 },
17083 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, UNKNOWN, 0 },
17084
17085 /* SSE2 */
17086 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, 0 },
17087 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, 0 },
17088 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, 0 },
17089 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, 0 },
17090 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, 0 },
17091 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, 0 },
17092 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, 0 },
17093 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, 0 },
17094
17095 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
17096 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
17097 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
17098 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, BUILTIN_DESC_SWAP_OPERANDS },
17099 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, BUILTIN_DESC_SWAP_OPERANDS },
17100 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
17101 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 },
17102 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 },
17103 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 },
17104 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, BUILTIN_DESC_SWAP_OPERANDS },
17105 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, BUILTIN_DESC_SWAP_OPERANDS },
17106 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 },
17107 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
17108 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
17109 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
17110 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
17111 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 },
17112 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 },
17113 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 },
17114 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 },
17115
17116 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, 0 },
17117 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, 0 },
17118 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, 0 },
17119 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, 0 },
17120
17121 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, 0 },
17122 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, 0 },
17123 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, 0 },
17124 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, 0 },
17125
17126 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, 0 },
17127 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, 0 },
17128 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, 0 },
17129
17130 /* SSE2 MMX */
17131 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, 0 },
17132 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, 0 },
17133 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, 0 },
17134 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, 0 },
17135 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, 0 },
17136 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, 0 },
17137 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, 0 },
17138 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, 0 },
17139
17140 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, 0 },
17141 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, 0 },
17142 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, 0 },
17143 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, 0 },
17144 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, 0 },
17145 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, 0 },
17146 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, 0 },
17147 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, 0 },
17148
17149 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, 0 },
17150 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN, 0 },
17151
17152 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, 0 },
17153 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, 0 },
17154 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, 0 },
17155 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, 0 },
17156
17157 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, 0 },
17158 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, 0 },
17159
17160 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, 0 },
17161 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, 0 },
17162 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, 0 },
17163 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, 0 },
17164 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, 0 },
17165 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, 0 },
17166
17167 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, 0 },
17168 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, 0 },
17169 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, 0 },
17170 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, 0 },
17171
17172 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, 0 },
17173 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, 0 },
17174 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, 0 },
17175 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, 0 },
17176 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, 0 },
17177 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, 0 },
17178 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, 0 },
17179 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, 0 },
17180
17181 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, 0 },
17182 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, 0 },
17183 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, 0 },
17184
17185 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, 0 },
17186 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, UNKNOWN, 0 },
17187
17188 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, UNKNOWN, 0 },
17189 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, UNKNOWN, 0 },
17190
17191 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, UNKNOWN, 0 },
17192 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, UNKNOWN, 0 },
17193 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, UNKNOWN, 0 },
17194
17195 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, UNKNOWN, 0 },
17196 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, UNKNOWN, 0 },
17197 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, UNKNOWN, 0 },
17198
17199 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, UNKNOWN, 0 },
17200 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, UNKNOWN, 0 },
17201
17202 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, UNKNOWN, 0 },
17203
17204 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, UNKNOWN, 0 },
17205 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, UNKNOWN, 0 },
17206 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, UNKNOWN, 0 },
17207 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, UNKNOWN, 0 },
17208
17209 /* SSE3 MMX */
17210 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, 0 },
17211 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, 0 },
17212 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, 0 },
17213 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, 0 },
17214 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, 0 },
17215 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, 0 },
17216
17217 /* SSSE3 */
17218 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, 0 },
17219 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, 0 },
17220 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, 0 },
17221 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, 0 },
17222 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, 0 },
17223 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, 0 },
17224 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, 0 },
17225 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, 0 },
17226 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, 0 },
17227 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, 0 },
17228 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, 0 },
17229 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, 0 },
17230 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubswv8hi3, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, 0 },
17231 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubswv4hi3, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, 0 },
17232 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, 0 },
17233 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, 0 },
17234 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, 0 },
17235 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, 0 },
17236 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, 0 },
17237 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, 0 },
17238 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, 0 },
17239 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, 0 },
17240 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, 0 },
17241 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, 0 },
17242
17243 /* SSE4.1 */
17244 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, 0 },
17245 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, 0 },
17246 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, 0 },
17247 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, 0 },
17248 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, 0 },
17249 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, 0 },
17250 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, 0 },
17251 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, 0 },
17252 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, 0 },
17253 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, 0 },
17254 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, 0, IX86_BUILTIN_PMULDQ128, UNKNOWN, 0 },
17255 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, 0 },
17256
17257 /* SSE4.2 */
17258 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, 0 },
17259 };
17260
17261 static const struct builtin_description bdesc_1arg[] =
17262 {
17263 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, UNKNOWN, 0 },
17264 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, UNKNOWN, 0 },
17265
17266 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, UNKNOWN, 0 },
17267 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, UNKNOWN, 0 },
17268 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, UNKNOWN, 0 },
17269
17270 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, UNKNOWN, 0 },
17271 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, UNKNOWN, 0 },
17272 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, UNKNOWN, 0 },
17273 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, UNKNOWN, 0 },
17274 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, UNKNOWN, 0 },
17275 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, 0 },
17276
17277 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, UNKNOWN, 0 },
17278 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, UNKNOWN, 0 },
17279
17280 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, UNKNOWN, 0 },
17281
17282 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, UNKNOWN, 0 },
17283 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, UNKNOWN, 0 },
17284
17285 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, UNKNOWN, 0 },
17286 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, UNKNOWN, 0 },
17287 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, UNKNOWN, 0 },
17288 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, 0 },
17289 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, UNKNOWN, 0 },
17290
17291 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, UNKNOWN, 0 },
17292
17293 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, UNKNOWN, 0 },
17294 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, UNKNOWN, 0 },
17295 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, UNKNOWN, 0 },
17296 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, 0 },
17297
17298 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, UNKNOWN, 0 },
17299 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, UNKNOWN, 0 },
17300 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, 0 },
17301
17302 /* SSE3 */
17303 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, 0 },
17304 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, 0 },
17305
17306 /* SSSE3 */
17307 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, 0 },
17308 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, 0 },
17309 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, 0 },
17310 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, 0 },
17311 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, 0 },
17312 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, 0 },
17313
17314 /* SSE4.1 */
17315 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv8qiv8hi2, 0, IX86_BUILTIN_PMOVSXBW128, UNKNOWN, 0 },
17316 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4qiv4si2, 0, IX86_BUILTIN_PMOVSXBD128, UNKNOWN, 0 },
17317 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2qiv2di2, 0, IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, 0 },
17318 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4hiv4si2, 0, IX86_BUILTIN_PMOVSXWD128, UNKNOWN, 0 },
17319 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2hiv2di2, 0, IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, 0 },
17320 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2siv2di2, 0, IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, 0 },
17321 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, 0, IX86_BUILTIN_PMOVZXBW128, UNKNOWN, 0 },
17322 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, 0, IX86_BUILTIN_PMOVZXBD128, UNKNOWN, 0 },
17323 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, 0, IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, 0 },
17324 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, 0, IX86_BUILTIN_PMOVZXWD128, UNKNOWN, 0 },
17325 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, 0, IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, 0 },
17326 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, 0, IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, 0 },
17327 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, 0 },
17328
17329 /* Fake 1 arg builtins with a constant smaller than 8 bits as the 2nd arg. */
17330 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_roundpd, 0, IX86_BUILTIN_ROUNDPD, UNKNOWN, 0 },
17331 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_roundps, 0, IX86_BUILTIN_ROUNDPS, UNKNOWN, 0 },
17332 };
17333
17334 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
17335 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
17336 builtins. */
17337 static void
17338 ix86_init_mmx_sse_builtins (void)
17339 {
17340 const struct builtin_description * d;
17341 size_t i;
17342
17343 tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode);
17344 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
17345 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
17346 tree V2DI_type_node
17347 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
17348 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
17349 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
17350 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
17351 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
17352 tree V8QI_type_node = build_vector_type_for_mode (char_type_node, V8QImode);
17353 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
17354
17355 tree pchar_type_node = build_pointer_type (char_type_node);
17356 tree pcchar_type_node = build_pointer_type (
17357 build_type_variant (char_type_node, 1, 0));
17358 tree pfloat_type_node = build_pointer_type (float_type_node);
17359 tree pcfloat_type_node = build_pointer_type (
17360 build_type_variant (float_type_node, 1, 0));
17361 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
17362 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
17363 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
17364
17365 /* Comparisons. */
17366 tree int_ftype_v4sf_v4sf
17367 = build_function_type_list (integer_type_node,
17368 V4SF_type_node, V4SF_type_node, NULL_TREE);
17369 tree v4si_ftype_v4sf_v4sf
17370 = build_function_type_list (V4SI_type_node,
17371 V4SF_type_node, V4SF_type_node, NULL_TREE);
17372 /* MMX/SSE/integer conversions. */
17373 tree int_ftype_v4sf
17374 = build_function_type_list (integer_type_node,
17375 V4SF_type_node, NULL_TREE);
17376 tree int64_ftype_v4sf
17377 = build_function_type_list (long_long_integer_type_node,
17378 V4SF_type_node, NULL_TREE);
17379 tree int_ftype_v8qi
17380 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
17381 tree v4sf_ftype_v4sf_int
17382 = build_function_type_list (V4SF_type_node,
17383 V4SF_type_node, integer_type_node, NULL_TREE);
17384 tree v4sf_ftype_v4sf_int64
17385 = build_function_type_list (V4SF_type_node,
17386 V4SF_type_node, long_long_integer_type_node,
17387 NULL_TREE);
17388 tree v4sf_ftype_v4sf_v2si
17389 = build_function_type_list (V4SF_type_node,
17390 V4SF_type_node, V2SI_type_node, NULL_TREE);
17391
17392 /* Miscellaneous. */
17393 tree v8qi_ftype_v4hi_v4hi
17394 = build_function_type_list (V8QI_type_node,
17395 V4HI_type_node, V4HI_type_node, NULL_TREE);
17396 tree v4hi_ftype_v2si_v2si
17397 = build_function_type_list (V4HI_type_node,
17398 V2SI_type_node, V2SI_type_node, NULL_TREE);
17399 tree v4sf_ftype_v4sf_v4sf_int
17400 = build_function_type_list (V4SF_type_node,
17401 V4SF_type_node, V4SF_type_node,
17402 integer_type_node, NULL_TREE);
17403 tree v2si_ftype_v4hi_v4hi
17404 = build_function_type_list (V2SI_type_node,
17405 V4HI_type_node, V4HI_type_node, NULL_TREE);
17406 tree v4hi_ftype_v4hi_int
17407 = build_function_type_list (V4HI_type_node,
17408 V4HI_type_node, integer_type_node, NULL_TREE);
17409 tree v4hi_ftype_v4hi_di
17410 = build_function_type_list (V4HI_type_node,
17411 V4HI_type_node, long_long_unsigned_type_node,
17412 NULL_TREE);
17413 tree v2si_ftype_v2si_di
17414 = build_function_type_list (V2SI_type_node,
17415 V2SI_type_node, long_long_unsigned_type_node,
17416 NULL_TREE);
17417 tree void_ftype_void
17418 = build_function_type (void_type_node, void_list_node);
17419 tree void_ftype_unsigned
17420 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
17421 tree void_ftype_unsigned_unsigned
17422 = build_function_type_list (void_type_node, unsigned_type_node,
17423 unsigned_type_node, NULL_TREE);
17424 tree void_ftype_pcvoid_unsigned_unsigned
17425 = build_function_type_list (void_type_node, const_ptr_type_node,
17426 unsigned_type_node, unsigned_type_node,
17427 NULL_TREE);
17428 tree unsigned_ftype_void
17429 = build_function_type (unsigned_type_node, void_list_node);
17430 tree v2si_ftype_v4sf
17431 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
17432 /* Loads/stores. */
17433 tree void_ftype_v8qi_v8qi_pchar
17434 = build_function_type_list (void_type_node,
17435 V8QI_type_node, V8QI_type_node,
17436 pchar_type_node, NULL_TREE);
17437 tree v4sf_ftype_pcfloat
17438 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
17439 /* @@@ the type is bogus */
17440 tree v4sf_ftype_v4sf_pv2si
17441 = build_function_type_list (V4SF_type_node,
17442 V4SF_type_node, pv2si_type_node, NULL_TREE);
17443 tree void_ftype_pv2si_v4sf
17444 = build_function_type_list (void_type_node,
17445 pv2si_type_node, V4SF_type_node, NULL_TREE);
17446 tree void_ftype_pfloat_v4sf
17447 = build_function_type_list (void_type_node,
17448 pfloat_type_node, V4SF_type_node, NULL_TREE);
17449 tree void_ftype_pdi_di
17450 = build_function_type_list (void_type_node,
17451 pdi_type_node, long_long_unsigned_type_node,
17452 NULL_TREE);
17453 tree void_ftype_pv2di_v2di
17454 = build_function_type_list (void_type_node,
17455 pv2di_type_node, V2DI_type_node, NULL_TREE);
17456 /* Normal vector unops. */
17457 tree v4sf_ftype_v4sf
17458 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
17459 tree v16qi_ftype_v16qi
17460 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
17461 tree v8hi_ftype_v8hi
17462 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
17463 tree v4si_ftype_v4si
17464 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
17465 tree v8qi_ftype_v8qi
17466 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
17467 tree v4hi_ftype_v4hi
17468 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
17469
17470 /* Normal vector binops. */
17471 tree v4sf_ftype_v4sf_v4sf
17472 = build_function_type_list (V4SF_type_node,
17473 V4SF_type_node, V4SF_type_node, NULL_TREE);
17474 tree v8qi_ftype_v8qi_v8qi
17475 = build_function_type_list (V8QI_type_node,
17476 V8QI_type_node, V8QI_type_node, NULL_TREE);
17477 tree v4hi_ftype_v4hi_v4hi
17478 = build_function_type_list (V4HI_type_node,
17479 V4HI_type_node, V4HI_type_node, NULL_TREE);
17480 tree v2si_ftype_v2si_v2si
17481 = build_function_type_list (V2SI_type_node,
17482 V2SI_type_node, V2SI_type_node, NULL_TREE);
17483 tree di_ftype_di_di
17484 = build_function_type_list (long_long_unsigned_type_node,
17485 long_long_unsigned_type_node,
17486 long_long_unsigned_type_node, NULL_TREE);
17487
17488 tree di_ftype_di_di_int
17489 = build_function_type_list (long_long_unsigned_type_node,
17490 long_long_unsigned_type_node,
17491 long_long_unsigned_type_node,
17492 integer_type_node, NULL_TREE);
17493
17494 tree v2si_ftype_v2sf
17495 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
17496 tree v2sf_ftype_v2si
17497 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
17498 tree v2si_ftype_v2si
17499 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
17500 tree v2sf_ftype_v2sf
17501 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
17502 tree v2sf_ftype_v2sf_v2sf
17503 = build_function_type_list (V2SF_type_node,
17504 V2SF_type_node, V2SF_type_node, NULL_TREE);
17505 tree v2si_ftype_v2sf_v2sf
17506 = build_function_type_list (V2SI_type_node,
17507 V2SF_type_node, V2SF_type_node, NULL_TREE);
17508 tree pint_type_node = build_pointer_type (integer_type_node);
17509 tree pdouble_type_node = build_pointer_type (double_type_node);
17510 tree pcdouble_type_node = build_pointer_type (
17511 build_type_variant (double_type_node, 1, 0));
17512 tree int_ftype_v2df_v2df
17513 = build_function_type_list (integer_type_node,
17514 V2DF_type_node, V2DF_type_node, NULL_TREE);
17515
17516 tree void_ftype_pcvoid
17517 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
17518 tree v4sf_ftype_v4si
17519 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
17520 tree v4si_ftype_v4sf
17521 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
17522 tree v2df_ftype_v4si
17523 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
17524 tree v4si_ftype_v2df
17525 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
17526 tree v2si_ftype_v2df
17527 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
17528 tree v4sf_ftype_v2df
17529 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
17530 tree v2df_ftype_v2si
17531 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
17532 tree v2df_ftype_v4sf
17533 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
17534 tree int_ftype_v2df
17535 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
17536 tree int64_ftype_v2df
17537 = build_function_type_list (long_long_integer_type_node,
17538 V2DF_type_node, NULL_TREE);
17539 tree v2df_ftype_v2df_int
17540 = build_function_type_list (V2DF_type_node,
17541 V2DF_type_node, integer_type_node, NULL_TREE);
17542 tree v2df_ftype_v2df_int64
17543 = build_function_type_list (V2DF_type_node,
17544 V2DF_type_node, long_long_integer_type_node,
17545 NULL_TREE);
17546 tree v4sf_ftype_v4sf_v2df
17547 = build_function_type_list (V4SF_type_node,
17548 V4SF_type_node, V2DF_type_node, NULL_TREE);
17549 tree v2df_ftype_v2df_v4sf
17550 = build_function_type_list (V2DF_type_node,
17551 V2DF_type_node, V4SF_type_node, NULL_TREE);
17552 tree v2df_ftype_v2df_v2df_int
17553 = build_function_type_list (V2DF_type_node,
17554 V2DF_type_node, V2DF_type_node,
17555 integer_type_node,
17556 NULL_TREE);
17557 tree v2df_ftype_v2df_pcdouble
17558 = build_function_type_list (V2DF_type_node,
17559 V2DF_type_node, pcdouble_type_node, NULL_TREE);
17560 tree void_ftype_pdouble_v2df
17561 = build_function_type_list (void_type_node,
17562 pdouble_type_node, V2DF_type_node, NULL_TREE);
17563 tree void_ftype_pint_int
17564 = build_function_type_list (void_type_node,
17565 pint_type_node, integer_type_node, NULL_TREE);
17566 tree void_ftype_v16qi_v16qi_pchar
17567 = build_function_type_list (void_type_node,
17568 V16QI_type_node, V16QI_type_node,
17569 pchar_type_node, NULL_TREE);
17570 tree v2df_ftype_pcdouble
17571 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
17572 tree v2df_ftype_v2df_v2df
17573 = build_function_type_list (V2DF_type_node,
17574 V2DF_type_node, V2DF_type_node, NULL_TREE);
17575 tree v16qi_ftype_v16qi_v16qi
17576 = build_function_type_list (V16QI_type_node,
17577 V16QI_type_node, V16QI_type_node, NULL_TREE);
17578 tree v8hi_ftype_v8hi_v8hi
17579 = build_function_type_list (V8HI_type_node,
17580 V8HI_type_node, V8HI_type_node, NULL_TREE);
17581 tree v4si_ftype_v4si_v4si
17582 = build_function_type_list (V4SI_type_node,
17583 V4SI_type_node, V4SI_type_node, NULL_TREE);
17584 tree v2di_ftype_v2di_v2di
17585 = build_function_type_list (V2DI_type_node,
17586 V2DI_type_node, V2DI_type_node, NULL_TREE);
17587 tree v2di_ftype_v2df_v2df
17588 = build_function_type_list (V2DI_type_node,
17589 V2DF_type_node, V2DF_type_node, NULL_TREE);
17590 tree v2df_ftype_v2df
17591 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
17592 tree v2di_ftype_v2di_int
17593 = build_function_type_list (V2DI_type_node,
17594 V2DI_type_node, integer_type_node, NULL_TREE);
17595 tree v2di_ftype_v2di_v2di_int
17596 = build_function_type_list (V2DI_type_node, V2DI_type_node,
17597 V2DI_type_node, integer_type_node, NULL_TREE);
17598 tree v4si_ftype_v4si_int
17599 = build_function_type_list (V4SI_type_node,
17600 V4SI_type_node, integer_type_node, NULL_TREE);
17601 tree v8hi_ftype_v8hi_int
17602 = build_function_type_list (V8HI_type_node,
17603 V8HI_type_node, integer_type_node, NULL_TREE);
17604 tree v4si_ftype_v8hi_v8hi
17605 = build_function_type_list (V4SI_type_node,
17606 V8HI_type_node, V8HI_type_node, NULL_TREE);
17607 tree di_ftype_v8qi_v8qi
17608 = build_function_type_list (long_long_unsigned_type_node,
17609 V8QI_type_node, V8QI_type_node, NULL_TREE);
17610 tree di_ftype_v2si_v2si
17611 = build_function_type_list (long_long_unsigned_type_node,
17612 V2SI_type_node, V2SI_type_node, NULL_TREE);
17613 tree v2di_ftype_v16qi_v16qi
17614 = build_function_type_list (V2DI_type_node,
17615 V16QI_type_node, V16QI_type_node, NULL_TREE);
17616 tree v2di_ftype_v4si_v4si
17617 = build_function_type_list (V2DI_type_node,
17618 V4SI_type_node, V4SI_type_node, NULL_TREE);
17619 tree int_ftype_v16qi
17620 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
17621 tree v16qi_ftype_pcchar
17622 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
17623 tree void_ftype_pchar_v16qi
17624 = build_function_type_list (void_type_node,
17625 pchar_type_node, V16QI_type_node, NULL_TREE);
17626
17627 tree v2di_ftype_v2di_unsigned_unsigned
17628 = build_function_type_list (V2DI_type_node, V2DI_type_node,
17629 unsigned_type_node, unsigned_type_node,
17630 NULL_TREE);
17631 tree v2di_ftype_v2di_v2di_unsigned_unsigned
17632 = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node,
17633 unsigned_type_node, unsigned_type_node,
17634 NULL_TREE);
17635 tree v2di_ftype_v2di_v16qi
17636 = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node,
17637 NULL_TREE);
17638 tree v2df_ftype_v2df_v2df_v2df
17639 = build_function_type_list (V2DF_type_node,
17640 V2DF_type_node, V2DF_type_node,
17641 V2DF_type_node, NULL_TREE);
17642 tree v4sf_ftype_v4sf_v4sf_v4sf
17643 = build_function_type_list (V4SF_type_node,
17644 V4SF_type_node, V4SF_type_node,
17645 V4SF_type_node, NULL_TREE);
17646 tree v8hi_ftype_v16qi
17647 = build_function_type_list (V8HI_type_node, V16QI_type_node,
17648 NULL_TREE);
17649 tree v4si_ftype_v16qi
17650 = build_function_type_list (V4SI_type_node, V16QI_type_node,
17651 NULL_TREE);
17652 tree v2di_ftype_v16qi
17653 = build_function_type_list (V2DI_type_node, V16QI_type_node,
17654 NULL_TREE);
17655 tree v4si_ftype_v8hi
17656 = build_function_type_list (V4SI_type_node, V8HI_type_node,
17657 NULL_TREE);
17658 tree v2di_ftype_v8hi
17659 = build_function_type_list (V2DI_type_node, V8HI_type_node,
17660 NULL_TREE);
17661 tree v2di_ftype_v4si
17662 = build_function_type_list (V2DI_type_node, V4SI_type_node,
17663 NULL_TREE);
17664 tree v2di_ftype_pv2di
17665 = build_function_type_list (V2DI_type_node, pv2di_type_node,
17666 NULL_TREE);
17667 tree v16qi_ftype_v16qi_v16qi_int
17668 = build_function_type_list (V16QI_type_node, V16QI_type_node,
17669 V16QI_type_node, integer_type_node,
17670 NULL_TREE);
17671 tree v16qi_ftype_v16qi_v16qi_v16qi
17672 = build_function_type_list (V16QI_type_node, V16QI_type_node,
17673 V16QI_type_node, V16QI_type_node,
17674 NULL_TREE);
17675 tree v8hi_ftype_v8hi_v8hi_int
17676 = build_function_type_list (V8HI_type_node, V8HI_type_node,
17677 V8HI_type_node, integer_type_node,
17678 NULL_TREE);
17679 tree v4si_ftype_v4si_v4si_int
17680 = build_function_type_list (V4SI_type_node, V4SI_type_node,
17681 V4SI_type_node, integer_type_node,
17682 NULL_TREE);
17683 tree int_ftype_v2di_v2di
17684 = build_function_type_list (integer_type_node,
17685 V2DI_type_node, V2DI_type_node,
17686 NULL_TREE);
17687 tree int_ftype_v16qi_int_v16qi_int_int
17688 = build_function_type_list (integer_type_node,
17689 V16QI_type_node,
17690 integer_type_node,
17691 V16QI_type_node,
17692 integer_type_node,
17693 integer_type_node,
17694 NULL_TREE);
17695 tree v16qi_ftype_v16qi_int_v16qi_int_int
17696 = build_function_type_list (V16QI_type_node,
17697 V16QI_type_node,
17698 integer_type_node,
17699 V16QI_type_node,
17700 integer_type_node,
17701 integer_type_node,
17702 NULL_TREE);
17703 tree int_ftype_v16qi_v16qi_int
17704 = build_function_type_list (integer_type_node,
17705 V16QI_type_node,
17706 V16QI_type_node,
17707 integer_type_node,
17708 NULL_TREE);
17709
17710 tree float80_type;
17711 tree float128_type;
17712 tree ftype;
17713
17714 /* The __float80 type. */
17715 if (TYPE_MODE (long_double_type_node) == XFmode)
17716 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
17717 "__float80");
17718 else
17719 {
17720 /* The __float80 type. */
17721 float80_type = make_node (REAL_TYPE);
17722 TYPE_PRECISION (float80_type) = 80;
17723 layout_type (float80_type);
17724 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
17725 }
17726
17727 if (TARGET_64BIT)
17728 {
17729 float128_type = make_node (REAL_TYPE);
17730 TYPE_PRECISION (float128_type) = 128;
17731 layout_type (float128_type);
17732 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
17733 }
17734
17735 /* Add all SSE builtins that are more or less simple operations on
17736 three operands. */
17737 for (i = 0, d = bdesc_sse_3arg;
17738 i < ARRAY_SIZE (bdesc_sse_3arg);
17739 i++, d++)
17740 {
17741 /* Use one of the operands; the target can have a different mode for
17742 mask-generating compares. */
17743 enum machine_mode mode;
17744 tree type;
17745
17746 if (d->name == 0)
17747 continue;
17748 mode = insn_data[d->icode].operand[1].mode;
17749
17750 switch (mode)
17751 {
17752 case V16QImode:
17753 type = v16qi_ftype_v16qi_v16qi_int;
17754 break;
17755 case V8HImode:
17756 type = v8hi_ftype_v8hi_v8hi_int;
17757 break;
17758 case V4SImode:
17759 type = v4si_ftype_v4si_v4si_int;
17760 break;
17761 case V2DImode:
17762 type = v2di_ftype_v2di_v2di_int;
17763 break;
17764 case V2DFmode:
17765 type = v2df_ftype_v2df_v2df_int;
17766 break;
17767 case V4SFmode:
17768 type = v4sf_ftype_v4sf_v4sf_int;
17769 break;
17770 default:
17771 gcc_unreachable ();
17772 }
17773
17774 /* Override for variable blends. */
17775 switch (d->icode)
17776 {
17777 case CODE_FOR_sse4_1_blendvpd:
17778 type = v2df_ftype_v2df_v2df_v2df;
17779 break;
17780 case CODE_FOR_sse4_1_blendvps:
17781 type = v4sf_ftype_v4sf_v4sf_v4sf;
17782 break;
17783 case CODE_FOR_sse4_1_pblendvb:
17784 type = v16qi_ftype_v16qi_v16qi_v16qi;
17785 break;
17786 default:
17787 break;
17788 }
17789
17790 def_builtin (d->mask, d->name, type, d->code);
17791 }
17792
17793 /* Add all builtins that are more or less simple operations on two
17794 operands. */
17795 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
17796 {
17797 /* Use one of the operands; the target can have a different mode for
17798 mask-generating compares. */
17799 enum machine_mode mode;
17800 tree type;
17801
17802 if (d->name == 0)
17803 continue;
17804 mode = insn_data[d->icode].operand[1].mode;
17805
17806 switch (mode)
17807 {
17808 case V16QImode:
17809 type = v16qi_ftype_v16qi_v16qi;
17810 break;
17811 case V8HImode:
17812 type = v8hi_ftype_v8hi_v8hi;
17813 break;
17814 case V4SImode:
17815 type = v4si_ftype_v4si_v4si;
17816 break;
17817 case V2DImode:
17818 type = v2di_ftype_v2di_v2di;
17819 break;
17820 case V2DFmode:
17821 type = v2df_ftype_v2df_v2df;
17822 break;
17823 case V4SFmode:
17824 type = v4sf_ftype_v4sf_v4sf;
17825 break;
17826 case V8QImode:
17827 type = v8qi_ftype_v8qi_v8qi;
17828 break;
17829 case V4HImode:
17830 type = v4hi_ftype_v4hi_v4hi;
17831 break;
17832 case V2SImode:
17833 type = v2si_ftype_v2si_v2si;
17834 break;
17835 case DImode:
17836 type = di_ftype_di_di;
17837 break;
17838
17839 default:
17840 gcc_unreachable ();
17841 }
17842
17843 /* Override for comparisons. */
17844 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
17845 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3)
17846 type = v4si_ftype_v4sf_v4sf;
17847
17848 if (d->icode == CODE_FOR_sse2_maskcmpv2df3
17849 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
17850 type = v2di_ftype_v2df_v2df;
17851
17852 def_builtin (d->mask, d->name, type, d->code);
17853 }
17854
17855 /* Add all builtins that are more or less simple operations on 1 operand. */
17856 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
17857 {
17858 enum machine_mode mode;
17859 tree type;
17860
17861 if (d->name == 0)
17862 continue;
17863 mode = insn_data[d->icode].operand[1].mode;
17864
17865 switch (mode)
17866 {
17867 case V16QImode:
17868 type = v16qi_ftype_v16qi;
17869 break;
17870 case V8HImode:
17871 type = v8hi_ftype_v8hi;
17872 break;
17873 case V4SImode:
17874 type = v4si_ftype_v4si;
17875 break;
17876 case V2DFmode:
17877 type = v2df_ftype_v2df;
17878 break;
17879 case V4SFmode:
17880 type = v4sf_ftype_v4sf;
17881 break;
17882 case V8QImode:
17883 type = v8qi_ftype_v8qi;
17884 break;
17885 case V4HImode:
17886 type = v4hi_ftype_v4hi;
17887 break;
17888 case V2SImode:
17889 type = v2si_ftype_v2si;
17890 break;
17891
17892 default:
17893 abort ();
17894 }
17895
17896 def_builtin (d->mask, d->name, type, d->code);
17897 }
17898
17899 /* pcmpestr[im] insns. */
17900 for (i = 0, d = bdesc_pcmpestr;
17901 i < ARRAY_SIZE (bdesc_pcmpestr);
17902 i++, d++)
17903 {
17904 if (d->code == IX86_BUILTIN_PCMPESTRM128)
17905 ftype = v16qi_ftype_v16qi_int_v16qi_int_int;
17906 else
17907 ftype = int_ftype_v16qi_int_v16qi_int_int;
17908 def_builtin (d->mask, d->name, ftype, d->code);
17909 }
17910
17911 /* pcmpistr[im] insns. */
17912 for (i = 0, d = bdesc_pcmpistr;
17913 i < ARRAY_SIZE (bdesc_pcmpistr);
17914 i++, d++)
17915 {
17916 if (d->code == IX86_BUILTIN_PCMPISTRM128)
17917 ftype = v16qi_ftype_v16qi_v16qi_int;
17918 else
17919 ftype = int_ftype_v16qi_v16qi_int;
17920 def_builtin (d->mask, d->name, ftype, d->code);
17921 }
17922
17923 /* Add the remaining MMX insns with somewhat more complicated types. */
17924 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
17925 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
17926 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
17927 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
17928
17929 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
17930 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
17931 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
17932
17933 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
17934 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
17935
17936 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
17937 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
17938
17939 /* comi/ucomi insns. */
17940 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
17941 if (d->mask == OPTION_MASK_ISA_SSE2)
17942 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
17943 else
17944 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
17945
17946 /* ptest insns. */
17947 for (i = 0, d = bdesc_ptest; i < ARRAY_SIZE (bdesc_ptest); i++, d++)
17948 def_builtin (d->mask, d->name, int_ftype_v2di_v2di, d->code);
17949
17950 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
17951 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
17952 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
17953
17954 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
17955 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
17956 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
17957 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
17958 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
17959 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
17960 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
17961 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
17962 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
17963 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
17964 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
17965
17966 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
17967
17968 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
17969 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
17970
17971 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
17972 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
17973 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
17974 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
17975
17976 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
17977 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
17978 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
17979 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
17980
17981 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
17982
17983 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
17984
17985 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
17986 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
17987 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
17988 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
17989 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
17990 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
17991
17992 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
17993
17994 /* Original 3DNow! */
17995 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
17996 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
17997 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
17998 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
17999 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
18000 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
18001 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
18002 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
18003 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
18004 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
18005 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
18006 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
18007 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
18008 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
18009 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
18010 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
18011 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
18012 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
18013 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
18014 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
18015
18016 /* 3DNow! extension as used in the Athlon CPU. */
18017 def_builtin (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
18018 def_builtin (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
18019 def_builtin (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
18020 def_builtin (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
18021 def_builtin (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
18022 def_builtin (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
18023
18024 /* SSE2 */
18025 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
18026
18027 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
18028 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
18029
18030 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
18031 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
18032
18033 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
18034 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
18035 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
18036 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
18037 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
18038
18039 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
18040 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
18041 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
18042 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
18043
18044 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
18045 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
18046
18047 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
18048
18049 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
18050 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
18051
18052 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
18053 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
18054 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
18055 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
18056 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
18057
18058 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
18059
18060 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
18061 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
18062 def_builtin_const (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
18063 def_builtin_const (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
18064
18065 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
18066 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
18067 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
18068
18069 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
18070 def_builtin_const (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
18071 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
18072 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
18073
18074 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
18075 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
18076 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
18077
18078 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
18079 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
18080
18081 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
18082 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
18083
18084 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
18085 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
18086 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
18087 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
18088 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSLLW128);
18089 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSLLD128);
18090 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
18091
18092 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
18093 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
18094 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
18095 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
18096 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRLW128);
18097 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRLD128);
18098 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
18099
18100 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
18101 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
18102 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRAW128);
18103 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRAD128);
18104
18105 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
18106
18107 /* Prescott New Instructions. */
18108 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor", void_ftype_pcvoid_unsigned_unsigned, IX86_BUILTIN_MONITOR);
18109 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait", void_ftype_unsigned_unsigned, IX86_BUILTIN_MWAIT);
18110 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_lddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
18111
18112 /* SSSE3. */
18113 def_builtin (OPTION_MASK_ISA_SSSE3, "__builtin_ia32_palignr128", v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PALIGNR128);
18114 def_builtin (OPTION_MASK_ISA_SSSE3, "__builtin_ia32_palignr", di_ftype_di_di_int, IX86_BUILTIN_PALIGNR);
18115
18116 /* SSE4.1. */
18117 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_movntdqa", v2di_ftype_pv2di, IX86_BUILTIN_MOVNTDQA);
18118 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxbw128", v8hi_ftype_v16qi, IX86_BUILTIN_PMOVSXBW128);
18119 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxbd128", v4si_ftype_v16qi, IX86_BUILTIN_PMOVSXBD128);
18120 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxbq128", v2di_ftype_v16qi, IX86_BUILTIN_PMOVSXBQ128);
18121 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxwd128", v4si_ftype_v8hi, IX86_BUILTIN_PMOVSXWD128);
18122 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxwq128", v2di_ftype_v8hi, IX86_BUILTIN_PMOVSXWQ128);
18123 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxdq128", v2di_ftype_v4si, IX86_BUILTIN_PMOVSXDQ128);
18124 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxbw128", v8hi_ftype_v16qi, IX86_BUILTIN_PMOVZXBW128);
18125 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxbd128", v4si_ftype_v16qi, IX86_BUILTIN_PMOVZXBD128);
18126 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxbq128", v2di_ftype_v16qi, IX86_BUILTIN_PMOVZXBQ128);
18127 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxwd128", v4si_ftype_v8hi, IX86_BUILTIN_PMOVZXWD128);
18128 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxwq128", v2di_ftype_v8hi, IX86_BUILTIN_PMOVZXWQ128);
18129 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxdq128", v2di_ftype_v4si, IX86_BUILTIN_PMOVZXDQ128);
18130 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmuldq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULDQ128);
18131 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_roundpd", v2df_ftype_v2df_int, IX86_BUILTIN_ROUNDPD);
18132 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_roundps", v4sf_ftype_v4sf_int, IX86_BUILTIN_ROUNDPS);
18133 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_roundsd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_ROUNDSD);
18134 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_roundss", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_ROUNDSS);
18135
18136 /* SSE4.2. */
18137 ftype = build_function_type_list (unsigned_type_node,
18138 unsigned_type_node,
18139 unsigned_char_type_node,
18140 NULL_TREE);
18141 def_builtin (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32qi", ftype, IX86_BUILTIN_CRC32QI);
18142 ftype = build_function_type_list (unsigned_type_node,
18143 unsigned_type_node,
18144 short_unsigned_type_node,
18145 NULL_TREE);
18146 def_builtin (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32hi", ftype, IX86_BUILTIN_CRC32HI);
18147 ftype = build_function_type_list (unsigned_type_node,
18148 unsigned_type_node,
18149 unsigned_type_node,
18150 NULL_TREE);
18151 def_builtin (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32si", ftype, IX86_BUILTIN_CRC32SI);
18152 ftype = build_function_type_list (long_long_unsigned_type_node,
18153 long_long_unsigned_type_node,
18154 long_long_unsigned_type_node,
18155 NULL_TREE);
18156 def_builtin (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32di", ftype, IX86_BUILTIN_CRC32DI);
18157
18158 /* AMDFAM10 SSE4A New built-ins */
18159 def_builtin (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_movntsd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTSD);
18160 def_builtin (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_movntss", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTSS);
18161 def_builtin (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_extrqi", v2di_ftype_v2di_unsigned_unsigned, IX86_BUILTIN_EXTRQI);
18162 def_builtin (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_extrq", v2di_ftype_v2di_v16qi, IX86_BUILTIN_EXTRQ);
18163 def_builtin (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_insertqi", v2di_ftype_v2di_v2di_unsigned_unsigned, IX86_BUILTIN_INSERTQI);
18164 def_builtin (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_insertq", v2di_ftype_v2di_v2di, IX86_BUILTIN_INSERTQ);
18165
18166 /* Access to the vec_init patterns. */
18167 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
18168 integer_type_node, NULL_TREE);
18169 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si", ftype, IX86_BUILTIN_VEC_INIT_V2SI);
18170
18171 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
18172 short_integer_type_node,
18173 short_integer_type_node,
18174 short_integer_type_node, NULL_TREE);
18175 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi", ftype, IX86_BUILTIN_VEC_INIT_V4HI);
18176
18177 ftype = build_function_type_list (V8QI_type_node, char_type_node,
18178 char_type_node, char_type_node,
18179 char_type_node, char_type_node,
18180 char_type_node, char_type_node,
18181 char_type_node, NULL_TREE);
18182 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi", ftype, IX86_BUILTIN_VEC_INIT_V8QI);
18183
18184 /* Access to the vec_extract patterns. */
18185 ftype = build_function_type_list (double_type_node, V2DF_type_node,
18186 integer_type_node, NULL_TREE);
18187 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df", ftype, IX86_BUILTIN_VEC_EXT_V2DF);
18188
18189 ftype = build_function_type_list (long_long_integer_type_node,
18190 V2DI_type_node, integer_type_node,
18191 NULL_TREE);
18192 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di", ftype, IX86_BUILTIN_VEC_EXT_V2DI);
18193
18194 ftype = build_function_type_list (float_type_node, V4SF_type_node,
18195 integer_type_node, NULL_TREE);
18196 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf", ftype, IX86_BUILTIN_VEC_EXT_V4SF);
18197
18198 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
18199 integer_type_node, NULL_TREE);
18200 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si", ftype, IX86_BUILTIN_VEC_EXT_V4SI);
18201
18202 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
18203 integer_type_node, NULL_TREE);
18204 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi", ftype, IX86_BUILTIN_VEC_EXT_V8HI);
18205
18206 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
18207 integer_type_node, NULL_TREE);
18208 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_ext_v4hi", ftype, IX86_BUILTIN_VEC_EXT_V4HI);
18209
18210 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
18211 integer_type_node, NULL_TREE);
18212 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si", ftype, IX86_BUILTIN_VEC_EXT_V2SI);
18213
18214 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
18215 integer_type_node, NULL_TREE);
18216 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi", ftype, IX86_BUILTIN_VEC_EXT_V16QI);
18217
18218 /* Access to the vec_set patterns. */
18219 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
18220 intDI_type_node,
18221 integer_type_node, NULL_TREE);
18222 def_builtin (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_vec_set_v2di", ftype, IX86_BUILTIN_VEC_SET_V2DI);
18223
18224 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
18225 float_type_node,
18226 integer_type_node, NULL_TREE);
18227 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf", ftype, IX86_BUILTIN_VEC_SET_V4SF);
18228
18229 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
18230 intSI_type_node,
18231 integer_type_node, NULL_TREE);
18232 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si", ftype, IX86_BUILTIN_VEC_SET_V4SI);
18233
18234 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
18235 intHI_type_node,
18236 integer_type_node, NULL_TREE);
18237 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi", ftype, IX86_BUILTIN_VEC_SET_V8HI);
18238
18239 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
18240 intHI_type_node,
18241 integer_type_node, NULL_TREE);
18242 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_set_v4hi", ftype, IX86_BUILTIN_VEC_SET_V4HI);
18243
18244 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
18245 intQI_type_node,
18246 integer_type_node, NULL_TREE);
18247 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi", ftype, IX86_BUILTIN_VEC_SET_V16QI);
18248 }
18249
18250 static void
18251 ix86_init_builtins (void)
18252 {
18253 if (TARGET_MMX)
18254 ix86_init_mmx_sse_builtins ();
18255 }
18256
18257 /* Errors in the source file can cause expand_expr to return const0_rtx
18258 where we expect a vector. To avoid crashing, use one of the vector
18259 clear instructions. */
18260 static rtx
18261 safe_vector_operand (rtx x, enum machine_mode mode)
18262 {
18263 if (x == const0_rtx)
18264 x = CONST0_RTX (mode);
18265 return x;
18266 }
18267
18268 /* Subroutine of ix86_expand_builtin to take care of SSE insns with
18269 4 operands. The third argument must be a constant smaller than 8
18270 bits or xmm0. */
18271
18272 static rtx
18273 ix86_expand_sse_4_operands_builtin (enum insn_code icode, tree exp,
18274 rtx target)
18275 {
18276 rtx pat;
18277 tree arg0 = CALL_EXPR_ARG (exp, 0);
18278 tree arg1 = CALL_EXPR_ARG (exp, 1);
18279 tree arg2 = CALL_EXPR_ARG (exp, 2);
18280 rtx op0 = expand_normal (arg0);
18281 rtx op1 = expand_normal (arg1);
18282 rtx op2 = expand_normal (arg2);
18283 enum machine_mode tmode = insn_data[icode].operand[0].mode;
18284 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
18285 enum machine_mode mode2 = insn_data[icode].operand[2].mode;
18286 enum machine_mode mode3 = insn_data[icode].operand[3].mode;
18287
18288 if (VECTOR_MODE_P (mode1))
18289 op0 = safe_vector_operand (op0, mode1);
18290 if (VECTOR_MODE_P (mode2))
18291 op1 = safe_vector_operand (op1, mode2);
18292 if (VECTOR_MODE_P (mode3))
18293 op2 = safe_vector_operand (op2, mode3);
18294
18295 if (optimize
18296 || target == 0
18297 || GET_MODE (target) != tmode
18298 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18299 target = gen_reg_rtx (tmode);
18300
18301 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18302 op0 = copy_to_mode_reg (mode1, op0);
18303 if ((optimize && !register_operand (op1, mode2))
18304 || !(*insn_data[icode].operand[2].predicate) (op1, mode2))
18305 op1 = copy_to_mode_reg (mode2, op1);
18306
18307 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
18308 switch (icode)
18309 {
18310 case CODE_FOR_sse4_1_blendvpd:
18311 case CODE_FOR_sse4_1_blendvps:
18312 case CODE_FOR_sse4_1_pblendvb:
18313 op2 = copy_to_mode_reg (mode3, op2);
18314 break;
18315
18316 case CODE_FOR_sse4_1_roundsd:
18317 case CODE_FOR_sse4_1_roundss:
18318 error ("the third argument must be a 4-bit immediate");
18319 return const0_rtx;
18320
18321 default:
18322 error ("the third argument must be an 8-bit immediate");
18323 return const0_rtx;
18324 }
18325
18326 pat = GEN_FCN (icode) (target, op0, op1, op2);
18327 if (! pat)
18328 return 0;
18329 emit_insn (pat);
18330 return target;
18331 }
18332
18333 /* Subroutine of ix86_expand_builtin to take care of crc32 insns. */
18334
18335 static rtx
18336 ix86_expand_crc32 (enum insn_code icode, tree exp, rtx target)
18337 {
18338 rtx pat;
18339 tree arg0 = CALL_EXPR_ARG (exp, 0);
18340 tree arg1 = CALL_EXPR_ARG (exp, 1);
18341 rtx op0 = expand_normal (arg0);
18342 rtx op1 = expand_normal (arg1);
18343 enum machine_mode tmode = insn_data[icode].operand[0].mode;
18344 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
18345 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
18346
18347 if (optimize
18348 || !target
18349 || GET_MODE (target) != tmode
18350 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18351 target = gen_reg_rtx (tmode);
18352
18353 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
18354 op0 = copy_to_mode_reg (mode0, op0);
18355 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
18356 {
18357 op1 = copy_to_reg (op1);
18358 op1 = simplify_gen_subreg (mode1, op1, GET_MODE (op1), 0);
18359 }
18360
18361 pat = GEN_FCN (icode) (target, op0, op1);
18362 if (! pat)
18363 return 0;
18364 emit_insn (pat);
18365 return target;
18366 }
18367
18368 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
18369
18370 static rtx
18371 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
18372 {
18373 rtx pat, xops[3];
18374 tree arg0 = CALL_EXPR_ARG (exp, 0);
18375 tree arg1 = CALL_EXPR_ARG (exp, 1);
18376 rtx op0 = expand_normal (arg0);
18377 rtx op1 = expand_normal (arg1);
18378 enum machine_mode tmode = insn_data[icode].operand[0].mode;
18379 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
18380 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
18381
18382 if (VECTOR_MODE_P (mode0))
18383 op0 = safe_vector_operand (op0, mode0);
18384 if (VECTOR_MODE_P (mode1))
18385 op1 = safe_vector_operand (op1, mode1);
18386
18387 if (optimize || !target
18388 || GET_MODE (target) != tmode
18389 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18390 target = gen_reg_rtx (tmode);
18391
18392 if (GET_MODE (op1) == SImode && mode1 == TImode)
18393 {
18394 rtx x = gen_reg_rtx (V4SImode);
18395 emit_insn (gen_sse2_loadd (x, op1));
18396 op1 = gen_lowpart (TImode, x);
18397 }
18398
18399 /* The insn must want input operands in the same modes as the
18400 result. */
18401 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
18402 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
18403
18404 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
18405 op0 = copy_to_mode_reg (mode0, op0);
18406 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
18407 op1 = copy_to_mode_reg (mode1, op1);
18408
18409 /* ??? Using ix86_fixup_binary_operands is problematic when
18410 we've got mismatched modes. Fake it. */
18411
18412 xops[0] = target;
18413 xops[1] = op0;
18414 xops[2] = op1;
18415
18416 if (tmode == mode0 && tmode == mode1)
18417 {
18418 target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
18419 op0 = xops[1];
18420 op1 = xops[2];
18421 }
18422 else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops))
18423 {
18424 op0 = force_reg (mode0, op0);
18425 op1 = force_reg (mode1, op1);
18426 target = gen_reg_rtx (tmode);
18427 }
18428
18429 pat = GEN_FCN (icode) (target, op0, op1);
18430 if (! pat)
18431 return 0;
18432 emit_insn (pat);
18433 return target;
18434 }
18435
18436 /* Subroutine of ix86_expand_builtin to take care of stores. */
18437
18438 static rtx
18439 ix86_expand_store_builtin (enum insn_code icode, tree exp)
18440 {
18441 rtx pat;
18442 tree arg0 = CALL_EXPR_ARG (exp, 0);
18443 tree arg1 = CALL_EXPR_ARG (exp, 1);
18444 rtx op0 = expand_normal (arg0);
18445 rtx op1 = expand_normal (arg1);
18446 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
18447 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
18448
18449 if (VECTOR_MODE_P (mode1))
18450 op1 = safe_vector_operand (op1, mode1);
18451
18452 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
18453 op1 = copy_to_mode_reg (mode1, op1);
18454
18455 pat = GEN_FCN (icode) (op0, op1);
18456 if (pat)
18457 emit_insn (pat);
18458 return 0;
18459 }
18460
18461 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
18462
18463 static rtx
18464 ix86_expand_unop_builtin (enum insn_code icode, tree exp,
18465 rtx target, int do_load)
18466 {
18467 rtx pat;
18468 tree arg0 = CALL_EXPR_ARG (exp, 0);
18469 rtx op0 = expand_normal (arg0);
18470 enum machine_mode tmode = insn_data[icode].operand[0].mode;
18471 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
18472
18473 if (optimize || !target
18474 || GET_MODE (target) != tmode
18475 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18476 target = gen_reg_rtx (tmode);
18477 if (do_load)
18478 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
18479 else
18480 {
18481 if (VECTOR_MODE_P (mode0))
18482 op0 = safe_vector_operand (op0, mode0);
18483
18484 if ((optimize && !register_operand (op0, mode0))
18485 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18486 op0 = copy_to_mode_reg (mode0, op0);
18487 }
18488
18489 switch (icode)
18490 {
18491 case CODE_FOR_sse4_1_roundpd:
18492 case CODE_FOR_sse4_1_roundps:
18493 {
18494 tree arg1 = CALL_EXPR_ARG (exp, 1);
18495 rtx op1 = expand_normal (arg1);
18496 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
18497
18498 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
18499 {
18500 error ("the second argument must be a 4-bit immediate");
18501 return const0_rtx;
18502 }
18503 pat = GEN_FCN (icode) (target, op0, op1);
18504 }
18505 break;
18506 default:
18507 pat = GEN_FCN (icode) (target, op0);
18508 break;
18509 }
18510
18511 if (! pat)
18512 return 0;
18513 emit_insn (pat);
18514 return target;
18515 }
18516
18517 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
18518 sqrtss, rsqrtss, rcpss. */
18519
18520 static rtx
18521 ix86_expand_unop1_builtin (enum insn_code icode, tree exp, rtx target)
18522 {
18523 rtx pat;
18524 tree arg0 = CALL_EXPR_ARG (exp, 0);
18525 rtx op1, op0 = expand_normal (arg0);
18526 enum machine_mode tmode = insn_data[icode].operand[0].mode;
18527 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
18528
18529 if (optimize || !target
18530 || GET_MODE (target) != tmode
18531 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18532 target = gen_reg_rtx (tmode);
18533
18534 if (VECTOR_MODE_P (mode0))
18535 op0 = safe_vector_operand (op0, mode0);
18536
18537 if ((optimize && !register_operand (op0, mode0))
18538 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18539 op0 = copy_to_mode_reg (mode0, op0);
18540
18541 op1 = op0;
18542 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
18543 op1 = copy_to_mode_reg (mode0, op1);
18544
18545 pat = GEN_FCN (icode) (target, op0, op1);
18546 if (! pat)
18547 return 0;
18548 emit_insn (pat);
18549 return target;
18550 }
18551
18552 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
18553
18554 static rtx
18555 ix86_expand_sse_compare (const struct builtin_description *d, tree exp,
18556 rtx target)
18557 {
18558 rtx pat;
18559 tree arg0 = CALL_EXPR_ARG (exp, 0);
18560 tree arg1 = CALL_EXPR_ARG (exp, 1);
18561 rtx op0 = expand_normal (arg0);
18562 rtx op1 = expand_normal (arg1);
18563 rtx op2;
18564 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
18565 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
18566 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
18567 enum rtx_code comparison = d->comparison;
18568
18569 if (VECTOR_MODE_P (mode0))
18570 op0 = safe_vector_operand (op0, mode0);
18571 if (VECTOR_MODE_P (mode1))
18572 op1 = safe_vector_operand (op1, mode1);
18573
18574 /* Swap operands if we have a comparison that isn't available in
18575 hardware. */
18576 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
18577 {
18578 rtx tmp = gen_reg_rtx (mode1);
18579 emit_move_insn (tmp, op1);
18580 op1 = op0;
18581 op0 = tmp;
18582 }
18583
18584 if (optimize || !target
18585 || GET_MODE (target) != tmode
18586 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
18587 target = gen_reg_rtx (tmode);
18588
18589 if ((optimize && !register_operand (op0, mode0))
18590 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
18591 op0 = copy_to_mode_reg (mode0, op0);
18592 if ((optimize && !register_operand (op1, mode1))
18593 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
18594 op1 = copy_to_mode_reg (mode1, op1);
18595
18596 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
18597 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
18598 if (! pat)
18599 return 0;
18600 emit_insn (pat);
18601 return target;
18602 }
18603
18604 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
18605
18606 static rtx
18607 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
18608 rtx target)
18609 {
18610 rtx pat;
18611 tree arg0 = CALL_EXPR_ARG (exp, 0);
18612 tree arg1 = CALL_EXPR_ARG (exp, 1);
18613 rtx op0 = expand_normal (arg0);
18614 rtx op1 = expand_normal (arg1);
18615 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
18616 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
18617 enum rtx_code comparison = d->comparison;
18618
18619 if (VECTOR_MODE_P (mode0))
18620 op0 = safe_vector_operand (op0, mode0);
18621 if (VECTOR_MODE_P (mode1))
18622 op1 = safe_vector_operand (op1, mode1);
18623
18624 /* Swap operands if we have a comparison that isn't available in
18625 hardware. */
18626 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
18627 {
18628 rtx tmp = op1;
18629 op1 = op0;
18630 op0 = tmp;
18631 }
18632
18633 target = gen_reg_rtx (SImode);
18634 emit_move_insn (target, const0_rtx);
18635 target = gen_rtx_SUBREG (QImode, target, 0);
18636
18637 if ((optimize && !register_operand (op0, mode0))
18638 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
18639 op0 = copy_to_mode_reg (mode0, op0);
18640 if ((optimize && !register_operand (op1, mode1))
18641 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
18642 op1 = copy_to_mode_reg (mode1, op1);
18643
18644 pat = GEN_FCN (d->icode) (op0, op1);
18645 if (! pat)
18646 return 0;
18647 emit_insn (pat);
18648 emit_insn (gen_rtx_SET (VOIDmode,
18649 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
18650 gen_rtx_fmt_ee (comparison, QImode,
18651 SET_DEST (pat),
18652 const0_rtx)));
18653
18654 return SUBREG_REG (target);
18655 }
18656
18657 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
18658
18659 static rtx
18660 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
18661 rtx target)
18662 {
18663 rtx pat;
18664 tree arg0 = CALL_EXPR_ARG (exp, 0);
18665 tree arg1 = CALL_EXPR_ARG (exp, 1);
18666 rtx op0 = expand_normal (arg0);
18667 rtx op1 = expand_normal (arg1);
18668 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
18669 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
18670 enum rtx_code comparison = d->comparison;
18671
18672 if (VECTOR_MODE_P (mode0))
18673 op0 = safe_vector_operand (op0, mode0);
18674 if (VECTOR_MODE_P (mode1))
18675 op1 = safe_vector_operand (op1, mode1);
18676
18677 target = gen_reg_rtx (SImode);
18678 emit_move_insn (target, const0_rtx);
18679 target = gen_rtx_SUBREG (QImode, target, 0);
18680
18681 if ((optimize && !register_operand (op0, mode0))
18682 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
18683 op0 = copy_to_mode_reg (mode0, op0);
18684 if ((optimize && !register_operand (op1, mode1))
18685 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
18686 op1 = copy_to_mode_reg (mode1, op1);
18687
18688 pat = GEN_FCN (d->icode) (op0, op1);
18689 if (! pat)
18690 return 0;
18691 emit_insn (pat);
18692 emit_insn (gen_rtx_SET (VOIDmode,
18693 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
18694 gen_rtx_fmt_ee (comparison, QImode,
18695 SET_DEST (pat),
18696 const0_rtx)));
18697
18698 return SUBREG_REG (target);
18699 }
18700
18701 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
18702
18703 static rtx
18704 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
18705 tree exp, rtx target)
18706 {
18707 rtx pat;
18708 tree arg0 = CALL_EXPR_ARG (exp, 0);
18709 tree arg1 = CALL_EXPR_ARG (exp, 1);
18710 tree arg2 = CALL_EXPR_ARG (exp, 2);
18711 tree arg3 = CALL_EXPR_ARG (exp, 3);
18712 tree arg4 = CALL_EXPR_ARG (exp, 4);
18713 rtx scratch0, scratch1;
18714 rtx op0 = expand_normal (arg0);
18715 rtx op1 = expand_normal (arg1);
18716 rtx op2 = expand_normal (arg2);
18717 rtx op3 = expand_normal (arg3);
18718 rtx op4 = expand_normal (arg4);
18719 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
18720
18721 tmode0 = insn_data[d->icode].operand[0].mode;
18722 tmode1 = insn_data[d->icode].operand[1].mode;
18723 modev2 = insn_data[d->icode].operand[2].mode;
18724 modei3 = insn_data[d->icode].operand[3].mode;
18725 modev4 = insn_data[d->icode].operand[4].mode;
18726 modei5 = insn_data[d->icode].operand[5].mode;
18727 modeimm = insn_data[d->icode].operand[6].mode;
18728
18729 if (VECTOR_MODE_P (modev2))
18730 op0 = safe_vector_operand (op0, modev2);
18731 if (VECTOR_MODE_P (modev4))
18732 op2 = safe_vector_operand (op2, modev4);
18733
18734 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
18735 op0 = copy_to_mode_reg (modev2, op0);
18736 if (! (*insn_data[d->icode].operand[3].predicate) (op1, modei3))
18737 op1 = copy_to_mode_reg (modei3, op1);
18738 if ((optimize && !register_operand (op2, modev4))
18739 || !(*insn_data[d->icode].operand[4].predicate) (op2, modev4))
18740 op2 = copy_to_mode_reg (modev4, op2);
18741 if (! (*insn_data[d->icode].operand[5].predicate) (op3, modei5))
18742 op3 = copy_to_mode_reg (modei5, op3);
18743
18744 if (! (*insn_data[d->icode].operand[6].predicate) (op4, modeimm))
18745 {
18746 error ("the fifth argument must be a 8-bit immediate");
18747 return const0_rtx;
18748 }
18749
18750 if (d->code == IX86_BUILTIN_PCMPESTRI128)
18751 {
18752 if (optimize || !target
18753 || GET_MODE (target) != tmode0
18754 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
18755 target = gen_reg_rtx (tmode0);
18756
18757 scratch1 = gen_reg_rtx (tmode1);
18758
18759 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
18760 }
18761 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
18762 {
18763 if (optimize || !target
18764 || GET_MODE (target) != tmode1
18765 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
18766 target = gen_reg_rtx (tmode1);
18767
18768 scratch0 = gen_reg_rtx (tmode0);
18769
18770 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
18771 }
18772 else
18773 {
18774 gcc_assert (d->flag);
18775
18776 scratch0 = gen_reg_rtx (tmode0);
18777 scratch1 = gen_reg_rtx (tmode1);
18778
18779 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
18780 }
18781
18782 if (! pat)
18783 return 0;
18784
18785 emit_insn (pat);
18786
18787 if (d->flag)
18788 {
18789 target = gen_reg_rtx (SImode);
18790 emit_move_insn (target, const0_rtx);
18791 target = gen_rtx_SUBREG (QImode, target, 0);
18792
18793 emit_insn
18794 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
18795 gen_rtx_fmt_ee (EQ, QImode,
18796 gen_rtx_REG ((enum machine_mode) d->flag,
18797 FLAGS_REG),
18798 const0_rtx)));
18799 return SUBREG_REG (target);
18800 }
18801 else
18802 return target;
18803 }
18804
18805
18806 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
18807
18808 static rtx
18809 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
18810 tree exp, rtx target)
18811 {
18812 rtx pat;
18813 tree arg0 = CALL_EXPR_ARG (exp, 0);
18814 tree arg1 = CALL_EXPR_ARG (exp, 1);
18815 tree arg2 = CALL_EXPR_ARG (exp, 2);
18816 rtx scratch0, scratch1;
18817 rtx op0 = expand_normal (arg0);
18818 rtx op1 = expand_normal (arg1);
18819 rtx op2 = expand_normal (arg2);
18820 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
18821
18822 tmode0 = insn_data[d->icode].operand[0].mode;
18823 tmode1 = insn_data[d->icode].operand[1].mode;
18824 modev2 = insn_data[d->icode].operand[2].mode;
18825 modev3 = insn_data[d->icode].operand[3].mode;
18826 modeimm = insn_data[d->icode].operand[4].mode;
18827
18828 if (VECTOR_MODE_P (modev2))
18829 op0 = safe_vector_operand (op0, modev2);
18830 if (VECTOR_MODE_P (modev3))
18831 op1 = safe_vector_operand (op1, modev3);
18832
18833 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
18834 op0 = copy_to_mode_reg (modev2, op0);
18835 if ((optimize && !register_operand (op1, modev3))
18836 || !(*insn_data[d->icode].operand[3].predicate) (op1, modev3))
18837 op1 = copy_to_mode_reg (modev3, op1);
18838
18839 if (! (*insn_data[d->icode].operand[4].predicate) (op2, modeimm))
18840 {
18841 error ("the third argument must be a 8-bit immediate");
18842 return const0_rtx;
18843 }
18844
18845 if (d->code == IX86_BUILTIN_PCMPISTRI128)
18846 {
18847 if (optimize || !target
18848 || GET_MODE (target) != tmode0
18849 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
18850 target = gen_reg_rtx (tmode0);
18851
18852 scratch1 = gen_reg_rtx (tmode1);
18853
18854 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
18855 }
18856 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
18857 {
18858 if (optimize || !target
18859 || GET_MODE (target) != tmode1
18860 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
18861 target = gen_reg_rtx (tmode1);
18862
18863 scratch0 = gen_reg_rtx (tmode0);
18864
18865 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
18866 }
18867 else
18868 {
18869 gcc_assert (d->flag);
18870
18871 scratch0 = gen_reg_rtx (tmode0);
18872 scratch1 = gen_reg_rtx (tmode1);
18873
18874 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
18875 }
18876
18877 if (! pat)
18878 return 0;
18879
18880 emit_insn (pat);
18881
18882 if (d->flag)
18883 {
18884 target = gen_reg_rtx (SImode);
18885 emit_move_insn (target, const0_rtx);
18886 target = gen_rtx_SUBREG (QImode, target, 0);
18887
18888 emit_insn
18889 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
18890 gen_rtx_fmt_ee (EQ, QImode,
18891 gen_rtx_REG ((enum machine_mode) d->flag,
18892 FLAGS_REG),
18893 const0_rtx)));
18894 return SUBREG_REG (target);
18895 }
18896 else
18897 return target;
18898 }
18899
18900 /* Return the integer constant in ARG. Constrain it to be in the range
18901 of the subparts of VEC_TYPE; issue an error if not. */
18902
18903 static int
18904 get_element_number (tree vec_type, tree arg)
18905 {
18906 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
18907
18908 if (!host_integerp (arg, 1)
18909 || (elt = tree_low_cst (arg, 1), elt > max))
18910 {
18911 error ("selector must be an integer constant in the range 0..%wi", max);
18912 return 0;
18913 }
18914
18915 return elt;
18916 }
18917
18918 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
18919 ix86_expand_vector_init. We DO have language-level syntax for this, in
18920 the form of (type){ init-list }. Except that since we can't place emms
18921 instructions from inside the compiler, we can't allow the use of MMX
18922 registers unless the user explicitly asks for it. So we do *not* define
18923 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
18924 we have builtins invoked by mmintrin.h that gives us license to emit
18925 these sorts of instructions. */
18926
18927 static rtx
18928 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
18929 {
18930 enum machine_mode tmode = TYPE_MODE (type);
18931 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
18932 int i, n_elt = GET_MODE_NUNITS (tmode);
18933 rtvec v = rtvec_alloc (n_elt);
18934
18935 gcc_assert (VECTOR_MODE_P (tmode));
18936 gcc_assert (call_expr_nargs (exp) == n_elt);
18937
18938 for (i = 0; i < n_elt; ++i)
18939 {
18940 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
18941 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
18942 }
18943
18944 if (!target || !register_operand (target, tmode))
18945 target = gen_reg_rtx (tmode);
18946
18947 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
18948 return target;
18949 }
18950
18951 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
18952 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
18953 had a language-level syntax for referencing vector elements. */
18954
18955 static rtx
18956 ix86_expand_vec_ext_builtin (tree exp, rtx target)
18957 {
18958 enum machine_mode tmode, mode0;
18959 tree arg0, arg1;
18960 int elt;
18961 rtx op0;
18962
18963 arg0 = CALL_EXPR_ARG (exp, 0);
18964 arg1 = CALL_EXPR_ARG (exp, 1);
18965
18966 op0 = expand_normal (arg0);
18967 elt = get_element_number (TREE_TYPE (arg0), arg1);
18968
18969 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
18970 mode0 = TYPE_MODE (TREE_TYPE (arg0));
18971 gcc_assert (VECTOR_MODE_P (mode0));
18972
18973 op0 = force_reg (mode0, op0);
18974
18975 if (optimize || !target || !register_operand (target, tmode))
18976 target = gen_reg_rtx (tmode);
18977
18978 ix86_expand_vector_extract (true, target, op0, elt);
18979
18980 return target;
18981 }
18982
18983 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
18984 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
18985 a language-level syntax for referencing vector elements. */
18986
18987 static rtx
18988 ix86_expand_vec_set_builtin (tree exp)
18989 {
18990 enum machine_mode tmode, mode1;
18991 tree arg0, arg1, arg2;
18992 int elt;
18993 rtx op0, op1, target;
18994
18995 arg0 = CALL_EXPR_ARG (exp, 0);
18996 arg1 = CALL_EXPR_ARG (exp, 1);
18997 arg2 = CALL_EXPR_ARG (exp, 2);
18998
18999 tmode = TYPE_MODE (TREE_TYPE (arg0));
19000 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
19001 gcc_assert (VECTOR_MODE_P (tmode));
19002
19003 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
19004 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
19005 elt = get_element_number (TREE_TYPE (arg0), arg2);
19006
19007 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
19008 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
19009
19010 op0 = force_reg (tmode, op0);
19011 op1 = force_reg (mode1, op1);
19012
19013 /* OP0 is the source of these builtin functions and shouldn't be
19014 modified. Create a copy, use it and return it as target. */
19015 target = gen_reg_rtx (tmode);
19016 emit_move_insn (target, op0);
19017 ix86_expand_vector_set (true, target, op1, elt);
19018
19019 return target;
19020 }
19021
19022 /* Expand an expression EXP that calls a built-in function,
19023 with result going to TARGET if that's convenient
19024 (and in mode MODE if that's convenient).
19025 SUBTARGET may be used as the target for computing one of EXP's operands.
19026 IGNORE is nonzero if the value is to be ignored. */
19027
19028 static rtx
19029 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
19030 enum machine_mode mode ATTRIBUTE_UNUSED,
19031 int ignore ATTRIBUTE_UNUSED)
19032 {
19033 const struct builtin_description *d;
19034 size_t i;
19035 enum insn_code icode;
19036 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
19037 tree arg0, arg1, arg2, arg3;
19038 rtx op0, op1, op2, op3, pat;
19039 enum machine_mode tmode, mode0, mode1, mode2, mode3, mode4;
19040 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
19041
19042 switch (fcode)
19043 {
19044 case IX86_BUILTIN_EMMS:
19045 emit_insn (gen_mmx_emms ());
19046 return 0;
19047
19048 case IX86_BUILTIN_SFENCE:
19049 emit_insn (gen_sse_sfence ());
19050 return 0;
19051
19052 case IX86_BUILTIN_MASKMOVQ:
19053 case IX86_BUILTIN_MASKMOVDQU:
19054 icode = (fcode == IX86_BUILTIN_MASKMOVQ
19055 ? CODE_FOR_mmx_maskmovq
19056 : CODE_FOR_sse2_maskmovdqu);
19057 /* Note the arg order is different from the operand order. */
19058 arg1 = CALL_EXPR_ARG (exp, 0);
19059 arg2 = CALL_EXPR_ARG (exp, 1);
19060 arg0 = CALL_EXPR_ARG (exp, 2);
19061 op0 = expand_normal (arg0);
19062 op1 = expand_normal (arg1);
19063 op2 = expand_normal (arg2);
19064 mode0 = insn_data[icode].operand[0].mode;
19065 mode1 = insn_data[icode].operand[1].mode;
19066 mode2 = insn_data[icode].operand[2].mode;
19067
19068 op0 = force_reg (Pmode, op0);
19069 op0 = gen_rtx_MEM (mode1, op0);
19070
19071 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
19072 op0 = copy_to_mode_reg (mode0, op0);
19073 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
19074 op1 = copy_to_mode_reg (mode1, op1);
19075 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
19076 op2 = copy_to_mode_reg (mode2, op2);
19077 pat = GEN_FCN (icode) (op0, op1, op2);
19078 if (! pat)
19079 return 0;
19080 emit_insn (pat);
19081 return 0;
19082
19083 case IX86_BUILTIN_SQRTSS:
19084 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, exp, target);
19085 case IX86_BUILTIN_RSQRTSS:
19086 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, exp, target);
19087 case IX86_BUILTIN_RCPSS:
19088 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, exp, target);
19089
19090 case IX86_BUILTIN_LOADUPS:
19091 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, exp, target, 1);
19092
19093 case IX86_BUILTIN_STOREUPS:
19094 return ix86_expand_store_builtin (CODE_FOR_sse_movups, exp);
19095
19096 case IX86_BUILTIN_LOADHPS:
19097 case IX86_BUILTIN_LOADLPS:
19098 case IX86_BUILTIN_LOADHPD:
19099 case IX86_BUILTIN_LOADLPD:
19100 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
19101 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
19102 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
19103 : CODE_FOR_sse2_loadlpd);
19104 arg0 = CALL_EXPR_ARG (exp, 0);
19105 arg1 = CALL_EXPR_ARG (exp, 1);
19106 op0 = expand_normal (arg0);
19107 op1 = expand_normal (arg1);
19108 tmode = insn_data[icode].operand[0].mode;
19109 mode0 = insn_data[icode].operand[1].mode;
19110 mode1 = insn_data[icode].operand[2].mode;
19111
19112 op0 = force_reg (mode0, op0);
19113 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
19114 if (optimize || target == 0
19115 || GET_MODE (target) != tmode
19116 || !register_operand (target, tmode))
19117 target = gen_reg_rtx (tmode);
19118 pat = GEN_FCN (icode) (target, op0, op1);
19119 if (! pat)
19120 return 0;
19121 emit_insn (pat);
19122 return target;
19123
19124 case IX86_BUILTIN_STOREHPS:
19125 case IX86_BUILTIN_STORELPS:
19126 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
19127 : CODE_FOR_sse_storelps);
19128 arg0 = CALL_EXPR_ARG (exp, 0);
19129 arg1 = CALL_EXPR_ARG (exp, 1);
19130 op0 = expand_normal (arg0);
19131 op1 = expand_normal (arg1);
19132 mode0 = insn_data[icode].operand[0].mode;
19133 mode1 = insn_data[icode].operand[1].mode;
19134
19135 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
19136 op1 = force_reg (mode1, op1);
19137
19138 pat = GEN_FCN (icode) (op0, op1);
19139 if (! pat)
19140 return 0;
19141 emit_insn (pat);
19142 return const0_rtx;
19143
19144 case IX86_BUILTIN_MOVNTPS:
19145 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, exp);
19146 case IX86_BUILTIN_MOVNTQ:
19147 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, exp);
19148
19149 case IX86_BUILTIN_LDMXCSR:
19150 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
19151 target = assign_386_stack_local (SImode, SLOT_TEMP);
19152 emit_move_insn (target, op0);
19153 emit_insn (gen_sse_ldmxcsr (target));
19154 return 0;
19155
19156 case IX86_BUILTIN_STMXCSR:
19157 target = assign_386_stack_local (SImode, SLOT_TEMP);
19158 emit_insn (gen_sse_stmxcsr (target));
19159 return copy_to_mode_reg (SImode, target);
19160
19161 case IX86_BUILTIN_SHUFPS:
19162 case IX86_BUILTIN_SHUFPD:
19163 icode = (fcode == IX86_BUILTIN_SHUFPS
19164 ? CODE_FOR_sse_shufps
19165 : CODE_FOR_sse2_shufpd);
19166 arg0 = CALL_EXPR_ARG (exp, 0);
19167 arg1 = CALL_EXPR_ARG (exp, 1);
19168 arg2 = CALL_EXPR_ARG (exp, 2);
19169 op0 = expand_normal (arg0);
19170 op1 = expand_normal (arg1);
19171 op2 = expand_normal (arg2);
19172 tmode = insn_data[icode].operand[0].mode;
19173 mode0 = insn_data[icode].operand[1].mode;
19174 mode1 = insn_data[icode].operand[2].mode;
19175 mode2 = insn_data[icode].operand[3].mode;
19176
19177 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19178 op0 = copy_to_mode_reg (mode0, op0);
19179 if ((optimize && !register_operand (op1, mode1))
19180 || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
19181 op1 = copy_to_mode_reg (mode1, op1);
19182 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
19183 {
19184 /* @@@ better error message */
19185 error ("mask must be an immediate");
19186 return gen_reg_rtx (tmode);
19187 }
19188 if (optimize || target == 0
19189 || GET_MODE (target) != tmode
19190 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19191 target = gen_reg_rtx (tmode);
19192 pat = GEN_FCN (icode) (target, op0, op1, op2);
19193 if (! pat)
19194 return 0;
19195 emit_insn (pat);
19196 return target;
19197
19198 case IX86_BUILTIN_PSHUFW:
19199 case IX86_BUILTIN_PSHUFD:
19200 case IX86_BUILTIN_PSHUFHW:
19201 case IX86_BUILTIN_PSHUFLW:
19202 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
19203 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
19204 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
19205 : CODE_FOR_mmx_pshufw);
19206 arg0 = CALL_EXPR_ARG (exp, 0);
19207 arg1 = CALL_EXPR_ARG (exp, 1);
19208 op0 = expand_normal (arg0);
19209 op1 = expand_normal (arg1);
19210 tmode = insn_data[icode].operand[0].mode;
19211 mode1 = insn_data[icode].operand[1].mode;
19212 mode2 = insn_data[icode].operand[2].mode;
19213
19214 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19215 op0 = copy_to_mode_reg (mode1, op0);
19216 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
19217 {
19218 /* @@@ better error message */
19219 error ("mask must be an immediate");
19220 return const0_rtx;
19221 }
19222 if (target == 0
19223 || GET_MODE (target) != tmode
19224 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19225 target = gen_reg_rtx (tmode);
19226 pat = GEN_FCN (icode) (target, op0, op1);
19227 if (! pat)
19228 return 0;
19229 emit_insn (pat);
19230 return target;
19231
19232 case IX86_BUILTIN_PSLLWI128:
19233 icode = CODE_FOR_ashlv8hi3;
19234 goto do_pshifti;
19235 case IX86_BUILTIN_PSLLDI128:
19236 icode = CODE_FOR_ashlv4si3;
19237 goto do_pshifti;
19238 case IX86_BUILTIN_PSLLQI128:
19239 icode = CODE_FOR_ashlv2di3;
19240 goto do_pshifti;
19241 case IX86_BUILTIN_PSRAWI128:
19242 icode = CODE_FOR_ashrv8hi3;
19243 goto do_pshifti;
19244 case IX86_BUILTIN_PSRADI128:
19245 icode = CODE_FOR_ashrv4si3;
19246 goto do_pshifti;
19247 case IX86_BUILTIN_PSRLWI128:
19248 icode = CODE_FOR_lshrv8hi3;
19249 goto do_pshifti;
19250 case IX86_BUILTIN_PSRLDI128:
19251 icode = CODE_FOR_lshrv4si3;
19252 goto do_pshifti;
19253 case IX86_BUILTIN_PSRLQI128:
19254 icode = CODE_FOR_lshrv2di3;
19255 goto do_pshifti;
19256 do_pshifti:
19257 arg0 = CALL_EXPR_ARG (exp, 0);
19258 arg1 = CALL_EXPR_ARG (exp, 1);
19259 op0 = expand_normal (arg0);
19260 op1 = expand_normal (arg1);
19261
19262 if (!CONST_INT_P (op1))
19263 {
19264 error ("shift must be an immediate");
19265 return const0_rtx;
19266 }
19267 if (INTVAL (op1) < 0 || INTVAL (op1) > 255)
19268 op1 = GEN_INT (255);
19269
19270 tmode = insn_data[icode].operand[0].mode;
19271 mode1 = insn_data[icode].operand[1].mode;
19272 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19273 op0 = copy_to_reg (op0);
19274
19275 target = gen_reg_rtx (tmode);
19276 pat = GEN_FCN (icode) (target, op0, op1);
19277 if (!pat)
19278 return 0;
19279 emit_insn (pat);
19280 return target;
19281
19282 case IX86_BUILTIN_PSLLW128:
19283 icode = CODE_FOR_ashlv8hi3;
19284 goto do_pshift;
19285 case IX86_BUILTIN_PSLLD128:
19286 icode = CODE_FOR_ashlv4si3;
19287 goto do_pshift;
19288 case IX86_BUILTIN_PSLLQ128:
19289 icode = CODE_FOR_ashlv2di3;
19290 goto do_pshift;
19291 case IX86_BUILTIN_PSRAW128:
19292 icode = CODE_FOR_ashrv8hi3;
19293 goto do_pshift;
19294 case IX86_BUILTIN_PSRAD128:
19295 icode = CODE_FOR_ashrv4si3;
19296 goto do_pshift;
19297 case IX86_BUILTIN_PSRLW128:
19298 icode = CODE_FOR_lshrv8hi3;
19299 goto do_pshift;
19300 case IX86_BUILTIN_PSRLD128:
19301 icode = CODE_FOR_lshrv4si3;
19302 goto do_pshift;
19303 case IX86_BUILTIN_PSRLQ128:
19304 icode = CODE_FOR_lshrv2di3;
19305 goto do_pshift;
19306 do_pshift:
19307 arg0 = CALL_EXPR_ARG (exp, 0);
19308 arg1 = CALL_EXPR_ARG (exp, 1);
19309 op0 = expand_normal (arg0);
19310 op1 = expand_normal (arg1);
19311
19312 tmode = insn_data[icode].operand[0].mode;
19313 mode1 = insn_data[icode].operand[1].mode;
19314
19315 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19316 op0 = copy_to_reg (op0);
19317
19318 op1 = simplify_gen_subreg (TImode, op1, GET_MODE (op1), 0);
19319 if (! (*insn_data[icode].operand[2].predicate) (op1, TImode))
19320 op1 = copy_to_reg (op1);
19321
19322 target = gen_reg_rtx (tmode);
19323 pat = GEN_FCN (icode) (target, op0, op1);
19324 if (!pat)
19325 return 0;
19326 emit_insn (pat);
19327 return target;
19328
19329 case IX86_BUILTIN_PSLLDQI128:
19330 case IX86_BUILTIN_PSRLDQI128:
19331 icode = (fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
19332 : CODE_FOR_sse2_lshrti3);
19333 arg0 = CALL_EXPR_ARG (exp, 0);
19334 arg1 = CALL_EXPR_ARG (exp, 1);
19335 op0 = expand_normal (arg0);
19336 op1 = expand_normal (arg1);
19337 tmode = insn_data[icode].operand[0].mode;
19338 mode1 = insn_data[icode].operand[1].mode;
19339 mode2 = insn_data[icode].operand[2].mode;
19340
19341 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19342 {
19343 op0 = copy_to_reg (op0);
19344 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
19345 }
19346 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
19347 {
19348 error ("shift must be an immediate");
19349 return const0_rtx;
19350 }
19351 target = gen_reg_rtx (V2DImode);
19352 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0),
19353 op0, op1);
19354 if (! pat)
19355 return 0;
19356 emit_insn (pat);
19357 return target;
19358
19359 case IX86_BUILTIN_FEMMS:
19360 emit_insn (gen_mmx_femms ());
19361 return NULL_RTX;
19362
19363 case IX86_BUILTIN_PAVGUSB:
19364 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, exp, target);
19365
19366 case IX86_BUILTIN_PF2ID:
19367 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, exp, target, 0);
19368
19369 case IX86_BUILTIN_PFACC:
19370 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, exp, target);
19371
19372 case IX86_BUILTIN_PFADD:
19373 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, exp, target);
19374
19375 case IX86_BUILTIN_PFCMPEQ:
19376 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, exp, target);
19377
19378 case IX86_BUILTIN_PFCMPGE:
19379 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, exp, target);
19380
19381 case IX86_BUILTIN_PFCMPGT:
19382 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, exp, target);
19383
19384 case IX86_BUILTIN_PFMAX:
19385 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, exp, target);
19386
19387 case IX86_BUILTIN_PFMIN:
19388 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, exp, target);
19389
19390 case IX86_BUILTIN_PFMUL:
19391 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, exp, target);
19392
19393 case IX86_BUILTIN_PFRCP:
19394 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, exp, target, 0);
19395
19396 case IX86_BUILTIN_PFRCPIT1:
19397 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, exp, target);
19398
19399 case IX86_BUILTIN_PFRCPIT2:
19400 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, exp, target);
19401
19402 case IX86_BUILTIN_PFRSQIT1:
19403 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, exp, target);
19404
19405 case IX86_BUILTIN_PFRSQRT:
19406 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, exp, target, 0);
19407
19408 case IX86_BUILTIN_PFSUB:
19409 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, exp, target);
19410
19411 case IX86_BUILTIN_PFSUBR:
19412 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, exp, target);
19413
19414 case IX86_BUILTIN_PI2FD:
19415 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, exp, target, 0);
19416
19417 case IX86_BUILTIN_PMULHRW:
19418 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, exp, target);
19419
19420 case IX86_BUILTIN_PF2IW:
19421 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, exp, target, 0);
19422
19423 case IX86_BUILTIN_PFNACC:
19424 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, exp, target);
19425
19426 case IX86_BUILTIN_PFPNACC:
19427 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, exp, target);
19428
19429 case IX86_BUILTIN_PI2FW:
19430 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, exp, target, 0);
19431
19432 case IX86_BUILTIN_PSWAPDSI:
19433 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, exp, target, 0);
19434
19435 case IX86_BUILTIN_PSWAPDSF:
19436 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, exp, target, 0);
19437
19438 case IX86_BUILTIN_SQRTSD:
19439 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, exp, target);
19440 case IX86_BUILTIN_LOADUPD:
19441 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, exp, target, 1);
19442 case IX86_BUILTIN_STOREUPD:
19443 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, exp);
19444
19445 case IX86_BUILTIN_MFENCE:
19446 emit_insn (gen_sse2_mfence ());
19447 return 0;
19448 case IX86_BUILTIN_LFENCE:
19449 emit_insn (gen_sse2_lfence ());
19450 return 0;
19451
19452 case IX86_BUILTIN_CLFLUSH:
19453 arg0 = CALL_EXPR_ARG (exp, 0);
19454 op0 = expand_normal (arg0);
19455 icode = CODE_FOR_sse2_clflush;
19456 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
19457 op0 = copy_to_mode_reg (Pmode, op0);
19458
19459 emit_insn (gen_sse2_clflush (op0));
19460 return 0;
19461
19462 case IX86_BUILTIN_MOVNTPD:
19463 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, exp);
19464 case IX86_BUILTIN_MOVNTDQ:
19465 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, exp);
19466 case IX86_BUILTIN_MOVNTI:
19467 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, exp);
19468
19469 case IX86_BUILTIN_LOADDQU:
19470 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, exp, target, 1);
19471 case IX86_BUILTIN_STOREDQU:
19472 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, exp);
19473
19474 case IX86_BUILTIN_MONITOR:
19475 arg0 = CALL_EXPR_ARG (exp, 0);
19476 arg1 = CALL_EXPR_ARG (exp, 1);
19477 arg2 = CALL_EXPR_ARG (exp, 2);
19478 op0 = expand_normal (arg0);
19479 op1 = expand_normal (arg1);
19480 op2 = expand_normal (arg2);
19481 if (!REG_P (op0))
19482 op0 = copy_to_mode_reg (Pmode, op0);
19483 if (!REG_P (op1))
19484 op1 = copy_to_mode_reg (SImode, op1);
19485 if (!REG_P (op2))
19486 op2 = copy_to_mode_reg (SImode, op2);
19487 if (!TARGET_64BIT)
19488 emit_insn (gen_sse3_monitor (op0, op1, op2));
19489 else
19490 emit_insn (gen_sse3_monitor64 (op0, op1, op2));
19491 return 0;
19492
19493 case IX86_BUILTIN_MWAIT:
19494 arg0 = CALL_EXPR_ARG (exp, 0);
19495 arg1 = CALL_EXPR_ARG (exp, 1);
19496 op0 = expand_normal (arg0);
19497 op1 = expand_normal (arg1);
19498 if (!REG_P (op0))
19499 op0 = copy_to_mode_reg (SImode, op0);
19500 if (!REG_P (op1))
19501 op1 = copy_to_mode_reg (SImode, op1);
19502 emit_insn (gen_sse3_mwait (op0, op1));
19503 return 0;
19504
19505 case IX86_BUILTIN_LDDQU:
19506 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, exp,
19507 target, 1);
19508
19509 case IX86_BUILTIN_PALIGNR:
19510 case IX86_BUILTIN_PALIGNR128:
19511 if (fcode == IX86_BUILTIN_PALIGNR)
19512 {
19513 icode = CODE_FOR_ssse3_palignrdi;
19514 mode = DImode;
19515 }
19516 else
19517 {
19518 icode = CODE_FOR_ssse3_palignrti;
19519 mode = V2DImode;
19520 }
19521 arg0 = CALL_EXPR_ARG (exp, 0);
19522 arg1 = CALL_EXPR_ARG (exp, 1);
19523 arg2 = CALL_EXPR_ARG (exp, 2);
19524 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
19525 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, EXPAND_NORMAL);
19526 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, EXPAND_NORMAL);
19527 tmode = insn_data[icode].operand[0].mode;
19528 mode1 = insn_data[icode].operand[1].mode;
19529 mode2 = insn_data[icode].operand[2].mode;
19530 mode3 = insn_data[icode].operand[3].mode;
19531
19532 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19533 {
19534 op0 = copy_to_reg (op0);
19535 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
19536 }
19537 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
19538 {
19539 op1 = copy_to_reg (op1);
19540 op1 = simplify_gen_subreg (mode2, op1, GET_MODE (op1), 0);
19541 }
19542 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
19543 {
19544 error ("shift must be an immediate");
19545 return const0_rtx;
19546 }
19547 target = gen_reg_rtx (mode);
19548 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, mode, 0),
19549 op0, op1, op2);
19550 if (! pat)
19551 return 0;
19552 emit_insn (pat);
19553 return target;
19554
19555 case IX86_BUILTIN_MOVNTDQA:
19556 return ix86_expand_unop_builtin (CODE_FOR_sse4_1_movntdqa, exp,
19557 target, 1);
19558
19559 case IX86_BUILTIN_MOVNTSD:
19560 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv2df, exp);
19561
19562 case IX86_BUILTIN_MOVNTSS:
19563 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv4sf, exp);
19564
19565 case IX86_BUILTIN_INSERTQ:
19566 case IX86_BUILTIN_EXTRQ:
19567 icode = (fcode == IX86_BUILTIN_EXTRQ
19568 ? CODE_FOR_sse4a_extrq
19569 : CODE_FOR_sse4a_insertq);
19570 arg0 = CALL_EXPR_ARG (exp, 0);
19571 arg1 = CALL_EXPR_ARG (exp, 1);
19572 op0 = expand_normal (arg0);
19573 op1 = expand_normal (arg1);
19574 tmode = insn_data[icode].operand[0].mode;
19575 mode1 = insn_data[icode].operand[1].mode;
19576 mode2 = insn_data[icode].operand[2].mode;
19577 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19578 op0 = copy_to_mode_reg (mode1, op0);
19579 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
19580 op1 = copy_to_mode_reg (mode2, op1);
19581 if (optimize || target == 0
19582 || GET_MODE (target) != tmode
19583 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19584 target = gen_reg_rtx (tmode);
19585 pat = GEN_FCN (icode) (target, op0, op1);
19586 if (! pat)
19587 return NULL_RTX;
19588 emit_insn (pat);
19589 return target;
19590
19591 case IX86_BUILTIN_EXTRQI:
19592 icode = CODE_FOR_sse4a_extrqi;
19593 arg0 = CALL_EXPR_ARG (exp, 0);
19594 arg1 = CALL_EXPR_ARG (exp, 1);
19595 arg2 = CALL_EXPR_ARG (exp, 2);
19596 op0 = expand_normal (arg0);
19597 op1 = expand_normal (arg1);
19598 op2 = expand_normal (arg2);
19599 tmode = insn_data[icode].operand[0].mode;
19600 mode1 = insn_data[icode].operand[1].mode;
19601 mode2 = insn_data[icode].operand[2].mode;
19602 mode3 = insn_data[icode].operand[3].mode;
19603 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19604 op0 = copy_to_mode_reg (mode1, op0);
19605 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
19606 {
19607 error ("index mask must be an immediate");
19608 return gen_reg_rtx (tmode);
19609 }
19610 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
19611 {
19612 error ("length mask must be an immediate");
19613 return gen_reg_rtx (tmode);
19614 }
19615 if (optimize || target == 0
19616 || GET_MODE (target) != tmode
19617 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19618 target = gen_reg_rtx (tmode);
19619 pat = GEN_FCN (icode) (target, op0, op1, op2);
19620 if (! pat)
19621 return NULL_RTX;
19622 emit_insn (pat);
19623 return target;
19624
19625 case IX86_BUILTIN_INSERTQI:
19626 icode = CODE_FOR_sse4a_insertqi;
19627 arg0 = CALL_EXPR_ARG (exp, 0);
19628 arg1 = CALL_EXPR_ARG (exp, 1);
19629 arg2 = CALL_EXPR_ARG (exp, 2);
19630 arg3 = CALL_EXPR_ARG (exp, 3);
19631 op0 = expand_normal (arg0);
19632 op1 = expand_normal (arg1);
19633 op2 = expand_normal (arg2);
19634 op3 = expand_normal (arg3);
19635 tmode = insn_data[icode].operand[0].mode;
19636 mode1 = insn_data[icode].operand[1].mode;
19637 mode2 = insn_data[icode].operand[2].mode;
19638 mode3 = insn_data[icode].operand[3].mode;
19639 mode4 = insn_data[icode].operand[4].mode;
19640
19641 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19642 op0 = copy_to_mode_reg (mode1, op0);
19643
19644 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
19645 op1 = copy_to_mode_reg (mode2, op1);
19646
19647 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
19648 {
19649 error ("index mask must be an immediate");
19650 return gen_reg_rtx (tmode);
19651 }
19652 if (! (*insn_data[icode].operand[4].predicate) (op3, mode4))
19653 {
19654 error ("length mask must be an immediate");
19655 return gen_reg_rtx (tmode);
19656 }
19657 if (optimize || target == 0
19658 || GET_MODE (target) != tmode
19659 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19660 target = gen_reg_rtx (tmode);
19661 pat = GEN_FCN (icode) (target, op0, op1, op2, op3);
19662 if (! pat)
19663 return NULL_RTX;
19664 emit_insn (pat);
19665 return target;
19666
19667 case IX86_BUILTIN_VEC_INIT_V2SI:
19668 case IX86_BUILTIN_VEC_INIT_V4HI:
19669 case IX86_BUILTIN_VEC_INIT_V8QI:
19670 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
19671
19672 case IX86_BUILTIN_VEC_EXT_V2DF:
19673 case IX86_BUILTIN_VEC_EXT_V2DI:
19674 case IX86_BUILTIN_VEC_EXT_V4SF:
19675 case IX86_BUILTIN_VEC_EXT_V4SI:
19676 case IX86_BUILTIN_VEC_EXT_V8HI:
19677 case IX86_BUILTIN_VEC_EXT_V2SI:
19678 case IX86_BUILTIN_VEC_EXT_V4HI:
19679 case IX86_BUILTIN_VEC_EXT_V16QI:
19680 return ix86_expand_vec_ext_builtin (exp, target);
19681
19682 case IX86_BUILTIN_VEC_SET_V2DI:
19683 case IX86_BUILTIN_VEC_SET_V4SF:
19684 case IX86_BUILTIN_VEC_SET_V4SI:
19685 case IX86_BUILTIN_VEC_SET_V8HI:
19686 case IX86_BUILTIN_VEC_SET_V4HI:
19687 case IX86_BUILTIN_VEC_SET_V16QI:
19688 return ix86_expand_vec_set_builtin (exp);
19689
19690 default:
19691 break;
19692 }
19693
19694 for (i = 0, d = bdesc_sse_3arg;
19695 i < ARRAY_SIZE (bdesc_sse_3arg);
19696 i++, d++)
19697 if (d->code == fcode)
19698 return ix86_expand_sse_4_operands_builtin (d->icode, exp,
19699 target);
19700
19701 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
19702 if (d->code == fcode)
19703 {
19704 /* Compares are treated specially. */
19705 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
19706 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3
19707 || d->icode == CODE_FOR_sse2_maskcmpv2df3
19708 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
19709 return ix86_expand_sse_compare (d, exp, target);
19710
19711 return ix86_expand_binop_builtin (d->icode, exp, target);
19712 }
19713
19714 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
19715 if (d->code == fcode)
19716 return ix86_expand_unop_builtin (d->icode, exp, target, 0);
19717
19718 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
19719 if (d->code == fcode)
19720 return ix86_expand_sse_comi (d, exp, target);
19721
19722 for (i = 0, d = bdesc_ptest; i < ARRAY_SIZE (bdesc_ptest); i++, d++)
19723 if (d->code == fcode)
19724 return ix86_expand_sse_ptest (d, exp, target);
19725
19726 for (i = 0, d = bdesc_crc32; i < ARRAY_SIZE (bdesc_crc32); i++, d++)
19727 if (d->code == fcode)
19728 return ix86_expand_crc32 (d->icode, exp, target);
19729
19730 for (i = 0, d = bdesc_pcmpestr;
19731 i < ARRAY_SIZE (bdesc_pcmpestr);
19732 i++, d++)
19733 if (d->code == fcode)
19734 return ix86_expand_sse_pcmpestr (d, exp, target);
19735
19736 for (i = 0, d = bdesc_pcmpistr;
19737 i < ARRAY_SIZE (bdesc_pcmpistr);
19738 i++, d++)
19739 if (d->code == fcode)
19740 return ix86_expand_sse_pcmpistr (d, exp, target);
19741
19742 gcc_unreachable ();
19743 }
19744
19745 /* Returns a function decl for a vectorized version of the builtin function
19746 with builtin function code FN and the result vector type TYPE, or NULL_TREE
19747 if it is not available. */
19748
19749 static tree
19750 ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
19751 tree type_in)
19752 {
19753 enum machine_mode in_mode, out_mode;
19754 int in_n, out_n;
19755
19756 if (TREE_CODE (type_out) != VECTOR_TYPE
19757 || TREE_CODE (type_in) != VECTOR_TYPE)
19758 return NULL_TREE;
19759
19760 out_mode = TYPE_MODE (TREE_TYPE (type_out));
19761 out_n = TYPE_VECTOR_SUBPARTS (type_out);
19762 in_mode = TYPE_MODE (TREE_TYPE (type_in));
19763 in_n = TYPE_VECTOR_SUBPARTS (type_in);
19764
19765 switch (fn)
19766 {
19767 case BUILT_IN_SQRT:
19768 if (out_mode == DFmode && out_n == 2
19769 && in_mode == DFmode && in_n == 2)
19770 return ix86_builtins[IX86_BUILTIN_SQRTPD];
19771 return NULL_TREE;
19772
19773 case BUILT_IN_SQRTF:
19774 if (out_mode == SFmode && out_n == 4
19775 && in_mode == SFmode && in_n == 4)
19776 return ix86_builtins[IX86_BUILTIN_SQRTPS];
19777 return NULL_TREE;
19778
19779 case BUILT_IN_LRINTF:
19780 if (out_mode == SImode && out_n == 4
19781 && in_mode == SFmode && in_n == 4)
19782 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
19783 return NULL_TREE;
19784
19785 default:
19786 ;
19787 }
19788
19789 return NULL_TREE;
19790 }
19791
19792 /* Returns a decl of a function that implements conversion of the
19793 input vector of type TYPE, or NULL_TREE if it is not available. */
19794
19795 static tree
19796 ix86_builtin_conversion (unsigned int code, tree type)
19797 {
19798 if (TREE_CODE (type) != VECTOR_TYPE)
19799 return NULL_TREE;
19800
19801 switch (code)
19802 {
19803 case FLOAT_EXPR:
19804 switch (TYPE_MODE (type))
19805 {
19806 case V4SImode:
19807 return ix86_builtins[IX86_BUILTIN_CVTDQ2PS];
19808 default:
19809 return NULL_TREE;
19810 }
19811
19812 case FIX_TRUNC_EXPR:
19813 switch (TYPE_MODE (type))
19814 {
19815 case V4SFmode:
19816 return ix86_builtins[IX86_BUILTIN_CVTTPS2DQ];
19817 default:
19818 return NULL_TREE;
19819 }
19820 default:
19821 return NULL_TREE;
19822
19823 }
19824 }
19825
19826 /* Store OPERAND to the memory after reload is completed. This means
19827 that we can't easily use assign_stack_local. */
19828 rtx
19829 ix86_force_to_memory (enum machine_mode mode, rtx operand)
19830 {
19831 rtx result;
19832
19833 gcc_assert (reload_completed);
19834 if (TARGET_RED_ZONE)
19835 {
19836 result = gen_rtx_MEM (mode,
19837 gen_rtx_PLUS (Pmode,
19838 stack_pointer_rtx,
19839 GEN_INT (-RED_ZONE_SIZE)));
19840 emit_move_insn (result, operand);
19841 }
19842 else if (!TARGET_RED_ZONE && TARGET_64BIT)
19843 {
19844 switch (mode)
19845 {
19846 case HImode:
19847 case SImode:
19848 operand = gen_lowpart (DImode, operand);
19849 /* FALLTHRU */
19850 case DImode:
19851 emit_insn (
19852 gen_rtx_SET (VOIDmode,
19853 gen_rtx_MEM (DImode,
19854 gen_rtx_PRE_DEC (DImode,
19855 stack_pointer_rtx)),
19856 operand));
19857 break;
19858 default:
19859 gcc_unreachable ();
19860 }
19861 result = gen_rtx_MEM (mode, stack_pointer_rtx);
19862 }
19863 else
19864 {
19865 switch (mode)
19866 {
19867 case DImode:
19868 {
19869 rtx operands[2];
19870 split_di (&operand, 1, operands, operands + 1);
19871 emit_insn (
19872 gen_rtx_SET (VOIDmode,
19873 gen_rtx_MEM (SImode,
19874 gen_rtx_PRE_DEC (Pmode,
19875 stack_pointer_rtx)),
19876 operands[1]));
19877 emit_insn (
19878 gen_rtx_SET (VOIDmode,
19879 gen_rtx_MEM (SImode,
19880 gen_rtx_PRE_DEC (Pmode,
19881 stack_pointer_rtx)),
19882 operands[0]));
19883 }
19884 break;
19885 case HImode:
19886 /* Store HImodes as SImodes. */
19887 operand = gen_lowpart (SImode, operand);
19888 /* FALLTHRU */
19889 case SImode:
19890 emit_insn (
19891 gen_rtx_SET (VOIDmode,
19892 gen_rtx_MEM (GET_MODE (operand),
19893 gen_rtx_PRE_DEC (SImode,
19894 stack_pointer_rtx)),
19895 operand));
19896 break;
19897 default:
19898 gcc_unreachable ();
19899 }
19900 result = gen_rtx_MEM (mode, stack_pointer_rtx);
19901 }
19902 return result;
19903 }
19904
19905 /* Free operand from the memory. */
19906 void
19907 ix86_free_from_memory (enum machine_mode mode)
19908 {
19909 if (!TARGET_RED_ZONE)
19910 {
19911 int size;
19912
19913 if (mode == DImode || TARGET_64BIT)
19914 size = 8;
19915 else
19916 size = 4;
19917 /* Use LEA to deallocate stack space. In peephole2 it will be converted
19918 to pop or add instruction if registers are available. */
19919 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
19920 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
19921 GEN_INT (size))));
19922 }
19923 }
19924
19925 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
19926 QImode must go into class Q_REGS.
19927 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
19928 movdf to do mem-to-mem moves through integer regs. */
19929 enum reg_class
19930 ix86_preferred_reload_class (rtx x, enum reg_class regclass)
19931 {
19932 enum machine_mode mode = GET_MODE (x);
19933
19934 /* We're only allowed to return a subclass of CLASS. Many of the
19935 following checks fail for NO_REGS, so eliminate that early. */
19936 if (regclass == NO_REGS)
19937 return NO_REGS;
19938
19939 /* All classes can load zeros. */
19940 if (x == CONST0_RTX (mode))
19941 return regclass;
19942
19943 /* Force constants into memory if we are loading a (nonzero) constant into
19944 an MMX or SSE register. This is because there are no MMX/SSE instructions
19945 to load from a constant. */
19946 if (CONSTANT_P (x)
19947 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
19948 return NO_REGS;
19949
19950 /* Prefer SSE regs only, if we can use them for math. */
19951 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
19952 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
19953
19954 /* Floating-point constants need more complex checks. */
19955 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
19956 {
19957 /* General regs can load everything. */
19958 if (reg_class_subset_p (regclass, GENERAL_REGS))
19959 return regclass;
19960
19961 /* Floats can load 0 and 1 plus some others. Note that we eliminated
19962 zero above. We only want to wind up preferring 80387 registers if
19963 we plan on doing computation with them. */
19964 if (TARGET_80387
19965 && standard_80387_constant_p (x))
19966 {
19967 /* Limit class to non-sse. */
19968 if (regclass == FLOAT_SSE_REGS)
19969 return FLOAT_REGS;
19970 if (regclass == FP_TOP_SSE_REGS)
19971 return FP_TOP_REG;
19972 if (regclass == FP_SECOND_SSE_REGS)
19973 return FP_SECOND_REG;
19974 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
19975 return regclass;
19976 }
19977
19978 return NO_REGS;
19979 }
19980
19981 /* Generally when we see PLUS here, it's the function invariant
19982 (plus soft-fp const_int). Which can only be computed into general
19983 regs. */
19984 if (GET_CODE (x) == PLUS)
19985 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
19986
19987 /* QImode constants are easy to load, but non-constant QImode data
19988 must go into Q_REGS. */
19989 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
19990 {
19991 if (reg_class_subset_p (regclass, Q_REGS))
19992 return regclass;
19993 if (reg_class_subset_p (Q_REGS, regclass))
19994 return Q_REGS;
19995 return NO_REGS;
19996 }
19997
19998 return regclass;
19999 }
20000
20001 /* Discourage putting floating-point values in SSE registers unless
20002 SSE math is being used, and likewise for the 387 registers. */
20003 enum reg_class
20004 ix86_preferred_output_reload_class (rtx x, enum reg_class regclass)
20005 {
20006 enum machine_mode mode = GET_MODE (x);
20007
20008 /* Restrict the output reload class to the register bank that we are doing
20009 math on. If we would like not to return a subset of CLASS, reject this
20010 alternative: if reload cannot do this, it will still use its choice. */
20011 mode = GET_MODE (x);
20012 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
20013 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
20014
20015 if (X87_FLOAT_MODE_P (mode))
20016 {
20017 if (regclass == FP_TOP_SSE_REGS)
20018 return FP_TOP_REG;
20019 else if (regclass == FP_SECOND_SSE_REGS)
20020 return FP_SECOND_REG;
20021 else
20022 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
20023 }
20024
20025 return regclass;
20026 }
20027
20028 /* If we are copying between general and FP registers, we need a memory
20029 location. The same is true for SSE and MMX registers.
20030
20031 The macro can't work reliably when one of the CLASSES is class containing
20032 registers from multiple units (SSE, MMX, integer). We avoid this by never
20033 combining those units in single alternative in the machine description.
20034 Ensure that this constraint holds to avoid unexpected surprises.
20035
20036 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
20037 enforce these sanity checks. */
20038
20039 int
20040 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
20041 enum machine_mode mode, int strict)
20042 {
20043 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
20044 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
20045 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
20046 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
20047 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
20048 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
20049 {
20050 gcc_assert (!strict);
20051 return true;
20052 }
20053
20054 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
20055 return true;
20056
20057 /* ??? This is a lie. We do have moves between mmx/general, and for
20058 mmx/sse2. But by saying we need secondary memory we discourage the
20059 register allocator from using the mmx registers unless needed. */
20060 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
20061 return true;
20062
20063 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
20064 {
20065 /* SSE1 doesn't have any direct moves from other classes. */
20066 if (!TARGET_SSE2)
20067 return true;
20068
20069 /* If the target says that inter-unit moves are more expensive
20070 than moving through memory, then don't generate them. */
20071 if (!TARGET_INTER_UNIT_MOVES)
20072 return true;
20073
20074 /* Between SSE and general, we have moves no larger than word size. */
20075 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
20076 return true;
20077 }
20078
20079 return false;
20080 }
20081
20082 /* Return true if the registers in CLASS cannot represent the change from
20083 modes FROM to TO. */
20084
20085 bool
20086 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
20087 enum reg_class regclass)
20088 {
20089 if (from == to)
20090 return false;
20091
20092 /* x87 registers can't do subreg at all, as all values are reformatted
20093 to extended precision. */
20094 if (MAYBE_FLOAT_CLASS_P (regclass))
20095 return true;
20096
20097 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
20098 {
20099 /* Vector registers do not support QI or HImode loads. If we don't
20100 disallow a change to these modes, reload will assume it's ok to
20101 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
20102 the vec_dupv4hi pattern. */
20103 if (GET_MODE_SIZE (from) < 4)
20104 return true;
20105
20106 /* Vector registers do not support subreg with nonzero offsets, which
20107 are otherwise valid for integer registers. Since we can't see
20108 whether we have a nonzero offset from here, prohibit all
20109 nonparadoxical subregs changing size. */
20110 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
20111 return true;
20112 }
20113
20114 return false;
20115 }
20116
20117 /* Return the cost of moving data from a register in class CLASS1 to
20118 one in class CLASS2.
20119
20120 It is not required that the cost always equal 2 when FROM is the same as TO;
20121 on some machines it is expensive to move between registers if they are not
20122 general registers. */
20123
20124 int
20125 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
20126 enum reg_class class2)
20127 {
20128 /* In case we require secondary memory, compute cost of the store followed
20129 by load. In order to avoid bad register allocation choices, we need
20130 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
20131
20132 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
20133 {
20134 int cost = 1;
20135
20136 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
20137 MEMORY_MOVE_COST (mode, class1, 1));
20138 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
20139 MEMORY_MOVE_COST (mode, class2, 1));
20140
20141 /* In case of copying from general_purpose_register we may emit multiple
20142 stores followed by single load causing memory size mismatch stall.
20143 Count this as arbitrarily high cost of 20. */
20144 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
20145 cost += 20;
20146
20147 /* In the case of FP/MMX moves, the registers actually overlap, and we
20148 have to switch modes in order to treat them differently. */
20149 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
20150 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
20151 cost += 20;
20152
20153 return cost;
20154 }
20155
20156 /* Moves between SSE/MMX and integer unit are expensive. */
20157 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
20158 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
20159 return ix86_cost->mmxsse_to_integer;
20160 if (MAYBE_FLOAT_CLASS_P (class1))
20161 return ix86_cost->fp_move;
20162 if (MAYBE_SSE_CLASS_P (class1))
20163 return ix86_cost->sse_move;
20164 if (MAYBE_MMX_CLASS_P (class1))
20165 return ix86_cost->mmx_move;
20166 return 2;
20167 }
20168
20169 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
20170
20171 bool
20172 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
20173 {
20174 /* Flags and only flags can only hold CCmode values. */
20175 if (CC_REGNO_P (regno))
20176 return GET_MODE_CLASS (mode) == MODE_CC;
20177 if (GET_MODE_CLASS (mode) == MODE_CC
20178 || GET_MODE_CLASS (mode) == MODE_RANDOM
20179 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
20180 return 0;
20181 if (FP_REGNO_P (regno))
20182 return VALID_FP_MODE_P (mode);
20183 if (SSE_REGNO_P (regno))
20184 {
20185 /* We implement the move patterns for all vector modes into and
20186 out of SSE registers, even when no operation instructions
20187 are available. */
20188 return (VALID_SSE_REG_MODE (mode)
20189 || VALID_SSE2_REG_MODE (mode)
20190 || VALID_MMX_REG_MODE (mode)
20191 || VALID_MMX_REG_MODE_3DNOW (mode));
20192 }
20193 if (MMX_REGNO_P (regno))
20194 {
20195 /* We implement the move patterns for 3DNOW modes even in MMX mode,
20196 so if the register is available at all, then we can move data of
20197 the given mode into or out of it. */
20198 return (VALID_MMX_REG_MODE (mode)
20199 || VALID_MMX_REG_MODE_3DNOW (mode));
20200 }
20201
20202 if (mode == QImode)
20203 {
20204 /* Take care for QImode values - they can be in non-QI regs,
20205 but then they do cause partial register stalls. */
20206 if (regno < 4 || TARGET_64BIT)
20207 return 1;
20208 if (!TARGET_PARTIAL_REG_STALL)
20209 return 1;
20210 return reload_in_progress || reload_completed;
20211 }
20212 /* We handle both integer and floats in the general purpose registers. */
20213 else if (VALID_INT_MODE_P (mode))
20214 return 1;
20215 else if (VALID_FP_MODE_P (mode))
20216 return 1;
20217 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
20218 on to use that value in smaller contexts, this can easily force a
20219 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
20220 supporting DImode, allow it. */
20221 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
20222 return 1;
20223
20224 return 0;
20225 }
20226
20227 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
20228 tieable integer mode. */
20229
20230 static bool
20231 ix86_tieable_integer_mode_p (enum machine_mode mode)
20232 {
20233 switch (mode)
20234 {
20235 case HImode:
20236 case SImode:
20237 return true;
20238
20239 case QImode:
20240 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
20241
20242 case DImode:
20243 return TARGET_64BIT;
20244
20245 default:
20246 return false;
20247 }
20248 }
20249
20250 /* Return true if MODE1 is accessible in a register that can hold MODE2
20251 without copying. That is, all register classes that can hold MODE2
20252 can also hold MODE1. */
20253
20254 bool
20255 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
20256 {
20257 if (mode1 == mode2)
20258 return true;
20259
20260 if (ix86_tieable_integer_mode_p (mode1)
20261 && ix86_tieable_integer_mode_p (mode2))
20262 return true;
20263
20264 /* MODE2 being XFmode implies fp stack or general regs, which means we
20265 can tie any smaller floating point modes to it. Note that we do not
20266 tie this with TFmode. */
20267 if (mode2 == XFmode)
20268 return mode1 == SFmode || mode1 == DFmode;
20269
20270 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
20271 that we can tie it with SFmode. */
20272 if (mode2 == DFmode)
20273 return mode1 == SFmode;
20274
20275 /* If MODE2 is only appropriate for an SSE register, then tie with
20276 any other mode acceptable to SSE registers. */
20277 if (GET_MODE_SIZE (mode2) == 16
20278 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
20279 return (GET_MODE_SIZE (mode1) == 16
20280 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
20281
20282 /* If MODE2 is appropriate for an MMX register, then tie
20283 with any other mode acceptable to MMX registers. */
20284 if (GET_MODE_SIZE (mode2) == 8
20285 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
20286 return (GET_MODE_SIZE (mode1) == 8
20287 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
20288
20289 return false;
20290 }
20291
20292 /* Return the cost of moving data of mode M between a
20293 register and memory. A value of 2 is the default; this cost is
20294 relative to those in `REGISTER_MOVE_COST'.
20295
20296 If moving between registers and memory is more expensive than
20297 between two registers, you should define this macro to express the
20298 relative cost.
20299
20300 Model also increased moving costs of QImode registers in non
20301 Q_REGS classes.
20302 */
20303 int
20304 ix86_memory_move_cost (enum machine_mode mode, enum reg_class regclass, int in)
20305 {
20306 if (FLOAT_CLASS_P (regclass))
20307 {
20308 int index;
20309 switch (mode)
20310 {
20311 case SFmode:
20312 index = 0;
20313 break;
20314 case DFmode:
20315 index = 1;
20316 break;
20317 case XFmode:
20318 index = 2;
20319 break;
20320 default:
20321 return 100;
20322 }
20323 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
20324 }
20325 if (SSE_CLASS_P (regclass))
20326 {
20327 int index;
20328 switch (GET_MODE_SIZE (mode))
20329 {
20330 case 4:
20331 index = 0;
20332 break;
20333 case 8:
20334 index = 1;
20335 break;
20336 case 16:
20337 index = 2;
20338 break;
20339 default:
20340 return 100;
20341 }
20342 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
20343 }
20344 if (MMX_CLASS_P (regclass))
20345 {
20346 int index;
20347 switch (GET_MODE_SIZE (mode))
20348 {
20349 case 4:
20350 index = 0;
20351 break;
20352 case 8:
20353 index = 1;
20354 break;
20355 default:
20356 return 100;
20357 }
20358 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
20359 }
20360 switch (GET_MODE_SIZE (mode))
20361 {
20362 case 1:
20363 if (in)
20364 return (Q_CLASS_P (regclass) ? ix86_cost->int_load[0]
20365 : ix86_cost->movzbl_load);
20366 else
20367 return (Q_CLASS_P (regclass) ? ix86_cost->int_store[0]
20368 : ix86_cost->int_store[0] + 4);
20369 break;
20370 case 2:
20371 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
20372 default:
20373 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
20374 if (mode == TFmode)
20375 mode = XFmode;
20376 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
20377 * (((int) GET_MODE_SIZE (mode)
20378 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
20379 }
20380 }
20381
20382 /* Compute a (partial) cost for rtx X. Return true if the complete
20383 cost has been computed, and false if subexpressions should be
20384 scanned. In either case, *TOTAL contains the cost result. */
20385
20386 static bool
20387 ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total)
20388 {
20389 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
20390 enum machine_mode mode = GET_MODE (x);
20391
20392 switch (code)
20393 {
20394 case CONST_INT:
20395 case CONST:
20396 case LABEL_REF:
20397 case SYMBOL_REF:
20398 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
20399 *total = 3;
20400 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
20401 *total = 2;
20402 else if (flag_pic && SYMBOLIC_CONST (x)
20403 && (!TARGET_64BIT
20404 || (!GET_CODE (x) != LABEL_REF
20405 && (GET_CODE (x) != SYMBOL_REF
20406 || !SYMBOL_REF_LOCAL_P (x)))))
20407 *total = 1;
20408 else
20409 *total = 0;
20410 return true;
20411
20412 case CONST_DOUBLE:
20413 if (mode == VOIDmode)
20414 *total = 0;
20415 else
20416 switch (standard_80387_constant_p (x))
20417 {
20418 case 1: /* 0.0 */
20419 *total = 1;
20420 break;
20421 default: /* Other constants */
20422 *total = 2;
20423 break;
20424 case 0:
20425 case -1:
20426 /* Start with (MEM (SYMBOL_REF)), since that's where
20427 it'll probably end up. Add a penalty for size. */
20428 *total = (COSTS_N_INSNS (1)
20429 + (flag_pic != 0 && !TARGET_64BIT)
20430 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
20431 break;
20432 }
20433 return true;
20434
20435 case ZERO_EXTEND:
20436 /* The zero extensions is often completely free on x86_64, so make
20437 it as cheap as possible. */
20438 if (TARGET_64BIT && mode == DImode
20439 && GET_MODE (XEXP (x, 0)) == SImode)
20440 *total = 1;
20441 else if (TARGET_ZERO_EXTEND_WITH_AND)
20442 *total = ix86_cost->add;
20443 else
20444 *total = ix86_cost->movzx;
20445 return false;
20446
20447 case SIGN_EXTEND:
20448 *total = ix86_cost->movsx;
20449 return false;
20450
20451 case ASHIFT:
20452 if (CONST_INT_P (XEXP (x, 1))
20453 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
20454 {
20455 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
20456 if (value == 1)
20457 {
20458 *total = ix86_cost->add;
20459 return false;
20460 }
20461 if ((value == 2 || value == 3)
20462 && ix86_cost->lea <= ix86_cost->shift_const)
20463 {
20464 *total = ix86_cost->lea;
20465 return false;
20466 }
20467 }
20468 /* FALLTHRU */
20469
20470 case ROTATE:
20471 case ASHIFTRT:
20472 case LSHIFTRT:
20473 case ROTATERT:
20474 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
20475 {
20476 if (CONST_INT_P (XEXP (x, 1)))
20477 {
20478 if (INTVAL (XEXP (x, 1)) > 32)
20479 *total = ix86_cost->shift_const + COSTS_N_INSNS (2);
20480 else
20481 *total = ix86_cost->shift_const * 2;
20482 }
20483 else
20484 {
20485 if (GET_CODE (XEXP (x, 1)) == AND)
20486 *total = ix86_cost->shift_var * 2;
20487 else
20488 *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
20489 }
20490 }
20491 else
20492 {
20493 if (CONST_INT_P (XEXP (x, 1)))
20494 *total = ix86_cost->shift_const;
20495 else
20496 *total = ix86_cost->shift_var;
20497 }
20498 return false;
20499
20500 case MULT:
20501 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
20502 {
20503 /* ??? SSE scalar cost should be used here. */
20504 *total = ix86_cost->fmul;
20505 return false;
20506 }
20507 else if (X87_FLOAT_MODE_P (mode))
20508 {
20509 *total = ix86_cost->fmul;
20510 return false;
20511 }
20512 else if (FLOAT_MODE_P (mode))
20513 {
20514 /* ??? SSE vector cost should be used here. */
20515 *total = ix86_cost->fmul;
20516 return false;
20517 }
20518 else
20519 {
20520 rtx op0 = XEXP (x, 0);
20521 rtx op1 = XEXP (x, 1);
20522 int nbits;
20523 if (CONST_INT_P (XEXP (x, 1)))
20524 {
20525 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
20526 for (nbits = 0; value != 0; value &= value - 1)
20527 nbits++;
20528 }
20529 else
20530 /* This is arbitrary. */
20531 nbits = 7;
20532
20533 /* Compute costs correctly for widening multiplication. */
20534 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
20535 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
20536 == GET_MODE_SIZE (mode))
20537 {
20538 int is_mulwiden = 0;
20539 enum machine_mode inner_mode = GET_MODE (op0);
20540
20541 if (GET_CODE (op0) == GET_CODE (op1))
20542 is_mulwiden = 1, op1 = XEXP (op1, 0);
20543 else if (CONST_INT_P (op1))
20544 {
20545 if (GET_CODE (op0) == SIGN_EXTEND)
20546 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
20547 == INTVAL (op1);
20548 else
20549 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
20550 }
20551
20552 if (is_mulwiden)
20553 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
20554 }
20555
20556 *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
20557 + nbits * ix86_cost->mult_bit
20558 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
20559
20560 return true;
20561 }
20562
20563 case DIV:
20564 case UDIV:
20565 case MOD:
20566 case UMOD:
20567 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
20568 /* ??? SSE cost should be used here. */
20569 *total = ix86_cost->fdiv;
20570 else if (X87_FLOAT_MODE_P (mode))
20571 *total = ix86_cost->fdiv;
20572 else if (FLOAT_MODE_P (mode))
20573 /* ??? SSE vector cost should be used here. */
20574 *total = ix86_cost->fdiv;
20575 else
20576 *total = ix86_cost->divide[MODE_INDEX (mode)];
20577 return false;
20578
20579 case PLUS:
20580 if (GET_MODE_CLASS (mode) == MODE_INT
20581 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
20582 {
20583 if (GET_CODE (XEXP (x, 0)) == PLUS
20584 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
20585 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
20586 && CONSTANT_P (XEXP (x, 1)))
20587 {
20588 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
20589 if (val == 2 || val == 4 || val == 8)
20590 {
20591 *total = ix86_cost->lea;
20592 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
20593 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
20594 outer_code);
20595 *total += rtx_cost (XEXP (x, 1), outer_code);
20596 return true;
20597 }
20598 }
20599 else if (GET_CODE (XEXP (x, 0)) == MULT
20600 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
20601 {
20602 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
20603 if (val == 2 || val == 4 || val == 8)
20604 {
20605 *total = ix86_cost->lea;
20606 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
20607 *total += rtx_cost (XEXP (x, 1), outer_code);
20608 return true;
20609 }
20610 }
20611 else if (GET_CODE (XEXP (x, 0)) == PLUS)
20612 {
20613 *total = ix86_cost->lea;
20614 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
20615 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
20616 *total += rtx_cost (XEXP (x, 1), outer_code);
20617 return true;
20618 }
20619 }
20620 /* FALLTHRU */
20621
20622 case MINUS:
20623 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
20624 {
20625 /* ??? SSE cost should be used here. */
20626 *total = ix86_cost->fadd;
20627 return false;
20628 }
20629 else if (X87_FLOAT_MODE_P (mode))
20630 {
20631 *total = ix86_cost->fadd;
20632 return false;
20633 }
20634 else if (FLOAT_MODE_P (mode))
20635 {
20636 /* ??? SSE vector cost should be used here. */
20637 *total = ix86_cost->fadd;
20638 return false;
20639 }
20640 /* FALLTHRU */
20641
20642 case AND:
20643 case IOR:
20644 case XOR:
20645 if (!TARGET_64BIT && mode == DImode)
20646 {
20647 *total = (ix86_cost->add * 2
20648 + (rtx_cost (XEXP (x, 0), outer_code)
20649 << (GET_MODE (XEXP (x, 0)) != DImode))
20650 + (rtx_cost (XEXP (x, 1), outer_code)
20651 << (GET_MODE (XEXP (x, 1)) != DImode)));
20652 return true;
20653 }
20654 /* FALLTHRU */
20655
20656 case NEG:
20657 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
20658 {
20659 /* ??? SSE cost should be used here. */
20660 *total = ix86_cost->fchs;
20661 return false;
20662 }
20663 else if (X87_FLOAT_MODE_P (mode))
20664 {
20665 *total = ix86_cost->fchs;
20666 return false;
20667 }
20668 else if (FLOAT_MODE_P (mode))
20669 {
20670 /* ??? SSE vector cost should be used here. */
20671 *total = ix86_cost->fchs;
20672 return false;
20673 }
20674 /* FALLTHRU */
20675
20676 case NOT:
20677 if (!TARGET_64BIT && mode == DImode)
20678 *total = ix86_cost->add * 2;
20679 else
20680 *total = ix86_cost->add;
20681 return false;
20682
20683 case COMPARE:
20684 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
20685 && XEXP (XEXP (x, 0), 1) == const1_rtx
20686 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
20687 && XEXP (x, 1) == const0_rtx)
20688 {
20689 /* This kind of construct is implemented using test[bwl].
20690 Treat it as if we had an AND. */
20691 *total = (ix86_cost->add
20692 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
20693 + rtx_cost (const1_rtx, outer_code));
20694 return true;
20695 }
20696 return false;
20697
20698 case FLOAT_EXTEND:
20699 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
20700 *total = 0;
20701 return false;
20702
20703 case ABS:
20704 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
20705 /* ??? SSE cost should be used here. */
20706 *total = ix86_cost->fabs;
20707 else if (X87_FLOAT_MODE_P (mode))
20708 *total = ix86_cost->fabs;
20709 else if (FLOAT_MODE_P (mode))
20710 /* ??? SSE vector cost should be used here. */
20711 *total = ix86_cost->fabs;
20712 return false;
20713
20714 case SQRT:
20715 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
20716 /* ??? SSE cost should be used here. */
20717 *total = ix86_cost->fsqrt;
20718 else if (X87_FLOAT_MODE_P (mode))
20719 *total = ix86_cost->fsqrt;
20720 else if (FLOAT_MODE_P (mode))
20721 /* ??? SSE vector cost should be used here. */
20722 *total = ix86_cost->fsqrt;
20723 return false;
20724
20725 case UNSPEC:
20726 if (XINT (x, 1) == UNSPEC_TP)
20727 *total = 0;
20728 return false;
20729
20730 default:
20731 return false;
20732 }
20733 }
20734
20735 #if TARGET_MACHO
20736
20737 static int current_machopic_label_num;
20738
20739 /* Given a symbol name and its associated stub, write out the
20740 definition of the stub. */
20741
20742 void
20743 machopic_output_stub (FILE *file, const char *symb, const char *stub)
20744 {
20745 unsigned int length;
20746 char *binder_name, *symbol_name, lazy_ptr_name[32];
20747 int label = ++current_machopic_label_num;
20748
20749 /* For 64-bit we shouldn't get here. */
20750 gcc_assert (!TARGET_64BIT);
20751
20752 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
20753 symb = (*targetm.strip_name_encoding) (symb);
20754
20755 length = strlen (stub);
20756 binder_name = alloca (length + 32);
20757 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
20758
20759 length = strlen (symb);
20760 symbol_name = alloca (length + 32);
20761 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
20762
20763 sprintf (lazy_ptr_name, "L%d$lz", label);
20764
20765 if (MACHOPIC_PURE)
20766 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
20767 else
20768 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
20769
20770 fprintf (file, "%s:\n", stub);
20771 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
20772
20773 if (MACHOPIC_PURE)
20774 {
20775 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
20776 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
20777 fprintf (file, "\tjmp\t*%%edx\n");
20778 }
20779 else
20780 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
20781
20782 fprintf (file, "%s:\n", binder_name);
20783
20784 if (MACHOPIC_PURE)
20785 {
20786 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
20787 fprintf (file, "\tpushl\t%%eax\n");
20788 }
20789 else
20790 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
20791
20792 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
20793
20794 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
20795 fprintf (file, "%s:\n", lazy_ptr_name);
20796 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
20797 fprintf (file, "\t.long %s\n", binder_name);
20798 }
20799
20800 void
20801 darwin_x86_file_end (void)
20802 {
20803 darwin_file_end ();
20804 ix86_file_end ();
20805 }
20806 #endif /* TARGET_MACHO */
20807
20808 /* Order the registers for register allocator. */
20809
20810 void
20811 x86_order_regs_for_local_alloc (void)
20812 {
20813 int pos = 0;
20814 int i;
20815
20816 /* First allocate the local general purpose registers. */
20817 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
20818 if (GENERAL_REGNO_P (i) && call_used_regs[i])
20819 reg_alloc_order [pos++] = i;
20820
20821 /* Global general purpose registers. */
20822 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
20823 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
20824 reg_alloc_order [pos++] = i;
20825
20826 /* x87 registers come first in case we are doing FP math
20827 using them. */
20828 if (!TARGET_SSE_MATH)
20829 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
20830 reg_alloc_order [pos++] = i;
20831
20832 /* SSE registers. */
20833 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
20834 reg_alloc_order [pos++] = i;
20835 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
20836 reg_alloc_order [pos++] = i;
20837
20838 /* x87 registers. */
20839 if (TARGET_SSE_MATH)
20840 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
20841 reg_alloc_order [pos++] = i;
20842
20843 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
20844 reg_alloc_order [pos++] = i;
20845
20846 /* Initialize the rest of array as we do not allocate some registers
20847 at all. */
20848 while (pos < FIRST_PSEUDO_REGISTER)
20849 reg_alloc_order [pos++] = 0;
20850 }
20851
20852 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
20853 struct attribute_spec.handler. */
20854 static tree
20855 ix86_handle_struct_attribute (tree *node, tree name,
20856 tree args ATTRIBUTE_UNUSED,
20857 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
20858 {
20859 tree *type = NULL;
20860 if (DECL_P (*node))
20861 {
20862 if (TREE_CODE (*node) == TYPE_DECL)
20863 type = &TREE_TYPE (*node);
20864 }
20865 else
20866 type = node;
20867
20868 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
20869 || TREE_CODE (*type) == UNION_TYPE)))
20870 {
20871 warning (OPT_Wattributes, "%qs attribute ignored",
20872 IDENTIFIER_POINTER (name));
20873 *no_add_attrs = true;
20874 }
20875
20876 else if ((is_attribute_p ("ms_struct", name)
20877 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
20878 || ((is_attribute_p ("gcc_struct", name)
20879 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
20880 {
20881 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
20882 IDENTIFIER_POINTER (name));
20883 *no_add_attrs = true;
20884 }
20885
20886 return NULL_TREE;
20887 }
20888
20889 static bool
20890 ix86_ms_bitfield_layout_p (tree record_type)
20891 {
20892 return (TARGET_MS_BITFIELD_LAYOUT &&
20893 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
20894 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
20895 }
20896
20897 /* Returns an expression indicating where the this parameter is
20898 located on entry to the FUNCTION. */
20899
20900 static rtx
20901 x86_this_parameter (tree function)
20902 {
20903 tree type = TREE_TYPE (function);
20904 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
20905
20906 if (TARGET_64BIT)
20907 {
20908 const int *parm_regs;
20909
20910 if (TARGET_64BIT_MS_ABI)
20911 parm_regs = x86_64_ms_abi_int_parameter_registers;
20912 else
20913 parm_regs = x86_64_int_parameter_registers;
20914 return gen_rtx_REG (DImode, parm_regs[aggr]);
20915 }
20916
20917 if (ix86_function_regparm (type, function) > 0
20918 && !type_has_variadic_args_p (type))
20919 {
20920 int regno = 0;
20921 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
20922 regno = 2;
20923 return gen_rtx_REG (SImode, regno);
20924 }
20925
20926 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
20927 }
20928
20929 /* Determine whether x86_output_mi_thunk can succeed. */
20930
20931 static bool
20932 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
20933 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
20934 HOST_WIDE_INT vcall_offset, tree function)
20935 {
20936 /* 64-bit can handle anything. */
20937 if (TARGET_64BIT)
20938 return true;
20939
20940 /* For 32-bit, everything's fine if we have one free register. */
20941 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
20942 return true;
20943
20944 /* Need a free register for vcall_offset. */
20945 if (vcall_offset)
20946 return false;
20947
20948 /* Need a free register for GOT references. */
20949 if (flag_pic && !(*targetm.binds_local_p) (function))
20950 return false;
20951
20952 /* Otherwise ok. */
20953 return true;
20954 }
20955
20956 /* Output the assembler code for a thunk function. THUNK_DECL is the
20957 declaration for the thunk function itself, FUNCTION is the decl for
20958 the target function. DELTA is an immediate constant offset to be
20959 added to THIS. If VCALL_OFFSET is nonzero, the word at
20960 *(*this + vcall_offset) should be added to THIS. */
20961
20962 static void
20963 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
20964 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
20965 HOST_WIDE_INT vcall_offset, tree function)
20966 {
20967 rtx xops[3];
20968 rtx this_param = x86_this_parameter (function);
20969 rtx this_reg, tmp;
20970
20971 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
20972 pull it in now and let DELTA benefit. */
20973 if (REG_P (this_param))
20974 this_reg = this_param;
20975 else if (vcall_offset)
20976 {
20977 /* Put the this parameter into %eax. */
20978 xops[0] = this_param;
20979 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
20980 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
20981 }
20982 else
20983 this_reg = NULL_RTX;
20984
20985 /* Adjust the this parameter by a fixed constant. */
20986 if (delta)
20987 {
20988 xops[0] = GEN_INT (delta);
20989 xops[1] = this_reg ? this_reg : this_param;
20990 if (TARGET_64BIT)
20991 {
20992 if (!x86_64_general_operand (xops[0], DImode))
20993 {
20994 tmp = gen_rtx_REG (DImode, R10_REG);
20995 xops[1] = tmp;
20996 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
20997 xops[0] = tmp;
20998 xops[1] = this_param;
20999 }
21000 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
21001 }
21002 else
21003 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
21004 }
21005
21006 /* Adjust the this parameter by a value stored in the vtable. */
21007 if (vcall_offset)
21008 {
21009 if (TARGET_64BIT)
21010 tmp = gen_rtx_REG (DImode, R10_REG);
21011 else
21012 {
21013 int tmp_regno = 2 /* ECX */;
21014 if (lookup_attribute ("fastcall",
21015 TYPE_ATTRIBUTES (TREE_TYPE (function))))
21016 tmp_regno = 0 /* EAX */;
21017 tmp = gen_rtx_REG (SImode, tmp_regno);
21018 }
21019
21020 xops[0] = gen_rtx_MEM (Pmode, this_reg);
21021 xops[1] = tmp;
21022 if (TARGET_64BIT)
21023 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
21024 else
21025 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
21026
21027 /* Adjust the this parameter. */
21028 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
21029 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
21030 {
21031 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
21032 xops[0] = GEN_INT (vcall_offset);
21033 xops[1] = tmp2;
21034 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
21035 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
21036 }
21037 xops[1] = this_reg;
21038 if (TARGET_64BIT)
21039 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
21040 else
21041 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
21042 }
21043
21044 /* If necessary, drop THIS back to its stack slot. */
21045 if (this_reg && this_reg != this_param)
21046 {
21047 xops[0] = this_reg;
21048 xops[1] = this_param;
21049 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
21050 }
21051
21052 xops[0] = XEXP (DECL_RTL (function), 0);
21053 if (TARGET_64BIT)
21054 {
21055 if (!flag_pic || (*targetm.binds_local_p) (function))
21056 output_asm_insn ("jmp\t%P0", xops);
21057 /* All thunks should be in the same object as their target,
21058 and thus binds_local_p should be true. */
21059 else if (TARGET_64BIT_MS_ABI)
21060 gcc_unreachable ();
21061 else
21062 {
21063 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
21064 tmp = gen_rtx_CONST (Pmode, tmp);
21065 tmp = gen_rtx_MEM (QImode, tmp);
21066 xops[0] = tmp;
21067 output_asm_insn ("jmp\t%A0", xops);
21068 }
21069 }
21070 else
21071 {
21072 if (!flag_pic || (*targetm.binds_local_p) (function))
21073 output_asm_insn ("jmp\t%P0", xops);
21074 else
21075 #if TARGET_MACHO
21076 if (TARGET_MACHO)
21077 {
21078 rtx sym_ref = XEXP (DECL_RTL (function), 0);
21079 tmp = (gen_rtx_SYMBOL_REF
21080 (Pmode,
21081 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
21082 tmp = gen_rtx_MEM (QImode, tmp);
21083 xops[0] = tmp;
21084 output_asm_insn ("jmp\t%0", xops);
21085 }
21086 else
21087 #endif /* TARGET_MACHO */
21088 {
21089 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
21090 output_set_got (tmp, NULL_RTX);
21091
21092 xops[1] = tmp;
21093 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
21094 output_asm_insn ("jmp\t{*}%1", xops);
21095 }
21096 }
21097 }
21098
21099 static void
21100 x86_file_start (void)
21101 {
21102 default_file_start ();
21103 #if TARGET_MACHO
21104 darwin_file_start ();
21105 #endif
21106 if (X86_FILE_START_VERSION_DIRECTIVE)
21107 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
21108 if (X86_FILE_START_FLTUSED)
21109 fputs ("\t.global\t__fltused\n", asm_out_file);
21110 if (ix86_asm_dialect == ASM_INTEL)
21111 fputs ("\t.intel_syntax\n", asm_out_file);
21112 }
21113
21114 int
21115 x86_field_alignment (tree field, int computed)
21116 {
21117 enum machine_mode mode;
21118 tree type = TREE_TYPE (field);
21119
21120 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
21121 return computed;
21122 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
21123 ? get_inner_array_type (type) : type);
21124 if (mode == DFmode || mode == DCmode
21125 || GET_MODE_CLASS (mode) == MODE_INT
21126 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
21127 return MIN (32, computed);
21128 return computed;
21129 }
21130
21131 /* Output assembler code to FILE to increment profiler label # LABELNO
21132 for profiling a function entry. */
21133 void
21134 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
21135 {
21136 if (TARGET_64BIT)
21137 {
21138 #ifndef NO_PROFILE_COUNTERS
21139 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
21140 #endif
21141
21142 if (!TARGET_64BIT_MS_ABI && flag_pic)
21143 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
21144 else
21145 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
21146 }
21147 else if (flag_pic)
21148 {
21149 #ifndef NO_PROFILE_COUNTERS
21150 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
21151 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
21152 #endif
21153 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
21154 }
21155 else
21156 {
21157 #ifndef NO_PROFILE_COUNTERS
21158 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
21159 PROFILE_COUNT_REGISTER);
21160 #endif
21161 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
21162 }
21163 }
21164
21165 /* We don't have exact information about the insn sizes, but we may assume
21166 quite safely that we are informed about all 1 byte insns and memory
21167 address sizes. This is enough to eliminate unnecessary padding in
21168 99% of cases. */
21169
21170 static int
21171 min_insn_size (rtx insn)
21172 {
21173 int l = 0;
21174
21175 if (!INSN_P (insn) || !active_insn_p (insn))
21176 return 0;
21177
21178 /* Discard alignments we've emit and jump instructions. */
21179 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
21180 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
21181 return 0;
21182 if (JUMP_P (insn)
21183 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
21184 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
21185 return 0;
21186
21187 /* Important case - calls are always 5 bytes.
21188 It is common to have many calls in the row. */
21189 if (CALL_P (insn)
21190 && symbolic_reference_mentioned_p (PATTERN (insn))
21191 && !SIBLING_CALL_P (insn))
21192 return 5;
21193 if (get_attr_length (insn) <= 1)
21194 return 1;
21195
21196 /* For normal instructions we may rely on the sizes of addresses
21197 and the presence of symbol to require 4 bytes of encoding.
21198 This is not the case for jumps where references are PC relative. */
21199 if (!JUMP_P (insn))
21200 {
21201 l = get_attr_length_address (insn);
21202 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
21203 l = 4;
21204 }
21205 if (l)
21206 return 1+l;
21207 else
21208 return 2;
21209 }
21210
21211 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
21212 window. */
21213
21214 static void
21215 ix86_avoid_jump_misspredicts (void)
21216 {
21217 rtx insn, start = get_insns ();
21218 int nbytes = 0, njumps = 0;
21219 int isjump = 0;
21220
21221 /* Look for all minimal intervals of instructions containing 4 jumps.
21222 The intervals are bounded by START and INSN. NBYTES is the total
21223 size of instructions in the interval including INSN and not including
21224 START. When the NBYTES is smaller than 16 bytes, it is possible
21225 that the end of START and INSN ends up in the same 16byte page.
21226
21227 The smallest offset in the page INSN can start is the case where START
21228 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
21229 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
21230 */
21231 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
21232 {
21233
21234 nbytes += min_insn_size (insn);
21235 if (dump_file)
21236 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
21237 INSN_UID (insn), min_insn_size (insn));
21238 if ((JUMP_P (insn)
21239 && GET_CODE (PATTERN (insn)) != ADDR_VEC
21240 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
21241 || CALL_P (insn))
21242 njumps++;
21243 else
21244 continue;
21245
21246 while (njumps > 3)
21247 {
21248 start = NEXT_INSN (start);
21249 if ((JUMP_P (start)
21250 && GET_CODE (PATTERN (start)) != ADDR_VEC
21251 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
21252 || CALL_P (start))
21253 njumps--, isjump = 1;
21254 else
21255 isjump = 0;
21256 nbytes -= min_insn_size (start);
21257 }
21258 gcc_assert (njumps >= 0);
21259 if (dump_file)
21260 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
21261 INSN_UID (start), INSN_UID (insn), nbytes);
21262
21263 if (njumps == 3 && isjump && nbytes < 16)
21264 {
21265 int padsize = 15 - nbytes + min_insn_size (insn);
21266
21267 if (dump_file)
21268 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
21269 INSN_UID (insn), padsize);
21270 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
21271 }
21272 }
21273 }
21274
21275 /* AMD Athlon works faster
21276 when RET is not destination of conditional jump or directly preceded
21277 by other jump instruction. We avoid the penalty by inserting NOP just
21278 before the RET instructions in such cases. */
21279 static void
21280 ix86_pad_returns (void)
21281 {
21282 edge e;
21283 edge_iterator ei;
21284
21285 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
21286 {
21287 basic_block bb = e->src;
21288 rtx ret = BB_END (bb);
21289 rtx prev;
21290 bool replace = false;
21291
21292 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
21293 || !maybe_hot_bb_p (bb))
21294 continue;
21295 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
21296 if (active_insn_p (prev) || LABEL_P (prev))
21297 break;
21298 if (prev && LABEL_P (prev))
21299 {
21300 edge e;
21301 edge_iterator ei;
21302
21303 FOR_EACH_EDGE (e, ei, bb->preds)
21304 if (EDGE_FREQUENCY (e) && e->src->index >= 0
21305 && !(e->flags & EDGE_FALLTHRU))
21306 replace = true;
21307 }
21308 if (!replace)
21309 {
21310 prev = prev_active_insn (ret);
21311 if (prev
21312 && ((JUMP_P (prev) && any_condjump_p (prev))
21313 || CALL_P (prev)))
21314 replace = true;
21315 /* Empty functions get branch mispredict even when the jump destination
21316 is not visible to us. */
21317 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
21318 replace = true;
21319 }
21320 if (replace)
21321 {
21322 emit_insn_before (gen_return_internal_long (), ret);
21323 delete_insn (ret);
21324 }
21325 }
21326 }
21327
21328 /* Implement machine specific optimizations. We implement padding of returns
21329 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
21330 static void
21331 ix86_reorg (void)
21332 {
21333 if (TARGET_PAD_RETURNS && optimize && !optimize_size)
21334 ix86_pad_returns ();
21335 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
21336 ix86_avoid_jump_misspredicts ();
21337 }
21338
21339 /* Return nonzero when QImode register that must be represented via REX prefix
21340 is used. */
21341 bool
21342 x86_extended_QIreg_mentioned_p (rtx insn)
21343 {
21344 int i;
21345 extract_insn_cached (insn);
21346 for (i = 0; i < recog_data.n_operands; i++)
21347 if (REG_P (recog_data.operand[i])
21348 && REGNO (recog_data.operand[i]) >= 4)
21349 return true;
21350 return false;
21351 }
21352
21353 /* Return nonzero when P points to register encoded via REX prefix.
21354 Called via for_each_rtx. */
21355 static int
21356 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
21357 {
21358 unsigned int regno;
21359 if (!REG_P (*p))
21360 return 0;
21361 regno = REGNO (*p);
21362 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
21363 }
21364
21365 /* Return true when INSN mentions register that must be encoded using REX
21366 prefix. */
21367 bool
21368 x86_extended_reg_mentioned_p (rtx insn)
21369 {
21370 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
21371 }
21372
21373 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
21374 optabs would emit if we didn't have TFmode patterns. */
21375
21376 void
21377 x86_emit_floatuns (rtx operands[2])
21378 {
21379 rtx neglab, donelab, i0, i1, f0, in, out;
21380 enum machine_mode mode, inmode;
21381
21382 inmode = GET_MODE (operands[1]);
21383 gcc_assert (inmode == SImode || inmode == DImode);
21384
21385 out = operands[0];
21386 in = force_reg (inmode, operands[1]);
21387 mode = GET_MODE (out);
21388 neglab = gen_label_rtx ();
21389 donelab = gen_label_rtx ();
21390 f0 = gen_reg_rtx (mode);
21391
21392 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
21393
21394 expand_float (out, in, 0);
21395
21396 emit_jump_insn (gen_jump (donelab));
21397 emit_barrier ();
21398
21399 emit_label (neglab);
21400
21401 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
21402 1, OPTAB_DIRECT);
21403 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
21404 1, OPTAB_DIRECT);
21405 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
21406
21407 expand_float (f0, i0, 0);
21408
21409 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
21410
21411 emit_label (donelab);
21412 }
21413 \f
21414 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
21415 with all elements equal to VAR. Return true if successful. */
21416
21417 static bool
21418 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
21419 rtx target, rtx val)
21420 {
21421 enum machine_mode smode, wsmode, wvmode;
21422 rtx x;
21423
21424 switch (mode)
21425 {
21426 case V2SImode:
21427 case V2SFmode:
21428 if (!mmx_ok)
21429 return false;
21430 /* FALLTHRU */
21431
21432 case V2DFmode:
21433 case V2DImode:
21434 case V4SFmode:
21435 case V4SImode:
21436 val = force_reg (GET_MODE_INNER (mode), val);
21437 x = gen_rtx_VEC_DUPLICATE (mode, val);
21438 emit_insn (gen_rtx_SET (VOIDmode, target, x));
21439 return true;
21440
21441 case V4HImode:
21442 if (!mmx_ok)
21443 return false;
21444 if (TARGET_SSE || TARGET_3DNOW_A)
21445 {
21446 val = gen_lowpart (SImode, val);
21447 x = gen_rtx_TRUNCATE (HImode, val);
21448 x = gen_rtx_VEC_DUPLICATE (mode, x);
21449 emit_insn (gen_rtx_SET (VOIDmode, target, x));
21450 return true;
21451 }
21452 else
21453 {
21454 smode = HImode;
21455 wsmode = SImode;
21456 wvmode = V2SImode;
21457 goto widen;
21458 }
21459
21460 case V8QImode:
21461 if (!mmx_ok)
21462 return false;
21463 smode = QImode;
21464 wsmode = HImode;
21465 wvmode = V4HImode;
21466 goto widen;
21467 case V8HImode:
21468 if (TARGET_SSE2)
21469 {
21470 rtx tmp1, tmp2;
21471 /* Extend HImode to SImode using a paradoxical SUBREG. */
21472 tmp1 = gen_reg_rtx (SImode);
21473 emit_move_insn (tmp1, gen_lowpart (SImode, val));
21474 /* Insert the SImode value as low element of V4SImode vector. */
21475 tmp2 = gen_reg_rtx (V4SImode);
21476 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
21477 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
21478 CONST0_RTX (V4SImode),
21479 const1_rtx);
21480 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
21481 /* Cast the V4SImode vector back to a V8HImode vector. */
21482 tmp1 = gen_reg_rtx (V8HImode);
21483 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
21484 /* Duplicate the low short through the whole low SImode word. */
21485 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
21486 /* Cast the V8HImode vector back to a V4SImode vector. */
21487 tmp2 = gen_reg_rtx (V4SImode);
21488 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
21489 /* Replicate the low element of the V4SImode vector. */
21490 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
21491 /* Cast the V2SImode back to V8HImode, and store in target. */
21492 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
21493 return true;
21494 }
21495 smode = HImode;
21496 wsmode = SImode;
21497 wvmode = V4SImode;
21498 goto widen;
21499 case V16QImode:
21500 if (TARGET_SSE2)
21501 {
21502 rtx tmp1, tmp2;
21503 /* Extend QImode to SImode using a paradoxical SUBREG. */
21504 tmp1 = gen_reg_rtx (SImode);
21505 emit_move_insn (tmp1, gen_lowpart (SImode, val));
21506 /* Insert the SImode value as low element of V4SImode vector. */
21507 tmp2 = gen_reg_rtx (V4SImode);
21508 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
21509 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
21510 CONST0_RTX (V4SImode),
21511 const1_rtx);
21512 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
21513 /* Cast the V4SImode vector back to a V16QImode vector. */
21514 tmp1 = gen_reg_rtx (V16QImode);
21515 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
21516 /* Duplicate the low byte through the whole low SImode word. */
21517 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
21518 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
21519 /* Cast the V16QImode vector back to a V4SImode vector. */
21520 tmp2 = gen_reg_rtx (V4SImode);
21521 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
21522 /* Replicate the low element of the V4SImode vector. */
21523 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
21524 /* Cast the V2SImode back to V16QImode, and store in target. */
21525 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
21526 return true;
21527 }
21528 smode = QImode;
21529 wsmode = HImode;
21530 wvmode = V8HImode;
21531 goto widen;
21532 widen:
21533 /* Replicate the value once into the next wider mode and recurse. */
21534 val = convert_modes (wsmode, smode, val, true);
21535 x = expand_simple_binop (wsmode, ASHIFT, val,
21536 GEN_INT (GET_MODE_BITSIZE (smode)),
21537 NULL_RTX, 1, OPTAB_LIB_WIDEN);
21538 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
21539
21540 x = gen_reg_rtx (wvmode);
21541 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
21542 gcc_unreachable ();
21543 emit_move_insn (target, gen_lowpart (mode, x));
21544 return true;
21545
21546 default:
21547 return false;
21548 }
21549 }
21550
21551 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
21552 whose ONE_VAR element is VAR, and other elements are zero. Return true
21553 if successful. */
21554
21555 static bool
21556 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
21557 rtx target, rtx var, int one_var)
21558 {
21559 enum machine_mode vsimode;
21560 rtx new_target;
21561 rtx x, tmp;
21562
21563 switch (mode)
21564 {
21565 case V2SFmode:
21566 case V2SImode:
21567 if (!mmx_ok)
21568 return false;
21569 /* FALLTHRU */
21570
21571 case V2DFmode:
21572 case V2DImode:
21573 if (one_var != 0)
21574 return false;
21575 var = force_reg (GET_MODE_INNER (mode), var);
21576 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
21577 emit_insn (gen_rtx_SET (VOIDmode, target, x));
21578 return true;
21579
21580 case V4SFmode:
21581 case V4SImode:
21582 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
21583 new_target = gen_reg_rtx (mode);
21584 else
21585 new_target = target;
21586 var = force_reg (GET_MODE_INNER (mode), var);
21587 x = gen_rtx_VEC_DUPLICATE (mode, var);
21588 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
21589 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
21590 if (one_var != 0)
21591 {
21592 /* We need to shuffle the value to the correct position, so
21593 create a new pseudo to store the intermediate result. */
21594
21595 /* With SSE2, we can use the integer shuffle insns. */
21596 if (mode != V4SFmode && TARGET_SSE2)
21597 {
21598 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
21599 GEN_INT (1),
21600 GEN_INT (one_var == 1 ? 0 : 1),
21601 GEN_INT (one_var == 2 ? 0 : 1),
21602 GEN_INT (one_var == 3 ? 0 : 1)));
21603 if (target != new_target)
21604 emit_move_insn (target, new_target);
21605 return true;
21606 }
21607
21608 /* Otherwise convert the intermediate result to V4SFmode and
21609 use the SSE1 shuffle instructions. */
21610 if (mode != V4SFmode)
21611 {
21612 tmp = gen_reg_rtx (V4SFmode);
21613 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
21614 }
21615 else
21616 tmp = new_target;
21617
21618 emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp,
21619 GEN_INT (1),
21620 GEN_INT (one_var == 1 ? 0 : 1),
21621 GEN_INT (one_var == 2 ? 0+4 : 1+4),
21622 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
21623
21624 if (mode != V4SFmode)
21625 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
21626 else if (tmp != target)
21627 emit_move_insn (target, tmp);
21628 }
21629 else if (target != new_target)
21630 emit_move_insn (target, new_target);
21631 return true;
21632
21633 case V8HImode:
21634 case V16QImode:
21635 vsimode = V4SImode;
21636 goto widen;
21637 case V4HImode:
21638 case V8QImode:
21639 if (!mmx_ok)
21640 return false;
21641 vsimode = V2SImode;
21642 goto widen;
21643 widen:
21644 if (one_var != 0)
21645 return false;
21646
21647 /* Zero extend the variable element to SImode and recurse. */
21648 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
21649
21650 x = gen_reg_rtx (vsimode);
21651 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
21652 var, one_var))
21653 gcc_unreachable ();
21654
21655 emit_move_insn (target, gen_lowpart (mode, x));
21656 return true;
21657
21658 default:
21659 return false;
21660 }
21661 }
21662
21663 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
21664 consisting of the values in VALS. It is known that all elements
21665 except ONE_VAR are constants. Return true if successful. */
21666
21667 static bool
21668 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
21669 rtx target, rtx vals, int one_var)
21670 {
21671 rtx var = XVECEXP (vals, 0, one_var);
21672 enum machine_mode wmode;
21673 rtx const_vec, x;
21674
21675 const_vec = copy_rtx (vals);
21676 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
21677 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
21678
21679 switch (mode)
21680 {
21681 case V2DFmode:
21682 case V2DImode:
21683 case V2SFmode:
21684 case V2SImode:
21685 /* For the two element vectors, it's just as easy to use
21686 the general case. */
21687 return false;
21688
21689 case V4SFmode:
21690 case V4SImode:
21691 case V8HImode:
21692 case V4HImode:
21693 break;
21694
21695 case V16QImode:
21696 wmode = V8HImode;
21697 goto widen;
21698 case V8QImode:
21699 wmode = V4HImode;
21700 goto widen;
21701 widen:
21702 /* There's no way to set one QImode entry easily. Combine
21703 the variable value with its adjacent constant value, and
21704 promote to an HImode set. */
21705 x = XVECEXP (vals, 0, one_var ^ 1);
21706 if (one_var & 1)
21707 {
21708 var = convert_modes (HImode, QImode, var, true);
21709 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
21710 NULL_RTX, 1, OPTAB_LIB_WIDEN);
21711 x = GEN_INT (INTVAL (x) & 0xff);
21712 }
21713 else
21714 {
21715 var = convert_modes (HImode, QImode, var, true);
21716 x = gen_int_mode (INTVAL (x) << 8, HImode);
21717 }
21718 if (x != const0_rtx)
21719 var = expand_simple_binop (HImode, IOR, var, x, var,
21720 1, OPTAB_LIB_WIDEN);
21721
21722 x = gen_reg_rtx (wmode);
21723 emit_move_insn (x, gen_lowpart (wmode, const_vec));
21724 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
21725
21726 emit_move_insn (target, gen_lowpart (mode, x));
21727 return true;
21728
21729 default:
21730 return false;
21731 }
21732
21733 emit_move_insn (target, const_vec);
21734 ix86_expand_vector_set (mmx_ok, target, var, one_var);
21735 return true;
21736 }
21737
21738 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
21739 all values variable, and none identical. */
21740
21741 static void
21742 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
21743 rtx target, rtx vals)
21744 {
21745 enum machine_mode half_mode = GET_MODE_INNER (mode);
21746 rtx op0 = NULL, op1 = NULL;
21747 bool use_vec_concat = false;
21748
21749 switch (mode)
21750 {
21751 case V2SFmode:
21752 case V2SImode:
21753 if (!mmx_ok && !TARGET_SSE)
21754 break;
21755 /* FALLTHRU */
21756
21757 case V2DFmode:
21758 case V2DImode:
21759 /* For the two element vectors, we always implement VEC_CONCAT. */
21760 op0 = XVECEXP (vals, 0, 0);
21761 op1 = XVECEXP (vals, 0, 1);
21762 use_vec_concat = true;
21763 break;
21764
21765 case V4SFmode:
21766 half_mode = V2SFmode;
21767 goto half;
21768 case V4SImode:
21769 half_mode = V2SImode;
21770 goto half;
21771 half:
21772 {
21773 rtvec v;
21774
21775 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
21776 Recurse to load the two halves. */
21777
21778 op0 = gen_reg_rtx (half_mode);
21779 v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
21780 ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
21781
21782 op1 = gen_reg_rtx (half_mode);
21783 v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
21784 ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
21785
21786 use_vec_concat = true;
21787 }
21788 break;
21789
21790 case V8HImode:
21791 case V16QImode:
21792 case V4HImode:
21793 case V8QImode:
21794 break;
21795
21796 default:
21797 gcc_unreachable ();
21798 }
21799
21800 if (use_vec_concat)
21801 {
21802 if (!register_operand (op0, half_mode))
21803 op0 = force_reg (half_mode, op0);
21804 if (!register_operand (op1, half_mode))
21805 op1 = force_reg (half_mode, op1);
21806
21807 emit_insn (gen_rtx_SET (VOIDmode, target,
21808 gen_rtx_VEC_CONCAT (mode, op0, op1)));
21809 }
21810 else
21811 {
21812 int i, j, n_elts, n_words, n_elt_per_word;
21813 enum machine_mode inner_mode;
21814 rtx words[4], shift;
21815
21816 inner_mode = GET_MODE_INNER (mode);
21817 n_elts = GET_MODE_NUNITS (mode);
21818 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
21819 n_elt_per_word = n_elts / n_words;
21820 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
21821
21822 for (i = 0; i < n_words; ++i)
21823 {
21824 rtx word = NULL_RTX;
21825
21826 for (j = 0; j < n_elt_per_word; ++j)
21827 {
21828 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
21829 elt = convert_modes (word_mode, inner_mode, elt, true);
21830
21831 if (j == 0)
21832 word = elt;
21833 else
21834 {
21835 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
21836 word, 1, OPTAB_LIB_WIDEN);
21837 word = expand_simple_binop (word_mode, IOR, word, elt,
21838 word, 1, OPTAB_LIB_WIDEN);
21839 }
21840 }
21841
21842 words[i] = word;
21843 }
21844
21845 if (n_words == 1)
21846 emit_move_insn (target, gen_lowpart (mode, words[0]));
21847 else if (n_words == 2)
21848 {
21849 rtx tmp = gen_reg_rtx (mode);
21850 emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
21851 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
21852 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
21853 emit_move_insn (target, tmp);
21854 }
21855 else if (n_words == 4)
21856 {
21857 rtx tmp = gen_reg_rtx (V4SImode);
21858 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
21859 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
21860 emit_move_insn (target, gen_lowpart (mode, tmp));
21861 }
21862 else
21863 gcc_unreachable ();
21864 }
21865 }
21866
21867 /* Initialize vector TARGET via VALS. Suppress the use of MMX
21868 instructions unless MMX_OK is true. */
21869
21870 void
21871 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
21872 {
21873 enum machine_mode mode = GET_MODE (target);
21874 enum machine_mode inner_mode = GET_MODE_INNER (mode);
21875 int n_elts = GET_MODE_NUNITS (mode);
21876 int n_var = 0, one_var = -1;
21877 bool all_same = true, all_const_zero = true;
21878 int i;
21879 rtx x;
21880
21881 for (i = 0; i < n_elts; ++i)
21882 {
21883 x = XVECEXP (vals, 0, i);
21884 if (!CONSTANT_P (x))
21885 n_var++, one_var = i;
21886 else if (x != CONST0_RTX (inner_mode))
21887 all_const_zero = false;
21888 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
21889 all_same = false;
21890 }
21891
21892 /* Constants are best loaded from the constant pool. */
21893 if (n_var == 0)
21894 {
21895 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
21896 return;
21897 }
21898
21899 /* If all values are identical, broadcast the value. */
21900 if (all_same
21901 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
21902 XVECEXP (vals, 0, 0)))
21903 return;
21904
21905 /* Values where only one field is non-constant are best loaded from
21906 the pool and overwritten via move later. */
21907 if (n_var == 1)
21908 {
21909 if (all_const_zero
21910 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
21911 XVECEXP (vals, 0, one_var),
21912 one_var))
21913 return;
21914
21915 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
21916 return;
21917 }
21918
21919 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
21920 }
21921
21922 void
21923 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
21924 {
21925 enum machine_mode mode = GET_MODE (target);
21926 enum machine_mode inner_mode = GET_MODE_INNER (mode);
21927 bool use_vec_merge = false;
21928 rtx tmp;
21929
21930 switch (mode)
21931 {
21932 case V2SFmode:
21933 case V2SImode:
21934 if (mmx_ok)
21935 {
21936 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
21937 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
21938 if (elt == 0)
21939 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
21940 else
21941 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
21942 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
21943 return;
21944 }
21945 break;
21946
21947 case V2DImode:
21948 use_vec_merge = TARGET_SSE4_1;
21949 if (use_vec_merge)
21950 break;
21951
21952 case V2DFmode:
21953 {
21954 rtx op0, op1;
21955
21956 /* For the two element vectors, we implement a VEC_CONCAT with
21957 the extraction of the other element. */
21958
21959 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
21960 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
21961
21962 if (elt == 0)
21963 op0 = val, op1 = tmp;
21964 else
21965 op0 = tmp, op1 = val;
21966
21967 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
21968 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
21969 }
21970 return;
21971
21972 case V4SFmode:
21973 use_vec_merge = TARGET_SSE4_1;
21974 if (use_vec_merge)
21975 break;
21976
21977 switch (elt)
21978 {
21979 case 0:
21980 use_vec_merge = true;
21981 break;
21982
21983 case 1:
21984 /* tmp = target = A B C D */
21985 tmp = copy_to_reg (target);
21986 /* target = A A B B */
21987 emit_insn (gen_sse_unpcklps (target, target, target));
21988 /* target = X A B B */
21989 ix86_expand_vector_set (false, target, val, 0);
21990 /* target = A X C D */
21991 emit_insn (gen_sse_shufps_1 (target, target, tmp,
21992 GEN_INT (1), GEN_INT (0),
21993 GEN_INT (2+4), GEN_INT (3+4)));
21994 return;
21995
21996 case 2:
21997 /* tmp = target = A B C D */
21998 tmp = copy_to_reg (target);
21999 /* tmp = X B C D */
22000 ix86_expand_vector_set (false, tmp, val, 0);
22001 /* target = A B X D */
22002 emit_insn (gen_sse_shufps_1 (target, target, tmp,
22003 GEN_INT (0), GEN_INT (1),
22004 GEN_INT (0+4), GEN_INT (3+4)));
22005 return;
22006
22007 case 3:
22008 /* tmp = target = A B C D */
22009 tmp = copy_to_reg (target);
22010 /* tmp = X B C D */
22011 ix86_expand_vector_set (false, tmp, val, 0);
22012 /* target = A B X D */
22013 emit_insn (gen_sse_shufps_1 (target, target, tmp,
22014 GEN_INT (0), GEN_INT (1),
22015 GEN_INT (2+4), GEN_INT (0+4)));
22016 return;
22017
22018 default:
22019 gcc_unreachable ();
22020 }
22021 break;
22022
22023 case V4SImode:
22024 use_vec_merge = TARGET_SSE4_1;
22025 if (use_vec_merge)
22026 break;
22027
22028 /* Element 0 handled by vec_merge below. */
22029 if (elt == 0)
22030 {
22031 use_vec_merge = true;
22032 break;
22033 }
22034
22035 if (TARGET_SSE2)
22036 {
22037 /* With SSE2, use integer shuffles to swap element 0 and ELT,
22038 store into element 0, then shuffle them back. */
22039
22040 rtx order[4];
22041
22042 order[0] = GEN_INT (elt);
22043 order[1] = const1_rtx;
22044 order[2] = const2_rtx;
22045 order[3] = GEN_INT (3);
22046 order[elt] = const0_rtx;
22047
22048 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
22049 order[1], order[2], order[3]));
22050
22051 ix86_expand_vector_set (false, target, val, 0);
22052
22053 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
22054 order[1], order[2], order[3]));
22055 }
22056 else
22057 {
22058 /* For SSE1, we have to reuse the V4SF code. */
22059 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
22060 gen_lowpart (SFmode, val), elt);
22061 }
22062 return;
22063
22064 case V8HImode:
22065 use_vec_merge = TARGET_SSE2;
22066 break;
22067 case V4HImode:
22068 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
22069 break;
22070
22071 case V16QImode:
22072 use_vec_merge = TARGET_SSE4_1;
22073 break;
22074
22075 case V8QImode:
22076 default:
22077 break;
22078 }
22079
22080 if (use_vec_merge)
22081 {
22082 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
22083 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
22084 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
22085 }
22086 else
22087 {
22088 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
22089
22090 emit_move_insn (mem, target);
22091
22092 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
22093 emit_move_insn (tmp, val);
22094
22095 emit_move_insn (target, mem);
22096 }
22097 }
22098
22099 void
22100 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
22101 {
22102 enum machine_mode mode = GET_MODE (vec);
22103 enum machine_mode inner_mode = GET_MODE_INNER (mode);
22104 bool use_vec_extr = false;
22105 rtx tmp;
22106
22107 switch (mode)
22108 {
22109 case V2SImode:
22110 case V2SFmode:
22111 if (!mmx_ok)
22112 break;
22113 /* FALLTHRU */
22114
22115 case V2DFmode:
22116 case V2DImode:
22117 use_vec_extr = true;
22118 break;
22119
22120 case V4SFmode:
22121 use_vec_extr = TARGET_SSE4_1;
22122 if (use_vec_extr)
22123 break;
22124
22125 switch (elt)
22126 {
22127 case 0:
22128 tmp = vec;
22129 break;
22130
22131 case 1:
22132 case 3:
22133 tmp = gen_reg_rtx (mode);
22134 emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
22135 GEN_INT (elt), GEN_INT (elt),
22136 GEN_INT (elt+4), GEN_INT (elt+4)));
22137 break;
22138
22139 case 2:
22140 tmp = gen_reg_rtx (mode);
22141 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
22142 break;
22143
22144 default:
22145 gcc_unreachable ();
22146 }
22147 vec = tmp;
22148 use_vec_extr = true;
22149 elt = 0;
22150 break;
22151
22152 case V4SImode:
22153 use_vec_extr = TARGET_SSE4_1;
22154 if (use_vec_extr)
22155 break;
22156
22157 if (TARGET_SSE2)
22158 {
22159 switch (elt)
22160 {
22161 case 0:
22162 tmp = vec;
22163 break;
22164
22165 case 1:
22166 case 3:
22167 tmp = gen_reg_rtx (mode);
22168 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
22169 GEN_INT (elt), GEN_INT (elt),
22170 GEN_INT (elt), GEN_INT (elt)));
22171 break;
22172
22173 case 2:
22174 tmp = gen_reg_rtx (mode);
22175 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
22176 break;
22177
22178 default:
22179 gcc_unreachable ();
22180 }
22181 vec = tmp;
22182 use_vec_extr = true;
22183 elt = 0;
22184 }
22185 else
22186 {
22187 /* For SSE1, we have to reuse the V4SF code. */
22188 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
22189 gen_lowpart (V4SFmode, vec), elt);
22190 return;
22191 }
22192 break;
22193
22194 case V8HImode:
22195 use_vec_extr = TARGET_SSE2;
22196 break;
22197 case V4HImode:
22198 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
22199 break;
22200
22201 case V16QImode:
22202 use_vec_extr = TARGET_SSE4_1;
22203 break;
22204
22205 case V8QImode:
22206 /* ??? Could extract the appropriate HImode element and shift. */
22207 default:
22208 break;
22209 }
22210
22211 if (use_vec_extr)
22212 {
22213 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
22214 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
22215
22216 /* Let the rtl optimizers know about the zero extension performed. */
22217 if (inner_mode == QImode || inner_mode == HImode)
22218 {
22219 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
22220 target = gen_lowpart (SImode, target);
22221 }
22222
22223 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
22224 }
22225 else
22226 {
22227 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
22228
22229 emit_move_insn (mem, vec);
22230
22231 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
22232 emit_move_insn (target, tmp);
22233 }
22234 }
22235
22236 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
22237 pattern to reduce; DEST is the destination; IN is the input vector. */
22238
22239 void
22240 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
22241 {
22242 rtx tmp1, tmp2, tmp3;
22243
22244 tmp1 = gen_reg_rtx (V4SFmode);
22245 tmp2 = gen_reg_rtx (V4SFmode);
22246 tmp3 = gen_reg_rtx (V4SFmode);
22247
22248 emit_insn (gen_sse_movhlps (tmp1, in, in));
22249 emit_insn (fn (tmp2, tmp1, in));
22250
22251 emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2,
22252 GEN_INT (1), GEN_INT (1),
22253 GEN_INT (1+4), GEN_INT (1+4)));
22254 emit_insn (fn (dest, tmp2, tmp3));
22255 }
22256 \f
22257 /* Target hook for scalar_mode_supported_p. */
22258 static bool
22259 ix86_scalar_mode_supported_p (enum machine_mode mode)
22260 {
22261 if (DECIMAL_FLOAT_MODE_P (mode))
22262 return true;
22263 else if (mode == TFmode)
22264 return TARGET_64BIT;
22265 else
22266 return default_scalar_mode_supported_p (mode);
22267 }
22268
22269 /* Implements target hook vector_mode_supported_p. */
22270 static bool
22271 ix86_vector_mode_supported_p (enum machine_mode mode)
22272 {
22273 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
22274 return true;
22275 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
22276 return true;
22277 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
22278 return true;
22279 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
22280 return true;
22281 return false;
22282 }
22283
22284 /* Worker function for TARGET_MD_ASM_CLOBBERS.
22285
22286 We do this in the new i386 backend to maintain source compatibility
22287 with the old cc0-based compiler. */
22288
22289 static tree
22290 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
22291 tree inputs ATTRIBUTE_UNUSED,
22292 tree clobbers)
22293 {
22294 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
22295 clobbers);
22296 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
22297 clobbers);
22298 return clobbers;
22299 }
22300
22301 /* Implements target vector targetm.asm.encode_section_info. This
22302 is not used by netware. */
22303
22304 static void ATTRIBUTE_UNUSED
22305 ix86_encode_section_info (tree decl, rtx rtl, int first)
22306 {
22307 default_encode_section_info (decl, rtl, first);
22308
22309 if (TREE_CODE (decl) == VAR_DECL
22310 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
22311 && ix86_in_large_data_p (decl))
22312 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
22313 }
22314
22315 /* Worker function for REVERSE_CONDITION. */
22316
22317 enum rtx_code
22318 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
22319 {
22320 return (mode != CCFPmode && mode != CCFPUmode
22321 ? reverse_condition (code)
22322 : reverse_condition_maybe_unordered (code));
22323 }
22324
22325 /* Output code to perform an x87 FP register move, from OPERANDS[1]
22326 to OPERANDS[0]. */
22327
22328 const char *
22329 output_387_reg_move (rtx insn, rtx *operands)
22330 {
22331 if (REG_P (operands[0]))
22332 {
22333 if (REG_P (operands[1])
22334 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
22335 {
22336 if (REGNO (operands[0]) == FIRST_STACK_REG)
22337 return output_387_ffreep (operands, 0);
22338 return "fstp\t%y0";
22339 }
22340 if (STACK_TOP_P (operands[0]))
22341 return "fld%z1\t%y1";
22342 return "fst\t%y0";
22343 }
22344 else if (MEM_P (operands[0]))
22345 {
22346 gcc_assert (REG_P (operands[1]));
22347 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
22348 return "fstp%z0\t%y0";
22349 else
22350 {
22351 /* There is no non-popping store to memory for XFmode.
22352 So if we need one, follow the store with a load. */
22353 if (GET_MODE (operands[0]) == XFmode)
22354 return "fstp%z0\t%y0\n\tfld%z0\t%y0";
22355 else
22356 return "fst%z0\t%y0";
22357 }
22358 }
22359 else
22360 gcc_unreachable();
22361 }
22362
22363 /* Output code to perform a conditional jump to LABEL, if C2 flag in
22364 FP status register is set. */
22365
22366 void
22367 ix86_emit_fp_unordered_jump (rtx label)
22368 {
22369 rtx reg = gen_reg_rtx (HImode);
22370 rtx temp;
22371
22372 emit_insn (gen_x86_fnstsw_1 (reg));
22373
22374 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_size))
22375 {
22376 emit_insn (gen_x86_sahf_1 (reg));
22377
22378 temp = gen_rtx_REG (CCmode, FLAGS_REG);
22379 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
22380 }
22381 else
22382 {
22383 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
22384
22385 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
22386 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
22387 }
22388
22389 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
22390 gen_rtx_LABEL_REF (VOIDmode, label),
22391 pc_rtx);
22392 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
22393
22394 emit_jump_insn (temp);
22395 predict_jump (REG_BR_PROB_BASE * 10 / 100);
22396 }
22397
22398 /* Output code to perform a log1p XFmode calculation. */
22399
22400 void ix86_emit_i387_log1p (rtx op0, rtx op1)
22401 {
22402 rtx label1 = gen_label_rtx ();
22403 rtx label2 = gen_label_rtx ();
22404
22405 rtx tmp = gen_reg_rtx (XFmode);
22406 rtx tmp2 = gen_reg_rtx (XFmode);
22407
22408 emit_insn (gen_absxf2 (tmp, op1));
22409 emit_insn (gen_cmpxf (tmp,
22410 CONST_DOUBLE_FROM_REAL_VALUE (
22411 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
22412 XFmode)));
22413 emit_jump_insn (gen_bge (label1));
22414
22415 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
22416 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
22417 emit_jump (label2);
22418
22419 emit_label (label1);
22420 emit_move_insn (tmp, CONST1_RTX (XFmode));
22421 emit_insn (gen_addxf3 (tmp, op1, tmp));
22422 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
22423 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
22424
22425 emit_label (label2);
22426 }
22427
22428 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
22429
22430 static void ATTRIBUTE_UNUSED
22431 i386_solaris_elf_named_section (const char *name, unsigned int flags,
22432 tree decl)
22433 {
22434 /* With Binutils 2.15, the "@unwind" marker must be specified on
22435 every occurrence of the ".eh_frame" section, not just the first
22436 one. */
22437 if (TARGET_64BIT
22438 && strcmp (name, ".eh_frame") == 0)
22439 {
22440 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
22441 flags & SECTION_WRITE ? "aw" : "a");
22442 return;
22443 }
22444 default_elf_asm_named_section (name, flags, decl);
22445 }
22446
22447 /* Return the mangling of TYPE if it is an extended fundamental type. */
22448
22449 static const char *
22450 ix86_mangle_fundamental_type (tree type)
22451 {
22452 switch (TYPE_MODE (type))
22453 {
22454 case TFmode:
22455 /* __float128 is "g". */
22456 return "g";
22457 case XFmode:
22458 /* "long double" or __float80 is "e". */
22459 return "e";
22460 default:
22461 return NULL;
22462 }
22463 }
22464
22465 /* For 32-bit code we can save PIC register setup by using
22466 __stack_chk_fail_local hidden function instead of calling
22467 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
22468 register, so it is better to call __stack_chk_fail directly. */
22469
22470 static tree
22471 ix86_stack_protect_fail (void)
22472 {
22473 return TARGET_64BIT
22474 ? default_external_stack_protect_fail ()
22475 : default_hidden_stack_protect_fail ();
22476 }
22477
22478 /* Select a format to encode pointers in exception handling data. CODE
22479 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
22480 true if the symbol may be affected by dynamic relocations.
22481
22482 ??? All x86 object file formats are capable of representing this.
22483 After all, the relocation needed is the same as for the call insn.
22484 Whether or not a particular assembler allows us to enter such, I
22485 guess we'll have to see. */
22486 int
22487 asm_preferred_eh_data_format (int code, int global)
22488 {
22489 if (flag_pic)
22490 {
22491 int type = DW_EH_PE_sdata8;
22492 if (!TARGET_64BIT
22493 || ix86_cmodel == CM_SMALL_PIC
22494 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
22495 type = DW_EH_PE_sdata4;
22496 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
22497 }
22498 if (ix86_cmodel == CM_SMALL
22499 || (ix86_cmodel == CM_MEDIUM && code))
22500 return DW_EH_PE_udata4;
22501 return DW_EH_PE_absptr;
22502 }
22503 \f
22504 /* Expand copysign from SIGN to the positive value ABS_VALUE
22505 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
22506 the sign-bit. */
22507 static void
22508 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
22509 {
22510 enum machine_mode mode = GET_MODE (sign);
22511 rtx sgn = gen_reg_rtx (mode);
22512 if (mask == NULL_RTX)
22513 {
22514 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
22515 if (!VECTOR_MODE_P (mode))
22516 {
22517 /* We need to generate a scalar mode mask in this case. */
22518 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
22519 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
22520 mask = gen_reg_rtx (mode);
22521 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
22522 }
22523 }
22524 else
22525 mask = gen_rtx_NOT (mode, mask);
22526 emit_insn (gen_rtx_SET (VOIDmode, sgn,
22527 gen_rtx_AND (mode, mask, sign)));
22528 emit_insn (gen_rtx_SET (VOIDmode, result,
22529 gen_rtx_IOR (mode, abs_value, sgn)));
22530 }
22531
22532 /* Expand fabs (OP0) and return a new rtx that holds the result. The
22533 mask for masking out the sign-bit is stored in *SMASK, if that is
22534 non-null. */
22535 static rtx
22536 ix86_expand_sse_fabs (rtx op0, rtx *smask)
22537 {
22538 enum machine_mode mode = GET_MODE (op0);
22539 rtx xa, mask;
22540
22541 xa = gen_reg_rtx (mode);
22542 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
22543 if (!VECTOR_MODE_P (mode))
22544 {
22545 /* We need to generate a scalar mode mask in this case. */
22546 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
22547 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
22548 mask = gen_reg_rtx (mode);
22549 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
22550 }
22551 emit_insn (gen_rtx_SET (VOIDmode, xa,
22552 gen_rtx_AND (mode, op0, mask)));
22553
22554 if (smask)
22555 *smask = mask;
22556
22557 return xa;
22558 }
22559
22560 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
22561 swapping the operands if SWAP_OPERANDS is true. The expanded
22562 code is a forward jump to a newly created label in case the
22563 comparison is true. The generated label rtx is returned. */
22564 static rtx
22565 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
22566 bool swap_operands)
22567 {
22568 rtx label, tmp;
22569
22570 if (swap_operands)
22571 {
22572 tmp = op0;
22573 op0 = op1;
22574 op1 = tmp;
22575 }
22576
22577 label = gen_label_rtx ();
22578 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
22579 emit_insn (gen_rtx_SET (VOIDmode, tmp,
22580 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
22581 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
22582 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
22583 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
22584 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
22585 JUMP_LABEL (tmp) = label;
22586
22587 return label;
22588 }
22589
22590 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
22591 using comparison code CODE. Operands are swapped for the comparison if
22592 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
22593 static rtx
22594 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
22595 bool swap_operands)
22596 {
22597 enum machine_mode mode = GET_MODE (op0);
22598 rtx mask = gen_reg_rtx (mode);
22599
22600 if (swap_operands)
22601 {
22602 rtx tmp = op0;
22603 op0 = op1;
22604 op1 = tmp;
22605 }
22606
22607 if (mode == DFmode)
22608 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
22609 gen_rtx_fmt_ee (code, mode, op0, op1)));
22610 else
22611 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
22612 gen_rtx_fmt_ee (code, mode, op0, op1)));
22613
22614 return mask;
22615 }
22616
22617 /* Generate and return a rtx of mode MODE for 2**n where n is the number
22618 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
22619 static rtx
22620 ix86_gen_TWO52 (enum machine_mode mode)
22621 {
22622 REAL_VALUE_TYPE TWO52r;
22623 rtx TWO52;
22624
22625 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
22626 TWO52 = const_double_from_real_value (TWO52r, mode);
22627 TWO52 = force_reg (mode, TWO52);
22628
22629 return TWO52;
22630 }
22631
22632 /* Expand SSE sequence for computing lround from OP1 storing
22633 into OP0. */
22634 void
22635 ix86_expand_lround (rtx op0, rtx op1)
22636 {
22637 /* C code for the stuff we're doing below:
22638 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
22639 return (long)tmp;
22640 */
22641 enum machine_mode mode = GET_MODE (op1);
22642 const struct real_format *fmt;
22643 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
22644 rtx adj;
22645
22646 /* load nextafter (0.5, 0.0) */
22647 fmt = REAL_MODE_FORMAT (mode);
22648 real_2expN (&half_minus_pred_half, -(fmt->p) - 1);
22649 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
22650
22651 /* adj = copysign (0.5, op1) */
22652 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
22653 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
22654
22655 /* adj = op1 + adj */
22656 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
22657
22658 /* op0 = (imode)adj */
22659 expand_fix (op0, adj, 0);
22660 }
22661
22662 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
22663 into OPERAND0. */
22664 void
22665 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
22666 {
22667 /* C code for the stuff we're doing below (for do_floor):
22668 xi = (long)op1;
22669 xi -= (double)xi > op1 ? 1 : 0;
22670 return xi;
22671 */
22672 enum machine_mode fmode = GET_MODE (op1);
22673 enum machine_mode imode = GET_MODE (op0);
22674 rtx ireg, freg, label, tmp;
22675
22676 /* reg = (long)op1 */
22677 ireg = gen_reg_rtx (imode);
22678 expand_fix (ireg, op1, 0);
22679
22680 /* freg = (double)reg */
22681 freg = gen_reg_rtx (fmode);
22682 expand_float (freg, ireg, 0);
22683
22684 /* ireg = (freg > op1) ? ireg - 1 : ireg */
22685 label = ix86_expand_sse_compare_and_jump (UNLE,
22686 freg, op1, !do_floor);
22687 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
22688 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
22689 emit_move_insn (ireg, tmp);
22690
22691 emit_label (label);
22692 LABEL_NUSES (label) = 1;
22693
22694 emit_move_insn (op0, ireg);
22695 }
22696
22697 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
22698 result in OPERAND0. */
22699 void
22700 ix86_expand_rint (rtx operand0, rtx operand1)
22701 {
22702 /* C code for the stuff we're doing below:
22703 xa = fabs (operand1);
22704 if (!isless (xa, 2**52))
22705 return operand1;
22706 xa = xa + 2**52 - 2**52;
22707 return copysign (xa, operand1);
22708 */
22709 enum machine_mode mode = GET_MODE (operand0);
22710 rtx res, xa, label, TWO52, mask;
22711
22712 res = gen_reg_rtx (mode);
22713 emit_move_insn (res, operand1);
22714
22715 /* xa = abs (operand1) */
22716 xa = ix86_expand_sse_fabs (res, &mask);
22717
22718 /* if (!isless (xa, TWO52)) goto label; */
22719 TWO52 = ix86_gen_TWO52 (mode);
22720 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
22721
22722 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
22723 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
22724
22725 ix86_sse_copysign_to_positive (res, xa, res, mask);
22726
22727 emit_label (label);
22728 LABEL_NUSES (label) = 1;
22729
22730 emit_move_insn (operand0, res);
22731 }
22732
22733 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
22734 into OPERAND0. */
22735 void
22736 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
22737 {
22738 /* C code for the stuff we expand below.
22739 double xa = fabs (x), x2;
22740 if (!isless (xa, TWO52))
22741 return x;
22742 xa = xa + TWO52 - TWO52;
22743 x2 = copysign (xa, x);
22744 Compensate. Floor:
22745 if (x2 > x)
22746 x2 -= 1;
22747 Compensate. Ceil:
22748 if (x2 < x)
22749 x2 -= -1;
22750 return x2;
22751 */
22752 enum machine_mode mode = GET_MODE (operand0);
22753 rtx xa, TWO52, tmp, label, one, res, mask;
22754
22755 TWO52 = ix86_gen_TWO52 (mode);
22756
22757 /* Temporary for holding the result, initialized to the input
22758 operand to ease control flow. */
22759 res = gen_reg_rtx (mode);
22760 emit_move_insn (res, operand1);
22761
22762 /* xa = abs (operand1) */
22763 xa = ix86_expand_sse_fabs (res, &mask);
22764
22765 /* if (!isless (xa, TWO52)) goto label; */
22766 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
22767
22768 /* xa = xa + TWO52 - TWO52; */
22769 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
22770 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
22771
22772 /* xa = copysign (xa, operand1) */
22773 ix86_sse_copysign_to_positive (xa, xa, res, mask);
22774
22775 /* generate 1.0 or -1.0 */
22776 one = force_reg (mode,
22777 const_double_from_real_value (do_floor
22778 ? dconst1 : dconstm1, mode));
22779
22780 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
22781 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
22782 emit_insn (gen_rtx_SET (VOIDmode, tmp,
22783 gen_rtx_AND (mode, one, tmp)));
22784 /* We always need to subtract here to preserve signed zero. */
22785 tmp = expand_simple_binop (mode, MINUS,
22786 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
22787 emit_move_insn (res, tmp);
22788
22789 emit_label (label);
22790 LABEL_NUSES (label) = 1;
22791
22792 emit_move_insn (operand0, res);
22793 }
22794
22795 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
22796 into OPERAND0. */
22797 void
22798 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
22799 {
22800 /* C code for the stuff we expand below.
22801 double xa = fabs (x), x2;
22802 if (!isless (xa, TWO52))
22803 return x;
22804 x2 = (double)(long)x;
22805 Compensate. Floor:
22806 if (x2 > x)
22807 x2 -= 1;
22808 Compensate. Ceil:
22809 if (x2 < x)
22810 x2 += 1;
22811 if (HONOR_SIGNED_ZEROS (mode))
22812 return copysign (x2, x);
22813 return x2;
22814 */
22815 enum machine_mode mode = GET_MODE (operand0);
22816 rtx xa, xi, TWO52, tmp, label, one, res, mask;
22817
22818 TWO52 = ix86_gen_TWO52 (mode);
22819
22820 /* Temporary for holding the result, initialized to the input
22821 operand to ease control flow. */
22822 res = gen_reg_rtx (mode);
22823 emit_move_insn (res, operand1);
22824
22825 /* xa = abs (operand1) */
22826 xa = ix86_expand_sse_fabs (res, &mask);
22827
22828 /* if (!isless (xa, TWO52)) goto label; */
22829 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
22830
22831 /* xa = (double)(long)x */
22832 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
22833 expand_fix (xi, res, 0);
22834 expand_float (xa, xi, 0);
22835
22836 /* generate 1.0 */
22837 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
22838
22839 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
22840 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
22841 emit_insn (gen_rtx_SET (VOIDmode, tmp,
22842 gen_rtx_AND (mode, one, tmp)));
22843 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
22844 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
22845 emit_move_insn (res, tmp);
22846
22847 if (HONOR_SIGNED_ZEROS (mode))
22848 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
22849
22850 emit_label (label);
22851 LABEL_NUSES (label) = 1;
22852
22853 emit_move_insn (operand0, res);
22854 }
22855
22856 /* Expand SSE sequence for computing round from OPERAND1 storing
22857 into OPERAND0. Sequence that works without relying on DImode truncation
22858 via cvttsd2siq that is only available on 64bit targets. */
22859 void
22860 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
22861 {
22862 /* C code for the stuff we expand below.
22863 double xa = fabs (x), xa2, x2;
22864 if (!isless (xa, TWO52))
22865 return x;
22866 Using the absolute value and copying back sign makes
22867 -0.0 -> -0.0 correct.
22868 xa2 = xa + TWO52 - TWO52;
22869 Compensate.
22870 dxa = xa2 - xa;
22871 if (dxa <= -0.5)
22872 xa2 += 1;
22873 else if (dxa > 0.5)
22874 xa2 -= 1;
22875 x2 = copysign (xa2, x);
22876 return x2;
22877 */
22878 enum machine_mode mode = GET_MODE (operand0);
22879 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
22880
22881 TWO52 = ix86_gen_TWO52 (mode);
22882
22883 /* Temporary for holding the result, initialized to the input
22884 operand to ease control flow. */
22885 res = gen_reg_rtx (mode);
22886 emit_move_insn (res, operand1);
22887
22888 /* xa = abs (operand1) */
22889 xa = ix86_expand_sse_fabs (res, &mask);
22890
22891 /* if (!isless (xa, TWO52)) goto label; */
22892 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
22893
22894 /* xa2 = xa + TWO52 - TWO52; */
22895 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
22896 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
22897
22898 /* dxa = xa2 - xa; */
22899 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
22900
22901 /* generate 0.5, 1.0 and -0.5 */
22902 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
22903 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
22904 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
22905 0, OPTAB_DIRECT);
22906
22907 /* Compensate. */
22908 tmp = gen_reg_rtx (mode);
22909 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
22910 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
22911 emit_insn (gen_rtx_SET (VOIDmode, tmp,
22912 gen_rtx_AND (mode, one, tmp)));
22913 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
22914 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
22915 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
22916 emit_insn (gen_rtx_SET (VOIDmode, tmp,
22917 gen_rtx_AND (mode, one, tmp)));
22918 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
22919
22920 /* res = copysign (xa2, operand1) */
22921 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
22922
22923 emit_label (label);
22924 LABEL_NUSES (label) = 1;
22925
22926 emit_move_insn (operand0, res);
22927 }
22928
22929 /* Expand SSE sequence for computing trunc from OPERAND1 storing
22930 into OPERAND0. */
22931 void
22932 ix86_expand_trunc (rtx operand0, rtx operand1)
22933 {
22934 /* C code for SSE variant we expand below.
22935 double xa = fabs (x), x2;
22936 if (!isless (xa, TWO52))
22937 return x;
22938 x2 = (double)(long)x;
22939 if (HONOR_SIGNED_ZEROS (mode))
22940 return copysign (x2, x);
22941 return x2;
22942 */
22943 enum machine_mode mode = GET_MODE (operand0);
22944 rtx xa, xi, TWO52, label, res, mask;
22945
22946 TWO52 = ix86_gen_TWO52 (mode);
22947
22948 /* Temporary for holding the result, initialized to the input
22949 operand to ease control flow. */
22950 res = gen_reg_rtx (mode);
22951 emit_move_insn (res, operand1);
22952
22953 /* xa = abs (operand1) */
22954 xa = ix86_expand_sse_fabs (res, &mask);
22955
22956 /* if (!isless (xa, TWO52)) goto label; */
22957 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
22958
22959 /* x = (double)(long)x */
22960 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
22961 expand_fix (xi, res, 0);
22962 expand_float (res, xi, 0);
22963
22964 if (HONOR_SIGNED_ZEROS (mode))
22965 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
22966
22967 emit_label (label);
22968 LABEL_NUSES (label) = 1;
22969
22970 emit_move_insn (operand0, res);
22971 }
22972
22973 /* Expand SSE sequence for computing trunc from OPERAND1 storing
22974 into OPERAND0. */
22975 void
22976 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
22977 {
22978 enum machine_mode mode = GET_MODE (operand0);
22979 rtx xa, mask, TWO52, label, one, res, smask, tmp;
22980
22981 /* C code for SSE variant we expand below.
22982 double xa = fabs (x), x2;
22983 if (!isless (xa, TWO52))
22984 return x;
22985 xa2 = xa + TWO52 - TWO52;
22986 Compensate:
22987 if (xa2 > xa)
22988 xa2 -= 1.0;
22989 x2 = copysign (xa2, x);
22990 return x2;
22991 */
22992
22993 TWO52 = ix86_gen_TWO52 (mode);
22994
22995 /* Temporary for holding the result, initialized to the input
22996 operand to ease control flow. */
22997 res = gen_reg_rtx (mode);
22998 emit_move_insn (res, operand1);
22999
23000 /* xa = abs (operand1) */
23001 xa = ix86_expand_sse_fabs (res, &smask);
23002
23003 /* if (!isless (xa, TWO52)) goto label; */
23004 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
23005
23006 /* res = xa + TWO52 - TWO52; */
23007 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
23008 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
23009 emit_move_insn (res, tmp);
23010
23011 /* generate 1.0 */
23012 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
23013
23014 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
23015 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
23016 emit_insn (gen_rtx_SET (VOIDmode, mask,
23017 gen_rtx_AND (mode, mask, one)));
23018 tmp = expand_simple_binop (mode, MINUS,
23019 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
23020 emit_move_insn (res, tmp);
23021
23022 /* res = copysign (res, operand1) */
23023 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
23024
23025 emit_label (label);
23026 LABEL_NUSES (label) = 1;
23027
23028 emit_move_insn (operand0, res);
23029 }
23030
23031 /* Expand SSE sequence for computing round from OPERAND1 storing
23032 into OPERAND0. */
23033 void
23034 ix86_expand_round (rtx operand0, rtx operand1)
23035 {
23036 /* C code for the stuff we're doing below:
23037 double xa = fabs (x);
23038 if (!isless (xa, TWO52))
23039 return x;
23040 xa = (double)(long)(xa + nextafter (0.5, 0.0));
23041 return copysign (xa, x);
23042 */
23043 enum machine_mode mode = GET_MODE (operand0);
23044 rtx res, TWO52, xa, label, xi, half, mask;
23045 const struct real_format *fmt;
23046 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
23047
23048 /* Temporary for holding the result, initialized to the input
23049 operand to ease control flow. */
23050 res = gen_reg_rtx (mode);
23051 emit_move_insn (res, operand1);
23052
23053 TWO52 = ix86_gen_TWO52 (mode);
23054 xa = ix86_expand_sse_fabs (res, &mask);
23055 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
23056
23057 /* load nextafter (0.5, 0.0) */
23058 fmt = REAL_MODE_FORMAT (mode);
23059 real_2expN (&half_minus_pred_half, -(fmt->p) - 1);
23060 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
23061
23062 /* xa = xa + 0.5 */
23063 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
23064 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
23065
23066 /* xa = (double)(int64_t)xa */
23067 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
23068 expand_fix (xi, xa, 0);
23069 expand_float (xa, xi, 0);
23070
23071 /* res = copysign (xa, operand1) */
23072 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
23073
23074 emit_label (label);
23075 LABEL_NUSES (label) = 1;
23076
23077 emit_move_insn (operand0, res);
23078 }
23079
23080 \f
23081 /* Table of valid machine attributes. */
23082 static const struct attribute_spec ix86_attribute_table[] =
23083 {
23084 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
23085 /* Stdcall attribute says callee is responsible for popping arguments
23086 if they are not variable. */
23087 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
23088 /* Fastcall attribute says callee is responsible for popping arguments
23089 if they are not variable. */
23090 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
23091 /* Cdecl attribute says the callee is a normal C declaration */
23092 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
23093 /* Regparm attribute specifies how many integer arguments are to be
23094 passed in registers. */
23095 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
23096 /* Sseregparm attribute says we are using x86_64 calling conventions
23097 for FP arguments. */
23098 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
23099 /* force_align_arg_pointer says this function realigns the stack at entry. */
23100 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
23101 false, true, true, ix86_handle_cconv_attribute },
23102 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
23103 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
23104 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
23105 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
23106 #endif
23107 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
23108 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
23109 #ifdef SUBTARGET_ATTRIBUTE_TABLE
23110 SUBTARGET_ATTRIBUTE_TABLE,
23111 #endif
23112 { NULL, 0, 0, false, false, false, NULL }
23113 };
23114
23115 /* Initialize the GCC target structure. */
23116 #undef TARGET_ATTRIBUTE_TABLE
23117 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
23118 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
23119 # undef TARGET_MERGE_DECL_ATTRIBUTES
23120 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
23121 #endif
23122
23123 #undef TARGET_COMP_TYPE_ATTRIBUTES
23124 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
23125
23126 #undef TARGET_INIT_BUILTINS
23127 #define TARGET_INIT_BUILTINS ix86_init_builtins
23128 #undef TARGET_EXPAND_BUILTIN
23129 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
23130
23131 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
23132 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION ix86_builtin_vectorized_function
23133 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
23134 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_builtin_conversion
23135
23136 #undef TARGET_ASM_FUNCTION_EPILOGUE
23137 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
23138
23139 #undef TARGET_ENCODE_SECTION_INFO
23140 #ifndef SUBTARGET_ENCODE_SECTION_INFO
23141 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
23142 #else
23143 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
23144 #endif
23145
23146 #undef TARGET_ASM_OPEN_PAREN
23147 #define TARGET_ASM_OPEN_PAREN ""
23148 #undef TARGET_ASM_CLOSE_PAREN
23149 #define TARGET_ASM_CLOSE_PAREN ""
23150
23151 #undef TARGET_ASM_ALIGNED_HI_OP
23152 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
23153 #undef TARGET_ASM_ALIGNED_SI_OP
23154 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
23155 #ifdef ASM_QUAD
23156 #undef TARGET_ASM_ALIGNED_DI_OP
23157 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
23158 #endif
23159
23160 #undef TARGET_ASM_UNALIGNED_HI_OP
23161 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
23162 #undef TARGET_ASM_UNALIGNED_SI_OP
23163 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
23164 #undef TARGET_ASM_UNALIGNED_DI_OP
23165 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
23166
23167 #undef TARGET_SCHED_ADJUST_COST
23168 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
23169 #undef TARGET_SCHED_ISSUE_RATE
23170 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
23171 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
23172 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
23173 ia32_multipass_dfa_lookahead
23174
23175 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
23176 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
23177
23178 #ifdef HAVE_AS_TLS
23179 #undef TARGET_HAVE_TLS
23180 #define TARGET_HAVE_TLS true
23181 #endif
23182 #undef TARGET_CANNOT_FORCE_CONST_MEM
23183 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
23184 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
23185 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_rtx_true
23186
23187 #undef TARGET_DELEGITIMIZE_ADDRESS
23188 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
23189
23190 #undef TARGET_MS_BITFIELD_LAYOUT_P
23191 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
23192
23193 #if TARGET_MACHO
23194 #undef TARGET_BINDS_LOCAL_P
23195 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
23196 #endif
23197 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
23198 #undef TARGET_BINDS_LOCAL_P
23199 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
23200 #endif
23201
23202 #undef TARGET_ASM_OUTPUT_MI_THUNK
23203 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
23204 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
23205 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
23206
23207 #undef TARGET_ASM_FILE_START
23208 #define TARGET_ASM_FILE_START x86_file_start
23209
23210 #undef TARGET_DEFAULT_TARGET_FLAGS
23211 #define TARGET_DEFAULT_TARGET_FLAGS \
23212 (TARGET_DEFAULT \
23213 | TARGET_SUBTARGET_DEFAULT \
23214 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
23215
23216 #undef TARGET_HANDLE_OPTION
23217 #define TARGET_HANDLE_OPTION ix86_handle_option
23218
23219 #undef TARGET_RTX_COSTS
23220 #define TARGET_RTX_COSTS ix86_rtx_costs
23221 #undef TARGET_ADDRESS_COST
23222 #define TARGET_ADDRESS_COST ix86_address_cost
23223
23224 #undef TARGET_FIXED_CONDITION_CODE_REGS
23225 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
23226 #undef TARGET_CC_MODES_COMPATIBLE
23227 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
23228
23229 #undef TARGET_MACHINE_DEPENDENT_REORG
23230 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
23231
23232 #undef TARGET_BUILD_BUILTIN_VA_LIST
23233 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
23234
23235 #undef TARGET_MD_ASM_CLOBBERS
23236 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
23237
23238 #undef TARGET_PROMOTE_PROTOTYPES
23239 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
23240 #undef TARGET_STRUCT_VALUE_RTX
23241 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
23242 #undef TARGET_SETUP_INCOMING_VARARGS
23243 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
23244 #undef TARGET_MUST_PASS_IN_STACK
23245 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
23246 #undef TARGET_PASS_BY_REFERENCE
23247 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
23248 #undef TARGET_INTERNAL_ARG_POINTER
23249 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
23250 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
23251 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
23252 #undef TARGET_STRICT_ARGUMENT_NAMING
23253 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
23254
23255 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
23256 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
23257
23258 #undef TARGET_SCALAR_MODE_SUPPORTED_P
23259 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
23260
23261 #undef TARGET_VECTOR_MODE_SUPPORTED_P
23262 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
23263
23264 #ifdef HAVE_AS_TLS
23265 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
23266 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
23267 #endif
23268
23269 #ifdef SUBTARGET_INSERT_ATTRIBUTES
23270 #undef TARGET_INSERT_ATTRIBUTES
23271 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
23272 #endif
23273
23274 #undef TARGET_MANGLE_FUNDAMENTAL_TYPE
23275 #define TARGET_MANGLE_FUNDAMENTAL_TYPE ix86_mangle_fundamental_type
23276
23277 #undef TARGET_STACK_PROTECT_FAIL
23278 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
23279
23280 #undef TARGET_FUNCTION_VALUE
23281 #define TARGET_FUNCTION_VALUE ix86_function_value
23282
23283 struct gcc_target targetm = TARGET_INITIALIZER;
23284 \f
23285 #include "gt-i386.h"