i386.c (ix86_secondary_memory_needed): Break out to...
[gcc.git] / gcc / config / i386 / i386.c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 Boston, MA 02110-1301, USA. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "tm_p.h"
29 #include "regs.h"
30 #include "hard-reg-set.h"
31 #include "real.h"
32 #include "insn-config.h"
33 #include "conditions.h"
34 #include "output.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
37 #include "flags.h"
38 #include "except.h"
39 #include "function.h"
40 #include "recog.h"
41 #include "expr.h"
42 #include "optabs.h"
43 #include "toplev.h"
44 #include "basic-block.h"
45 #include "ggc.h"
46 #include "target.h"
47 #include "target-def.h"
48 #include "langhooks.h"
49 #include "cgraph.h"
50 #include "tree-gimple.h"
51 #include "dwarf2.h"
52 #include "df.h"
53 #include "tm-constrs.h"
54 #include "params.h"
55
56 #ifndef CHECK_STACK_LIMIT
57 #define CHECK_STACK_LIMIT (-1)
58 #endif
59
60 /* Return index of given mode in mult and division cost tables. */
61 #define MODE_INDEX(mode) \
62 ((mode) == QImode ? 0 \
63 : (mode) == HImode ? 1 \
64 : (mode) == SImode ? 2 \
65 : (mode) == DImode ? 3 \
66 : 4)
67
68 /* Processor costs (relative to an add) */
69 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
70 #define COSTS_N_BYTES(N) ((N) * 2)
71
72 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
73
74 static const
75 struct processor_costs size_cost = { /* costs for tuning for size */
76 COSTS_N_BYTES (2), /* cost of an add instruction */
77 COSTS_N_BYTES (3), /* cost of a lea instruction */
78 COSTS_N_BYTES (2), /* variable shift costs */
79 COSTS_N_BYTES (3), /* constant shift costs */
80 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
81 COSTS_N_BYTES (3), /* HI */
82 COSTS_N_BYTES (3), /* SI */
83 COSTS_N_BYTES (3), /* DI */
84 COSTS_N_BYTES (5)}, /* other */
85 0, /* cost of multiply per each bit set */
86 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
87 COSTS_N_BYTES (3), /* HI */
88 COSTS_N_BYTES (3), /* SI */
89 COSTS_N_BYTES (3), /* DI */
90 COSTS_N_BYTES (5)}, /* other */
91 COSTS_N_BYTES (3), /* cost of movsx */
92 COSTS_N_BYTES (3), /* cost of movzx */
93 0, /* "large" insn */
94 2, /* MOVE_RATIO */
95 2, /* cost for loading QImode using movzbl */
96 {2, 2, 2}, /* cost of loading integer registers
97 in QImode, HImode and SImode.
98 Relative to reg-reg move (2). */
99 {2, 2, 2}, /* cost of storing integer registers */
100 2, /* cost of reg,reg fld/fst */
101 {2, 2, 2}, /* cost of loading fp registers
102 in SFmode, DFmode and XFmode */
103 {2, 2, 2}, /* cost of storing fp registers
104 in SFmode, DFmode and XFmode */
105 3, /* cost of moving MMX register */
106 {3, 3}, /* cost of loading MMX registers
107 in SImode and DImode */
108 {3, 3}, /* cost of storing MMX registers
109 in SImode and DImode */
110 3, /* cost of moving SSE register */
111 {3, 3, 3}, /* cost of loading SSE registers
112 in SImode, DImode and TImode */
113 {3, 3, 3}, /* cost of storing SSE registers
114 in SImode, DImode and TImode */
115 3, /* MMX or SSE register to integer */
116 0, /* size of prefetch block */
117 0, /* number of parallel prefetches */
118 2, /* Branch cost */
119 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
120 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
121 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
122 COSTS_N_BYTES (2), /* cost of FABS instruction. */
123 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
124 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
125 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
126 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
127 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
128 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}}
129 };
130
131 /* Processor costs (relative to an add) */
132 static const
133 struct processor_costs i386_cost = { /* 386 specific costs */
134 COSTS_N_INSNS (1), /* cost of an add instruction */
135 COSTS_N_INSNS (1), /* cost of a lea instruction */
136 COSTS_N_INSNS (3), /* variable shift costs */
137 COSTS_N_INSNS (2), /* constant shift costs */
138 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
139 COSTS_N_INSNS (6), /* HI */
140 COSTS_N_INSNS (6), /* SI */
141 COSTS_N_INSNS (6), /* DI */
142 COSTS_N_INSNS (6)}, /* other */
143 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
144 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
145 COSTS_N_INSNS (23), /* HI */
146 COSTS_N_INSNS (23), /* SI */
147 COSTS_N_INSNS (23), /* DI */
148 COSTS_N_INSNS (23)}, /* other */
149 COSTS_N_INSNS (3), /* cost of movsx */
150 COSTS_N_INSNS (2), /* cost of movzx */
151 15, /* "large" insn */
152 3, /* MOVE_RATIO */
153 4, /* cost for loading QImode using movzbl */
154 {2, 4, 2}, /* cost of loading integer registers
155 in QImode, HImode and SImode.
156 Relative to reg-reg move (2). */
157 {2, 4, 2}, /* cost of storing integer registers */
158 2, /* cost of reg,reg fld/fst */
159 {8, 8, 8}, /* cost of loading fp registers
160 in SFmode, DFmode and XFmode */
161 {8, 8, 8}, /* cost of storing fp registers
162 in SFmode, DFmode and XFmode */
163 2, /* cost of moving MMX register */
164 {4, 8}, /* cost of loading MMX registers
165 in SImode and DImode */
166 {4, 8}, /* cost of storing MMX registers
167 in SImode and DImode */
168 2, /* cost of moving SSE register */
169 {4, 8, 16}, /* cost of loading SSE registers
170 in SImode, DImode and TImode */
171 {4, 8, 16}, /* cost of storing SSE registers
172 in SImode, DImode and TImode */
173 3, /* MMX or SSE register to integer */
174 0, /* size of prefetch block */
175 0, /* number of parallel prefetches */
176 1, /* Branch cost */
177 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
178 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
179 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
180 COSTS_N_INSNS (22), /* cost of FABS instruction. */
181 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
182 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
183 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
184 DUMMY_STRINGOP_ALGS},
185 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
186 DUMMY_STRINGOP_ALGS},
187 };
188
189 static const
190 struct processor_costs i486_cost = { /* 486 specific costs */
191 COSTS_N_INSNS (1), /* cost of an add instruction */
192 COSTS_N_INSNS (1), /* cost of a lea instruction */
193 COSTS_N_INSNS (3), /* variable shift costs */
194 COSTS_N_INSNS (2), /* constant shift costs */
195 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
196 COSTS_N_INSNS (12), /* HI */
197 COSTS_N_INSNS (12), /* SI */
198 COSTS_N_INSNS (12), /* DI */
199 COSTS_N_INSNS (12)}, /* other */
200 1, /* cost of multiply per each bit set */
201 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
202 COSTS_N_INSNS (40), /* HI */
203 COSTS_N_INSNS (40), /* SI */
204 COSTS_N_INSNS (40), /* DI */
205 COSTS_N_INSNS (40)}, /* other */
206 COSTS_N_INSNS (3), /* cost of movsx */
207 COSTS_N_INSNS (2), /* cost of movzx */
208 15, /* "large" insn */
209 3, /* MOVE_RATIO */
210 4, /* cost for loading QImode using movzbl */
211 {2, 4, 2}, /* cost of loading integer registers
212 in QImode, HImode and SImode.
213 Relative to reg-reg move (2). */
214 {2, 4, 2}, /* cost of storing integer registers */
215 2, /* cost of reg,reg fld/fst */
216 {8, 8, 8}, /* cost of loading fp registers
217 in SFmode, DFmode and XFmode */
218 {8, 8, 8}, /* cost of storing fp registers
219 in SFmode, DFmode and XFmode */
220 2, /* cost of moving MMX register */
221 {4, 8}, /* cost of loading MMX registers
222 in SImode and DImode */
223 {4, 8}, /* cost of storing MMX registers
224 in SImode and DImode */
225 2, /* cost of moving SSE register */
226 {4, 8, 16}, /* cost of loading SSE registers
227 in SImode, DImode and TImode */
228 {4, 8, 16}, /* cost of storing SSE registers
229 in SImode, DImode and TImode */
230 3, /* MMX or SSE register to integer */
231 0, /* size of prefetch block */
232 0, /* number of parallel prefetches */
233 1, /* Branch cost */
234 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
235 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
236 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
237 COSTS_N_INSNS (3), /* cost of FABS instruction. */
238 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
239 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
240 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
241 DUMMY_STRINGOP_ALGS},
242 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
243 DUMMY_STRINGOP_ALGS}
244 };
245
246 static const
247 struct processor_costs pentium_cost = {
248 COSTS_N_INSNS (1), /* cost of an add instruction */
249 COSTS_N_INSNS (1), /* cost of a lea instruction */
250 COSTS_N_INSNS (4), /* variable shift costs */
251 COSTS_N_INSNS (1), /* constant shift costs */
252 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
253 COSTS_N_INSNS (11), /* HI */
254 COSTS_N_INSNS (11), /* SI */
255 COSTS_N_INSNS (11), /* DI */
256 COSTS_N_INSNS (11)}, /* other */
257 0, /* cost of multiply per each bit set */
258 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
259 COSTS_N_INSNS (25), /* HI */
260 COSTS_N_INSNS (25), /* SI */
261 COSTS_N_INSNS (25), /* DI */
262 COSTS_N_INSNS (25)}, /* other */
263 COSTS_N_INSNS (3), /* cost of movsx */
264 COSTS_N_INSNS (2), /* cost of movzx */
265 8, /* "large" insn */
266 6, /* MOVE_RATIO */
267 6, /* cost for loading QImode using movzbl */
268 {2, 4, 2}, /* cost of loading integer registers
269 in QImode, HImode and SImode.
270 Relative to reg-reg move (2). */
271 {2, 4, 2}, /* cost of storing integer registers */
272 2, /* cost of reg,reg fld/fst */
273 {2, 2, 6}, /* cost of loading fp registers
274 in SFmode, DFmode and XFmode */
275 {4, 4, 6}, /* cost of storing fp registers
276 in SFmode, DFmode and XFmode */
277 8, /* cost of moving MMX register */
278 {8, 8}, /* cost of loading MMX registers
279 in SImode and DImode */
280 {8, 8}, /* cost of storing MMX registers
281 in SImode and DImode */
282 2, /* cost of moving SSE register */
283 {4, 8, 16}, /* cost of loading SSE registers
284 in SImode, DImode and TImode */
285 {4, 8, 16}, /* cost of storing SSE registers
286 in SImode, DImode and TImode */
287 3, /* MMX or SSE register to integer */
288 0, /* size of prefetch block */
289 0, /* number of parallel prefetches */
290 2, /* Branch cost */
291 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
292 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
293 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
294 COSTS_N_INSNS (1), /* cost of FABS instruction. */
295 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
296 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
297 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
298 DUMMY_STRINGOP_ALGS},
299 {{libcall, {{-1, rep_prefix_4_byte}}},
300 DUMMY_STRINGOP_ALGS}
301 };
302
303 static const
304 struct processor_costs pentiumpro_cost = {
305 COSTS_N_INSNS (1), /* cost of an add instruction */
306 COSTS_N_INSNS (1), /* cost of a lea instruction */
307 COSTS_N_INSNS (1), /* variable shift costs */
308 COSTS_N_INSNS (1), /* constant shift costs */
309 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
310 COSTS_N_INSNS (4), /* HI */
311 COSTS_N_INSNS (4), /* SI */
312 COSTS_N_INSNS (4), /* DI */
313 COSTS_N_INSNS (4)}, /* other */
314 0, /* cost of multiply per each bit set */
315 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
316 COSTS_N_INSNS (17), /* HI */
317 COSTS_N_INSNS (17), /* SI */
318 COSTS_N_INSNS (17), /* DI */
319 COSTS_N_INSNS (17)}, /* other */
320 COSTS_N_INSNS (1), /* cost of movsx */
321 COSTS_N_INSNS (1), /* cost of movzx */
322 8, /* "large" insn */
323 6, /* MOVE_RATIO */
324 2, /* cost for loading QImode using movzbl */
325 {4, 4, 4}, /* cost of loading integer registers
326 in QImode, HImode and SImode.
327 Relative to reg-reg move (2). */
328 {2, 2, 2}, /* cost of storing integer registers */
329 2, /* cost of reg,reg fld/fst */
330 {2, 2, 6}, /* cost of loading fp registers
331 in SFmode, DFmode and XFmode */
332 {4, 4, 6}, /* cost of storing fp registers
333 in SFmode, DFmode and XFmode */
334 2, /* cost of moving MMX register */
335 {2, 2}, /* cost of loading MMX registers
336 in SImode and DImode */
337 {2, 2}, /* cost of storing MMX registers
338 in SImode and DImode */
339 2, /* cost of moving SSE register */
340 {2, 2, 8}, /* cost of loading SSE registers
341 in SImode, DImode and TImode */
342 {2, 2, 8}, /* cost of storing SSE registers
343 in SImode, DImode and TImode */
344 3, /* MMX or SSE register to integer */
345 32, /* size of prefetch block */
346 6, /* number of parallel prefetches */
347 2, /* Branch cost */
348 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
349 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
350 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
351 COSTS_N_INSNS (2), /* cost of FABS instruction. */
352 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
353 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
354 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
355 the alignment). For small blocks inline loop is still a noticeable win, for bigger
356 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
357 more expensive startup time in CPU, but after 4K the difference is down in the noise.
358 */
359 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
360 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
361 DUMMY_STRINGOP_ALGS},
362 {{rep_prefix_4_byte, {{1024, unrolled_loop},
363 {8192, rep_prefix_4_byte}, {-1, libcall}}},
364 DUMMY_STRINGOP_ALGS}
365 };
366
367 static const
368 struct processor_costs geode_cost = {
369 COSTS_N_INSNS (1), /* cost of an add instruction */
370 COSTS_N_INSNS (1), /* cost of a lea instruction */
371 COSTS_N_INSNS (2), /* variable shift costs */
372 COSTS_N_INSNS (1), /* constant shift costs */
373 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
374 COSTS_N_INSNS (4), /* HI */
375 COSTS_N_INSNS (7), /* SI */
376 COSTS_N_INSNS (7), /* DI */
377 COSTS_N_INSNS (7)}, /* other */
378 0, /* cost of multiply per each bit set */
379 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
380 COSTS_N_INSNS (23), /* HI */
381 COSTS_N_INSNS (39), /* SI */
382 COSTS_N_INSNS (39), /* DI */
383 COSTS_N_INSNS (39)}, /* other */
384 COSTS_N_INSNS (1), /* cost of movsx */
385 COSTS_N_INSNS (1), /* cost of movzx */
386 8, /* "large" insn */
387 4, /* MOVE_RATIO */
388 1, /* cost for loading QImode using movzbl */
389 {1, 1, 1}, /* cost of loading integer registers
390 in QImode, HImode and SImode.
391 Relative to reg-reg move (2). */
392 {1, 1, 1}, /* cost of storing integer registers */
393 1, /* cost of reg,reg fld/fst */
394 {1, 1, 1}, /* cost of loading fp registers
395 in SFmode, DFmode and XFmode */
396 {4, 6, 6}, /* cost of storing fp registers
397 in SFmode, DFmode and XFmode */
398
399 1, /* cost of moving MMX register */
400 {1, 1}, /* cost of loading MMX registers
401 in SImode and DImode */
402 {1, 1}, /* cost of storing MMX registers
403 in SImode and DImode */
404 1, /* cost of moving SSE register */
405 {1, 1, 1}, /* cost of loading SSE registers
406 in SImode, DImode and TImode */
407 {1, 1, 1}, /* cost of storing SSE registers
408 in SImode, DImode and TImode */
409 1, /* MMX or SSE register to integer */
410 32, /* size of prefetch block */
411 1, /* number of parallel prefetches */
412 1, /* Branch cost */
413 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
414 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
415 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
416 COSTS_N_INSNS (1), /* cost of FABS instruction. */
417 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
418 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
419 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
420 DUMMY_STRINGOP_ALGS},
421 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
422 DUMMY_STRINGOP_ALGS}
423 };
424
425 static const
426 struct processor_costs k6_cost = {
427 COSTS_N_INSNS (1), /* cost of an add instruction */
428 COSTS_N_INSNS (2), /* cost of a lea instruction */
429 COSTS_N_INSNS (1), /* variable shift costs */
430 COSTS_N_INSNS (1), /* constant shift costs */
431 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
432 COSTS_N_INSNS (3), /* HI */
433 COSTS_N_INSNS (3), /* SI */
434 COSTS_N_INSNS (3), /* DI */
435 COSTS_N_INSNS (3)}, /* other */
436 0, /* cost of multiply per each bit set */
437 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
438 COSTS_N_INSNS (18), /* HI */
439 COSTS_N_INSNS (18), /* SI */
440 COSTS_N_INSNS (18), /* DI */
441 COSTS_N_INSNS (18)}, /* other */
442 COSTS_N_INSNS (2), /* cost of movsx */
443 COSTS_N_INSNS (2), /* cost of movzx */
444 8, /* "large" insn */
445 4, /* MOVE_RATIO */
446 3, /* cost for loading QImode using movzbl */
447 {4, 5, 4}, /* cost of loading integer registers
448 in QImode, HImode and SImode.
449 Relative to reg-reg move (2). */
450 {2, 3, 2}, /* cost of storing integer registers */
451 4, /* cost of reg,reg fld/fst */
452 {6, 6, 6}, /* cost of loading fp registers
453 in SFmode, DFmode and XFmode */
454 {4, 4, 4}, /* cost of storing fp registers
455 in SFmode, DFmode and XFmode */
456 2, /* cost of moving MMX register */
457 {2, 2}, /* cost of loading MMX registers
458 in SImode and DImode */
459 {2, 2}, /* cost of storing MMX registers
460 in SImode and DImode */
461 2, /* cost of moving SSE register */
462 {2, 2, 8}, /* cost of loading SSE registers
463 in SImode, DImode and TImode */
464 {2, 2, 8}, /* cost of storing SSE registers
465 in SImode, DImode and TImode */
466 6, /* MMX or SSE register to integer */
467 32, /* size of prefetch block */
468 1, /* number of parallel prefetches */
469 1, /* Branch cost */
470 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
471 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
472 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
473 COSTS_N_INSNS (2), /* cost of FABS instruction. */
474 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
475 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
476 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
477 DUMMY_STRINGOP_ALGS},
478 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
479 DUMMY_STRINGOP_ALGS}
480 };
481
482 static const
483 struct processor_costs athlon_cost = {
484 COSTS_N_INSNS (1), /* cost of an add instruction */
485 COSTS_N_INSNS (2), /* cost of a lea instruction */
486 COSTS_N_INSNS (1), /* variable shift costs */
487 COSTS_N_INSNS (1), /* constant shift costs */
488 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
489 COSTS_N_INSNS (5), /* HI */
490 COSTS_N_INSNS (5), /* SI */
491 COSTS_N_INSNS (5), /* DI */
492 COSTS_N_INSNS (5)}, /* other */
493 0, /* cost of multiply per each bit set */
494 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
495 COSTS_N_INSNS (26), /* HI */
496 COSTS_N_INSNS (42), /* SI */
497 COSTS_N_INSNS (74), /* DI */
498 COSTS_N_INSNS (74)}, /* other */
499 COSTS_N_INSNS (1), /* cost of movsx */
500 COSTS_N_INSNS (1), /* cost of movzx */
501 8, /* "large" insn */
502 9, /* MOVE_RATIO */
503 4, /* cost for loading QImode using movzbl */
504 {3, 4, 3}, /* cost of loading integer registers
505 in QImode, HImode and SImode.
506 Relative to reg-reg move (2). */
507 {3, 4, 3}, /* cost of storing integer registers */
508 4, /* cost of reg,reg fld/fst */
509 {4, 4, 12}, /* cost of loading fp registers
510 in SFmode, DFmode and XFmode */
511 {6, 6, 8}, /* cost of storing fp registers
512 in SFmode, DFmode and XFmode */
513 2, /* cost of moving MMX register */
514 {4, 4}, /* cost of loading MMX registers
515 in SImode and DImode */
516 {4, 4}, /* cost of storing MMX registers
517 in SImode and DImode */
518 2, /* cost of moving SSE register */
519 {4, 4, 6}, /* cost of loading SSE registers
520 in SImode, DImode and TImode */
521 {4, 4, 5}, /* cost of storing SSE registers
522 in SImode, DImode and TImode */
523 5, /* MMX or SSE register to integer */
524 64, /* size of prefetch block */
525 6, /* number of parallel prefetches */
526 5, /* Branch cost */
527 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
528 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
529 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
530 COSTS_N_INSNS (2), /* cost of FABS instruction. */
531 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
532 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
533 /* For some reason, Athlon deals better with REP prefix (relative to loops)
534 compared to K8. Alignment becomes important after 8 bytes for memcpy and
535 128 bytes for memset. */
536 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
537 DUMMY_STRINGOP_ALGS},
538 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
539 DUMMY_STRINGOP_ALGS}
540 };
541
542 static const
543 struct processor_costs k8_cost = {
544 COSTS_N_INSNS (1), /* cost of an add instruction */
545 COSTS_N_INSNS (2), /* cost of a lea instruction */
546 COSTS_N_INSNS (1), /* variable shift costs */
547 COSTS_N_INSNS (1), /* constant shift costs */
548 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
549 COSTS_N_INSNS (4), /* HI */
550 COSTS_N_INSNS (3), /* SI */
551 COSTS_N_INSNS (4), /* DI */
552 COSTS_N_INSNS (5)}, /* other */
553 0, /* cost of multiply per each bit set */
554 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
555 COSTS_N_INSNS (26), /* HI */
556 COSTS_N_INSNS (42), /* SI */
557 COSTS_N_INSNS (74), /* DI */
558 COSTS_N_INSNS (74)}, /* other */
559 COSTS_N_INSNS (1), /* cost of movsx */
560 COSTS_N_INSNS (1), /* cost of movzx */
561 8, /* "large" insn */
562 9, /* MOVE_RATIO */
563 4, /* cost for loading QImode using movzbl */
564 {3, 4, 3}, /* cost of loading integer registers
565 in QImode, HImode and SImode.
566 Relative to reg-reg move (2). */
567 {3, 4, 3}, /* cost of storing integer registers */
568 4, /* cost of reg,reg fld/fst */
569 {4, 4, 12}, /* cost of loading fp registers
570 in SFmode, DFmode and XFmode */
571 {6, 6, 8}, /* cost of storing fp registers
572 in SFmode, DFmode and XFmode */
573 2, /* cost of moving MMX register */
574 {3, 3}, /* cost of loading MMX registers
575 in SImode and DImode */
576 {4, 4}, /* cost of storing MMX registers
577 in SImode and DImode */
578 2, /* cost of moving SSE register */
579 {4, 3, 6}, /* cost of loading SSE registers
580 in SImode, DImode and TImode */
581 {4, 4, 5}, /* cost of storing SSE registers
582 in SImode, DImode and TImode */
583 5, /* MMX or SSE register to integer */
584 64, /* size of prefetch block */
585 /* New AMD processors never drop prefetches; if they cannot be performed
586 immediately, they are queued. We set number of simultaneous prefetches
587 to a large constant to reflect this (it probably is not a good idea not
588 to limit number of prefetches at all, as their execution also takes some
589 time). */
590 100, /* number of parallel prefetches */
591 5, /* Branch cost */
592 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
593 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
594 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
595 COSTS_N_INSNS (2), /* cost of FABS instruction. */
596 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
597 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
598 /* K8 has optimized REP instruction for medium sized blocks, but for very small
599 blocks it is better to use loop. For large blocks, libcall can do
600 nontemporary accesses and beat inline considerably. */
601 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
602 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
603 {{libcall, {{8, loop}, {24, unrolled_loop},
604 {2048, rep_prefix_4_byte}, {-1, libcall}}},
605 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
606 };
607
608 struct processor_costs amdfam10_cost = {
609 COSTS_N_INSNS (1), /* cost of an add instruction */
610 COSTS_N_INSNS (2), /* cost of a lea instruction */
611 COSTS_N_INSNS (1), /* variable shift costs */
612 COSTS_N_INSNS (1), /* constant shift costs */
613 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
614 COSTS_N_INSNS (4), /* HI */
615 COSTS_N_INSNS (3), /* SI */
616 COSTS_N_INSNS (4), /* DI */
617 COSTS_N_INSNS (5)}, /* other */
618 0, /* cost of multiply per each bit set */
619 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
620 COSTS_N_INSNS (35), /* HI */
621 COSTS_N_INSNS (51), /* SI */
622 COSTS_N_INSNS (83), /* DI */
623 COSTS_N_INSNS (83)}, /* other */
624 COSTS_N_INSNS (1), /* cost of movsx */
625 COSTS_N_INSNS (1), /* cost of movzx */
626 8, /* "large" insn */
627 9, /* MOVE_RATIO */
628 4, /* cost for loading QImode using movzbl */
629 {3, 4, 3}, /* cost of loading integer registers
630 in QImode, HImode and SImode.
631 Relative to reg-reg move (2). */
632 {3, 4, 3}, /* cost of storing integer registers */
633 4, /* cost of reg,reg fld/fst */
634 {4, 4, 12}, /* cost of loading fp registers
635 in SFmode, DFmode and XFmode */
636 {6, 6, 8}, /* cost of storing fp registers
637 in SFmode, DFmode and XFmode */
638 2, /* cost of moving MMX register */
639 {3, 3}, /* cost of loading MMX registers
640 in SImode and DImode */
641 {4, 4}, /* cost of storing MMX registers
642 in SImode and DImode */
643 2, /* cost of moving SSE register */
644 {4, 4, 3}, /* cost of loading SSE registers
645 in SImode, DImode and TImode */
646 {4, 4, 5}, /* cost of storing SSE registers
647 in SImode, DImode and TImode */
648 3, /* MMX or SSE register to integer */
649 /* On K8
650 MOVD reg64, xmmreg Double FSTORE 4
651 MOVD reg32, xmmreg Double FSTORE 4
652 On AMDFAM10
653 MOVD reg64, xmmreg Double FADD 3
654 1/1 1/1
655 MOVD reg32, xmmreg Double FADD 3
656 1/1 1/1 */
657 64, /* size of prefetch block */
658 /* New AMD processors never drop prefetches; if they cannot be performed
659 immediately, they are queued. We set number of simultaneous prefetches
660 to a large constant to reflect this (it probably is not a good idea not
661 to limit number of prefetches at all, as their execution also takes some
662 time). */
663 100, /* number of parallel prefetches */
664 5, /* Branch cost */
665 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
666 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
667 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
668 COSTS_N_INSNS (2), /* cost of FABS instruction. */
669 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
670 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
671
672 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
673 very small blocks it is better to use loop. For large blocks, libcall can
674 do nontemporary accesses and beat inline considerably. */
675 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
676 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
677 {{libcall, {{8, loop}, {24, unrolled_loop},
678 {2048, rep_prefix_4_byte}, {-1, libcall}}},
679 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
680 };
681
682 static const
683 struct processor_costs pentium4_cost = {
684 COSTS_N_INSNS (1), /* cost of an add instruction */
685 COSTS_N_INSNS (3), /* cost of a lea instruction */
686 COSTS_N_INSNS (4), /* variable shift costs */
687 COSTS_N_INSNS (4), /* constant shift costs */
688 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
689 COSTS_N_INSNS (15), /* HI */
690 COSTS_N_INSNS (15), /* SI */
691 COSTS_N_INSNS (15), /* DI */
692 COSTS_N_INSNS (15)}, /* other */
693 0, /* cost of multiply per each bit set */
694 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
695 COSTS_N_INSNS (56), /* HI */
696 COSTS_N_INSNS (56), /* SI */
697 COSTS_N_INSNS (56), /* DI */
698 COSTS_N_INSNS (56)}, /* other */
699 COSTS_N_INSNS (1), /* cost of movsx */
700 COSTS_N_INSNS (1), /* cost of movzx */
701 16, /* "large" insn */
702 6, /* MOVE_RATIO */
703 2, /* cost for loading QImode using movzbl */
704 {4, 5, 4}, /* cost of loading integer registers
705 in QImode, HImode and SImode.
706 Relative to reg-reg move (2). */
707 {2, 3, 2}, /* cost of storing integer registers */
708 2, /* cost of reg,reg fld/fst */
709 {2, 2, 6}, /* cost of loading fp registers
710 in SFmode, DFmode and XFmode */
711 {4, 4, 6}, /* cost of storing fp registers
712 in SFmode, DFmode and XFmode */
713 2, /* cost of moving MMX register */
714 {2, 2}, /* cost of loading MMX registers
715 in SImode and DImode */
716 {2, 2}, /* cost of storing MMX registers
717 in SImode and DImode */
718 12, /* cost of moving SSE register */
719 {12, 12, 12}, /* cost of loading SSE registers
720 in SImode, DImode and TImode */
721 {2, 2, 8}, /* cost of storing SSE registers
722 in SImode, DImode and TImode */
723 10, /* MMX or SSE register to integer */
724 64, /* size of prefetch block */
725 6, /* number of parallel prefetches */
726 2, /* Branch cost */
727 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
728 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
729 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
730 COSTS_N_INSNS (2), /* cost of FABS instruction. */
731 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
732 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
733 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
734 DUMMY_STRINGOP_ALGS},
735 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
736 {-1, libcall}}},
737 DUMMY_STRINGOP_ALGS},
738 };
739
740 static const
741 struct processor_costs nocona_cost = {
742 COSTS_N_INSNS (1), /* cost of an add instruction */
743 COSTS_N_INSNS (1), /* cost of a lea instruction */
744 COSTS_N_INSNS (1), /* variable shift costs */
745 COSTS_N_INSNS (1), /* constant shift costs */
746 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
747 COSTS_N_INSNS (10), /* HI */
748 COSTS_N_INSNS (10), /* SI */
749 COSTS_N_INSNS (10), /* DI */
750 COSTS_N_INSNS (10)}, /* other */
751 0, /* cost of multiply per each bit set */
752 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
753 COSTS_N_INSNS (66), /* HI */
754 COSTS_N_INSNS (66), /* SI */
755 COSTS_N_INSNS (66), /* DI */
756 COSTS_N_INSNS (66)}, /* other */
757 COSTS_N_INSNS (1), /* cost of movsx */
758 COSTS_N_INSNS (1), /* cost of movzx */
759 16, /* "large" insn */
760 17, /* MOVE_RATIO */
761 4, /* cost for loading QImode using movzbl */
762 {4, 4, 4}, /* cost of loading integer registers
763 in QImode, HImode and SImode.
764 Relative to reg-reg move (2). */
765 {4, 4, 4}, /* cost of storing integer registers */
766 3, /* cost of reg,reg fld/fst */
767 {12, 12, 12}, /* cost of loading fp registers
768 in SFmode, DFmode and XFmode */
769 {4, 4, 4}, /* cost of storing fp registers
770 in SFmode, DFmode and XFmode */
771 6, /* cost of moving MMX register */
772 {12, 12}, /* cost of loading MMX registers
773 in SImode and DImode */
774 {12, 12}, /* cost of storing MMX registers
775 in SImode and DImode */
776 6, /* cost of moving SSE register */
777 {12, 12, 12}, /* cost of loading SSE registers
778 in SImode, DImode and TImode */
779 {12, 12, 12}, /* cost of storing SSE registers
780 in SImode, DImode and TImode */
781 8, /* MMX or SSE register to integer */
782 128, /* size of prefetch block */
783 8, /* number of parallel prefetches */
784 1, /* Branch cost */
785 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
786 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
787 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
788 COSTS_N_INSNS (3), /* cost of FABS instruction. */
789 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
790 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
791 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
792 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
793 {100000, unrolled_loop}, {-1, libcall}}}},
794 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
795 {-1, libcall}}},
796 {libcall, {{24, loop}, {64, unrolled_loop},
797 {8192, rep_prefix_8_byte}, {-1, libcall}}}}
798 };
799
800 static const
801 struct processor_costs core2_cost = {
802 COSTS_N_INSNS (1), /* cost of an add instruction */
803 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
804 COSTS_N_INSNS (1), /* variable shift costs */
805 COSTS_N_INSNS (1), /* constant shift costs */
806 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
807 COSTS_N_INSNS (3), /* HI */
808 COSTS_N_INSNS (3), /* SI */
809 COSTS_N_INSNS (3), /* DI */
810 COSTS_N_INSNS (3)}, /* other */
811 0, /* cost of multiply per each bit set */
812 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
813 COSTS_N_INSNS (22), /* HI */
814 COSTS_N_INSNS (22), /* SI */
815 COSTS_N_INSNS (22), /* DI */
816 COSTS_N_INSNS (22)}, /* other */
817 COSTS_N_INSNS (1), /* cost of movsx */
818 COSTS_N_INSNS (1), /* cost of movzx */
819 8, /* "large" insn */
820 16, /* MOVE_RATIO */
821 2, /* cost for loading QImode using movzbl */
822 {6, 6, 6}, /* cost of loading integer registers
823 in QImode, HImode and SImode.
824 Relative to reg-reg move (2). */
825 {4, 4, 4}, /* cost of storing integer registers */
826 2, /* cost of reg,reg fld/fst */
827 {6, 6, 6}, /* cost of loading fp registers
828 in SFmode, DFmode and XFmode */
829 {4, 4, 4}, /* cost of loading integer registers */
830 2, /* cost of moving MMX register */
831 {6, 6}, /* cost of loading MMX registers
832 in SImode and DImode */
833 {4, 4}, /* cost of storing MMX registers
834 in SImode and DImode */
835 2, /* cost of moving SSE register */
836 {6, 6, 6}, /* cost of loading SSE registers
837 in SImode, DImode and TImode */
838 {4, 4, 4}, /* cost of storing SSE registers
839 in SImode, DImode and TImode */
840 2, /* MMX or SSE register to integer */
841 128, /* size of prefetch block */
842 8, /* number of parallel prefetches */
843 3, /* Branch cost */
844 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
845 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
846 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
847 COSTS_N_INSNS (1), /* cost of FABS instruction. */
848 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
849 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
850 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
851 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
852 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
853 {{libcall, {{8, loop}, {15, unrolled_loop},
854 {2048, rep_prefix_4_byte}, {-1, libcall}}},
855 {libcall, {{24, loop}, {32, unrolled_loop},
856 {8192, rep_prefix_8_byte}, {-1, libcall}}}}
857 };
858
859 /* Generic64 should produce code tuned for Nocona and K8. */
860 static const
861 struct processor_costs generic64_cost = {
862 COSTS_N_INSNS (1), /* cost of an add instruction */
863 /* On all chips taken into consideration lea is 2 cycles and more. With
864 this cost however our current implementation of synth_mult results in
865 use of unnecessary temporary registers causing regression on several
866 SPECfp benchmarks. */
867 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
868 COSTS_N_INSNS (1), /* variable shift costs */
869 COSTS_N_INSNS (1), /* constant shift costs */
870 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
871 COSTS_N_INSNS (4), /* HI */
872 COSTS_N_INSNS (3), /* SI */
873 COSTS_N_INSNS (4), /* DI */
874 COSTS_N_INSNS (2)}, /* other */
875 0, /* cost of multiply per each bit set */
876 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
877 COSTS_N_INSNS (26), /* HI */
878 COSTS_N_INSNS (42), /* SI */
879 COSTS_N_INSNS (74), /* DI */
880 COSTS_N_INSNS (74)}, /* other */
881 COSTS_N_INSNS (1), /* cost of movsx */
882 COSTS_N_INSNS (1), /* cost of movzx */
883 8, /* "large" insn */
884 17, /* MOVE_RATIO */
885 4, /* cost for loading QImode using movzbl */
886 {4, 4, 4}, /* cost of loading integer registers
887 in QImode, HImode and SImode.
888 Relative to reg-reg move (2). */
889 {4, 4, 4}, /* cost of storing integer registers */
890 4, /* cost of reg,reg fld/fst */
891 {12, 12, 12}, /* cost of loading fp registers
892 in SFmode, DFmode and XFmode */
893 {6, 6, 8}, /* cost of storing fp registers
894 in SFmode, DFmode and XFmode */
895 2, /* cost of moving MMX register */
896 {8, 8}, /* cost of loading MMX registers
897 in SImode and DImode */
898 {8, 8}, /* cost of storing MMX registers
899 in SImode and DImode */
900 2, /* cost of moving SSE register */
901 {8, 8, 8}, /* cost of loading SSE registers
902 in SImode, DImode and TImode */
903 {8, 8, 8}, /* cost of storing SSE registers
904 in SImode, DImode and TImode */
905 5, /* MMX or SSE register to integer */
906 64, /* size of prefetch block */
907 6, /* number of parallel prefetches */
908 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
909 is increased to perhaps more appropriate value of 5. */
910 3, /* Branch cost */
911 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
912 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
913 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
914 COSTS_N_INSNS (8), /* cost of FABS instruction. */
915 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
916 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
917 {DUMMY_STRINGOP_ALGS,
918 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
919 {DUMMY_STRINGOP_ALGS,
920 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
921 };
922
923 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
924 static const
925 struct processor_costs generic32_cost = {
926 COSTS_N_INSNS (1), /* cost of an add instruction */
927 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
928 COSTS_N_INSNS (1), /* variable shift costs */
929 COSTS_N_INSNS (1), /* constant shift costs */
930 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
931 COSTS_N_INSNS (4), /* HI */
932 COSTS_N_INSNS (3), /* SI */
933 COSTS_N_INSNS (4), /* DI */
934 COSTS_N_INSNS (2)}, /* other */
935 0, /* cost of multiply per each bit set */
936 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
937 COSTS_N_INSNS (26), /* HI */
938 COSTS_N_INSNS (42), /* SI */
939 COSTS_N_INSNS (74), /* DI */
940 COSTS_N_INSNS (74)}, /* other */
941 COSTS_N_INSNS (1), /* cost of movsx */
942 COSTS_N_INSNS (1), /* cost of movzx */
943 8, /* "large" insn */
944 17, /* MOVE_RATIO */
945 4, /* cost for loading QImode using movzbl */
946 {4, 4, 4}, /* cost of loading integer registers
947 in QImode, HImode and SImode.
948 Relative to reg-reg move (2). */
949 {4, 4, 4}, /* cost of storing integer registers */
950 4, /* cost of reg,reg fld/fst */
951 {12, 12, 12}, /* cost of loading fp registers
952 in SFmode, DFmode and XFmode */
953 {6, 6, 8}, /* cost of storing fp registers
954 in SFmode, DFmode and XFmode */
955 2, /* cost of moving MMX register */
956 {8, 8}, /* cost of loading MMX registers
957 in SImode and DImode */
958 {8, 8}, /* cost of storing MMX registers
959 in SImode and DImode */
960 2, /* cost of moving SSE register */
961 {8, 8, 8}, /* cost of loading SSE registers
962 in SImode, DImode and TImode */
963 {8, 8, 8}, /* cost of storing SSE registers
964 in SImode, DImode and TImode */
965 5, /* MMX or SSE register to integer */
966 64, /* size of prefetch block */
967 6, /* number of parallel prefetches */
968 3, /* Branch cost */
969 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
970 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
971 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
972 COSTS_N_INSNS (8), /* cost of FABS instruction. */
973 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
974 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
975 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
976 DUMMY_STRINGOP_ALGS},
977 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
978 DUMMY_STRINGOP_ALGS},
979 };
980
981 const struct processor_costs *ix86_cost = &pentium_cost;
982
983 /* Processor feature/optimization bitmasks. */
984 #define m_386 (1<<PROCESSOR_I386)
985 #define m_486 (1<<PROCESSOR_I486)
986 #define m_PENT (1<<PROCESSOR_PENTIUM)
987 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
988 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
989 #define m_NOCONA (1<<PROCESSOR_NOCONA)
990 #define m_CORE2 (1<<PROCESSOR_CORE2)
991
992 #define m_GEODE (1<<PROCESSOR_GEODE)
993 #define m_K6 (1<<PROCESSOR_K6)
994 #define m_K6_GEODE (m_K6 | m_GEODE)
995 #define m_K8 (1<<PROCESSOR_K8)
996 #define m_ATHLON (1<<PROCESSOR_ATHLON)
997 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
998 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
999 #define m_ATHLON_K8_AMDFAM10 (m_K8 | m_ATHLON | m_AMDFAM10)
1000
1001 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1002 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1003
1004 /* Generic instruction choice should be common subset of supported CPUs
1005 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1006 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1007
1008 /* Feature tests against the various tunings. */
1009 unsigned int ix86_tune_features[X86_TUNE_LAST] = {
1010 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1011 negatively, so enabling for Generic64 seems like good code size
1012 tradeoff. We can't enable it for 32bit generic because it does not
1013 work well with PPro base chips. */
1014 m_386 | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC64,
1015
1016 /* X86_TUNE_PUSH_MEMORY */
1017 m_386 | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4
1018 | m_NOCONA | m_CORE2 | m_GENERIC,
1019
1020 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1021 m_486 | m_PENT,
1022
1023 /* X86_TUNE_USE_BIT_TEST */
1024 m_386,
1025
1026 /* X86_TUNE_UNROLL_STRLEN */
1027 m_486 | m_PENT | m_PPRO | m_ATHLON_K8_AMDFAM10 | m_K6 | m_CORE2 | m_GENERIC,
1028
1029 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1030 m_PPRO | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_GENERIC,
1031
1032 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1033 on simulation result. But after P4 was made, no performance benefit
1034 was observed with branch hints. It also increases the code size.
1035 As a result, icc never generates branch hints. */
1036 0,
1037
1038 /* X86_TUNE_DOUBLE_WITH_ADD */
1039 ~m_386,
1040
1041 /* X86_TUNE_USE_SAHF */
1042 m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
1043 | m_NOCONA | m_CORE2 | m_GENERIC,
1044
1045 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1046 partial dependencies. */
1047 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA
1048 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1049
1050 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1051 register stalls on Generic32 compilation setting as well. However
1052 in current implementation the partial register stalls are not eliminated
1053 very well - they can be introduced via subregs synthesized by combine
1054 and can happen in caller/callee saving sequences. Because this option
1055 pays back little on PPro based chips and is in conflict with partial reg
1056 dependencies used by Athlon/P4 based chips, it is better to leave it off
1057 for generic32 for now. */
1058 m_PPRO,
1059
1060 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1061 m_CORE2 | m_GENERIC,
1062
1063 /* X86_TUNE_USE_HIMODE_FIOP */
1064 m_386 | m_486 | m_K6_GEODE,
1065
1066 /* X86_TUNE_USE_SIMODE_FIOP */
1067 ~(m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT | m_CORE2 | m_GENERIC),
1068
1069 /* X86_TUNE_USE_MOV0 */
1070 m_K6,
1071
1072 /* X86_TUNE_USE_CLTD */
1073 ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC),
1074
1075 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1076 m_PENT4,
1077
1078 /* X86_TUNE_SPLIT_LONG_MOVES */
1079 m_PPRO,
1080
1081 /* X86_TUNE_READ_MODIFY_WRITE */
1082 ~m_PENT,
1083
1084 /* X86_TUNE_READ_MODIFY */
1085 ~(m_PENT | m_PPRO),
1086
1087 /* X86_TUNE_PROMOTE_QIMODE */
1088 m_K6_GEODE | m_PENT | m_386 | m_486 | m_ATHLON_K8_AMDFAM10 | m_CORE2
1089 | m_GENERIC /* | m_PENT4 ? */,
1090
1091 /* X86_TUNE_FAST_PREFIX */
1092 ~(m_PENT | m_486 | m_386),
1093
1094 /* X86_TUNE_SINGLE_STRINGOP */
1095 m_386 | m_PENT4 | m_NOCONA,
1096
1097 /* X86_TUNE_QIMODE_MATH */
1098 ~0,
1099
1100 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1101 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1102 might be considered for Generic32 if our scheme for avoiding partial
1103 stalls was more effective. */
1104 ~m_PPRO,
1105
1106 /* X86_TUNE_PROMOTE_QI_REGS */
1107 0,
1108
1109 /* X86_TUNE_PROMOTE_HI_REGS */
1110 m_PPRO,
1111
1112 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
1113 m_ATHLON_K8_AMDFAM10 | m_K6_GEODE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1114
1115 /* X86_TUNE_ADD_ESP_8 */
1116 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_K6_GEODE | m_386
1117 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1118
1119 /* X86_TUNE_SUB_ESP_4 */
1120 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1121
1122 /* X86_TUNE_SUB_ESP_8 */
1123 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_386 | m_486
1124 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1125
1126 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1127 for DFmode copies */
1128 ~(m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1129 | m_GENERIC | m_GEODE),
1130
1131 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1132 m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1133
1134 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1135 conflict here in between PPro/Pentium4 based chips that thread 128bit
1136 SSE registers as single units versus K8 based chips that divide SSE
1137 registers to two 64bit halves. This knob promotes all store destinations
1138 to be 128bit to allow register renaming on 128bit SSE units, but usually
1139 results in one extra microop on 64bit SSE units. Experimental results
1140 shows that disabling this option on P4 brings over 20% SPECfp regression,
1141 while enabling it on K8 brings roughly 2.4% regression that can be partly
1142 masked by careful scheduling of moves. */
1143 m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC | m_AMDFAM10,
1144
1145 /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
1146 m_AMDFAM10,
1147
1148 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1149 are resolved on SSE register parts instead of whole registers, so we may
1150 maintain just lower part of scalar values in proper format leaving the
1151 upper part undefined. */
1152 m_ATHLON_K8,
1153
1154 /* X86_TUNE_SSE_TYPELESS_STORES */
1155 m_ATHLON_K8_AMDFAM10,
1156
1157 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1158 m_PPRO | m_PENT4 | m_NOCONA,
1159
1160 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1161 m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1162
1163 /* X86_TUNE_PROLOGUE_USING_MOVE */
1164 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1165
1166 /* X86_TUNE_EPILOGUE_USING_MOVE */
1167 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1168
1169 /* X86_TUNE_SHIFT1 */
1170 ~m_486,
1171
1172 /* X86_TUNE_USE_FFREEP */
1173 m_ATHLON_K8_AMDFAM10,
1174
1175 /* X86_TUNE_INTER_UNIT_MOVES */
1176 ~(m_ATHLON_K8_AMDFAM10 | m_GENERIC),
1177
1178 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1179 than 4 branch instructions in the 16 byte window. */
1180 m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1181
1182 /* X86_TUNE_SCHEDULE */
1183 m_PPRO | m_ATHLON_K8_AMDFAM10 | m_K6_GEODE | m_PENT | m_CORE2 | m_GENERIC,
1184
1185 /* X86_TUNE_USE_BT */
1186 m_ATHLON_K8_AMDFAM10,
1187
1188 /* X86_TUNE_USE_INCDEC */
1189 ~(m_PENT4 | m_NOCONA | m_GENERIC),
1190
1191 /* X86_TUNE_PAD_RETURNS */
1192 m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC,
1193
1194 /* X86_TUNE_EXT_80387_CONSTANTS */
1195 m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC,
1196
1197 /* X86_TUNE_SHORTEN_X87_SSE */
1198 ~m_K8,
1199
1200 /* X86_TUNE_AVOID_VECTOR_DECODE */
1201 m_K8 | m_GENERIC64,
1202
1203 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1204 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1205 ~(m_386 | m_486),
1206
1207 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1208 vector path on AMD machines. */
1209 m_K8 | m_GENERIC64 | m_AMDFAM10,
1210
1211 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1212 machines. */
1213 m_K8 | m_GENERIC64 | m_AMDFAM10,
1214
1215 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1216 than a MOV. */
1217 m_PENT,
1218
1219 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1220 but one byte longer. */
1221 m_PENT,
1222
1223 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1224 operand that cannot be represented using a modRM byte. The XOR
1225 replacement is long decoded, so this split helps here as well. */
1226 m_K6,
1227 };
1228
1229 /* Feature tests against the various architecture variations. */
1230 unsigned int ix86_arch_features[X86_ARCH_LAST] = {
1231 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
1232 ~(m_386 | m_486 | m_PENT | m_K6),
1233
1234 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1235 ~m_386,
1236
1237 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1238 ~(m_386 | m_486),
1239
1240 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1241 ~m_386,
1242
1243 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1244 ~m_386,
1245 };
1246
1247 static const unsigned int x86_accumulate_outgoing_args
1248 = m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
1249
1250 static const unsigned int x86_arch_always_fancy_math_387
1251 = m_PENT | m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT4
1252 | m_NOCONA | m_CORE2 | m_GENERIC;
1253
1254 static enum stringop_alg stringop_alg = no_stringop;
1255
1256 /* In case the average insn count for single function invocation is
1257 lower than this constant, emit fast (but longer) prologue and
1258 epilogue code. */
1259 #define FAST_PROLOGUE_INSN_COUNT 20
1260
1261 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1262 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1263 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1264 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1265
1266 /* Array of the smallest class containing reg number REGNO, indexed by
1267 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1268
1269 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1270 {
1271 /* ax, dx, cx, bx */
1272 AREG, DREG, CREG, BREG,
1273 /* si, di, bp, sp */
1274 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1275 /* FP registers */
1276 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1277 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1278 /* arg pointer */
1279 NON_Q_REGS,
1280 /* flags, fpsr, fpcr, frame */
1281 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1282 /* SSE registers */
1283 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1284 SSE_REGS, SSE_REGS,
1285 /* MMX registers */
1286 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1287 MMX_REGS, MMX_REGS,
1288 /* REX registers */
1289 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1290 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1291 /* SSE REX registers */
1292 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1293 SSE_REGS, SSE_REGS,
1294 };
1295
1296 /* The "default" register map used in 32bit mode. */
1297
1298 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1299 {
1300 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1301 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1302 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1303 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1304 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1305 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1306 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1307 };
1308
1309 static int const x86_64_int_parameter_registers[6] =
1310 {
1311 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
1312 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1313 };
1314
1315 static int const x86_64_ms_abi_int_parameter_registers[4] =
1316 {
1317 2 /*RCX*/, 1 /*RDX*/,
1318 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1319 };
1320
1321 static int const x86_64_int_return_registers[4] =
1322 {
1323 0 /*RAX*/, 1 /*RDX*/, 5 /*RDI*/, 4 /*RSI*/
1324 };
1325
1326 /* The "default" register map used in 64bit mode. */
1327 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1328 {
1329 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1330 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1331 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1332 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1333 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1334 8,9,10,11,12,13,14,15, /* extended integer registers */
1335 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1336 };
1337
1338 /* Define the register numbers to be used in Dwarf debugging information.
1339 The SVR4 reference port C compiler uses the following register numbers
1340 in its Dwarf output code:
1341 0 for %eax (gcc regno = 0)
1342 1 for %ecx (gcc regno = 2)
1343 2 for %edx (gcc regno = 1)
1344 3 for %ebx (gcc regno = 3)
1345 4 for %esp (gcc regno = 7)
1346 5 for %ebp (gcc regno = 6)
1347 6 for %esi (gcc regno = 4)
1348 7 for %edi (gcc regno = 5)
1349 The following three DWARF register numbers are never generated by
1350 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1351 believes these numbers have these meanings.
1352 8 for %eip (no gcc equivalent)
1353 9 for %eflags (gcc regno = 17)
1354 10 for %trapno (no gcc equivalent)
1355 It is not at all clear how we should number the FP stack registers
1356 for the x86 architecture. If the version of SDB on x86/svr4 were
1357 a bit less brain dead with respect to floating-point then we would
1358 have a precedent to follow with respect to DWARF register numbers
1359 for x86 FP registers, but the SDB on x86/svr4 is so completely
1360 broken with respect to FP registers that it is hardly worth thinking
1361 of it as something to strive for compatibility with.
1362 The version of x86/svr4 SDB I have at the moment does (partially)
1363 seem to believe that DWARF register number 11 is associated with
1364 the x86 register %st(0), but that's about all. Higher DWARF
1365 register numbers don't seem to be associated with anything in
1366 particular, and even for DWARF regno 11, SDB only seems to under-
1367 stand that it should say that a variable lives in %st(0) (when
1368 asked via an `=' command) if we said it was in DWARF regno 11,
1369 but SDB still prints garbage when asked for the value of the
1370 variable in question (via a `/' command).
1371 (Also note that the labels SDB prints for various FP stack regs
1372 when doing an `x' command are all wrong.)
1373 Note that these problems generally don't affect the native SVR4
1374 C compiler because it doesn't allow the use of -O with -g and
1375 because when it is *not* optimizing, it allocates a memory
1376 location for each floating-point variable, and the memory
1377 location is what gets described in the DWARF AT_location
1378 attribute for the variable in question.
1379 Regardless of the severe mental illness of the x86/svr4 SDB, we
1380 do something sensible here and we use the following DWARF
1381 register numbers. Note that these are all stack-top-relative
1382 numbers.
1383 11 for %st(0) (gcc regno = 8)
1384 12 for %st(1) (gcc regno = 9)
1385 13 for %st(2) (gcc regno = 10)
1386 14 for %st(3) (gcc regno = 11)
1387 15 for %st(4) (gcc regno = 12)
1388 16 for %st(5) (gcc regno = 13)
1389 17 for %st(6) (gcc regno = 14)
1390 18 for %st(7) (gcc regno = 15)
1391 */
1392 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1393 {
1394 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1395 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1396 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1397 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1398 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1399 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1400 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1401 };
1402
1403 /* Test and compare insns in i386.md store the information needed to
1404 generate branch and scc insns here. */
1405
1406 rtx ix86_compare_op0 = NULL_RTX;
1407 rtx ix86_compare_op1 = NULL_RTX;
1408 rtx ix86_compare_emitted = NULL_RTX;
1409
1410 /* Size of the register save area. */
1411 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
1412
1413 /* Define the structure for the machine field in struct function. */
1414
1415 struct stack_local_entry GTY(())
1416 {
1417 unsigned short mode;
1418 unsigned short n;
1419 rtx rtl;
1420 struct stack_local_entry *next;
1421 };
1422
1423 /* Structure describing stack frame layout.
1424 Stack grows downward:
1425
1426 [arguments]
1427 <- ARG_POINTER
1428 saved pc
1429
1430 saved frame pointer if frame_pointer_needed
1431 <- HARD_FRAME_POINTER
1432 [saved regs]
1433
1434 [padding1] \
1435 )
1436 [va_arg registers] (
1437 > to_allocate <- FRAME_POINTER
1438 [frame] (
1439 )
1440 [padding2] /
1441 */
1442 struct ix86_frame
1443 {
1444 int nregs;
1445 int padding1;
1446 int va_arg_size;
1447 HOST_WIDE_INT frame;
1448 int padding2;
1449 int outgoing_arguments_size;
1450 int red_zone_size;
1451
1452 HOST_WIDE_INT to_allocate;
1453 /* The offsets relative to ARG_POINTER. */
1454 HOST_WIDE_INT frame_pointer_offset;
1455 HOST_WIDE_INT hard_frame_pointer_offset;
1456 HOST_WIDE_INT stack_pointer_offset;
1457
1458 /* When save_regs_using_mov is set, emit prologue using
1459 move instead of push instructions. */
1460 bool save_regs_using_mov;
1461 };
1462
1463 /* Code model option. */
1464 enum cmodel ix86_cmodel;
1465 /* Asm dialect. */
1466 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1467 /* TLS dialects. */
1468 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1469
1470 /* Which unit we are generating floating point math for. */
1471 enum fpmath_unit ix86_fpmath;
1472
1473 /* Which cpu are we scheduling for. */
1474 enum processor_type ix86_tune;
1475
1476 /* Which instruction set architecture to use. */
1477 enum processor_type ix86_arch;
1478
1479 /* true if sse prefetch instruction is not NOOP. */
1480 int x86_prefetch_sse;
1481
1482 /* ix86_regparm_string as a number */
1483 static int ix86_regparm;
1484
1485 /* -mstackrealign option */
1486 extern int ix86_force_align_arg_pointer;
1487 static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer";
1488
1489 /* Preferred alignment for stack boundary in bits. */
1490 unsigned int ix86_preferred_stack_boundary;
1491
1492 /* Values 1-5: see jump.c */
1493 int ix86_branch_cost;
1494
1495 /* Variables which are this size or smaller are put in the data/bss
1496 or ldata/lbss sections. */
1497
1498 int ix86_section_threshold = 65536;
1499
1500 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1501 char internal_label_prefix[16];
1502 int internal_label_prefix_len;
1503
1504 /* Fence to use after loop using movnt. */
1505 tree x86_mfence;
1506
1507 /* Register class used for passing given 64bit part of the argument.
1508 These represent classes as documented by the PS ABI, with the exception
1509 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1510 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1511
1512 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1513 whenever possible (upper half does contain padding). */
1514 enum x86_64_reg_class
1515 {
1516 X86_64_NO_CLASS,
1517 X86_64_INTEGER_CLASS,
1518 X86_64_INTEGERSI_CLASS,
1519 X86_64_SSE_CLASS,
1520 X86_64_SSESF_CLASS,
1521 X86_64_SSEDF_CLASS,
1522 X86_64_SSEUP_CLASS,
1523 X86_64_X87_CLASS,
1524 X86_64_X87UP_CLASS,
1525 X86_64_COMPLEX_X87_CLASS,
1526 X86_64_MEMORY_CLASS
1527 };
1528 static const char * const x86_64_reg_class_name[] =
1529 {
1530 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1531 "sseup", "x87", "x87up", "cplx87", "no"
1532 };
1533
1534 #define MAX_CLASSES 4
1535
1536 /* Table of constants used by fldpi, fldln2, etc.... */
1537 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1538 static bool ext_80387_constants_init = 0;
1539
1540 \f
1541 static struct machine_function * ix86_init_machine_status (void);
1542 static rtx ix86_function_value (tree, tree, bool);
1543 static int ix86_function_regparm (tree, tree);
1544 static void ix86_compute_frame_layout (struct ix86_frame *);
1545 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1546 rtx, rtx, int);
1547
1548 \f
1549 /* The svr4 ABI for the i386 says that records and unions are returned
1550 in memory. */
1551 #ifndef DEFAULT_PCC_STRUCT_RETURN
1552 #define DEFAULT_PCC_STRUCT_RETURN 1
1553 #endif
1554
1555 /* Bit flags that specify the ISA we are compiling for. */
1556 int ix86_isa_flags = TARGET_64BIT_DEFAULT | TARGET_SUBTARGET_ISA_DEFAULT;
1557
1558 /* A mask of ix86_isa_flags that includes bit X if X
1559 was set or cleared on the command line. */
1560 static int ix86_isa_flags_explicit;
1561
1562 /* Define a set of ISAs which aren't available for a given ISA. MMX
1563 and SSE ISAs are handled separately. */
1564
1565 #define OPTION_MASK_ISA_MMX_UNSET \
1566 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_UNSET)
1567 #define OPTION_MASK_ISA_3DNOW_UNSET OPTION_MASK_ISA_3DNOW_A
1568
1569 #define OPTION_MASK_ISA_SSE_UNSET \
1570 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE2_UNSET)
1571 #define OPTION_MASK_ISA_SSE2_UNSET \
1572 (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE3_UNSET)
1573 #define OPTION_MASK_ISA_SSE3_UNSET \
1574 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSSE3_UNSET)
1575 #define OPTION_MASK_ISA_SSSE3_UNSET \
1576 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_1_UNSET)
1577 #define OPTION_MASK_ISA_SSE4_1_UNSET \
1578 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_2_UNSET)
1579 #define OPTION_MASK_ISA_SSE4_2_UNSET OPTION_MASK_ISA_SSE4A
1580
1581 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
1582 as -msse4.1 -msse4.2. -mno-sse4 should the same as -mno-sse4.1. */
1583 #define OPTION_MASK_ISA_SSE4 \
1584 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2)
1585 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
1586
1587 #define OPTION_MASK_ISA_SSE4A_UNSET OPTION_MASK_ISA_SSE4
1588
1589 /* Implement TARGET_HANDLE_OPTION. */
1590
1591 static bool
1592 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1593 {
1594 switch (code)
1595 {
1596 case OPT_mmmx:
1597 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX;
1598 if (!value)
1599 {
1600 ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
1601 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
1602 }
1603 return true;
1604
1605 case OPT_m3dnow:
1606 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW;
1607 if (!value)
1608 {
1609 ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
1610 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
1611 }
1612 return true;
1613
1614 case OPT_m3dnowa:
1615 return false;
1616
1617 case OPT_msse:
1618 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE;
1619 if (!value)
1620 {
1621 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
1622 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
1623 }
1624 return true;
1625
1626 case OPT_msse2:
1627 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2;
1628 if (!value)
1629 {
1630 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
1631 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
1632 }
1633 return true;
1634
1635 case OPT_msse3:
1636 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3;
1637 if (!value)
1638 {
1639 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
1640 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
1641 }
1642 return true;
1643
1644 case OPT_mssse3:
1645 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3;
1646 if (!value)
1647 {
1648 ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
1649 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
1650 }
1651 return true;
1652
1653 case OPT_msse4_1:
1654 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1;
1655 if (!value)
1656 {
1657 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
1658 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
1659 }
1660 return true;
1661
1662 case OPT_msse4_2:
1663 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2;
1664 if (!value)
1665 {
1666 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
1667 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
1668 }
1669 return true;
1670
1671 case OPT_msse4:
1672 ix86_isa_flags |= OPTION_MASK_ISA_SSE4;
1673 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4;
1674 return true;
1675
1676 case OPT_mno_sse4:
1677 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
1678 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
1679 return true;
1680
1681 case OPT_msse4a:
1682 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A;
1683 if (!value)
1684 {
1685 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
1686 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
1687 }
1688 return true;
1689
1690 default:
1691 return true;
1692 }
1693 }
1694
1695 /* Sometimes certain combinations of command options do not make
1696 sense on a particular target machine. You can define a macro
1697 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1698 defined, is executed once just after all the command options have
1699 been parsed.
1700
1701 Don't use this macro to turn on various extra optimizations for
1702 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1703
1704 void
1705 override_options (void)
1706 {
1707 int i;
1708 int ix86_tune_defaulted = 0;
1709 int ix86_arch_specified = 0;
1710 unsigned int ix86_arch_mask, ix86_tune_mask;
1711
1712 /* Comes from final.c -- no real reason to change it. */
1713 #define MAX_CODE_ALIGN 16
1714
1715 static struct ptt
1716 {
1717 const struct processor_costs *cost; /* Processor costs */
1718 const int align_loop; /* Default alignments. */
1719 const int align_loop_max_skip;
1720 const int align_jump;
1721 const int align_jump_max_skip;
1722 const int align_func;
1723 }
1724 const processor_target_table[PROCESSOR_max] =
1725 {
1726 {&i386_cost, 4, 3, 4, 3, 4},
1727 {&i486_cost, 16, 15, 16, 15, 16},
1728 {&pentium_cost, 16, 7, 16, 7, 16},
1729 {&pentiumpro_cost, 16, 15, 16, 10, 16},
1730 {&geode_cost, 0, 0, 0, 0, 0},
1731 {&k6_cost, 32, 7, 32, 7, 32},
1732 {&athlon_cost, 16, 7, 16, 7, 16},
1733 {&pentium4_cost, 0, 0, 0, 0, 0},
1734 {&k8_cost, 16, 7, 16, 7, 16},
1735 {&nocona_cost, 0, 0, 0, 0, 0},
1736 {&core2_cost, 16, 10, 16, 10, 16},
1737 {&generic32_cost, 16, 7, 16, 7, 16},
1738 {&generic64_cost, 16, 10, 16, 10, 16},
1739 {&amdfam10_cost, 32, 24, 32, 7, 32}
1740 };
1741
1742 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1743 enum pta_flags
1744 {
1745 PTA_SSE = 1 << 0,
1746 PTA_SSE2 = 1 << 1,
1747 PTA_SSE3 = 1 << 2,
1748 PTA_MMX = 1 << 3,
1749 PTA_PREFETCH_SSE = 1 << 4,
1750 PTA_3DNOW = 1 << 5,
1751 PTA_3DNOW_A = 1 << 6,
1752 PTA_64BIT = 1 << 7,
1753 PTA_SSSE3 = 1 << 8,
1754 PTA_CX16 = 1 << 9,
1755 PTA_POPCNT = 1 << 10,
1756 PTA_ABM = 1 << 11,
1757 PTA_SSE4A = 1 << 12,
1758 PTA_NO_SAHF = 1 << 13,
1759 PTA_SSE4_1 = 1 << 14,
1760 PTA_SSE4_2 = 1 << 15
1761 };
1762
1763 static struct pta
1764 {
1765 const char *const name; /* processor name or nickname. */
1766 const enum processor_type processor;
1767 const unsigned /*enum pta_flags*/ flags;
1768 }
1769 const processor_alias_table[] =
1770 {
1771 {"i386", PROCESSOR_I386, 0},
1772 {"i486", PROCESSOR_I486, 0},
1773 {"i586", PROCESSOR_PENTIUM, 0},
1774 {"pentium", PROCESSOR_PENTIUM, 0},
1775 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1776 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1777 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1778 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1779 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
1780 {"i686", PROCESSOR_PENTIUMPRO, 0},
1781 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1782 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1783 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
1784 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
1785 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_SSE2},
1786 {"pentium4", PROCESSOR_PENTIUM4, PTA_MMX |PTA_SSE | PTA_SSE2},
1787 {"pentium4m", PROCESSOR_PENTIUM4, PTA_MMX | PTA_SSE | PTA_SSE2},
1788 {"prescott", PROCESSOR_NOCONA, PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
1789 {"nocona", PROCESSOR_NOCONA, (PTA_64BIT
1790 | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
1791 | PTA_CX16 | PTA_NO_SAHF)},
1792 {"core2", PROCESSOR_CORE2, (PTA_64BIT
1793 | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
1794 | PTA_SSSE3
1795 | PTA_CX16)},
1796 {"geode", PROCESSOR_GEODE, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1797 |PTA_PREFETCH_SSE)},
1798 {"k6", PROCESSOR_K6, PTA_MMX},
1799 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1800 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1801 {"athlon", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1802 | PTA_PREFETCH_SSE)},
1803 {"athlon-tbird", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1804 | PTA_PREFETCH_SSE)},
1805 {"athlon-4", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1806 | PTA_SSE)},
1807 {"athlon-xp", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1808 | PTA_SSE)},
1809 {"athlon-mp", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1810 | PTA_SSE)},
1811 {"x86-64", PROCESSOR_K8, (PTA_64BIT
1812 | PTA_MMX | PTA_SSE | PTA_SSE2
1813 | PTA_NO_SAHF)},
1814 {"k8", PROCESSOR_K8, (PTA_64BIT
1815 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1816 | PTA_SSE | PTA_SSE2
1817 | PTA_NO_SAHF)},
1818 {"k8-sse3", PROCESSOR_K8, (PTA_64BIT
1819 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1820 | PTA_SSE | PTA_SSE2 | PTA_SSE3
1821 | PTA_NO_SAHF)},
1822 {"opteron", PROCESSOR_K8, (PTA_64BIT
1823 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1824 | PTA_SSE | PTA_SSE2
1825 | PTA_NO_SAHF)},
1826 {"opteron-sse3", PROCESSOR_K8, (PTA_64BIT
1827 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1828 | PTA_SSE | PTA_SSE2 | PTA_SSE3
1829 | PTA_NO_SAHF)},
1830 {"athlon64", PROCESSOR_K8, (PTA_64BIT
1831 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1832 | PTA_SSE | PTA_SSE2
1833 | PTA_NO_SAHF)},
1834 {"athlon64-sse3", PROCESSOR_K8, (PTA_64BIT
1835 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1836 | PTA_SSE | PTA_SSE2 | PTA_SSE3
1837 | PTA_NO_SAHF)},
1838 {"athlon-fx", PROCESSOR_K8, (PTA_64BIT
1839 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1840 | PTA_SSE | PTA_SSE2
1841 | PTA_NO_SAHF)},
1842 {"amdfam10", PROCESSOR_AMDFAM10, (PTA_64BIT
1843 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1844 | PTA_SSE | PTA_SSE2 | PTA_SSE3
1845 | PTA_SSE4A
1846 | PTA_CX16 | PTA_ABM)},
1847 {"barcelona", PROCESSOR_AMDFAM10, (PTA_64BIT
1848 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1849 | PTA_SSE | PTA_SSE2 | PTA_SSE3
1850 | PTA_SSE4A
1851 | PTA_CX16 | PTA_ABM)},
1852 {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ },
1853 {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ },
1854 };
1855
1856 int const pta_size = ARRAY_SIZE (processor_alias_table);
1857
1858 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1859 SUBTARGET_OVERRIDE_OPTIONS;
1860 #endif
1861
1862 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
1863 SUBSUBTARGET_OVERRIDE_OPTIONS;
1864 #endif
1865
1866 /* -fPIC is the default for x86_64. */
1867 if (TARGET_MACHO && TARGET_64BIT)
1868 flag_pic = 2;
1869
1870 /* Set the default values for switches whose default depends on TARGET_64BIT
1871 in case they weren't overwritten by command line options. */
1872 if (TARGET_64BIT)
1873 {
1874 /* Mach-O doesn't support omitting the frame pointer for now. */
1875 if (flag_omit_frame_pointer == 2)
1876 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
1877 if (flag_asynchronous_unwind_tables == 2)
1878 flag_asynchronous_unwind_tables = 1;
1879 if (flag_pcc_struct_return == 2)
1880 flag_pcc_struct_return = 0;
1881 }
1882 else
1883 {
1884 if (flag_omit_frame_pointer == 2)
1885 flag_omit_frame_pointer = 0;
1886 if (flag_asynchronous_unwind_tables == 2)
1887 flag_asynchronous_unwind_tables = 0;
1888 if (flag_pcc_struct_return == 2)
1889 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1890 }
1891
1892 /* Need to check -mtune=generic first. */
1893 if (ix86_tune_string)
1894 {
1895 if (!strcmp (ix86_tune_string, "generic")
1896 || !strcmp (ix86_tune_string, "i686")
1897 /* As special support for cross compilers we read -mtune=native
1898 as -mtune=generic. With native compilers we won't see the
1899 -mtune=native, as it was changed by the driver. */
1900 || !strcmp (ix86_tune_string, "native"))
1901 {
1902 if (TARGET_64BIT)
1903 ix86_tune_string = "generic64";
1904 else
1905 ix86_tune_string = "generic32";
1906 }
1907 else if (!strncmp (ix86_tune_string, "generic", 7))
1908 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1909 }
1910 else
1911 {
1912 if (ix86_arch_string)
1913 ix86_tune_string = ix86_arch_string;
1914 if (!ix86_tune_string)
1915 {
1916 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1917 ix86_tune_defaulted = 1;
1918 }
1919
1920 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
1921 need to use a sensible tune option. */
1922 if (!strcmp (ix86_tune_string, "generic")
1923 || !strcmp (ix86_tune_string, "x86-64")
1924 || !strcmp (ix86_tune_string, "i686"))
1925 {
1926 if (TARGET_64BIT)
1927 ix86_tune_string = "generic64";
1928 else
1929 ix86_tune_string = "generic32";
1930 }
1931 }
1932 if (ix86_stringop_string)
1933 {
1934 if (!strcmp (ix86_stringop_string, "rep_byte"))
1935 stringop_alg = rep_prefix_1_byte;
1936 else if (!strcmp (ix86_stringop_string, "libcall"))
1937 stringop_alg = libcall;
1938 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
1939 stringop_alg = rep_prefix_4_byte;
1940 else if (!strcmp (ix86_stringop_string, "rep_8byte"))
1941 stringop_alg = rep_prefix_8_byte;
1942 else if (!strcmp (ix86_stringop_string, "byte_loop"))
1943 stringop_alg = loop_1_byte;
1944 else if (!strcmp (ix86_stringop_string, "loop"))
1945 stringop_alg = loop;
1946 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
1947 stringop_alg = unrolled_loop;
1948 else
1949 error ("bad value (%s) for -mstringop-strategy= switch", ix86_stringop_string);
1950 }
1951 if (!strcmp (ix86_tune_string, "x86-64"))
1952 warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
1953 "-mtune=generic instead as appropriate.");
1954
1955 if (!ix86_arch_string)
1956 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
1957 else
1958 ix86_arch_specified = 1;
1959
1960 if (!strcmp (ix86_arch_string, "generic"))
1961 error ("generic CPU can be used only for -mtune= switch");
1962 if (!strncmp (ix86_arch_string, "generic", 7))
1963 error ("bad value (%s) for -march= switch", ix86_arch_string);
1964
1965 if (ix86_cmodel_string != 0)
1966 {
1967 if (!strcmp (ix86_cmodel_string, "small"))
1968 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1969 else if (!strcmp (ix86_cmodel_string, "medium"))
1970 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
1971 else if (!strcmp (ix86_cmodel_string, "large"))
1972 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
1973 else if (flag_pic)
1974 error ("code model %s does not support PIC mode", ix86_cmodel_string);
1975 else if (!strcmp (ix86_cmodel_string, "32"))
1976 ix86_cmodel = CM_32;
1977 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1978 ix86_cmodel = CM_KERNEL;
1979 else
1980 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1981 }
1982 else
1983 {
1984 /* For TARGET_64BIT_MS_ABI, force pic on, in order to enable the
1985 use of rip-relative addressing. This eliminates fixups that
1986 would otherwise be needed if this object is to be placed in a
1987 DLL, and is essentially just as efficient as direct addressing. */
1988 if (TARGET_64BIT_MS_ABI)
1989 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
1990 else if (TARGET_64BIT)
1991 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1992 else
1993 ix86_cmodel = CM_32;
1994 }
1995 if (ix86_asm_string != 0)
1996 {
1997 if (! TARGET_MACHO
1998 && !strcmp (ix86_asm_string, "intel"))
1999 ix86_asm_dialect = ASM_INTEL;
2000 else if (!strcmp (ix86_asm_string, "att"))
2001 ix86_asm_dialect = ASM_ATT;
2002 else
2003 error ("bad value (%s) for -masm= switch", ix86_asm_string);
2004 }
2005 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
2006 error ("code model %qs not supported in the %s bit mode",
2007 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
2008 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
2009 sorry ("%i-bit mode not compiled in",
2010 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
2011
2012 for (i = 0; i < pta_size; i++)
2013 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
2014 {
2015 ix86_arch = processor_alias_table[i].processor;
2016 /* Default cpu tuning to the architecture. */
2017 ix86_tune = ix86_arch;
2018
2019 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2020 error ("CPU you selected does not support x86-64 "
2021 "instruction set");
2022
2023 if (processor_alias_table[i].flags & PTA_MMX
2024 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
2025 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
2026 if (processor_alias_table[i].flags & PTA_3DNOW
2027 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
2028 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
2029 if (processor_alias_table[i].flags & PTA_3DNOW_A
2030 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
2031 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
2032 if (processor_alias_table[i].flags & PTA_SSE
2033 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
2034 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
2035 if (processor_alias_table[i].flags & PTA_SSE2
2036 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
2037 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
2038 if (processor_alias_table[i].flags & PTA_SSE3
2039 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
2040 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
2041 if (processor_alias_table[i].flags & PTA_SSSE3
2042 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
2043 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
2044 if (processor_alias_table[i].flags & PTA_SSE4_1
2045 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
2046 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
2047 if (processor_alias_table[i].flags & PTA_SSE4_2
2048 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
2049 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
2050 if (processor_alias_table[i].flags & PTA_SSE4A
2051 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
2052 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
2053
2054 if (processor_alias_table[i].flags & PTA_ABM)
2055 x86_abm = true;
2056 if (processor_alias_table[i].flags & PTA_CX16)
2057 x86_cmpxchg16b = true;
2058 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM))
2059 x86_popcnt = true;
2060 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
2061 x86_prefetch_sse = true;
2062 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF)))
2063 x86_sahf = true;
2064
2065 break;
2066 }
2067
2068 if (i == pta_size)
2069 error ("bad value (%s) for -march= switch", ix86_arch_string);
2070
2071 ix86_arch_mask = 1u << ix86_arch;
2072 for (i = 0; i < X86_ARCH_LAST; ++i)
2073 ix86_arch_features[i] &= ix86_arch_mask;
2074
2075 for (i = 0; i < pta_size; i++)
2076 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
2077 {
2078 ix86_tune = processor_alias_table[i].processor;
2079 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2080 {
2081 if (ix86_tune_defaulted)
2082 {
2083 ix86_tune_string = "x86-64";
2084 for (i = 0; i < pta_size; i++)
2085 if (! strcmp (ix86_tune_string,
2086 processor_alias_table[i].name))
2087 break;
2088 ix86_tune = processor_alias_table[i].processor;
2089 }
2090 else
2091 error ("CPU you selected does not support x86-64 "
2092 "instruction set");
2093 }
2094 /* Intel CPUs have always interpreted SSE prefetch instructions as
2095 NOPs; so, we can enable SSE prefetch instructions even when
2096 -mtune (rather than -march) points us to a processor that has them.
2097 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
2098 higher processors. */
2099 if (TARGET_CMOVE
2100 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
2101 x86_prefetch_sse = true;
2102 break;
2103 }
2104 if (i == pta_size)
2105 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
2106
2107 ix86_tune_mask = 1u << ix86_tune;
2108 for (i = 0; i < X86_TUNE_LAST; ++i)
2109 ix86_tune_features[i] &= ix86_tune_mask;
2110
2111 if (optimize_size)
2112 ix86_cost = &size_cost;
2113 else
2114 ix86_cost = processor_target_table[ix86_tune].cost;
2115
2116 /* Arrange to set up i386_stack_locals for all functions. */
2117 init_machine_status = ix86_init_machine_status;
2118
2119 /* Validate -mregparm= value. */
2120 if (ix86_regparm_string)
2121 {
2122 if (TARGET_64BIT)
2123 warning (0, "-mregparm is ignored in 64-bit mode");
2124 i = atoi (ix86_regparm_string);
2125 if (i < 0 || i > REGPARM_MAX)
2126 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
2127 else
2128 ix86_regparm = i;
2129 }
2130 if (TARGET_64BIT)
2131 ix86_regparm = REGPARM_MAX;
2132
2133 /* If the user has provided any of the -malign-* options,
2134 warn and use that value only if -falign-* is not set.
2135 Remove this code in GCC 3.2 or later. */
2136 if (ix86_align_loops_string)
2137 {
2138 warning (0, "-malign-loops is obsolete, use -falign-loops");
2139 if (align_loops == 0)
2140 {
2141 i = atoi (ix86_align_loops_string);
2142 if (i < 0 || i > MAX_CODE_ALIGN)
2143 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2144 else
2145 align_loops = 1 << i;
2146 }
2147 }
2148
2149 if (ix86_align_jumps_string)
2150 {
2151 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
2152 if (align_jumps == 0)
2153 {
2154 i = atoi (ix86_align_jumps_string);
2155 if (i < 0 || i > MAX_CODE_ALIGN)
2156 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2157 else
2158 align_jumps = 1 << i;
2159 }
2160 }
2161
2162 if (ix86_align_funcs_string)
2163 {
2164 warning (0, "-malign-functions is obsolete, use -falign-functions");
2165 if (align_functions == 0)
2166 {
2167 i = atoi (ix86_align_funcs_string);
2168 if (i < 0 || i > MAX_CODE_ALIGN)
2169 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2170 else
2171 align_functions = 1 << i;
2172 }
2173 }
2174
2175 /* Default align_* from the processor table. */
2176 if (align_loops == 0)
2177 {
2178 align_loops = processor_target_table[ix86_tune].align_loop;
2179 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
2180 }
2181 if (align_jumps == 0)
2182 {
2183 align_jumps = processor_target_table[ix86_tune].align_jump;
2184 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
2185 }
2186 if (align_functions == 0)
2187 {
2188 align_functions = processor_target_table[ix86_tune].align_func;
2189 }
2190
2191 /* Validate -mbranch-cost= value, or provide default. */
2192 ix86_branch_cost = ix86_cost->branch_cost;
2193 if (ix86_branch_cost_string)
2194 {
2195 i = atoi (ix86_branch_cost_string);
2196 if (i < 0 || i > 5)
2197 error ("-mbranch-cost=%d is not between 0 and 5", i);
2198 else
2199 ix86_branch_cost = i;
2200 }
2201 if (ix86_section_threshold_string)
2202 {
2203 i = atoi (ix86_section_threshold_string);
2204 if (i < 0)
2205 error ("-mlarge-data-threshold=%d is negative", i);
2206 else
2207 ix86_section_threshold = i;
2208 }
2209
2210 if (ix86_tls_dialect_string)
2211 {
2212 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
2213 ix86_tls_dialect = TLS_DIALECT_GNU;
2214 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
2215 ix86_tls_dialect = TLS_DIALECT_GNU2;
2216 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
2217 ix86_tls_dialect = TLS_DIALECT_SUN;
2218 else
2219 error ("bad value (%s) for -mtls-dialect= switch",
2220 ix86_tls_dialect_string);
2221 }
2222
2223 if (ix87_precision_string)
2224 {
2225 i = atoi (ix87_precision_string);
2226 if (i != 32 && i != 64 && i != 80)
2227 error ("pc%d is not valid precision setting (32, 64 or 80)", i);
2228 }
2229
2230 if (TARGET_64BIT)
2231 {
2232 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
2233
2234 /* Enable by default the SSE and MMX builtins. Do allow the user to
2235 explicitly disable any of these. In particular, disabling SSE and
2236 MMX for kernel code is extremely useful. */
2237 if (!ix86_arch_specified)
2238 ix86_isa_flags
2239 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
2240 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
2241
2242 if (TARGET_RTD)
2243 warning (0, "-mrtd is ignored in 64bit mode");
2244 }
2245 else
2246 {
2247 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
2248
2249 if (!ix86_arch_specified)
2250 ix86_isa_flags
2251 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
2252
2253 /* i386 ABI does not specify red zone. It still makes sense to use it
2254 when programmer takes care to stack from being destroyed. */
2255 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
2256 target_flags |= MASK_NO_RED_ZONE;
2257 }
2258
2259 /* Keep nonleaf frame pointers. */
2260 if (flag_omit_frame_pointer)
2261 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
2262 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
2263 flag_omit_frame_pointer = 1;
2264
2265 /* If we're doing fast math, we don't care about comparison order
2266 wrt NaNs. This lets us use a shorter comparison sequence. */
2267 if (flag_finite_math_only)
2268 target_flags &= ~MASK_IEEE_FP;
2269
2270 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
2271 since the insns won't need emulation. */
2272 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
2273 target_flags &= ~MASK_NO_FANCY_MATH_387;
2274
2275 /* Likewise, if the target doesn't have a 387, or we've specified
2276 software floating point, don't use 387 inline intrinsics. */
2277 if (!TARGET_80387)
2278 target_flags |= MASK_NO_FANCY_MATH_387;
2279
2280 /* Turn on SSE4.1 builtins for -msse4.2. */
2281 if (TARGET_SSE4_2)
2282 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
2283
2284 /* Turn on SSSE3 builtins for -msse4.1. */
2285 if (TARGET_SSE4_1)
2286 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
2287
2288 /* Turn on SSE3 builtins for -mssse3. */
2289 if (TARGET_SSSE3)
2290 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
2291
2292 /* Turn on SSE3 builtins for -msse4a. */
2293 if (TARGET_SSE4A)
2294 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
2295
2296 /* Turn on SSE2 builtins for -msse3. */
2297 if (TARGET_SSE3)
2298 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
2299
2300 /* Turn on SSE builtins for -msse2. */
2301 if (TARGET_SSE2)
2302 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
2303
2304 /* Turn on MMX builtins for -msse. */
2305 if (TARGET_SSE)
2306 {
2307 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
2308 x86_prefetch_sse = true;
2309 }
2310
2311 /* Turn on MMX builtins for 3Dnow. */
2312 if (TARGET_3DNOW)
2313 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
2314
2315 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
2316 if (TARGET_SSE4_2 || TARGET_ABM)
2317 x86_popcnt = true;
2318
2319 /* Validate -mpreferred-stack-boundary= value, or provide default.
2320 The default of 128 bits is for Pentium III's SSE __m128. We can't
2321 change it because of optimize_size. Otherwise, we can't mix object
2322 files compiled with -Os and -On. */
2323 ix86_preferred_stack_boundary = 128;
2324 if (ix86_preferred_stack_boundary_string)
2325 {
2326 i = atoi (ix86_preferred_stack_boundary_string);
2327 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
2328 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
2329 TARGET_64BIT ? 4 : 2);
2330 else
2331 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
2332 }
2333
2334 /* Accept -msseregparm only if at least SSE support is enabled. */
2335 if (TARGET_SSEREGPARM
2336 && ! TARGET_SSE)
2337 error ("-msseregparm used without SSE enabled");
2338
2339 ix86_fpmath = TARGET_FPMATH_DEFAULT;
2340 if (ix86_fpmath_string != 0)
2341 {
2342 if (! strcmp (ix86_fpmath_string, "387"))
2343 ix86_fpmath = FPMATH_387;
2344 else if (! strcmp (ix86_fpmath_string, "sse"))
2345 {
2346 if (!TARGET_SSE)
2347 {
2348 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2349 ix86_fpmath = FPMATH_387;
2350 }
2351 else
2352 ix86_fpmath = FPMATH_SSE;
2353 }
2354 else if (! strcmp (ix86_fpmath_string, "387,sse")
2355 || ! strcmp (ix86_fpmath_string, "sse,387"))
2356 {
2357 if (!TARGET_SSE)
2358 {
2359 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2360 ix86_fpmath = FPMATH_387;
2361 }
2362 else if (!TARGET_80387)
2363 {
2364 warning (0, "387 instruction set disabled, using SSE arithmetics");
2365 ix86_fpmath = FPMATH_SSE;
2366 }
2367 else
2368 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
2369 }
2370 else
2371 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
2372 }
2373
2374 /* If the i387 is disabled, then do not return values in it. */
2375 if (!TARGET_80387)
2376 target_flags &= ~MASK_FLOAT_RETURNS;
2377
2378 if ((x86_accumulate_outgoing_args & ix86_tune_mask)
2379 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2380 && !optimize_size)
2381 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2382
2383 /* ??? Unwind info is not correct around the CFG unless either a frame
2384 pointer is present or M_A_O_A is set. Fixing this requires rewriting
2385 unwind info generation to be aware of the CFG and propagating states
2386 around edges. */
2387 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
2388 || flag_exceptions || flag_non_call_exceptions)
2389 && flag_omit_frame_pointer
2390 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
2391 {
2392 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2393 warning (0, "unwind tables currently require either a frame pointer "
2394 "or -maccumulate-outgoing-args for correctness");
2395 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2396 }
2397
2398 /* For sane SSE instruction set generation we need fcomi instruction.
2399 It is safe to enable all CMOVE instructions. */
2400 if (TARGET_SSE)
2401 TARGET_CMOVE = 1;
2402
2403 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
2404 {
2405 char *p;
2406 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
2407 p = strchr (internal_label_prefix, 'X');
2408 internal_label_prefix_len = p - internal_label_prefix;
2409 *p = '\0';
2410 }
2411
2412 /* When scheduling description is not available, disable scheduler pass
2413 so it won't slow down the compilation and make x87 code slower. */
2414 if (!TARGET_SCHEDULE)
2415 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
2416
2417 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
2418 set_param_value ("simultaneous-prefetches",
2419 ix86_cost->simultaneous_prefetches);
2420 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
2421 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
2422 }
2423 \f
2424 /* Return true if this goes in large data/bss. */
2425
2426 static bool
2427 ix86_in_large_data_p (tree exp)
2428 {
2429 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
2430 return false;
2431
2432 /* Functions are never large data. */
2433 if (TREE_CODE (exp) == FUNCTION_DECL)
2434 return false;
2435
2436 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
2437 {
2438 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
2439 if (strcmp (section, ".ldata") == 0
2440 || strcmp (section, ".lbss") == 0)
2441 return true;
2442 return false;
2443 }
2444 else
2445 {
2446 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
2447
2448 /* If this is an incomplete type with size 0, then we can't put it
2449 in data because it might be too big when completed. */
2450 if (!size || size > ix86_section_threshold)
2451 return true;
2452 }
2453
2454 return false;
2455 }
2456
2457 /* Switch to the appropriate section for output of DECL.
2458 DECL is either a `VAR_DECL' node or a constant of some sort.
2459 RELOC indicates whether forming the initial value of DECL requires
2460 link-time relocations. */
2461
2462 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
2463 ATTRIBUTE_UNUSED;
2464
2465 static section *
2466 x86_64_elf_select_section (tree decl, int reloc,
2467 unsigned HOST_WIDE_INT align)
2468 {
2469 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2470 && ix86_in_large_data_p (decl))
2471 {
2472 const char *sname = NULL;
2473 unsigned int flags = SECTION_WRITE;
2474 switch (categorize_decl_for_section (decl, reloc))
2475 {
2476 case SECCAT_DATA:
2477 sname = ".ldata";
2478 break;
2479 case SECCAT_DATA_REL:
2480 sname = ".ldata.rel";
2481 break;
2482 case SECCAT_DATA_REL_LOCAL:
2483 sname = ".ldata.rel.local";
2484 break;
2485 case SECCAT_DATA_REL_RO:
2486 sname = ".ldata.rel.ro";
2487 break;
2488 case SECCAT_DATA_REL_RO_LOCAL:
2489 sname = ".ldata.rel.ro.local";
2490 break;
2491 case SECCAT_BSS:
2492 sname = ".lbss";
2493 flags |= SECTION_BSS;
2494 break;
2495 case SECCAT_RODATA:
2496 case SECCAT_RODATA_MERGE_STR:
2497 case SECCAT_RODATA_MERGE_STR_INIT:
2498 case SECCAT_RODATA_MERGE_CONST:
2499 sname = ".lrodata";
2500 flags = 0;
2501 break;
2502 case SECCAT_SRODATA:
2503 case SECCAT_SDATA:
2504 case SECCAT_SBSS:
2505 gcc_unreachable ();
2506 case SECCAT_TEXT:
2507 case SECCAT_TDATA:
2508 case SECCAT_TBSS:
2509 /* We don't split these for medium model. Place them into
2510 default sections and hope for best. */
2511 break;
2512 }
2513 if (sname)
2514 {
2515 /* We might get called with string constants, but get_named_section
2516 doesn't like them as they are not DECLs. Also, we need to set
2517 flags in that case. */
2518 if (!DECL_P (decl))
2519 return get_section (sname, flags, NULL);
2520 return get_named_section (decl, sname, reloc);
2521 }
2522 }
2523 return default_elf_select_section (decl, reloc, align);
2524 }
2525
2526 /* Build up a unique section name, expressed as a
2527 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2528 RELOC indicates whether the initial value of EXP requires
2529 link-time relocations. */
2530
2531 static void ATTRIBUTE_UNUSED
2532 x86_64_elf_unique_section (tree decl, int reloc)
2533 {
2534 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2535 && ix86_in_large_data_p (decl))
2536 {
2537 const char *prefix = NULL;
2538 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2539 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
2540
2541 switch (categorize_decl_for_section (decl, reloc))
2542 {
2543 case SECCAT_DATA:
2544 case SECCAT_DATA_REL:
2545 case SECCAT_DATA_REL_LOCAL:
2546 case SECCAT_DATA_REL_RO:
2547 case SECCAT_DATA_REL_RO_LOCAL:
2548 prefix = one_only ? ".gnu.linkonce.ld." : ".ldata.";
2549 break;
2550 case SECCAT_BSS:
2551 prefix = one_only ? ".gnu.linkonce.lb." : ".lbss.";
2552 break;
2553 case SECCAT_RODATA:
2554 case SECCAT_RODATA_MERGE_STR:
2555 case SECCAT_RODATA_MERGE_STR_INIT:
2556 case SECCAT_RODATA_MERGE_CONST:
2557 prefix = one_only ? ".gnu.linkonce.lr." : ".lrodata.";
2558 break;
2559 case SECCAT_SRODATA:
2560 case SECCAT_SDATA:
2561 case SECCAT_SBSS:
2562 gcc_unreachable ();
2563 case SECCAT_TEXT:
2564 case SECCAT_TDATA:
2565 case SECCAT_TBSS:
2566 /* We don't split these for medium model. Place them into
2567 default sections and hope for best. */
2568 break;
2569 }
2570 if (prefix)
2571 {
2572 const char *name;
2573 size_t nlen, plen;
2574 char *string;
2575 plen = strlen (prefix);
2576
2577 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
2578 name = targetm.strip_name_encoding (name);
2579 nlen = strlen (name);
2580
2581 string = (char *) alloca (nlen + plen + 1);
2582 memcpy (string, prefix, plen);
2583 memcpy (string + plen, name, nlen + 1);
2584
2585 DECL_SECTION_NAME (decl) = build_string (nlen + plen, string);
2586 return;
2587 }
2588 }
2589 default_unique_section (decl, reloc);
2590 }
2591
2592 #ifdef COMMON_ASM_OP
2593 /* This says how to output assembler code to declare an
2594 uninitialized external linkage data object.
2595
2596 For medium model x86-64 we need to use .largecomm opcode for
2597 large objects. */
2598 void
2599 x86_elf_aligned_common (FILE *file,
2600 const char *name, unsigned HOST_WIDE_INT size,
2601 int align)
2602 {
2603 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2604 && size > (unsigned int)ix86_section_threshold)
2605 fprintf (file, ".largecomm\t");
2606 else
2607 fprintf (file, "%s", COMMON_ASM_OP);
2608 assemble_name (file, name);
2609 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
2610 size, align / BITS_PER_UNIT);
2611 }
2612 #endif
2613
2614 /* Utility function for targets to use in implementing
2615 ASM_OUTPUT_ALIGNED_BSS. */
2616
2617 void
2618 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
2619 const char *name, unsigned HOST_WIDE_INT size,
2620 int align)
2621 {
2622 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2623 && size > (unsigned int)ix86_section_threshold)
2624 switch_to_section (get_named_section (decl, ".lbss", 0));
2625 else
2626 switch_to_section (bss_section);
2627 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
2628 #ifdef ASM_DECLARE_OBJECT_NAME
2629 last_assemble_variable_decl = decl;
2630 ASM_DECLARE_OBJECT_NAME (file, name, decl);
2631 #else
2632 /* Standard thing is just output label for the object. */
2633 ASM_OUTPUT_LABEL (file, name);
2634 #endif /* ASM_DECLARE_OBJECT_NAME */
2635 ASM_OUTPUT_SKIP (file, size ? size : 1);
2636 }
2637 \f
2638 void
2639 optimization_options (int level, int size ATTRIBUTE_UNUSED)
2640 {
2641 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2642 make the problem with not enough registers even worse. */
2643 #ifdef INSN_SCHEDULING
2644 if (level > 1)
2645 flag_schedule_insns = 0;
2646 #endif
2647
2648 if (TARGET_MACHO)
2649 /* The Darwin libraries never set errno, so we might as well
2650 avoid calling them when that's the only reason we would. */
2651 flag_errno_math = 0;
2652
2653 /* The default values of these switches depend on the TARGET_64BIT
2654 that is not known at this moment. Mark these values with 2 and
2655 let user the to override these. In case there is no command line option
2656 specifying them, we will set the defaults in override_options. */
2657 if (optimize >= 1)
2658 flag_omit_frame_pointer = 2;
2659 flag_pcc_struct_return = 2;
2660 flag_asynchronous_unwind_tables = 2;
2661 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2662 SUBTARGET_OPTIMIZATION_OPTIONS;
2663 #endif
2664 }
2665 \f
2666 /* Decide whether we can make a sibling call to a function. DECL is the
2667 declaration of the function being targeted by the call and EXP is the
2668 CALL_EXPR representing the call. */
2669
2670 static bool
2671 ix86_function_ok_for_sibcall (tree decl, tree exp)
2672 {
2673 tree func;
2674 rtx a, b;
2675
2676 /* If we are generating position-independent code, we cannot sibcall
2677 optimize any indirect call, or a direct call to a global function,
2678 as the PLT requires %ebx be live. */
2679 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
2680 return false;
2681
2682 if (decl)
2683 func = decl;
2684 else
2685 {
2686 func = TREE_TYPE (CALL_EXPR_FN (exp));
2687 if (POINTER_TYPE_P (func))
2688 func = TREE_TYPE (func);
2689 }
2690
2691 /* Check that the return value locations are the same. Like
2692 if we are returning floats on the 80387 register stack, we cannot
2693 make a sibcall from a function that doesn't return a float to a
2694 function that does or, conversely, from a function that does return
2695 a float to a function that doesn't; the necessary stack adjustment
2696 would not be executed. This is also the place we notice
2697 differences in the return value ABI. Note that it is ok for one
2698 of the functions to have void return type as long as the return
2699 value of the other is passed in a register. */
2700 a = ix86_function_value (TREE_TYPE (exp), func, false);
2701 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
2702 cfun->decl, false);
2703 if (STACK_REG_P (a) || STACK_REG_P (b))
2704 {
2705 if (!rtx_equal_p (a, b))
2706 return false;
2707 }
2708 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
2709 ;
2710 else if (!rtx_equal_p (a, b))
2711 return false;
2712
2713 /* If this call is indirect, we'll need to be able to use a call-clobbered
2714 register for the address of the target function. Make sure that all
2715 such registers are not used for passing parameters. */
2716 if (!decl && !TARGET_64BIT)
2717 {
2718 tree type;
2719
2720 /* We're looking at the CALL_EXPR, we need the type of the function. */
2721 type = CALL_EXPR_FN (exp); /* pointer expression */
2722 type = TREE_TYPE (type); /* pointer type */
2723 type = TREE_TYPE (type); /* function type */
2724
2725 if (ix86_function_regparm (type, NULL) >= 3)
2726 {
2727 /* ??? Need to count the actual number of registers to be used,
2728 not the possible number of registers. Fix later. */
2729 return false;
2730 }
2731 }
2732
2733 /* Dllimport'd functions are also called indirectly. */
2734 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
2735 && decl && DECL_DLLIMPORT_P (decl)
2736 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
2737 return false;
2738
2739 /* If we forced aligned the stack, then sibcalling would unalign the
2740 stack, which may break the called function. */
2741 if (cfun->machine->force_align_arg_pointer)
2742 return false;
2743
2744 /* Otherwise okay. That also includes certain types of indirect calls. */
2745 return true;
2746 }
2747
2748 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
2749 calling convention attributes;
2750 arguments as in struct attribute_spec.handler. */
2751
2752 static tree
2753 ix86_handle_cconv_attribute (tree *node, tree name,
2754 tree args,
2755 int flags ATTRIBUTE_UNUSED,
2756 bool *no_add_attrs)
2757 {
2758 if (TREE_CODE (*node) != FUNCTION_TYPE
2759 && TREE_CODE (*node) != METHOD_TYPE
2760 && TREE_CODE (*node) != FIELD_DECL
2761 && TREE_CODE (*node) != TYPE_DECL)
2762 {
2763 warning (OPT_Wattributes, "%qs attribute only applies to functions",
2764 IDENTIFIER_POINTER (name));
2765 *no_add_attrs = true;
2766 return NULL_TREE;
2767 }
2768
2769 /* Can combine regparm with all attributes but fastcall. */
2770 if (is_attribute_p ("regparm", name))
2771 {
2772 tree cst;
2773
2774 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2775 {
2776 error ("fastcall and regparm attributes are not compatible");
2777 }
2778
2779 cst = TREE_VALUE (args);
2780 if (TREE_CODE (cst) != INTEGER_CST)
2781 {
2782 warning (OPT_Wattributes,
2783 "%qs attribute requires an integer constant argument",
2784 IDENTIFIER_POINTER (name));
2785 *no_add_attrs = true;
2786 }
2787 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
2788 {
2789 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
2790 IDENTIFIER_POINTER (name), REGPARM_MAX);
2791 *no_add_attrs = true;
2792 }
2793
2794 if (!TARGET_64BIT
2795 && lookup_attribute (ix86_force_align_arg_pointer_string,
2796 TYPE_ATTRIBUTES (*node))
2797 && compare_tree_int (cst, REGPARM_MAX-1))
2798 {
2799 error ("%s functions limited to %d register parameters",
2800 ix86_force_align_arg_pointer_string, REGPARM_MAX-1);
2801 }
2802
2803 return NULL_TREE;
2804 }
2805
2806 if (TARGET_64BIT)
2807 {
2808 /* Do not warn when emulating the MS ABI. */
2809 if (!TARGET_64BIT_MS_ABI)
2810 warning (OPT_Wattributes, "%qs attribute ignored",
2811 IDENTIFIER_POINTER (name));
2812 *no_add_attrs = true;
2813 return NULL_TREE;
2814 }
2815
2816 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
2817 if (is_attribute_p ("fastcall", name))
2818 {
2819 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2820 {
2821 error ("fastcall and cdecl attributes are not compatible");
2822 }
2823 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2824 {
2825 error ("fastcall and stdcall attributes are not compatible");
2826 }
2827 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
2828 {
2829 error ("fastcall and regparm attributes are not compatible");
2830 }
2831 }
2832
2833 /* Can combine stdcall with fastcall (redundant), regparm and
2834 sseregparm. */
2835 else if (is_attribute_p ("stdcall", name))
2836 {
2837 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2838 {
2839 error ("stdcall and cdecl attributes are not compatible");
2840 }
2841 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2842 {
2843 error ("stdcall and fastcall attributes are not compatible");
2844 }
2845 }
2846
2847 /* Can combine cdecl with regparm and sseregparm. */
2848 else if (is_attribute_p ("cdecl", name))
2849 {
2850 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2851 {
2852 error ("stdcall and cdecl attributes are not compatible");
2853 }
2854 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2855 {
2856 error ("fastcall and cdecl attributes are not compatible");
2857 }
2858 }
2859
2860 /* Can combine sseregparm with all attributes. */
2861
2862 return NULL_TREE;
2863 }
2864
2865 /* Return 0 if the attributes for two types are incompatible, 1 if they
2866 are compatible, and 2 if they are nearly compatible (which causes a
2867 warning to be generated). */
2868
2869 static int
2870 ix86_comp_type_attributes (tree type1, tree type2)
2871 {
2872 /* Check for mismatch of non-default calling convention. */
2873 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
2874
2875 if (TREE_CODE (type1) != FUNCTION_TYPE)
2876 return 1;
2877
2878 /* Check for mismatched fastcall/regparm types. */
2879 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
2880 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
2881 || (ix86_function_regparm (type1, NULL)
2882 != ix86_function_regparm (type2, NULL)))
2883 return 0;
2884
2885 /* Check for mismatched sseregparm types. */
2886 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
2887 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
2888 return 0;
2889
2890 /* Check for mismatched return types (cdecl vs stdcall). */
2891 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
2892 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
2893 return 0;
2894
2895 return 1;
2896 }
2897 \f
2898 /* Return the regparm value for a function with the indicated TYPE and DECL.
2899 DECL may be NULL when calling function indirectly
2900 or considering a libcall. */
2901
2902 static int
2903 ix86_function_regparm (tree type, tree decl)
2904 {
2905 tree attr;
2906 int regparm = ix86_regparm;
2907
2908 if (TARGET_64BIT)
2909 return regparm;
2910
2911 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
2912 if (attr)
2913 return TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
2914
2915 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
2916 return 2;
2917
2918 /* Use register calling convention for local functions when possible. */
2919 if (decl && TREE_CODE (decl) == FUNCTION_DECL
2920 && flag_unit_at_a_time && !profile_flag)
2921 {
2922 struct cgraph_local_info *i = cgraph_local_info (decl);
2923 if (i && i->local)
2924 {
2925 int local_regparm, globals = 0, regno;
2926 struct function *f;
2927
2928 /* Make sure no regparm register is taken by a
2929 global register variable. */
2930 for (local_regparm = 0; local_regparm < 3; local_regparm++)
2931 if (global_regs[local_regparm])
2932 break;
2933
2934 /* We can't use regparm(3) for nested functions as these use
2935 static chain pointer in third argument. */
2936 if (local_regparm == 3
2937 && (decl_function_context (decl)
2938 || ix86_force_align_arg_pointer)
2939 && !DECL_NO_STATIC_CHAIN (decl))
2940 local_regparm = 2;
2941
2942 /* If the function realigns its stackpointer, the prologue will
2943 clobber %ecx. If we've already generated code for the callee,
2944 the callee DECL_STRUCT_FUNCTION is gone, so we fall back to
2945 scanning the attributes for the self-realigning property. */
2946 f = DECL_STRUCT_FUNCTION (decl);
2947 if (local_regparm == 3
2948 && (f ? !!f->machine->force_align_arg_pointer
2949 : !!lookup_attribute (ix86_force_align_arg_pointer_string,
2950 TYPE_ATTRIBUTES (TREE_TYPE (decl)))))
2951 local_regparm = 2;
2952
2953 /* Each global register variable increases register preassure,
2954 so the more global reg vars there are, the smaller regparm
2955 optimization use, unless requested by the user explicitly. */
2956 for (regno = 0; regno < 6; regno++)
2957 if (global_regs[regno])
2958 globals++;
2959 local_regparm
2960 = globals < local_regparm ? local_regparm - globals : 0;
2961
2962 if (local_regparm > regparm)
2963 regparm = local_regparm;
2964 }
2965 }
2966
2967 return regparm;
2968 }
2969
2970 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
2971 DFmode (2) arguments in SSE registers for a function with the
2972 indicated TYPE and DECL. DECL may be NULL when calling function
2973 indirectly or considering a libcall. Otherwise return 0. */
2974
2975 static int
2976 ix86_function_sseregparm (tree type, tree decl)
2977 {
2978 gcc_assert (!TARGET_64BIT);
2979
2980 /* Use SSE registers to pass SFmode and DFmode arguments if requested
2981 by the sseregparm attribute. */
2982 if (TARGET_SSEREGPARM
2983 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
2984 {
2985 if (!TARGET_SSE)
2986 {
2987 if (decl)
2988 error ("Calling %qD with attribute sseregparm without "
2989 "SSE/SSE2 enabled", decl);
2990 else
2991 error ("Calling %qT with attribute sseregparm without "
2992 "SSE/SSE2 enabled", type);
2993 return 0;
2994 }
2995
2996 return 2;
2997 }
2998
2999 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
3000 (and DFmode for SSE2) arguments in SSE registers. */
3001 if (decl && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
3002 {
3003 struct cgraph_local_info *i = cgraph_local_info (decl);
3004 if (i && i->local)
3005 return TARGET_SSE2 ? 2 : 1;
3006 }
3007
3008 return 0;
3009 }
3010
3011 /* Return true if EAX is live at the start of the function. Used by
3012 ix86_expand_prologue to determine if we need special help before
3013 calling allocate_stack_worker. */
3014
3015 static bool
3016 ix86_eax_live_at_start_p (void)
3017 {
3018 /* Cheat. Don't bother working forward from ix86_function_regparm
3019 to the function type to whether an actual argument is located in
3020 eax. Instead just look at cfg info, which is still close enough
3021 to correct at this point. This gives false positives for broken
3022 functions that might use uninitialized data that happens to be
3023 allocated in eax, but who cares? */
3024 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
3025 }
3026
3027 /* Return true if TYPE has a variable argument list. */
3028
3029 static bool
3030 type_has_variadic_args_p (tree type)
3031 {
3032 tree n, t = TYPE_ARG_TYPES (type);
3033
3034 if (t == NULL)
3035 return false;
3036
3037 while ((n = TREE_CHAIN (t)) != NULL)
3038 t = n;
3039
3040 return TREE_VALUE (t) != void_type_node;
3041 }
3042
3043 /* Value is the number of bytes of arguments automatically
3044 popped when returning from a subroutine call.
3045 FUNDECL is the declaration node of the function (as a tree),
3046 FUNTYPE is the data type of the function (as a tree),
3047 or for a library call it is an identifier node for the subroutine name.
3048 SIZE is the number of bytes of arguments passed on the stack.
3049
3050 On the 80386, the RTD insn may be used to pop them if the number
3051 of args is fixed, but if the number is variable then the caller
3052 must pop them all. RTD can't be used for library calls now
3053 because the library is compiled with the Unix compiler.
3054 Use of RTD is a selectable option, since it is incompatible with
3055 standard Unix calling sequences. If the option is not selected,
3056 the caller must always pop the args.
3057
3058 The attribute stdcall is equivalent to RTD on a per module basis. */
3059
3060 int
3061 ix86_return_pops_args (tree fundecl, tree funtype, int size)
3062 {
3063 int rtd;
3064
3065 /* None of the 64-bit ABIs pop arguments. */
3066 if (TARGET_64BIT)
3067 return 0;
3068
3069 rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
3070
3071 /* Cdecl functions override -mrtd, and never pop the stack. */
3072 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
3073 {
3074 /* Stdcall and fastcall functions will pop the stack if not
3075 variable args. */
3076 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
3077 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
3078 rtd = 1;
3079
3080 if (rtd && ! type_has_variadic_args_p (funtype))
3081 return size;
3082 }
3083
3084 /* Lose any fake structure return argument if it is passed on the stack. */
3085 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
3086 && !KEEP_AGGREGATE_RETURN_POINTER)
3087 {
3088 int nregs = ix86_function_regparm (funtype, fundecl);
3089 if (nregs == 0)
3090 return GET_MODE_SIZE (Pmode);
3091 }
3092
3093 return 0;
3094 }
3095 \f
3096 /* Argument support functions. */
3097
3098 /* Return true when register may be used to pass function parameters. */
3099 bool
3100 ix86_function_arg_regno_p (int regno)
3101 {
3102 int i;
3103 const int *parm_regs;
3104
3105 if (!TARGET_64BIT)
3106 {
3107 if (TARGET_MACHO)
3108 return (regno < REGPARM_MAX
3109 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
3110 else
3111 return (regno < REGPARM_MAX
3112 || (TARGET_MMX && MMX_REGNO_P (regno)
3113 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
3114 || (TARGET_SSE && SSE_REGNO_P (regno)
3115 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
3116 }
3117
3118 if (TARGET_MACHO)
3119 {
3120 if (SSE_REGNO_P (regno) && TARGET_SSE)
3121 return true;
3122 }
3123 else
3124 {
3125 if (TARGET_SSE && SSE_REGNO_P (regno)
3126 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
3127 return true;
3128 }
3129
3130 /* RAX is used as hidden argument to va_arg functions. */
3131 if (!TARGET_64BIT_MS_ABI && regno == 0)
3132 return true;
3133
3134 if (TARGET_64BIT_MS_ABI)
3135 parm_regs = x86_64_ms_abi_int_parameter_registers;
3136 else
3137 parm_regs = x86_64_int_parameter_registers;
3138 for (i = 0; i < REGPARM_MAX; i++)
3139 if (regno == parm_regs[i])
3140 return true;
3141 return false;
3142 }
3143
3144 /* Return if we do not know how to pass TYPE solely in registers. */
3145
3146 static bool
3147 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
3148 {
3149 if (must_pass_in_stack_var_size_or_pad (mode, type))
3150 return true;
3151
3152 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
3153 The layout_type routine is crafty and tries to trick us into passing
3154 currently unsupported vector types on the stack by using TImode. */
3155 return (!TARGET_64BIT && mode == TImode
3156 && type && TREE_CODE (type) != VECTOR_TYPE);
3157 }
3158
3159 /* Initialize a variable CUM of type CUMULATIVE_ARGS
3160 for a call to a function whose data type is FNTYPE.
3161 For a library call, FNTYPE is 0. */
3162
3163 void
3164 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
3165 tree fntype, /* tree ptr for function decl */
3166 rtx libname, /* SYMBOL_REF of library name or 0 */
3167 tree fndecl)
3168 {
3169 memset (cum, 0, sizeof (*cum));
3170
3171 /* Set up the number of registers to use for passing arguments. */
3172 cum->nregs = ix86_regparm;
3173 if (TARGET_SSE)
3174 cum->sse_nregs = SSE_REGPARM_MAX;
3175 if (TARGET_MMX)
3176 cum->mmx_nregs = MMX_REGPARM_MAX;
3177 cum->warn_sse = true;
3178 cum->warn_mmx = true;
3179 cum->maybe_vaarg = (fntype
3180 ? (!TYPE_ARG_TYPES (fntype)
3181 || type_has_variadic_args_p (fntype))
3182 : !libname);
3183
3184 if (!TARGET_64BIT)
3185 {
3186 /* If there are variable arguments, then we won't pass anything
3187 in registers in 32-bit mode. */
3188 if (cum->maybe_vaarg)
3189 {
3190 cum->nregs = 0;
3191 cum->sse_nregs = 0;
3192 cum->mmx_nregs = 0;
3193 cum->warn_sse = 0;
3194 cum->warn_mmx = 0;
3195 return;
3196 }
3197
3198 /* Use ecx and edx registers if function has fastcall attribute,
3199 else look for regparm information. */
3200 if (fntype)
3201 {
3202 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
3203 {
3204 cum->nregs = 2;
3205 cum->fastcall = 1;
3206 }
3207 else
3208 cum->nregs = ix86_function_regparm (fntype, fndecl);
3209 }
3210
3211 /* Set up the number of SSE registers used for passing SFmode
3212 and DFmode arguments. Warn for mismatching ABI. */
3213 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl);
3214 }
3215 }
3216
3217 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
3218 But in the case of vector types, it is some vector mode.
3219
3220 When we have only some of our vector isa extensions enabled, then there
3221 are some modes for which vector_mode_supported_p is false. For these
3222 modes, the generic vector support in gcc will choose some non-vector mode
3223 in order to implement the type. By computing the natural mode, we'll
3224 select the proper ABI location for the operand and not depend on whatever
3225 the middle-end decides to do with these vector types. */
3226
3227 static enum machine_mode
3228 type_natural_mode (tree type)
3229 {
3230 enum machine_mode mode = TYPE_MODE (type);
3231
3232 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
3233 {
3234 HOST_WIDE_INT size = int_size_in_bytes (type);
3235 if ((size == 8 || size == 16)
3236 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
3237 && TYPE_VECTOR_SUBPARTS (type) > 1)
3238 {
3239 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
3240
3241 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
3242 mode = MIN_MODE_VECTOR_FLOAT;
3243 else
3244 mode = MIN_MODE_VECTOR_INT;
3245
3246 /* Get the mode which has this inner mode and number of units. */
3247 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
3248 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
3249 && GET_MODE_INNER (mode) == innermode)
3250 return mode;
3251
3252 gcc_unreachable ();
3253 }
3254 }
3255
3256 return mode;
3257 }
3258
3259 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
3260 this may not agree with the mode that the type system has chosen for the
3261 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
3262 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
3263
3264 static rtx
3265 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
3266 unsigned int regno)
3267 {
3268 rtx tmp;
3269
3270 if (orig_mode != BLKmode)
3271 tmp = gen_rtx_REG (orig_mode, regno);
3272 else
3273 {
3274 tmp = gen_rtx_REG (mode, regno);
3275 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
3276 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
3277 }
3278
3279 return tmp;
3280 }
3281
3282 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
3283 of this code is to classify each 8bytes of incoming argument by the register
3284 class and assign registers accordingly. */
3285
3286 /* Return the union class of CLASS1 and CLASS2.
3287 See the x86-64 PS ABI for details. */
3288
3289 static enum x86_64_reg_class
3290 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
3291 {
3292 /* Rule #1: If both classes are equal, this is the resulting class. */
3293 if (class1 == class2)
3294 return class1;
3295
3296 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
3297 the other class. */
3298 if (class1 == X86_64_NO_CLASS)
3299 return class2;
3300 if (class2 == X86_64_NO_CLASS)
3301 return class1;
3302
3303 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
3304 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
3305 return X86_64_MEMORY_CLASS;
3306
3307 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
3308 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
3309 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
3310 return X86_64_INTEGERSI_CLASS;
3311 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
3312 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
3313 return X86_64_INTEGER_CLASS;
3314
3315 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
3316 MEMORY is used. */
3317 if (class1 == X86_64_X87_CLASS
3318 || class1 == X86_64_X87UP_CLASS
3319 || class1 == X86_64_COMPLEX_X87_CLASS
3320 || class2 == X86_64_X87_CLASS
3321 || class2 == X86_64_X87UP_CLASS
3322 || class2 == X86_64_COMPLEX_X87_CLASS)
3323 return X86_64_MEMORY_CLASS;
3324
3325 /* Rule #6: Otherwise class SSE is used. */
3326 return X86_64_SSE_CLASS;
3327 }
3328
3329 /* Classify the argument of type TYPE and mode MODE.
3330 CLASSES will be filled by the register class used to pass each word
3331 of the operand. The number of words is returned. In case the parameter
3332 should be passed in memory, 0 is returned. As a special case for zero
3333 sized containers, classes[0] will be NO_CLASS and 1 is returned.
3334
3335 BIT_OFFSET is used internally for handling records and specifies offset
3336 of the offset in bits modulo 256 to avoid overflow cases.
3337
3338 See the x86-64 PS ABI for details.
3339 */
3340
3341 static int
3342 classify_argument (enum machine_mode mode, tree type,
3343 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
3344 {
3345 HOST_WIDE_INT bytes =
3346 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3347 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3348
3349 /* Variable sized entities are always passed/returned in memory. */
3350 if (bytes < 0)
3351 return 0;
3352
3353 if (mode != VOIDmode
3354 && targetm.calls.must_pass_in_stack (mode, type))
3355 return 0;
3356
3357 if (type && AGGREGATE_TYPE_P (type))
3358 {
3359 int i;
3360 tree field;
3361 enum x86_64_reg_class subclasses[MAX_CLASSES];
3362
3363 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
3364 if (bytes > 16)
3365 return 0;
3366
3367 for (i = 0; i < words; i++)
3368 classes[i] = X86_64_NO_CLASS;
3369
3370 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
3371 signalize memory class, so handle it as special case. */
3372 if (!words)
3373 {
3374 classes[0] = X86_64_NO_CLASS;
3375 return 1;
3376 }
3377
3378 /* Classify each field of record and merge classes. */
3379 switch (TREE_CODE (type))
3380 {
3381 case RECORD_TYPE:
3382 /* And now merge the fields of structure. */
3383 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3384 {
3385 if (TREE_CODE (field) == FIELD_DECL)
3386 {
3387 int num;
3388
3389 if (TREE_TYPE (field) == error_mark_node)
3390 continue;
3391
3392 /* Bitfields are always classified as integer. Handle them
3393 early, since later code would consider them to be
3394 misaligned integers. */
3395 if (DECL_BIT_FIELD (field))
3396 {
3397 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3398 i < ((int_bit_position (field) + (bit_offset % 64))
3399 + tree_low_cst (DECL_SIZE (field), 0)
3400 + 63) / 8 / 8; i++)
3401 classes[i] =
3402 merge_classes (X86_64_INTEGER_CLASS,
3403 classes[i]);
3404 }
3405 else
3406 {
3407 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3408 TREE_TYPE (field), subclasses,
3409 (int_bit_position (field)
3410 + bit_offset) % 256);
3411 if (!num)
3412 return 0;
3413 for (i = 0; i < num; i++)
3414 {
3415 int pos =
3416 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3417 classes[i + pos] =
3418 merge_classes (subclasses[i], classes[i + pos]);
3419 }
3420 }
3421 }
3422 }
3423 break;
3424
3425 case ARRAY_TYPE:
3426 /* Arrays are handled as small records. */
3427 {
3428 int num;
3429 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
3430 TREE_TYPE (type), subclasses, bit_offset);
3431 if (!num)
3432 return 0;
3433
3434 /* The partial classes are now full classes. */
3435 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
3436 subclasses[0] = X86_64_SSE_CLASS;
3437 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
3438 subclasses[0] = X86_64_INTEGER_CLASS;
3439
3440 for (i = 0; i < words; i++)
3441 classes[i] = subclasses[i % num];
3442
3443 break;
3444 }
3445 case UNION_TYPE:
3446 case QUAL_UNION_TYPE:
3447 /* Unions are similar to RECORD_TYPE but offset is always 0.
3448 */
3449 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3450 {
3451 if (TREE_CODE (field) == FIELD_DECL)
3452 {
3453 int num;
3454
3455 if (TREE_TYPE (field) == error_mark_node)
3456 continue;
3457
3458 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3459 TREE_TYPE (field), subclasses,
3460 bit_offset);
3461 if (!num)
3462 return 0;
3463 for (i = 0; i < num; i++)
3464 classes[i] = merge_classes (subclasses[i], classes[i]);
3465 }
3466 }
3467 break;
3468
3469 default:
3470 gcc_unreachable ();
3471 }
3472
3473 /* Final merger cleanup. */
3474 for (i = 0; i < words; i++)
3475 {
3476 /* If one class is MEMORY, everything should be passed in
3477 memory. */
3478 if (classes[i] == X86_64_MEMORY_CLASS)
3479 return 0;
3480
3481 /* The X86_64_SSEUP_CLASS should be always preceded by
3482 X86_64_SSE_CLASS. */
3483 if (classes[i] == X86_64_SSEUP_CLASS
3484 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
3485 classes[i] = X86_64_SSE_CLASS;
3486
3487 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
3488 if (classes[i] == X86_64_X87UP_CLASS
3489 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
3490 classes[i] = X86_64_SSE_CLASS;
3491 }
3492 return words;
3493 }
3494
3495 /* Compute alignment needed. We align all types to natural boundaries with
3496 exception of XFmode that is aligned to 64bits. */
3497 if (mode != VOIDmode && mode != BLKmode)
3498 {
3499 int mode_alignment = GET_MODE_BITSIZE (mode);
3500
3501 if (mode == XFmode)
3502 mode_alignment = 128;
3503 else if (mode == XCmode)
3504 mode_alignment = 256;
3505 if (COMPLEX_MODE_P (mode))
3506 mode_alignment /= 2;
3507 /* Misaligned fields are always returned in memory. */
3508 if (bit_offset % mode_alignment)
3509 return 0;
3510 }
3511
3512 /* for V1xx modes, just use the base mode */
3513 if (VECTOR_MODE_P (mode)
3514 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
3515 mode = GET_MODE_INNER (mode);
3516
3517 /* Classification of atomic types. */
3518 switch (mode)
3519 {
3520 case SDmode:
3521 case DDmode:
3522 classes[0] = X86_64_SSE_CLASS;
3523 return 1;
3524 case TDmode:
3525 classes[0] = X86_64_SSE_CLASS;
3526 classes[1] = X86_64_SSEUP_CLASS;
3527 return 2;
3528 case DImode:
3529 case SImode:
3530 case HImode:
3531 case QImode:
3532 case CSImode:
3533 case CHImode:
3534 case CQImode:
3535 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3536 classes[0] = X86_64_INTEGERSI_CLASS;
3537 else
3538 classes[0] = X86_64_INTEGER_CLASS;
3539 return 1;
3540 case CDImode:
3541 case TImode:
3542 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
3543 return 2;
3544 case CTImode:
3545 return 0;
3546 case SFmode:
3547 if (!(bit_offset % 64))
3548 classes[0] = X86_64_SSESF_CLASS;
3549 else
3550 classes[0] = X86_64_SSE_CLASS;
3551 return 1;
3552 case DFmode:
3553 classes[0] = X86_64_SSEDF_CLASS;
3554 return 1;
3555 case XFmode:
3556 classes[0] = X86_64_X87_CLASS;
3557 classes[1] = X86_64_X87UP_CLASS;
3558 return 2;
3559 case TFmode:
3560 classes[0] = X86_64_SSE_CLASS;
3561 classes[1] = X86_64_SSEUP_CLASS;
3562 return 2;
3563 case SCmode:
3564 classes[0] = X86_64_SSE_CLASS;
3565 return 1;
3566 case DCmode:
3567 classes[0] = X86_64_SSEDF_CLASS;
3568 classes[1] = X86_64_SSEDF_CLASS;
3569 return 2;
3570 case XCmode:
3571 classes[0] = X86_64_COMPLEX_X87_CLASS;
3572 return 1;
3573 case TCmode:
3574 /* This modes is larger than 16 bytes. */
3575 return 0;
3576 case V4SFmode:
3577 case V4SImode:
3578 case V16QImode:
3579 case V8HImode:
3580 case V2DFmode:
3581 case V2DImode:
3582 classes[0] = X86_64_SSE_CLASS;
3583 classes[1] = X86_64_SSEUP_CLASS;
3584 return 2;
3585 case V2SFmode:
3586 case V2SImode:
3587 case V4HImode:
3588 case V8QImode:
3589 classes[0] = X86_64_SSE_CLASS;
3590 return 1;
3591 case BLKmode:
3592 case VOIDmode:
3593 return 0;
3594 default:
3595 gcc_assert (VECTOR_MODE_P (mode));
3596
3597 if (bytes > 16)
3598 return 0;
3599
3600 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
3601
3602 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3603 classes[0] = X86_64_INTEGERSI_CLASS;
3604 else
3605 classes[0] = X86_64_INTEGER_CLASS;
3606 classes[1] = X86_64_INTEGER_CLASS;
3607 return 1 + (bytes > 8);
3608 }
3609 }
3610
3611 /* Examine the argument and return set number of register required in each
3612 class. Return 0 iff parameter should be passed in memory. */
3613 static int
3614 examine_argument (enum machine_mode mode, tree type, int in_return,
3615 int *int_nregs, int *sse_nregs)
3616 {
3617 enum x86_64_reg_class regclass[MAX_CLASSES];
3618 int n = classify_argument (mode, type, regclass, 0);
3619
3620 *int_nregs = 0;
3621 *sse_nregs = 0;
3622 if (!n)
3623 return 0;
3624 for (n--; n >= 0; n--)
3625 switch (regclass[n])
3626 {
3627 case X86_64_INTEGER_CLASS:
3628 case X86_64_INTEGERSI_CLASS:
3629 (*int_nregs)++;
3630 break;
3631 case X86_64_SSE_CLASS:
3632 case X86_64_SSESF_CLASS:
3633 case X86_64_SSEDF_CLASS:
3634 (*sse_nregs)++;
3635 break;
3636 case X86_64_NO_CLASS:
3637 case X86_64_SSEUP_CLASS:
3638 break;
3639 case X86_64_X87_CLASS:
3640 case X86_64_X87UP_CLASS:
3641 if (!in_return)
3642 return 0;
3643 break;
3644 case X86_64_COMPLEX_X87_CLASS:
3645 return in_return ? 2 : 0;
3646 case X86_64_MEMORY_CLASS:
3647 gcc_unreachable ();
3648 }
3649 return 1;
3650 }
3651
3652 /* Construct container for the argument used by GCC interface. See
3653 FUNCTION_ARG for the detailed description. */
3654
3655 static rtx
3656 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
3657 tree type, int in_return, int nintregs, int nsseregs,
3658 const int *intreg, int sse_regno)
3659 {
3660 /* The following variables hold the static issued_error state. */
3661 static bool issued_sse_arg_error;
3662 static bool issued_sse_ret_error;
3663 static bool issued_x87_ret_error;
3664
3665 enum machine_mode tmpmode;
3666 int bytes =
3667 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3668 enum x86_64_reg_class regclass[MAX_CLASSES];
3669 int n;
3670 int i;
3671 int nexps = 0;
3672 int needed_sseregs, needed_intregs;
3673 rtx exp[MAX_CLASSES];
3674 rtx ret;
3675
3676 n = classify_argument (mode, type, regclass, 0);
3677 if (!n)
3678 return NULL;
3679 if (!examine_argument (mode, type, in_return, &needed_intregs,
3680 &needed_sseregs))
3681 return NULL;
3682 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
3683 return NULL;
3684
3685 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
3686 some less clueful developer tries to use floating-point anyway. */
3687 if (needed_sseregs && !TARGET_SSE)
3688 {
3689 if (in_return)
3690 {
3691 if (!issued_sse_ret_error)
3692 {
3693 error ("SSE register return with SSE disabled");
3694 issued_sse_ret_error = true;
3695 }
3696 }
3697 else if (!issued_sse_arg_error)
3698 {
3699 error ("SSE register argument with SSE disabled");
3700 issued_sse_arg_error = true;
3701 }
3702 return NULL;
3703 }
3704
3705 /* Likewise, error if the ABI requires us to return values in the
3706 x87 registers and the user specified -mno-80387. */
3707 if (!TARGET_80387 && in_return)
3708 for (i = 0; i < n; i++)
3709 if (regclass[i] == X86_64_X87_CLASS
3710 || regclass[i] == X86_64_X87UP_CLASS
3711 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
3712 {
3713 if (!issued_x87_ret_error)
3714 {
3715 error ("x87 register return with x87 disabled");
3716 issued_x87_ret_error = true;
3717 }
3718 return NULL;
3719 }
3720
3721 /* First construct simple cases. Avoid SCmode, since we want to use
3722 single register to pass this type. */
3723 if (n == 1 && mode != SCmode)
3724 switch (regclass[0])
3725 {
3726 case X86_64_INTEGER_CLASS:
3727 case X86_64_INTEGERSI_CLASS:
3728 return gen_rtx_REG (mode, intreg[0]);
3729 case X86_64_SSE_CLASS:
3730 case X86_64_SSESF_CLASS:
3731 case X86_64_SSEDF_CLASS:
3732 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
3733 case X86_64_X87_CLASS:
3734 case X86_64_COMPLEX_X87_CLASS:
3735 return gen_rtx_REG (mode, FIRST_STACK_REG);
3736 case X86_64_NO_CLASS:
3737 /* Zero sized array, struct or class. */
3738 return NULL;
3739 default:
3740 gcc_unreachable ();
3741 }
3742 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
3743 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
3744 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
3745
3746 if (n == 2
3747 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
3748 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
3749 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
3750 && regclass[1] == X86_64_INTEGER_CLASS
3751 && (mode == CDImode || mode == TImode || mode == TFmode)
3752 && intreg[0] + 1 == intreg[1])
3753 return gen_rtx_REG (mode, intreg[0]);
3754
3755 /* Otherwise figure out the entries of the PARALLEL. */
3756 for (i = 0; i < n; i++)
3757 {
3758 switch (regclass[i])
3759 {
3760 case X86_64_NO_CLASS:
3761 break;
3762 case X86_64_INTEGER_CLASS:
3763 case X86_64_INTEGERSI_CLASS:
3764 /* Merge TImodes on aligned occasions here too. */
3765 if (i * 8 + 8 > bytes)
3766 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
3767 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
3768 tmpmode = SImode;
3769 else
3770 tmpmode = DImode;
3771 /* We've requested 24 bytes we don't have mode for. Use DImode. */
3772 if (tmpmode == BLKmode)
3773 tmpmode = DImode;
3774 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3775 gen_rtx_REG (tmpmode, *intreg),
3776 GEN_INT (i*8));
3777 intreg++;
3778 break;
3779 case X86_64_SSESF_CLASS:
3780 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3781 gen_rtx_REG (SFmode,
3782 SSE_REGNO (sse_regno)),
3783 GEN_INT (i*8));
3784 sse_regno++;
3785 break;
3786 case X86_64_SSEDF_CLASS:
3787 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3788 gen_rtx_REG (DFmode,
3789 SSE_REGNO (sse_regno)),
3790 GEN_INT (i*8));
3791 sse_regno++;
3792 break;
3793 case X86_64_SSE_CLASS:
3794 if (i < n - 1 && regclass[i + 1] == X86_64_SSEUP_CLASS)
3795 tmpmode = TImode;
3796 else
3797 tmpmode = DImode;
3798 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3799 gen_rtx_REG (tmpmode,
3800 SSE_REGNO (sse_regno)),
3801 GEN_INT (i*8));
3802 if (tmpmode == TImode)
3803 i++;
3804 sse_regno++;
3805 break;
3806 default:
3807 gcc_unreachable ();
3808 }
3809 }
3810
3811 /* Empty aligned struct, union or class. */
3812 if (nexps == 0)
3813 return NULL;
3814
3815 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
3816 for (i = 0; i < nexps; i++)
3817 XVECEXP (ret, 0, i) = exp [i];
3818 return ret;
3819 }
3820
3821 /* Update the data in CUM to advance over an argument of mode MODE
3822 and data type TYPE. (TYPE is null for libcalls where that information
3823 may not be available.) */
3824
3825 static void
3826 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3827 tree type, HOST_WIDE_INT bytes, HOST_WIDE_INT words)
3828 {
3829 switch (mode)
3830 {
3831 default:
3832 break;
3833
3834 case BLKmode:
3835 if (bytes < 0)
3836 break;
3837 /* FALLTHRU */
3838
3839 case DImode:
3840 case SImode:
3841 case HImode:
3842 case QImode:
3843 cum->words += words;
3844 cum->nregs -= words;
3845 cum->regno += words;
3846
3847 if (cum->nregs <= 0)
3848 {
3849 cum->nregs = 0;
3850 cum->regno = 0;
3851 }
3852 break;
3853
3854 case DFmode:
3855 if (cum->float_in_sse < 2)
3856 break;
3857 case SFmode:
3858 if (cum->float_in_sse < 1)
3859 break;
3860 /* FALLTHRU */
3861
3862 case TImode:
3863 case V16QImode:
3864 case V8HImode:
3865 case V4SImode:
3866 case V2DImode:
3867 case V4SFmode:
3868 case V2DFmode:
3869 if (!type || !AGGREGATE_TYPE_P (type))
3870 {
3871 cum->sse_words += words;
3872 cum->sse_nregs -= 1;
3873 cum->sse_regno += 1;
3874 if (cum->sse_nregs <= 0)
3875 {
3876 cum->sse_nregs = 0;
3877 cum->sse_regno = 0;
3878 }
3879 }
3880 break;
3881
3882 case V8QImode:
3883 case V4HImode:
3884 case V2SImode:
3885 case V2SFmode:
3886 if (!type || !AGGREGATE_TYPE_P (type))
3887 {
3888 cum->mmx_words += words;
3889 cum->mmx_nregs -= 1;
3890 cum->mmx_regno += 1;
3891 if (cum->mmx_nregs <= 0)
3892 {
3893 cum->mmx_nregs = 0;
3894 cum->mmx_regno = 0;
3895 }
3896 }
3897 break;
3898 }
3899 }
3900
3901 static void
3902 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3903 tree type, HOST_WIDE_INT words)
3904 {
3905 int int_nregs, sse_nregs;
3906
3907 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
3908 cum->words += words;
3909 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
3910 {
3911 cum->nregs -= int_nregs;
3912 cum->sse_nregs -= sse_nregs;
3913 cum->regno += int_nregs;
3914 cum->sse_regno += sse_nregs;
3915 }
3916 else
3917 cum->words += words;
3918 }
3919
3920 static void
3921 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
3922 HOST_WIDE_INT words)
3923 {
3924 /* Otherwise, this should be passed indirect. */
3925 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
3926
3927 cum->words += words;
3928 if (cum->nregs > 0)
3929 {
3930 cum->nregs -= 1;
3931 cum->regno += 1;
3932 }
3933 }
3934
3935 void
3936 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3937 tree type, int named ATTRIBUTE_UNUSED)
3938 {
3939 HOST_WIDE_INT bytes, words;
3940
3941 if (mode == BLKmode)
3942 bytes = int_size_in_bytes (type);
3943 else
3944 bytes = GET_MODE_SIZE (mode);
3945 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3946
3947 if (type)
3948 mode = type_natural_mode (type);
3949
3950 if (TARGET_64BIT_MS_ABI)
3951 function_arg_advance_ms_64 (cum, bytes, words);
3952 else if (TARGET_64BIT)
3953 function_arg_advance_64 (cum, mode, type, words);
3954 else
3955 function_arg_advance_32 (cum, mode, type, bytes, words);
3956 }
3957
3958 /* Define where to put the arguments to a function.
3959 Value is zero to push the argument on the stack,
3960 or a hard register in which to store the argument.
3961
3962 MODE is the argument's machine mode.
3963 TYPE is the data type of the argument (as a tree).
3964 This is null for libcalls where that information may
3965 not be available.
3966 CUM is a variable of type CUMULATIVE_ARGS which gives info about
3967 the preceding args and about the function being called.
3968 NAMED is nonzero if this argument is a named parameter
3969 (otherwise it is an extra parameter matching an ellipsis). */
3970
3971 static rtx
3972 function_arg_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3973 enum machine_mode orig_mode, tree type,
3974 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
3975 {
3976 static bool warnedsse, warnedmmx;
3977
3978 /* Avoid the AL settings for the Unix64 ABI. */
3979 if (mode == VOIDmode)
3980 return constm1_rtx;
3981
3982 switch (mode)
3983 {
3984 default:
3985 break;
3986
3987 case BLKmode:
3988 if (bytes < 0)
3989 break;
3990 /* FALLTHRU */
3991 case DImode:
3992 case SImode:
3993 case HImode:
3994 case QImode:
3995 if (words <= cum->nregs)
3996 {
3997 int regno = cum->regno;
3998
3999 /* Fastcall allocates the first two DWORD (SImode) or
4000 smaller arguments to ECX and EDX. */
4001 if (cum->fastcall)
4002 {
4003 if (mode == BLKmode || mode == DImode)
4004 break;
4005
4006 /* ECX not EAX is the first allocated register. */
4007 if (regno == 0)
4008 regno = 2;
4009 }
4010 return gen_rtx_REG (mode, regno);
4011 }
4012 break;
4013
4014 case DFmode:
4015 if (cum->float_in_sse < 2)
4016 break;
4017 case SFmode:
4018 if (cum->float_in_sse < 1)
4019 break;
4020 /* FALLTHRU */
4021 case TImode:
4022 case V16QImode:
4023 case V8HImode:
4024 case V4SImode:
4025 case V2DImode:
4026 case V4SFmode:
4027 case V2DFmode:
4028 if (!type || !AGGREGATE_TYPE_P (type))
4029 {
4030 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
4031 {
4032 warnedsse = true;
4033 warning (0, "SSE vector argument without SSE enabled "
4034 "changes the ABI");
4035 }
4036 if (cum->sse_nregs)
4037 return gen_reg_or_parallel (mode, orig_mode,
4038 cum->sse_regno + FIRST_SSE_REG);
4039 }
4040 break;
4041
4042 case V8QImode:
4043 case V4HImode:
4044 case V2SImode:
4045 case V2SFmode:
4046 if (!type || !AGGREGATE_TYPE_P (type))
4047 {
4048 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
4049 {
4050 warnedmmx = true;
4051 warning (0, "MMX vector argument without MMX enabled "
4052 "changes the ABI");
4053 }
4054 if (cum->mmx_nregs)
4055 return gen_reg_or_parallel (mode, orig_mode,
4056 cum->mmx_regno + FIRST_MMX_REG);
4057 }
4058 break;
4059 }
4060
4061 return NULL_RTX;
4062 }
4063
4064 static rtx
4065 function_arg_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4066 enum machine_mode orig_mode, tree type)
4067 {
4068 /* Handle a hidden AL argument containing number of registers
4069 for varargs x86-64 functions. */
4070 if (mode == VOIDmode)
4071 return GEN_INT (cum->maybe_vaarg
4072 ? (cum->sse_nregs < 0
4073 ? SSE_REGPARM_MAX
4074 : cum->sse_regno)
4075 : -1);
4076
4077 return construct_container (mode, orig_mode, type, 0, cum->nregs,
4078 cum->sse_nregs,
4079 &x86_64_int_parameter_registers [cum->regno],
4080 cum->sse_regno);
4081 }
4082
4083 static rtx
4084 function_arg_ms_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4085 enum machine_mode orig_mode, int named)
4086 {
4087 unsigned int regno;
4088
4089 /* Avoid the AL settings for the Unix64 ABI. */
4090 if (mode == VOIDmode)
4091 return constm1_rtx;
4092
4093 /* If we've run out of registers, it goes on the stack. */
4094 if (cum->nregs == 0)
4095 return NULL_RTX;
4096
4097 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
4098
4099 /* Only floating point modes are passed in anything but integer regs. */
4100 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
4101 {
4102 if (named)
4103 regno = cum->regno + FIRST_SSE_REG;
4104 else
4105 {
4106 rtx t1, t2;
4107
4108 /* Unnamed floating parameters are passed in both the
4109 SSE and integer registers. */
4110 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
4111 t2 = gen_rtx_REG (mode, regno);
4112 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
4113 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
4114 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
4115 }
4116 }
4117
4118 return gen_reg_or_parallel (mode, orig_mode, regno);
4119 }
4120
4121 rtx
4122 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
4123 tree type, int named)
4124 {
4125 enum machine_mode mode = omode;
4126 HOST_WIDE_INT bytes, words;
4127
4128 if (mode == BLKmode)
4129 bytes = int_size_in_bytes (type);
4130 else
4131 bytes = GET_MODE_SIZE (mode);
4132 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4133
4134 /* To simplify the code below, represent vector types with a vector mode
4135 even if MMX/SSE are not active. */
4136 if (type && TREE_CODE (type) == VECTOR_TYPE)
4137 mode = type_natural_mode (type);
4138
4139 if (TARGET_64BIT_MS_ABI)
4140 return function_arg_ms_64 (cum, mode, omode, named);
4141 else if (TARGET_64BIT)
4142 return function_arg_64 (cum, mode, omode, type);
4143 else
4144 return function_arg_32 (cum, mode, omode, type, bytes, words);
4145 }
4146
4147 /* A C expression that indicates when an argument must be passed by
4148 reference. If nonzero for an argument, a copy of that argument is
4149 made in memory and a pointer to the argument is passed instead of
4150 the argument itself. The pointer is passed in whatever way is
4151 appropriate for passing a pointer to that type. */
4152
4153 static bool
4154 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
4155 enum machine_mode mode ATTRIBUTE_UNUSED,
4156 tree type, bool named ATTRIBUTE_UNUSED)
4157 {
4158 if (TARGET_64BIT_MS_ABI)
4159 {
4160 if (type)
4161 {
4162 /* Arrays are passed by reference. */
4163 if (TREE_CODE (type) == ARRAY_TYPE)
4164 return true;
4165
4166 if (AGGREGATE_TYPE_P (type))
4167 {
4168 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
4169 are passed by reference. */
4170 int el2 = exact_log2 (int_size_in_bytes (type));
4171 return !(el2 >= 0 && el2 <= 3);
4172 }
4173 }
4174
4175 /* __m128 is passed by reference. */
4176 /* ??? How to handle complex? For now treat them as structs,
4177 and pass them by reference if they're too large. */
4178 if (GET_MODE_SIZE (mode) > 8)
4179 return true;
4180 }
4181 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
4182 return 1;
4183
4184 return 0;
4185 }
4186
4187 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
4188 ABI. Only called if TARGET_SSE. */
4189 static bool
4190 contains_128bit_aligned_vector_p (tree type)
4191 {
4192 enum machine_mode mode = TYPE_MODE (type);
4193 if (SSE_REG_MODE_P (mode)
4194 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
4195 return true;
4196 if (TYPE_ALIGN (type) < 128)
4197 return false;
4198
4199 if (AGGREGATE_TYPE_P (type))
4200 {
4201 /* Walk the aggregates recursively. */
4202 switch (TREE_CODE (type))
4203 {
4204 case RECORD_TYPE:
4205 case UNION_TYPE:
4206 case QUAL_UNION_TYPE:
4207 {
4208 tree field;
4209
4210 /* Walk all the structure fields. */
4211 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4212 {
4213 if (TREE_CODE (field) == FIELD_DECL
4214 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
4215 return true;
4216 }
4217 break;
4218 }
4219
4220 case ARRAY_TYPE:
4221 /* Just for use if some languages passes arrays by value. */
4222 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
4223 return true;
4224 break;
4225
4226 default:
4227 gcc_unreachable ();
4228 }
4229 }
4230 return false;
4231 }
4232
4233 /* Gives the alignment boundary, in bits, of an argument with the
4234 specified mode and type. */
4235
4236 int
4237 ix86_function_arg_boundary (enum machine_mode mode, tree type)
4238 {
4239 int align;
4240 if (type)
4241 align = TYPE_ALIGN (type);
4242 else
4243 align = GET_MODE_ALIGNMENT (mode);
4244 if (align < PARM_BOUNDARY)
4245 align = PARM_BOUNDARY;
4246 if (!TARGET_64BIT)
4247 {
4248 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
4249 make an exception for SSE modes since these require 128bit
4250 alignment.
4251
4252 The handling here differs from field_alignment. ICC aligns MMX
4253 arguments to 4 byte boundaries, while structure fields are aligned
4254 to 8 byte boundaries. */
4255 if (!TARGET_SSE)
4256 align = PARM_BOUNDARY;
4257 else if (!type)
4258 {
4259 if (!SSE_REG_MODE_P (mode))
4260 align = PARM_BOUNDARY;
4261 }
4262 else
4263 {
4264 if (!contains_128bit_aligned_vector_p (type))
4265 align = PARM_BOUNDARY;
4266 }
4267 }
4268 if (align > 128)
4269 align = 128;
4270 return align;
4271 }
4272
4273 /* Return true if N is a possible register number of function value. */
4274
4275 bool
4276 ix86_function_value_regno_p (int regno)
4277 {
4278 switch (regno)
4279 {
4280 case 0:
4281 return true;
4282
4283 case FIRST_FLOAT_REG:
4284 if (TARGET_64BIT_MS_ABI)
4285 return false;
4286 return TARGET_FLOAT_RETURNS_IN_80387;
4287
4288 case FIRST_SSE_REG:
4289 return TARGET_SSE;
4290
4291 case FIRST_MMX_REG:
4292 if (TARGET_MACHO || TARGET_64BIT)
4293 return false;
4294 return TARGET_MMX;
4295 }
4296
4297 return false;
4298 }
4299
4300 /* Define how to find the value returned by a function.
4301 VALTYPE is the data type of the value (as a tree).
4302 If the precise function being called is known, FUNC is its FUNCTION_DECL;
4303 otherwise, FUNC is 0. */
4304
4305 static rtx
4306 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
4307 tree fntype, tree fn)
4308 {
4309 unsigned int regno;
4310
4311 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4312 we normally prevent this case when mmx is not available. However
4313 some ABIs may require the result to be returned like DImode. */
4314 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4315 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
4316
4317 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4318 we prevent this case when sse is not available. However some ABIs
4319 may require the result to be returned like integer TImode. */
4320 else if (mode == TImode
4321 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4322 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
4323
4324 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
4325 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
4326 regno = FIRST_FLOAT_REG;
4327 else
4328 /* Most things go in %eax. */
4329 regno = 0;
4330
4331 /* Override FP return register with %xmm0 for local functions when
4332 SSE math is enabled or for functions with sseregparm attribute. */
4333 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
4334 {
4335 int sse_level = ix86_function_sseregparm (fntype, fn);
4336 if ((sse_level >= 1 && mode == SFmode)
4337 || (sse_level == 2 && mode == DFmode))
4338 regno = FIRST_SSE_REG;
4339 }
4340
4341 return gen_rtx_REG (orig_mode, regno);
4342 }
4343
4344 static rtx
4345 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
4346 tree valtype)
4347 {
4348 rtx ret;
4349
4350 /* Handle libcalls, which don't provide a type node. */
4351 if (valtype == NULL)
4352 {
4353 switch (mode)
4354 {
4355 case SFmode:
4356 case SCmode:
4357 case DFmode:
4358 case DCmode:
4359 case TFmode:
4360 case SDmode:
4361 case DDmode:
4362 case TDmode:
4363 return gen_rtx_REG (mode, FIRST_SSE_REG);
4364 case XFmode:
4365 case XCmode:
4366 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
4367 case TCmode:
4368 return NULL;
4369 default:
4370 return gen_rtx_REG (mode, 0);
4371 }
4372 }
4373
4374 ret = construct_container (mode, orig_mode, valtype, 1,
4375 REGPARM_MAX, SSE_REGPARM_MAX,
4376 x86_64_int_return_registers, 0);
4377
4378 /* For zero sized structures, construct_container returns NULL, but we
4379 need to keep rest of compiler happy by returning meaningful value. */
4380 if (!ret)
4381 ret = gen_rtx_REG (orig_mode, 0);
4382
4383 return ret;
4384 }
4385
4386 static rtx
4387 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
4388 {
4389 unsigned int regno = 0;
4390
4391 if (TARGET_SSE)
4392 {
4393 if (mode == SFmode || mode == DFmode)
4394 regno = FIRST_SSE_REG;
4395 else if (VECTOR_MODE_P (mode) || GET_MODE_SIZE (mode) == 16)
4396 regno = FIRST_SSE_REG;
4397 }
4398
4399 return gen_rtx_REG (orig_mode, regno);
4400 }
4401
4402 static rtx
4403 ix86_function_value_1 (tree valtype, tree fntype_or_decl,
4404 enum machine_mode orig_mode, enum machine_mode mode)
4405 {
4406 tree fn, fntype;
4407
4408 fn = NULL_TREE;
4409 if (fntype_or_decl && DECL_P (fntype_or_decl))
4410 fn = fntype_or_decl;
4411 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
4412
4413 if (TARGET_64BIT_MS_ABI)
4414 return function_value_ms_64 (orig_mode, mode);
4415 else if (TARGET_64BIT)
4416 return function_value_64 (orig_mode, mode, valtype);
4417 else
4418 return function_value_32 (orig_mode, mode, fntype, fn);
4419 }
4420
4421 static rtx
4422 ix86_function_value (tree valtype, tree fntype_or_decl,
4423 bool outgoing ATTRIBUTE_UNUSED)
4424 {
4425 enum machine_mode mode, orig_mode;
4426
4427 orig_mode = TYPE_MODE (valtype);
4428 mode = type_natural_mode (valtype);
4429 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
4430 }
4431
4432 rtx
4433 ix86_libcall_value (enum machine_mode mode)
4434 {
4435 return ix86_function_value_1 (NULL, NULL, mode, mode);
4436 }
4437
4438 /* Return true iff type is returned in memory. */
4439
4440 static int
4441 return_in_memory_32 (tree type, enum machine_mode mode)
4442 {
4443 HOST_WIDE_INT size;
4444
4445 if (mode == BLKmode)
4446 return 1;
4447
4448 size = int_size_in_bytes (type);
4449
4450 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
4451 return 0;
4452
4453 if (VECTOR_MODE_P (mode) || mode == TImode)
4454 {
4455 /* User-created vectors small enough to fit in EAX. */
4456 if (size < 8)
4457 return 0;
4458
4459 /* MMX/3dNow values are returned in MM0,
4460 except when it doesn't exits. */
4461 if (size == 8)
4462 return (TARGET_MMX ? 0 : 1);
4463
4464 /* SSE values are returned in XMM0, except when it doesn't exist. */
4465 if (size == 16)
4466 return (TARGET_SSE ? 0 : 1);
4467 }
4468
4469 if (mode == XFmode)
4470 return 0;
4471
4472 if (mode == TDmode)
4473 return 1;
4474
4475 if (size > 12)
4476 return 1;
4477 return 0;
4478 }
4479
4480 static int
4481 return_in_memory_64 (tree type, enum machine_mode mode)
4482 {
4483 int needed_intregs, needed_sseregs;
4484 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
4485 }
4486
4487 static int
4488 return_in_memory_ms_64 (tree type, enum machine_mode mode)
4489 {
4490 HOST_WIDE_INT size = int_size_in_bytes (type);
4491
4492 /* __m128 and friends are returned in xmm0. */
4493 if (size == 16 && VECTOR_MODE_P (mode))
4494 return 0;
4495
4496 /* Otherwise, the size must be exactly in [1248]. */
4497 return (size != 1 && size != 2 && size != 4 && size != 8);
4498 }
4499
4500 int
4501 ix86_return_in_memory (tree type)
4502 {
4503 enum machine_mode mode = type_natural_mode (type);
4504
4505 if (TARGET_64BIT_MS_ABI)
4506 return return_in_memory_ms_64 (type, mode);
4507 else if (TARGET_64BIT)
4508 return return_in_memory_64 (type, mode);
4509 else
4510 return return_in_memory_32 (type, mode);
4511 }
4512
4513 /* Return false iff TYPE is returned in memory. This version is used
4514 on Solaris 10. It is similar to the generic ix86_return_in_memory,
4515 but differs notably in that when MMX is available, 8-byte vectors
4516 are returned in memory, rather than in MMX registers. */
4517
4518 int
4519 ix86_sol10_return_in_memory (tree type)
4520 {
4521 int size;
4522 enum machine_mode mode = type_natural_mode (type);
4523
4524 if (TARGET_64BIT)
4525 return return_in_memory_64 (type, mode);
4526
4527 if (mode == BLKmode)
4528 return 1;
4529
4530 size = int_size_in_bytes (type);
4531
4532 if (VECTOR_MODE_P (mode))
4533 {
4534 /* Return in memory only if MMX registers *are* available. This
4535 seems backwards, but it is consistent with the existing
4536 Solaris x86 ABI. */
4537 if (size == 8)
4538 return TARGET_MMX;
4539 if (size == 16)
4540 return !TARGET_SSE;
4541 }
4542 else if (mode == TImode)
4543 return !TARGET_SSE;
4544 else if (mode == XFmode)
4545 return 0;
4546
4547 return size > 12;
4548 }
4549
4550 /* When returning SSE vector types, we have a choice of either
4551 (1) being abi incompatible with a -march switch, or
4552 (2) generating an error.
4553 Given no good solution, I think the safest thing is one warning.
4554 The user won't be able to use -Werror, but....
4555
4556 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
4557 called in response to actually generating a caller or callee that
4558 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
4559 via aggregate_value_p for general type probing from tree-ssa. */
4560
4561 static rtx
4562 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
4563 {
4564 static bool warnedsse, warnedmmx;
4565
4566 if (!TARGET_64BIT && type)
4567 {
4568 /* Look at the return type of the function, not the function type. */
4569 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
4570
4571 if (!TARGET_SSE && !warnedsse)
4572 {
4573 if (mode == TImode
4574 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4575 {
4576 warnedsse = true;
4577 warning (0, "SSE vector return without SSE enabled "
4578 "changes the ABI");
4579 }
4580 }
4581
4582 if (!TARGET_MMX && !warnedmmx)
4583 {
4584 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4585 {
4586 warnedmmx = true;
4587 warning (0, "MMX vector return without MMX enabled "
4588 "changes the ABI");
4589 }
4590 }
4591 }
4592
4593 return NULL;
4594 }
4595
4596 \f
4597 /* Create the va_list data type. */
4598
4599 static tree
4600 ix86_build_builtin_va_list (void)
4601 {
4602 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
4603
4604 /* For i386 we use plain pointer to argument area. */
4605 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
4606 return build_pointer_type (char_type_node);
4607
4608 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
4609 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
4610
4611 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
4612 unsigned_type_node);
4613 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
4614 unsigned_type_node);
4615 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
4616 ptr_type_node);
4617 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
4618 ptr_type_node);
4619
4620 va_list_gpr_counter_field = f_gpr;
4621 va_list_fpr_counter_field = f_fpr;
4622
4623 DECL_FIELD_CONTEXT (f_gpr) = record;
4624 DECL_FIELD_CONTEXT (f_fpr) = record;
4625 DECL_FIELD_CONTEXT (f_ovf) = record;
4626 DECL_FIELD_CONTEXT (f_sav) = record;
4627
4628 TREE_CHAIN (record) = type_decl;
4629 TYPE_NAME (record) = type_decl;
4630 TYPE_FIELDS (record) = f_gpr;
4631 TREE_CHAIN (f_gpr) = f_fpr;
4632 TREE_CHAIN (f_fpr) = f_ovf;
4633 TREE_CHAIN (f_ovf) = f_sav;
4634
4635 layout_type (record);
4636
4637 /* The correct type is an array type of one element. */
4638 return build_array_type (record, build_index_type (size_zero_node));
4639 }
4640
4641 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4642
4643 static void
4644 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
4645 {
4646 rtx save_area, mem;
4647 rtx label;
4648 rtx label_ref;
4649 rtx tmp_reg;
4650 rtx nsse_reg;
4651 int set;
4652 int i;
4653
4654 if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
4655 return;
4656
4657 /* Indicate to allocate space on the stack for varargs save area. */
4658 ix86_save_varrargs_registers = 1;
4659 cfun->stack_alignment_needed = 128;
4660
4661 save_area = frame_pointer_rtx;
4662 set = get_varargs_alias_set ();
4663
4664 for (i = cum->regno;
4665 i < ix86_regparm
4666 && i < cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
4667 i++)
4668 {
4669 mem = gen_rtx_MEM (Pmode,
4670 plus_constant (save_area, i * UNITS_PER_WORD));
4671 MEM_NOTRAP_P (mem) = 1;
4672 set_mem_alias_set (mem, set);
4673 emit_move_insn (mem, gen_rtx_REG (Pmode,
4674 x86_64_int_parameter_registers[i]));
4675 }
4676
4677 if (cum->sse_nregs && cfun->va_list_fpr_size)
4678 {
4679 /* Now emit code to save SSE registers. The AX parameter contains number
4680 of SSE parameter registers used to call this function. We use
4681 sse_prologue_save insn template that produces computed jump across
4682 SSE saves. We need some preparation work to get this working. */
4683
4684 label = gen_label_rtx ();
4685 label_ref = gen_rtx_LABEL_REF (Pmode, label);
4686
4687 /* Compute address to jump to :
4688 label - 5*eax + nnamed_sse_arguments*5 */
4689 tmp_reg = gen_reg_rtx (Pmode);
4690 nsse_reg = gen_reg_rtx (Pmode);
4691 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
4692 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4693 gen_rtx_MULT (Pmode, nsse_reg,
4694 GEN_INT (4))));
4695 if (cum->sse_regno)
4696 emit_move_insn
4697 (nsse_reg,
4698 gen_rtx_CONST (DImode,
4699 gen_rtx_PLUS (DImode,
4700 label_ref,
4701 GEN_INT (cum->sse_regno * 4))));
4702 else
4703 emit_move_insn (nsse_reg, label_ref);
4704 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
4705
4706 /* Compute address of memory block we save into. We always use pointer
4707 pointing 127 bytes after first byte to store - this is needed to keep
4708 instruction size limited by 4 bytes. */
4709 tmp_reg = gen_reg_rtx (Pmode);
4710 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4711 plus_constant (save_area,
4712 8 * REGPARM_MAX + 127)));
4713 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
4714 MEM_NOTRAP_P (mem) = 1;
4715 set_mem_alias_set (mem, set);
4716 set_mem_align (mem, BITS_PER_WORD);
4717
4718 /* And finally do the dirty job! */
4719 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
4720 GEN_INT (cum->sse_regno), label));
4721 }
4722 }
4723
4724 static void
4725 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
4726 {
4727 int set = get_varargs_alias_set ();
4728 int i;
4729
4730 for (i = cum->regno; i < REGPARM_MAX; i++)
4731 {
4732 rtx reg, mem;
4733
4734 mem = gen_rtx_MEM (Pmode,
4735 plus_constant (virtual_incoming_args_rtx,
4736 i * UNITS_PER_WORD));
4737 MEM_NOTRAP_P (mem) = 1;
4738 set_mem_alias_set (mem, set);
4739
4740 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
4741 emit_move_insn (mem, reg);
4742 }
4743 }
4744
4745 static void
4746 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4747 tree type, int *pretend_size ATTRIBUTE_UNUSED,
4748 int no_rtl)
4749 {
4750 CUMULATIVE_ARGS next_cum;
4751 tree fntype;
4752 int stdarg_p;
4753
4754 /* This argument doesn't appear to be used anymore. Which is good,
4755 because the old code here didn't suppress rtl generation. */
4756 gcc_assert (!no_rtl);
4757
4758 if (!TARGET_64BIT)
4759 return;
4760
4761 fntype = TREE_TYPE (current_function_decl);
4762 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
4763 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
4764 != void_type_node));
4765
4766 /* For varargs, we do not want to skip the dummy va_dcl argument.
4767 For stdargs, we do want to skip the last named argument. */
4768 next_cum = *cum;
4769 if (stdarg_p)
4770 function_arg_advance (&next_cum, mode, type, 1);
4771
4772 if (TARGET_64BIT_MS_ABI)
4773 setup_incoming_varargs_ms_64 (&next_cum);
4774 else
4775 setup_incoming_varargs_64 (&next_cum);
4776 }
4777
4778 /* Implement va_start. */
4779
4780 void
4781 ix86_va_start (tree valist, rtx nextarg)
4782 {
4783 HOST_WIDE_INT words, n_gpr, n_fpr;
4784 tree f_gpr, f_fpr, f_ovf, f_sav;
4785 tree gpr, fpr, ovf, sav, t;
4786 tree type;
4787
4788 /* Only 64bit target needs something special. */
4789 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
4790 {
4791 std_expand_builtin_va_start (valist, nextarg);
4792 return;
4793 }
4794
4795 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4796 f_fpr = TREE_CHAIN (f_gpr);
4797 f_ovf = TREE_CHAIN (f_fpr);
4798 f_sav = TREE_CHAIN (f_ovf);
4799
4800 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
4801 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4802 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4803 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4804 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4805
4806 /* Count number of gp and fp argument registers used. */
4807 words = current_function_args_info.words;
4808 n_gpr = current_function_args_info.regno;
4809 n_fpr = current_function_args_info.sse_regno;
4810
4811 if (cfun->va_list_gpr_size)
4812 {
4813 type = TREE_TYPE (gpr);
4814 t = build2 (GIMPLE_MODIFY_STMT, type, gpr,
4815 build_int_cst (type, n_gpr * 8));
4816 TREE_SIDE_EFFECTS (t) = 1;
4817 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4818 }
4819
4820 if (cfun->va_list_fpr_size)
4821 {
4822 type = TREE_TYPE (fpr);
4823 t = build2 (GIMPLE_MODIFY_STMT, type, fpr,
4824 build_int_cst (type, n_fpr * 16 + 8*REGPARM_MAX));
4825 TREE_SIDE_EFFECTS (t) = 1;
4826 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4827 }
4828
4829 /* Find the overflow area. */
4830 type = TREE_TYPE (ovf);
4831 t = make_tree (type, virtual_incoming_args_rtx);
4832 if (words != 0)
4833 t = build2 (POINTER_PLUS_EXPR, type, t,
4834 size_int (words * UNITS_PER_WORD));
4835 t = build2 (GIMPLE_MODIFY_STMT, type, ovf, t);
4836 TREE_SIDE_EFFECTS (t) = 1;
4837 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4838
4839 if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
4840 {
4841 /* Find the register save area.
4842 Prologue of the function save it right above stack frame. */
4843 type = TREE_TYPE (sav);
4844 t = make_tree (type, frame_pointer_rtx);
4845 t = build2 (GIMPLE_MODIFY_STMT, type, sav, t);
4846 TREE_SIDE_EFFECTS (t) = 1;
4847 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4848 }
4849 }
4850
4851 /* Implement va_arg. */
4852
4853 static tree
4854 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
4855 {
4856 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
4857 tree f_gpr, f_fpr, f_ovf, f_sav;
4858 tree gpr, fpr, ovf, sav, t;
4859 int size, rsize;
4860 tree lab_false, lab_over = NULL_TREE;
4861 tree addr, t2;
4862 rtx container;
4863 int indirect_p = 0;
4864 tree ptrtype;
4865 enum machine_mode nat_mode;
4866
4867 /* Only 64bit target needs something special. */
4868 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
4869 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4870
4871 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4872 f_fpr = TREE_CHAIN (f_gpr);
4873 f_ovf = TREE_CHAIN (f_fpr);
4874 f_sav = TREE_CHAIN (f_ovf);
4875
4876 valist = build_va_arg_indirect_ref (valist);
4877 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4878 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4879 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4880 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4881
4882 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
4883 if (indirect_p)
4884 type = build_pointer_type (type);
4885 size = int_size_in_bytes (type);
4886 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4887
4888 nat_mode = type_natural_mode (type);
4889 container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
4890 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
4891
4892 /* Pull the value out of the saved registers. */
4893
4894 addr = create_tmp_var (ptr_type_node, "addr");
4895 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
4896
4897 if (container)
4898 {
4899 int needed_intregs, needed_sseregs;
4900 bool need_temp;
4901 tree int_addr, sse_addr;
4902
4903 lab_false = create_artificial_label ();
4904 lab_over = create_artificial_label ();
4905
4906 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
4907
4908 need_temp = (!REG_P (container)
4909 && ((needed_intregs && TYPE_ALIGN (type) > 64)
4910 || TYPE_ALIGN (type) > 128));
4911
4912 /* In case we are passing structure, verify that it is consecutive block
4913 on the register save area. If not we need to do moves. */
4914 if (!need_temp && !REG_P (container))
4915 {
4916 /* Verify that all registers are strictly consecutive */
4917 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
4918 {
4919 int i;
4920
4921 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4922 {
4923 rtx slot = XVECEXP (container, 0, i);
4924 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
4925 || INTVAL (XEXP (slot, 1)) != i * 16)
4926 need_temp = 1;
4927 }
4928 }
4929 else
4930 {
4931 int i;
4932
4933 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4934 {
4935 rtx slot = XVECEXP (container, 0, i);
4936 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
4937 || INTVAL (XEXP (slot, 1)) != i * 8)
4938 need_temp = 1;
4939 }
4940 }
4941 }
4942 if (!need_temp)
4943 {
4944 int_addr = addr;
4945 sse_addr = addr;
4946 }
4947 else
4948 {
4949 int_addr = create_tmp_var (ptr_type_node, "int_addr");
4950 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
4951 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
4952 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
4953 }
4954
4955 /* First ensure that we fit completely in registers. */
4956 if (needed_intregs)
4957 {
4958 t = build_int_cst (TREE_TYPE (gpr),
4959 (REGPARM_MAX - needed_intregs + 1) * 8);
4960 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
4961 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4962 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4963 gimplify_and_add (t, pre_p);
4964 }
4965 if (needed_sseregs)
4966 {
4967 t = build_int_cst (TREE_TYPE (fpr),
4968 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
4969 + REGPARM_MAX * 8);
4970 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
4971 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4972 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4973 gimplify_and_add (t, pre_p);
4974 }
4975
4976 /* Compute index to start of area used for integer regs. */
4977 if (needed_intregs)
4978 {
4979 /* int_addr = gpr + sav; */
4980 t = fold_convert (sizetype, gpr);
4981 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
4982 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, int_addr, t);
4983 gimplify_and_add (t, pre_p);
4984 }
4985 if (needed_sseregs)
4986 {
4987 /* sse_addr = fpr + sav; */
4988 t = fold_convert (sizetype, fpr);
4989 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
4990 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, sse_addr, t);
4991 gimplify_and_add (t, pre_p);
4992 }
4993 if (need_temp)
4994 {
4995 int i;
4996 tree temp = create_tmp_var (type, "va_arg_tmp");
4997
4998 /* addr = &temp; */
4999 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
5000 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
5001 gimplify_and_add (t, pre_p);
5002
5003 for (i = 0; i < XVECLEN (container, 0); i++)
5004 {
5005 rtx slot = XVECEXP (container, 0, i);
5006 rtx reg = XEXP (slot, 0);
5007 enum machine_mode mode = GET_MODE (reg);
5008 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
5009 tree addr_type = build_pointer_type (piece_type);
5010 tree src_addr, src;
5011 int src_offset;
5012 tree dest_addr, dest;
5013
5014 if (SSE_REGNO_P (REGNO (reg)))
5015 {
5016 src_addr = sse_addr;
5017 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
5018 }
5019 else
5020 {
5021 src_addr = int_addr;
5022 src_offset = REGNO (reg) * 8;
5023 }
5024 src_addr = fold_convert (addr_type, src_addr);
5025 src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
5026 size_int (src_offset));
5027 src = build_va_arg_indirect_ref (src_addr);
5028
5029 dest_addr = fold_convert (addr_type, addr);
5030 dest_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, dest_addr,
5031 size_int (INTVAL (XEXP (slot, 1))));
5032 dest = build_va_arg_indirect_ref (dest_addr);
5033
5034 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, dest, src);
5035 gimplify_and_add (t, pre_p);
5036 }
5037 }
5038
5039 if (needed_intregs)
5040 {
5041 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
5042 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
5043 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (gpr), gpr, t);
5044 gimplify_and_add (t, pre_p);
5045 }
5046 if (needed_sseregs)
5047 {
5048 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
5049 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
5050 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (fpr), fpr, t);
5051 gimplify_and_add (t, pre_p);
5052 }
5053
5054 t = build1 (GOTO_EXPR, void_type_node, lab_over);
5055 gimplify_and_add (t, pre_p);
5056
5057 t = build1 (LABEL_EXPR, void_type_node, lab_false);
5058 append_to_statement_list (t, pre_p);
5059 }
5060
5061 /* ... otherwise out of the overflow area. */
5062
5063 /* Care for on-stack alignment if needed. */
5064 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64
5065 || integer_zerop (TYPE_SIZE (type)))
5066 t = ovf;
5067 else
5068 {
5069 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
5070 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), ovf,
5071 size_int (align - 1));
5072 t = fold_convert (sizetype, t);
5073 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5074 size_int (-align));
5075 t = fold_convert (TREE_TYPE (ovf), t);
5076 }
5077 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
5078
5079 t2 = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
5080 gimplify_and_add (t2, pre_p);
5081
5082 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t,
5083 size_int (rsize * UNITS_PER_WORD));
5084 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (ovf), ovf, t);
5085 gimplify_and_add (t, pre_p);
5086
5087 if (container)
5088 {
5089 t = build1 (LABEL_EXPR, void_type_node, lab_over);
5090 append_to_statement_list (t, pre_p);
5091 }
5092
5093 ptrtype = build_pointer_type (type);
5094 addr = fold_convert (ptrtype, addr);
5095
5096 if (indirect_p)
5097 addr = build_va_arg_indirect_ref (addr);
5098 return build_va_arg_indirect_ref (addr);
5099 }
5100 \f
5101 /* Return nonzero if OPNUM's MEM should be matched
5102 in movabs* patterns. */
5103
5104 int
5105 ix86_check_movabs (rtx insn, int opnum)
5106 {
5107 rtx set, mem;
5108
5109 set = PATTERN (insn);
5110 if (GET_CODE (set) == PARALLEL)
5111 set = XVECEXP (set, 0, 0);
5112 gcc_assert (GET_CODE (set) == SET);
5113 mem = XEXP (set, opnum);
5114 while (GET_CODE (mem) == SUBREG)
5115 mem = SUBREG_REG (mem);
5116 gcc_assert (MEM_P (mem));
5117 return (volatile_ok || !MEM_VOLATILE_P (mem));
5118 }
5119 \f
5120 /* Initialize the table of extra 80387 mathematical constants. */
5121
5122 static void
5123 init_ext_80387_constants (void)
5124 {
5125 static const char * cst[5] =
5126 {
5127 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
5128 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
5129 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
5130 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
5131 "3.1415926535897932385128089594061862044", /* 4: fldpi */
5132 };
5133 int i;
5134
5135 for (i = 0; i < 5; i++)
5136 {
5137 real_from_string (&ext_80387_constants_table[i], cst[i]);
5138 /* Ensure each constant is rounded to XFmode precision. */
5139 real_convert (&ext_80387_constants_table[i],
5140 XFmode, &ext_80387_constants_table[i]);
5141 }
5142
5143 ext_80387_constants_init = 1;
5144 }
5145
5146 /* Return true if the constant is something that can be loaded with
5147 a special instruction. */
5148
5149 int
5150 standard_80387_constant_p (rtx x)
5151 {
5152 enum machine_mode mode = GET_MODE (x);
5153
5154 REAL_VALUE_TYPE r;
5155
5156 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
5157 return -1;
5158
5159 if (x == CONST0_RTX (mode))
5160 return 1;
5161 if (x == CONST1_RTX (mode))
5162 return 2;
5163
5164 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5165
5166 /* For XFmode constants, try to find a special 80387 instruction when
5167 optimizing for size or on those CPUs that benefit from them. */
5168 if (mode == XFmode
5169 && (optimize_size || TARGET_EXT_80387_CONSTANTS))
5170 {
5171 int i;
5172
5173 if (! ext_80387_constants_init)
5174 init_ext_80387_constants ();
5175
5176 for (i = 0; i < 5; i++)
5177 if (real_identical (&r, &ext_80387_constants_table[i]))
5178 return i + 3;
5179 }
5180
5181 /* Load of the constant -0.0 or -1.0 will be split as
5182 fldz;fchs or fld1;fchs sequence. */
5183 if (real_isnegzero (&r))
5184 return 8;
5185 if (real_identical (&r, &dconstm1))
5186 return 9;
5187
5188 return 0;
5189 }
5190
5191 /* Return the opcode of the special instruction to be used to load
5192 the constant X. */
5193
5194 const char *
5195 standard_80387_constant_opcode (rtx x)
5196 {
5197 switch (standard_80387_constant_p (x))
5198 {
5199 case 1:
5200 return "fldz";
5201 case 2:
5202 return "fld1";
5203 case 3:
5204 return "fldlg2";
5205 case 4:
5206 return "fldln2";
5207 case 5:
5208 return "fldl2e";
5209 case 6:
5210 return "fldl2t";
5211 case 7:
5212 return "fldpi";
5213 case 8:
5214 case 9:
5215 return "#";
5216 default:
5217 gcc_unreachable ();
5218 }
5219 }
5220
5221 /* Return the CONST_DOUBLE representing the 80387 constant that is
5222 loaded by the specified special instruction. The argument IDX
5223 matches the return value from standard_80387_constant_p. */
5224
5225 rtx
5226 standard_80387_constant_rtx (int idx)
5227 {
5228 int i;
5229
5230 if (! ext_80387_constants_init)
5231 init_ext_80387_constants ();
5232
5233 switch (idx)
5234 {
5235 case 3:
5236 case 4:
5237 case 5:
5238 case 6:
5239 case 7:
5240 i = idx - 3;
5241 break;
5242
5243 default:
5244 gcc_unreachable ();
5245 }
5246
5247 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
5248 XFmode);
5249 }
5250
5251 /* Return 1 if mode is a valid mode for sse. */
5252 static int
5253 standard_sse_mode_p (enum machine_mode mode)
5254 {
5255 switch (mode)
5256 {
5257 case V16QImode:
5258 case V8HImode:
5259 case V4SImode:
5260 case V2DImode:
5261 case V4SFmode:
5262 case V2DFmode:
5263 return 1;
5264
5265 default:
5266 return 0;
5267 }
5268 }
5269
5270 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
5271 */
5272 int
5273 standard_sse_constant_p (rtx x)
5274 {
5275 enum machine_mode mode = GET_MODE (x);
5276
5277 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
5278 return 1;
5279 if (vector_all_ones_operand (x, mode)
5280 && standard_sse_mode_p (mode))
5281 return TARGET_SSE2 ? 2 : -1;
5282
5283 return 0;
5284 }
5285
5286 /* Return the opcode of the special instruction to be used to load
5287 the constant X. */
5288
5289 const char *
5290 standard_sse_constant_opcode (rtx insn, rtx x)
5291 {
5292 switch (standard_sse_constant_p (x))
5293 {
5294 case 1:
5295 if (get_attr_mode (insn) == MODE_V4SF)
5296 return "xorps\t%0, %0";
5297 else if (get_attr_mode (insn) == MODE_V2DF)
5298 return "xorpd\t%0, %0";
5299 else
5300 return "pxor\t%0, %0";
5301 case 2:
5302 return "pcmpeqd\t%0, %0";
5303 }
5304 gcc_unreachable ();
5305 }
5306
5307 /* Returns 1 if OP contains a symbol reference */
5308
5309 int
5310 symbolic_reference_mentioned_p (rtx op)
5311 {
5312 const char *fmt;
5313 int i;
5314
5315 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
5316 return 1;
5317
5318 fmt = GET_RTX_FORMAT (GET_CODE (op));
5319 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
5320 {
5321 if (fmt[i] == 'E')
5322 {
5323 int j;
5324
5325 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
5326 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
5327 return 1;
5328 }
5329
5330 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
5331 return 1;
5332 }
5333
5334 return 0;
5335 }
5336
5337 /* Return 1 if it is appropriate to emit `ret' instructions in the
5338 body of a function. Do this only if the epilogue is simple, needing a
5339 couple of insns. Prior to reloading, we can't tell how many registers
5340 must be saved, so return 0 then. Return 0 if there is no frame
5341 marker to de-allocate. */
5342
5343 int
5344 ix86_can_use_return_insn_p (void)
5345 {
5346 struct ix86_frame frame;
5347
5348 if (! reload_completed || frame_pointer_needed)
5349 return 0;
5350
5351 /* Don't allow more than 32 pop, since that's all we can do
5352 with one instruction. */
5353 if (current_function_pops_args
5354 && current_function_args_size >= 32768)
5355 return 0;
5356
5357 ix86_compute_frame_layout (&frame);
5358 return frame.to_allocate == 0 && frame.nregs == 0;
5359 }
5360 \f
5361 /* Value should be nonzero if functions must have frame pointers.
5362 Zero means the frame pointer need not be set up (and parms may
5363 be accessed via the stack pointer) in functions that seem suitable. */
5364
5365 int
5366 ix86_frame_pointer_required (void)
5367 {
5368 /* If we accessed previous frames, then the generated code expects
5369 to be able to access the saved ebp value in our frame. */
5370 if (cfun->machine->accesses_prev_frame)
5371 return 1;
5372
5373 /* Several x86 os'es need a frame pointer for other reasons,
5374 usually pertaining to setjmp. */
5375 if (SUBTARGET_FRAME_POINTER_REQUIRED)
5376 return 1;
5377
5378 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
5379 the frame pointer by default. Turn it back on now if we've not
5380 got a leaf function. */
5381 if (TARGET_OMIT_LEAF_FRAME_POINTER
5382 && (!current_function_is_leaf
5383 || ix86_current_function_calls_tls_descriptor))
5384 return 1;
5385
5386 if (current_function_profile)
5387 return 1;
5388
5389 return 0;
5390 }
5391
5392 /* Record that the current function accesses previous call frames. */
5393
5394 void
5395 ix86_setup_frame_addresses (void)
5396 {
5397 cfun->machine->accesses_prev_frame = 1;
5398 }
5399 \f
5400 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
5401 # define USE_HIDDEN_LINKONCE 1
5402 #else
5403 # define USE_HIDDEN_LINKONCE 0
5404 #endif
5405
5406 static int pic_labels_used;
5407
5408 /* Fills in the label name that should be used for a pc thunk for
5409 the given register. */
5410
5411 static void
5412 get_pc_thunk_name (char name[32], unsigned int regno)
5413 {
5414 gcc_assert (!TARGET_64BIT);
5415
5416 if (USE_HIDDEN_LINKONCE)
5417 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
5418 else
5419 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
5420 }
5421
5422
5423 /* This function generates code for -fpic that loads %ebx with
5424 the return address of the caller and then returns. */
5425
5426 void
5427 ix86_file_end (void)
5428 {
5429 rtx xops[2];
5430 int regno;
5431
5432 for (regno = 0; regno < 8; ++regno)
5433 {
5434 char name[32];
5435
5436 if (! ((pic_labels_used >> regno) & 1))
5437 continue;
5438
5439 get_pc_thunk_name (name, regno);
5440
5441 #if TARGET_MACHO
5442 if (TARGET_MACHO)
5443 {
5444 switch_to_section (darwin_sections[text_coal_section]);
5445 fputs ("\t.weak_definition\t", asm_out_file);
5446 assemble_name (asm_out_file, name);
5447 fputs ("\n\t.private_extern\t", asm_out_file);
5448 assemble_name (asm_out_file, name);
5449 fputs ("\n", asm_out_file);
5450 ASM_OUTPUT_LABEL (asm_out_file, name);
5451 }
5452 else
5453 #endif
5454 if (USE_HIDDEN_LINKONCE)
5455 {
5456 tree decl;
5457
5458 decl = build_decl (FUNCTION_DECL, get_identifier (name),
5459 error_mark_node);
5460 TREE_PUBLIC (decl) = 1;
5461 TREE_STATIC (decl) = 1;
5462 DECL_ONE_ONLY (decl) = 1;
5463
5464 (*targetm.asm_out.unique_section) (decl, 0);
5465 switch_to_section (get_named_section (decl, NULL, 0));
5466
5467 (*targetm.asm_out.globalize_label) (asm_out_file, name);
5468 fputs ("\t.hidden\t", asm_out_file);
5469 assemble_name (asm_out_file, name);
5470 fputc ('\n', asm_out_file);
5471 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
5472 }
5473 else
5474 {
5475 switch_to_section (text_section);
5476 ASM_OUTPUT_LABEL (asm_out_file, name);
5477 }
5478
5479 xops[0] = gen_rtx_REG (SImode, regno);
5480 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
5481 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
5482 output_asm_insn ("ret", xops);
5483 }
5484
5485 if (NEED_INDICATE_EXEC_STACK)
5486 file_end_indicate_exec_stack ();
5487 }
5488
5489 /* Emit code for the SET_GOT patterns. */
5490
5491 const char *
5492 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
5493 {
5494 rtx xops[3];
5495
5496 xops[0] = dest;
5497
5498 if (TARGET_VXWORKS_RTP && flag_pic)
5499 {
5500 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
5501 xops[2] = gen_rtx_MEM (Pmode,
5502 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
5503 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5504
5505 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
5506 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
5507 an unadorned address. */
5508 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
5509 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
5510 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
5511 return "";
5512 }
5513
5514 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
5515
5516 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
5517 {
5518 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
5519
5520 if (!flag_pic)
5521 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5522 else
5523 output_asm_insn ("call\t%a2", xops);
5524
5525 #if TARGET_MACHO
5526 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5527 is what will be referenced by the Mach-O PIC subsystem. */
5528 if (!label)
5529 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5530 #endif
5531
5532 (*targetm.asm_out.internal_label) (asm_out_file, "L",
5533 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
5534
5535 if (flag_pic)
5536 output_asm_insn ("pop{l}\t%0", xops);
5537 }
5538 else
5539 {
5540 char name[32];
5541 get_pc_thunk_name (name, REGNO (dest));
5542 pic_labels_used |= 1 << REGNO (dest);
5543
5544 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
5545 xops[2] = gen_rtx_MEM (QImode, xops[2]);
5546 output_asm_insn ("call\t%X2", xops);
5547 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5548 is what will be referenced by the Mach-O PIC subsystem. */
5549 #if TARGET_MACHO
5550 if (!label)
5551 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5552 else
5553 targetm.asm_out.internal_label (asm_out_file, "L",
5554 CODE_LABEL_NUMBER (label));
5555 #endif
5556 }
5557
5558 if (TARGET_MACHO)
5559 return "";
5560
5561 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
5562 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
5563 else
5564 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
5565
5566 return "";
5567 }
5568
5569 /* Generate an "push" pattern for input ARG. */
5570
5571 static rtx
5572 gen_push (rtx arg)
5573 {
5574 return gen_rtx_SET (VOIDmode,
5575 gen_rtx_MEM (Pmode,
5576 gen_rtx_PRE_DEC (Pmode,
5577 stack_pointer_rtx)),
5578 arg);
5579 }
5580
5581 /* Return >= 0 if there is an unused call-clobbered register available
5582 for the entire function. */
5583
5584 static unsigned int
5585 ix86_select_alt_pic_regnum (void)
5586 {
5587 if (current_function_is_leaf && !current_function_profile
5588 && !ix86_current_function_calls_tls_descriptor)
5589 {
5590 int i;
5591 for (i = 2; i >= 0; --i)
5592 if (!df_regs_ever_live_p (i))
5593 return i;
5594 }
5595
5596 return INVALID_REGNUM;
5597 }
5598
5599 /* Return 1 if we need to save REGNO. */
5600 static int
5601 ix86_save_reg (unsigned int regno, int maybe_eh_return)
5602 {
5603 if (pic_offset_table_rtx
5604 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
5605 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
5606 || current_function_profile
5607 || current_function_calls_eh_return
5608 || current_function_uses_const_pool))
5609 {
5610 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
5611 return 0;
5612 return 1;
5613 }
5614
5615 if (current_function_calls_eh_return && maybe_eh_return)
5616 {
5617 unsigned i;
5618 for (i = 0; ; i++)
5619 {
5620 unsigned test = EH_RETURN_DATA_REGNO (i);
5621 if (test == INVALID_REGNUM)
5622 break;
5623 if (test == regno)
5624 return 1;
5625 }
5626 }
5627
5628 if (cfun->machine->force_align_arg_pointer
5629 && regno == REGNO (cfun->machine->force_align_arg_pointer))
5630 return 1;
5631
5632 return (df_regs_ever_live_p (regno)
5633 && !call_used_regs[regno]
5634 && !fixed_regs[regno]
5635 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
5636 }
5637
5638 /* Return number of registers to be saved on the stack. */
5639
5640 static int
5641 ix86_nsaved_regs (void)
5642 {
5643 int nregs = 0;
5644 int regno;
5645
5646 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5647 if (ix86_save_reg (regno, true))
5648 nregs++;
5649 return nregs;
5650 }
5651
5652 /* Return the offset between two registers, one to be eliminated, and the other
5653 its replacement, at the start of a routine. */
5654
5655 HOST_WIDE_INT
5656 ix86_initial_elimination_offset (int from, int to)
5657 {
5658 struct ix86_frame frame;
5659 ix86_compute_frame_layout (&frame);
5660
5661 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
5662 return frame.hard_frame_pointer_offset;
5663 else if (from == FRAME_POINTER_REGNUM
5664 && to == HARD_FRAME_POINTER_REGNUM)
5665 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
5666 else
5667 {
5668 gcc_assert (to == STACK_POINTER_REGNUM);
5669
5670 if (from == ARG_POINTER_REGNUM)
5671 return frame.stack_pointer_offset;
5672
5673 gcc_assert (from == FRAME_POINTER_REGNUM);
5674 return frame.stack_pointer_offset - frame.frame_pointer_offset;
5675 }
5676 }
5677
5678 /* Fill structure ix86_frame about frame of currently computed function. */
5679
5680 static void
5681 ix86_compute_frame_layout (struct ix86_frame *frame)
5682 {
5683 HOST_WIDE_INT total_size;
5684 unsigned int stack_alignment_needed;
5685 HOST_WIDE_INT offset;
5686 unsigned int preferred_alignment;
5687 HOST_WIDE_INT size = get_frame_size ();
5688
5689 frame->nregs = ix86_nsaved_regs ();
5690 total_size = size;
5691
5692 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
5693 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
5694
5695 /* During reload iteration the amount of registers saved can change.
5696 Recompute the value as needed. Do not recompute when amount of registers
5697 didn't change as reload does multiple calls to the function and does not
5698 expect the decision to change within single iteration. */
5699 if (!optimize_size
5700 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
5701 {
5702 int count = frame->nregs;
5703
5704 cfun->machine->use_fast_prologue_epilogue_nregs = count;
5705 /* The fast prologue uses move instead of push to save registers. This
5706 is significantly longer, but also executes faster as modern hardware
5707 can execute the moves in parallel, but can't do that for push/pop.
5708
5709 Be careful about choosing what prologue to emit: When function takes
5710 many instructions to execute we may use slow version as well as in
5711 case function is known to be outside hot spot (this is known with
5712 feedback only). Weight the size of function by number of registers
5713 to save as it is cheap to use one or two push instructions but very
5714 slow to use many of them. */
5715 if (count)
5716 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
5717 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
5718 || (flag_branch_probabilities
5719 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
5720 cfun->machine->use_fast_prologue_epilogue = false;
5721 else
5722 cfun->machine->use_fast_prologue_epilogue
5723 = !expensive_function_p (count);
5724 }
5725 if (TARGET_PROLOGUE_USING_MOVE
5726 && cfun->machine->use_fast_prologue_epilogue)
5727 frame->save_regs_using_mov = true;
5728 else
5729 frame->save_regs_using_mov = false;
5730
5731
5732 /* Skip return address and saved base pointer. */
5733 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
5734
5735 frame->hard_frame_pointer_offset = offset;
5736
5737 /* Do some sanity checking of stack_alignment_needed and
5738 preferred_alignment, since i386 port is the only using those features
5739 that may break easily. */
5740
5741 gcc_assert (!size || stack_alignment_needed);
5742 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
5743 gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5744 gcc_assert (stack_alignment_needed
5745 <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5746
5747 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
5748 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
5749
5750 /* Register save area */
5751 offset += frame->nregs * UNITS_PER_WORD;
5752
5753 /* Va-arg area */
5754 if (ix86_save_varrargs_registers)
5755 {
5756 offset += X86_64_VARARGS_SIZE;
5757 frame->va_arg_size = X86_64_VARARGS_SIZE;
5758 }
5759 else
5760 frame->va_arg_size = 0;
5761
5762 /* Align start of frame for local function. */
5763 frame->padding1 = ((offset + stack_alignment_needed - 1)
5764 & -stack_alignment_needed) - offset;
5765
5766 offset += frame->padding1;
5767
5768 /* Frame pointer points here. */
5769 frame->frame_pointer_offset = offset;
5770
5771 offset += size;
5772
5773 /* Add outgoing arguments area. Can be skipped if we eliminated
5774 all the function calls as dead code.
5775 Skipping is however impossible when function calls alloca. Alloca
5776 expander assumes that last current_function_outgoing_args_size
5777 of stack frame are unused. */
5778 if (ACCUMULATE_OUTGOING_ARGS
5779 && (!current_function_is_leaf || current_function_calls_alloca
5780 || ix86_current_function_calls_tls_descriptor))
5781 {
5782 offset += current_function_outgoing_args_size;
5783 frame->outgoing_arguments_size = current_function_outgoing_args_size;
5784 }
5785 else
5786 frame->outgoing_arguments_size = 0;
5787
5788 /* Align stack boundary. Only needed if we're calling another function
5789 or using alloca. */
5790 if (!current_function_is_leaf || current_function_calls_alloca
5791 || ix86_current_function_calls_tls_descriptor)
5792 frame->padding2 = ((offset + preferred_alignment - 1)
5793 & -preferred_alignment) - offset;
5794 else
5795 frame->padding2 = 0;
5796
5797 offset += frame->padding2;
5798
5799 /* We've reached end of stack frame. */
5800 frame->stack_pointer_offset = offset;
5801
5802 /* Size prologue needs to allocate. */
5803 frame->to_allocate =
5804 (size + frame->padding1 + frame->padding2
5805 + frame->outgoing_arguments_size + frame->va_arg_size);
5806
5807 if ((!frame->to_allocate && frame->nregs <= 1)
5808 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
5809 frame->save_regs_using_mov = false;
5810
5811 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5812 && current_function_is_leaf
5813 && !ix86_current_function_calls_tls_descriptor)
5814 {
5815 frame->red_zone_size = frame->to_allocate;
5816 if (frame->save_regs_using_mov)
5817 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
5818 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5819 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5820 }
5821 else
5822 frame->red_zone_size = 0;
5823 frame->to_allocate -= frame->red_zone_size;
5824 frame->stack_pointer_offset -= frame->red_zone_size;
5825 #if 0
5826 fprintf (stderr, "\n");
5827 fprintf (stderr, "nregs: %ld\n", (long)frame->nregs);
5828 fprintf (stderr, "size: %ld\n", (long)size);
5829 fprintf (stderr, "alignment1: %ld\n", (long)stack_alignment_needed);
5830 fprintf (stderr, "padding1: %ld\n", (long)frame->padding1);
5831 fprintf (stderr, "va_arg: %ld\n", (long)frame->va_arg_size);
5832 fprintf (stderr, "padding2: %ld\n", (long)frame->padding2);
5833 fprintf (stderr, "to_allocate: %ld\n", (long)frame->to_allocate);
5834 fprintf (stderr, "red_zone_size: %ld\n", (long)frame->red_zone_size);
5835 fprintf (stderr, "frame_pointer_offset: %ld\n", (long)frame->frame_pointer_offset);
5836 fprintf (stderr, "hard_frame_pointer_offset: %ld\n",
5837 (long)frame->hard_frame_pointer_offset);
5838 fprintf (stderr, "stack_pointer_offset: %ld\n", (long)frame->stack_pointer_offset);
5839 fprintf (stderr, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf);
5840 fprintf (stderr, "current_function_calls_alloca: %ld\n", (long)current_function_calls_alloca);
5841 fprintf (stderr, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor);
5842 #endif
5843 }
5844
5845 /* Emit code to save registers in the prologue. */
5846
5847 static void
5848 ix86_emit_save_regs (void)
5849 {
5850 unsigned int regno;
5851 rtx insn;
5852
5853 for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
5854 if (ix86_save_reg (regno, true))
5855 {
5856 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5857 RTX_FRAME_RELATED_P (insn) = 1;
5858 }
5859 }
5860
5861 /* Emit code to save registers using MOV insns. First register
5862 is restored from POINTER + OFFSET. */
5863 static void
5864 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
5865 {
5866 unsigned int regno;
5867 rtx insn;
5868
5869 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5870 if (ix86_save_reg (regno, true))
5871 {
5872 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5873 Pmode, offset),
5874 gen_rtx_REG (Pmode, regno));
5875 RTX_FRAME_RELATED_P (insn) = 1;
5876 offset += UNITS_PER_WORD;
5877 }
5878 }
5879
5880 /* Expand prologue or epilogue stack adjustment.
5881 The pattern exist to put a dependency on all ebp-based memory accesses.
5882 STYLE should be negative if instructions should be marked as frame related,
5883 zero if %r11 register is live and cannot be freely used and positive
5884 otherwise. */
5885
5886 static void
5887 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5888 {
5889 rtx insn;
5890
5891 if (! TARGET_64BIT)
5892 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5893 else if (x86_64_immediate_operand (offset, DImode))
5894 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5895 else
5896 {
5897 rtx r11;
5898 /* r11 is used by indirect sibcall return as well, set before the
5899 epilogue and used after the epilogue. ATM indirect sibcall
5900 shouldn't be used together with huge frame sizes in one
5901 function because of the frame_size check in sibcall.c. */
5902 gcc_assert (style);
5903 r11 = gen_rtx_REG (DImode, R11_REG);
5904 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5905 if (style < 0)
5906 RTX_FRAME_RELATED_P (insn) = 1;
5907 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5908 offset));
5909 }
5910 if (style < 0)
5911 RTX_FRAME_RELATED_P (insn) = 1;
5912 }
5913
5914 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
5915
5916 static rtx
5917 ix86_internal_arg_pointer (void)
5918 {
5919 bool has_force_align_arg_pointer =
5920 (0 != lookup_attribute (ix86_force_align_arg_pointer_string,
5921 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))));
5922 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
5923 && DECL_NAME (current_function_decl)
5924 && MAIN_NAME_P (DECL_NAME (current_function_decl))
5925 && DECL_FILE_SCOPE_P (current_function_decl))
5926 || ix86_force_align_arg_pointer
5927 || has_force_align_arg_pointer)
5928 {
5929 /* Nested functions can't realign the stack due to a register
5930 conflict. */
5931 if (DECL_CONTEXT (current_function_decl)
5932 && TREE_CODE (DECL_CONTEXT (current_function_decl)) == FUNCTION_DECL)
5933 {
5934 if (ix86_force_align_arg_pointer)
5935 warning (0, "-mstackrealign ignored for nested functions");
5936 if (has_force_align_arg_pointer)
5937 error ("%s not supported for nested functions",
5938 ix86_force_align_arg_pointer_string);
5939 return virtual_incoming_args_rtx;
5940 }
5941 cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, 2);
5942 return copy_to_reg (cfun->machine->force_align_arg_pointer);
5943 }
5944 else
5945 return virtual_incoming_args_rtx;
5946 }
5947
5948 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
5949 This is called from dwarf2out.c to emit call frame instructions
5950 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
5951 static void
5952 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
5953 {
5954 rtx unspec = SET_SRC (pattern);
5955 gcc_assert (GET_CODE (unspec) == UNSPEC);
5956
5957 switch (index)
5958 {
5959 case UNSPEC_REG_SAVE:
5960 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
5961 SET_DEST (pattern));
5962 break;
5963 case UNSPEC_DEF_CFA:
5964 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
5965 INTVAL (XVECEXP (unspec, 0, 0)));
5966 break;
5967 default:
5968 gcc_unreachable ();
5969 }
5970 }
5971
5972 /* Expand the prologue into a bunch of separate insns. */
5973
5974 void
5975 ix86_expand_prologue (void)
5976 {
5977 rtx insn;
5978 bool pic_reg_used;
5979 struct ix86_frame frame;
5980 HOST_WIDE_INT allocate;
5981
5982 ix86_compute_frame_layout (&frame);
5983
5984 if (cfun->machine->force_align_arg_pointer)
5985 {
5986 rtx x, y;
5987
5988 /* Grab the argument pointer. */
5989 x = plus_constant (stack_pointer_rtx, 4);
5990 y = cfun->machine->force_align_arg_pointer;
5991 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
5992 RTX_FRAME_RELATED_P (insn) = 1;
5993
5994 /* The unwind info consists of two parts: install the fafp as the cfa,
5995 and record the fafp as the "save register" of the stack pointer.
5996 The later is there in order that the unwinder can see where it
5997 should restore the stack pointer across the and insn. */
5998 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA);
5999 x = gen_rtx_SET (VOIDmode, y, x);
6000 RTX_FRAME_RELATED_P (x) = 1;
6001 y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx),
6002 UNSPEC_REG_SAVE);
6003 y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y);
6004 RTX_FRAME_RELATED_P (y) = 1;
6005 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y));
6006 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
6007 REG_NOTES (insn) = x;
6008
6009 /* Align the stack. */
6010 emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx,
6011 GEN_INT (-16)));
6012
6013 /* And here we cheat like madmen with the unwind info. We force the
6014 cfa register back to sp+4, which is exactly what it was at the
6015 start of the function. Re-pushing the return address results in
6016 the return at the same spot relative to the cfa, and thus is
6017 correct wrt the unwind info. */
6018 x = cfun->machine->force_align_arg_pointer;
6019 x = gen_frame_mem (Pmode, plus_constant (x, -4));
6020 insn = emit_insn (gen_push (x));
6021 RTX_FRAME_RELATED_P (insn) = 1;
6022
6023 x = GEN_INT (4);
6024 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA);
6025 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
6026 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
6027 REG_NOTES (insn) = x;
6028 }
6029
6030 /* Note: AT&T enter does NOT have reversed args. Enter is probably
6031 slower on all targets. Also sdb doesn't like it. */
6032
6033 if (frame_pointer_needed)
6034 {
6035 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
6036 RTX_FRAME_RELATED_P (insn) = 1;
6037
6038 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
6039 RTX_FRAME_RELATED_P (insn) = 1;
6040 }
6041
6042 allocate = frame.to_allocate;
6043
6044 if (!frame.save_regs_using_mov)
6045 ix86_emit_save_regs ();
6046 else
6047 allocate += frame.nregs * UNITS_PER_WORD;
6048
6049 /* When using red zone we may start register saving before allocating
6050 the stack frame saving one cycle of the prologue. */
6051 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
6052 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
6053 : stack_pointer_rtx,
6054 -frame.nregs * UNITS_PER_WORD);
6055
6056 if (allocate == 0)
6057 ;
6058 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
6059 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6060 GEN_INT (-allocate), -1);
6061 else
6062 {
6063 /* Only valid for Win32. */
6064 rtx eax = gen_rtx_REG (Pmode, 0);
6065 bool eax_live;
6066 rtx t;
6067
6068 gcc_assert (!TARGET_64BIT || TARGET_64BIT_MS_ABI);
6069
6070 if (TARGET_64BIT_MS_ABI)
6071 eax_live = false;
6072 else
6073 eax_live = ix86_eax_live_at_start_p ();
6074
6075 if (eax_live)
6076 {
6077 emit_insn (gen_push (eax));
6078 allocate -= UNITS_PER_WORD;
6079 }
6080
6081 emit_move_insn (eax, GEN_INT (allocate));
6082
6083 if (TARGET_64BIT)
6084 insn = gen_allocate_stack_worker_64 (eax);
6085 else
6086 insn = gen_allocate_stack_worker_32 (eax);
6087 insn = emit_insn (insn);
6088 RTX_FRAME_RELATED_P (insn) = 1;
6089 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
6090 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
6091 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
6092 t, REG_NOTES (insn));
6093
6094 if (eax_live)
6095 {
6096 if (frame_pointer_needed)
6097 t = plus_constant (hard_frame_pointer_rtx,
6098 allocate
6099 - frame.to_allocate
6100 - frame.nregs * UNITS_PER_WORD);
6101 else
6102 t = plus_constant (stack_pointer_rtx, allocate);
6103 emit_move_insn (eax, gen_rtx_MEM (Pmode, t));
6104 }
6105 }
6106
6107 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
6108 {
6109 if (!frame_pointer_needed || !frame.to_allocate)
6110 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
6111 else
6112 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
6113 -frame.nregs * UNITS_PER_WORD);
6114 }
6115
6116 pic_reg_used = false;
6117 if (pic_offset_table_rtx
6118 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
6119 || current_function_profile))
6120 {
6121 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
6122
6123 if (alt_pic_reg_used != INVALID_REGNUM)
6124 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
6125
6126 pic_reg_used = true;
6127 }
6128
6129 if (pic_reg_used)
6130 {
6131 if (TARGET_64BIT)
6132 {
6133 if (ix86_cmodel == CM_LARGE_PIC)
6134 {
6135 rtx tmp_reg = gen_rtx_REG (DImode,
6136 FIRST_REX_INT_REG + 3 /* R11 */);
6137 rtx label = gen_label_rtx ();
6138 emit_label (label);
6139 LABEL_PRESERVE_P (label) = 1;
6140 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
6141 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
6142 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
6143 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
6144 pic_offset_table_rtx, tmp_reg));
6145 }
6146 else
6147 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
6148 }
6149 else
6150 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
6151 }
6152
6153 /* Prevent function calls from be scheduled before the call to mcount.
6154 In the pic_reg_used case, make sure that the got load isn't deleted. */
6155 if (current_function_profile)
6156 {
6157 if (pic_reg_used)
6158 emit_insn (gen_prologue_use (pic_offset_table_rtx));
6159 emit_insn (gen_blockage ());
6160 }
6161 }
6162
6163 /* Emit code to restore saved registers using MOV insns. First register
6164 is restored from POINTER + OFFSET. */
6165 static void
6166 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
6167 int maybe_eh_return)
6168 {
6169 int regno;
6170 rtx base_address = gen_rtx_MEM (Pmode, pointer);
6171
6172 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6173 if (ix86_save_reg (regno, maybe_eh_return))
6174 {
6175 /* Ensure that adjust_address won't be forced to produce pointer
6176 out of range allowed by x86-64 instruction set. */
6177 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
6178 {
6179 rtx r11;
6180
6181 r11 = gen_rtx_REG (DImode, R11_REG);
6182 emit_move_insn (r11, GEN_INT (offset));
6183 emit_insn (gen_adddi3 (r11, r11, pointer));
6184 base_address = gen_rtx_MEM (Pmode, r11);
6185 offset = 0;
6186 }
6187 emit_move_insn (gen_rtx_REG (Pmode, regno),
6188 adjust_address (base_address, Pmode, offset));
6189 offset += UNITS_PER_WORD;
6190 }
6191 }
6192
6193 /* Restore function stack, frame, and registers. */
6194
6195 void
6196 ix86_expand_epilogue (int style)
6197 {
6198 int regno;
6199 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
6200 struct ix86_frame frame;
6201 HOST_WIDE_INT offset;
6202
6203 ix86_compute_frame_layout (&frame);
6204
6205 /* Calculate start of saved registers relative to ebp. Special care
6206 must be taken for the normal return case of a function using
6207 eh_return: the eax and edx registers are marked as saved, but not
6208 restored along this path. */
6209 offset = frame.nregs;
6210 if (current_function_calls_eh_return && style != 2)
6211 offset -= 2;
6212 offset *= -UNITS_PER_WORD;
6213
6214 /* If we're only restoring one register and sp is not valid then
6215 using a move instruction to restore the register since it's
6216 less work than reloading sp and popping the register.
6217
6218 The default code result in stack adjustment using add/lea instruction,
6219 while this code results in LEAVE instruction (or discrete equivalent),
6220 so it is profitable in some other cases as well. Especially when there
6221 are no registers to restore. We also use this code when TARGET_USE_LEAVE
6222 and there is exactly one register to pop. This heuristic may need some
6223 tuning in future. */
6224 if ((!sp_valid && frame.nregs <= 1)
6225 || (TARGET_EPILOGUE_USING_MOVE
6226 && cfun->machine->use_fast_prologue_epilogue
6227 && (frame.nregs > 1 || frame.to_allocate))
6228 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
6229 || (frame_pointer_needed && TARGET_USE_LEAVE
6230 && cfun->machine->use_fast_prologue_epilogue
6231 && frame.nregs == 1)
6232 || current_function_calls_eh_return)
6233 {
6234 /* Restore registers. We can use ebp or esp to address the memory
6235 locations. If both are available, default to ebp, since offsets
6236 are known to be small. Only exception is esp pointing directly to the
6237 end of block of saved registers, where we may simplify addressing
6238 mode. */
6239
6240 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
6241 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
6242 frame.to_allocate, style == 2);
6243 else
6244 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
6245 offset, style == 2);
6246
6247 /* eh_return epilogues need %ecx added to the stack pointer. */
6248 if (style == 2)
6249 {
6250 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
6251
6252 if (frame_pointer_needed)
6253 {
6254 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
6255 tmp = plus_constant (tmp, UNITS_PER_WORD);
6256 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
6257
6258 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
6259 emit_move_insn (hard_frame_pointer_rtx, tmp);
6260
6261 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
6262 const0_rtx, style);
6263 }
6264 else
6265 {
6266 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
6267 tmp = plus_constant (tmp, (frame.to_allocate
6268 + frame.nregs * UNITS_PER_WORD));
6269 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
6270 }
6271 }
6272 else if (!frame_pointer_needed)
6273 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6274 GEN_INT (frame.to_allocate
6275 + frame.nregs * UNITS_PER_WORD),
6276 style);
6277 /* If not an i386, mov & pop is faster than "leave". */
6278 else if (TARGET_USE_LEAVE || optimize_size
6279 || !cfun->machine->use_fast_prologue_epilogue)
6280 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
6281 else
6282 {
6283 pro_epilogue_adjust_stack (stack_pointer_rtx,
6284 hard_frame_pointer_rtx,
6285 const0_rtx, style);
6286 if (TARGET_64BIT)
6287 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
6288 else
6289 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
6290 }
6291 }
6292 else
6293 {
6294 /* First step is to deallocate the stack frame so that we can
6295 pop the registers. */
6296 if (!sp_valid)
6297 {
6298 gcc_assert (frame_pointer_needed);
6299 pro_epilogue_adjust_stack (stack_pointer_rtx,
6300 hard_frame_pointer_rtx,
6301 GEN_INT (offset), style);
6302 }
6303 else if (frame.to_allocate)
6304 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6305 GEN_INT (frame.to_allocate), style);
6306
6307 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6308 if (ix86_save_reg (regno, false))
6309 {
6310 if (TARGET_64BIT)
6311 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
6312 else
6313 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
6314 }
6315 if (frame_pointer_needed)
6316 {
6317 /* Leave results in shorter dependency chains on CPUs that are
6318 able to grok it fast. */
6319 if (TARGET_USE_LEAVE)
6320 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
6321 else if (TARGET_64BIT)
6322 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
6323 else
6324 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
6325 }
6326 }
6327
6328 if (cfun->machine->force_align_arg_pointer)
6329 {
6330 emit_insn (gen_addsi3 (stack_pointer_rtx,
6331 cfun->machine->force_align_arg_pointer,
6332 GEN_INT (-4)));
6333 }
6334
6335 /* Sibcall epilogues don't want a return instruction. */
6336 if (style == 0)
6337 return;
6338
6339 if (current_function_pops_args && current_function_args_size)
6340 {
6341 rtx popc = GEN_INT (current_function_pops_args);
6342
6343 /* i386 can only pop 64K bytes. If asked to pop more, pop
6344 return address, do explicit add, and jump indirectly to the
6345 caller. */
6346
6347 if (current_function_pops_args >= 65536)
6348 {
6349 rtx ecx = gen_rtx_REG (SImode, 2);
6350
6351 /* There is no "pascal" calling convention in any 64bit ABI. */
6352 gcc_assert (!TARGET_64BIT);
6353
6354 emit_insn (gen_popsi1 (ecx));
6355 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
6356 emit_jump_insn (gen_return_indirect_internal (ecx));
6357 }
6358 else
6359 emit_jump_insn (gen_return_pop_internal (popc));
6360 }
6361 else
6362 emit_jump_insn (gen_return_internal ());
6363 }
6364
6365 /* Reset from the function's potential modifications. */
6366
6367 static void
6368 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6369 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
6370 {
6371 if (pic_offset_table_rtx)
6372 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
6373 #if TARGET_MACHO
6374 /* Mach-O doesn't support labels at the end of objects, so if
6375 it looks like we might want one, insert a NOP. */
6376 {
6377 rtx insn = get_last_insn ();
6378 while (insn
6379 && NOTE_P (insn)
6380 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
6381 insn = PREV_INSN (insn);
6382 if (insn
6383 && (LABEL_P (insn)
6384 || (NOTE_P (insn)
6385 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
6386 fputs ("\tnop\n", file);
6387 }
6388 #endif
6389
6390 }
6391 \f
6392 /* Extract the parts of an RTL expression that is a valid memory address
6393 for an instruction. Return 0 if the structure of the address is
6394 grossly off. Return -1 if the address contains ASHIFT, so it is not
6395 strictly valid, but still used for computing length of lea instruction. */
6396
6397 int
6398 ix86_decompose_address (rtx addr, struct ix86_address *out)
6399 {
6400 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
6401 rtx base_reg, index_reg;
6402 HOST_WIDE_INT scale = 1;
6403 rtx scale_rtx = NULL_RTX;
6404 int retval = 1;
6405 enum ix86_address_seg seg = SEG_DEFAULT;
6406
6407 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
6408 base = addr;
6409 else if (GET_CODE (addr) == PLUS)
6410 {
6411 rtx addends[4], op;
6412 int n = 0, i;
6413
6414 op = addr;
6415 do
6416 {
6417 if (n >= 4)
6418 return 0;
6419 addends[n++] = XEXP (op, 1);
6420 op = XEXP (op, 0);
6421 }
6422 while (GET_CODE (op) == PLUS);
6423 if (n >= 4)
6424 return 0;
6425 addends[n] = op;
6426
6427 for (i = n; i >= 0; --i)
6428 {
6429 op = addends[i];
6430 switch (GET_CODE (op))
6431 {
6432 case MULT:
6433 if (index)
6434 return 0;
6435 index = XEXP (op, 0);
6436 scale_rtx = XEXP (op, 1);
6437 break;
6438
6439 case UNSPEC:
6440 if (XINT (op, 1) == UNSPEC_TP
6441 && TARGET_TLS_DIRECT_SEG_REFS
6442 && seg == SEG_DEFAULT)
6443 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
6444 else
6445 return 0;
6446 break;
6447
6448 case REG:
6449 case SUBREG:
6450 if (!base)
6451 base = op;
6452 else if (!index)
6453 index = op;
6454 else
6455 return 0;
6456 break;
6457
6458 case CONST:
6459 case CONST_INT:
6460 case SYMBOL_REF:
6461 case LABEL_REF:
6462 if (disp)
6463 return 0;
6464 disp = op;
6465 break;
6466
6467 default:
6468 return 0;
6469 }
6470 }
6471 }
6472 else if (GET_CODE (addr) == MULT)
6473 {
6474 index = XEXP (addr, 0); /* index*scale */
6475 scale_rtx = XEXP (addr, 1);
6476 }
6477 else if (GET_CODE (addr) == ASHIFT)
6478 {
6479 rtx tmp;
6480
6481 /* We're called for lea too, which implements ashift on occasion. */
6482 index = XEXP (addr, 0);
6483 tmp = XEXP (addr, 1);
6484 if (!CONST_INT_P (tmp))
6485 return 0;
6486 scale = INTVAL (tmp);
6487 if ((unsigned HOST_WIDE_INT) scale > 3)
6488 return 0;
6489 scale = 1 << scale;
6490 retval = -1;
6491 }
6492 else
6493 disp = addr; /* displacement */
6494
6495 /* Extract the integral value of scale. */
6496 if (scale_rtx)
6497 {
6498 if (!CONST_INT_P (scale_rtx))
6499 return 0;
6500 scale = INTVAL (scale_rtx);
6501 }
6502
6503 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
6504 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
6505
6506 /* Allow arg pointer and stack pointer as index if there is not scaling. */
6507 if (base_reg && index_reg && scale == 1
6508 && (index_reg == arg_pointer_rtx
6509 || index_reg == frame_pointer_rtx
6510 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
6511 {
6512 rtx tmp;
6513 tmp = base, base = index, index = tmp;
6514 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
6515 }
6516
6517 /* Special case: %ebp cannot be encoded as a base without a displacement. */
6518 if ((base_reg == hard_frame_pointer_rtx
6519 || base_reg == frame_pointer_rtx
6520 || base_reg == arg_pointer_rtx) && !disp)
6521 disp = const0_rtx;
6522
6523 /* Special case: on K6, [%esi] makes the instruction vector decoded.
6524 Avoid this by transforming to [%esi+0]. */
6525 if (ix86_tune == PROCESSOR_K6 && !optimize_size
6526 && base_reg && !index_reg && !disp
6527 && REG_P (base_reg)
6528 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
6529 disp = const0_rtx;
6530
6531 /* Special case: encode reg+reg instead of reg*2. */
6532 if (!base && index && scale && scale == 2)
6533 base = index, base_reg = index_reg, scale = 1;
6534
6535 /* Special case: scaling cannot be encoded without base or displacement. */
6536 if (!base && !disp && index && scale != 1)
6537 disp = const0_rtx;
6538
6539 out->base = base;
6540 out->index = index;
6541 out->disp = disp;
6542 out->scale = scale;
6543 out->seg = seg;
6544
6545 return retval;
6546 }
6547 \f
6548 /* Return cost of the memory address x.
6549 For i386, it is better to use a complex address than let gcc copy
6550 the address into a reg and make a new pseudo. But not if the address
6551 requires to two regs - that would mean more pseudos with longer
6552 lifetimes. */
6553 static int
6554 ix86_address_cost (rtx x)
6555 {
6556 struct ix86_address parts;
6557 int cost = 1;
6558 int ok = ix86_decompose_address (x, &parts);
6559
6560 gcc_assert (ok);
6561
6562 if (parts.base && GET_CODE (parts.base) == SUBREG)
6563 parts.base = SUBREG_REG (parts.base);
6564 if (parts.index && GET_CODE (parts.index) == SUBREG)
6565 parts.index = SUBREG_REG (parts.index);
6566
6567 /* Attempt to minimize number of registers in the address. */
6568 if ((parts.base
6569 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
6570 || (parts.index
6571 && (!REG_P (parts.index)
6572 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
6573 cost++;
6574
6575 if (parts.base
6576 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
6577 && parts.index
6578 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
6579 && parts.base != parts.index)
6580 cost++;
6581
6582 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
6583 since it's predecode logic can't detect the length of instructions
6584 and it degenerates to vector decoded. Increase cost of such
6585 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
6586 to split such addresses or even refuse such addresses at all.
6587
6588 Following addressing modes are affected:
6589 [base+scale*index]
6590 [scale*index+disp]
6591 [base+index]
6592
6593 The first and last case may be avoidable by explicitly coding the zero in
6594 memory address, but I don't have AMD-K6 machine handy to check this
6595 theory. */
6596
6597 if (TARGET_K6
6598 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
6599 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
6600 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
6601 cost += 10;
6602
6603 return cost;
6604 }
6605 \f
6606 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
6607 this is used for to form addresses to local data when -fPIC is in
6608 use. */
6609
6610 static bool
6611 darwin_local_data_pic (rtx disp)
6612 {
6613 if (GET_CODE (disp) == MINUS)
6614 {
6615 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
6616 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
6617 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
6618 {
6619 const char *sym_name = XSTR (XEXP (disp, 1), 0);
6620 if (! strcmp (sym_name, "<pic base>"))
6621 return true;
6622 }
6623 }
6624
6625 return false;
6626 }
6627
6628 /* Determine if a given RTX is a valid constant. We already know this
6629 satisfies CONSTANT_P. */
6630
6631 bool
6632 legitimate_constant_p (rtx x)
6633 {
6634 switch (GET_CODE (x))
6635 {
6636 case CONST:
6637 x = XEXP (x, 0);
6638
6639 if (GET_CODE (x) == PLUS)
6640 {
6641 if (!CONST_INT_P (XEXP (x, 1)))
6642 return false;
6643 x = XEXP (x, 0);
6644 }
6645
6646 if (TARGET_MACHO && darwin_local_data_pic (x))
6647 return true;
6648
6649 /* Only some unspecs are valid as "constants". */
6650 if (GET_CODE (x) == UNSPEC)
6651 switch (XINT (x, 1))
6652 {
6653 case UNSPEC_GOT:
6654 case UNSPEC_GOTOFF:
6655 case UNSPEC_PLTOFF:
6656 return TARGET_64BIT;
6657 case UNSPEC_TPOFF:
6658 case UNSPEC_NTPOFF:
6659 x = XVECEXP (x, 0, 0);
6660 return (GET_CODE (x) == SYMBOL_REF
6661 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6662 case UNSPEC_DTPOFF:
6663 x = XVECEXP (x, 0, 0);
6664 return (GET_CODE (x) == SYMBOL_REF
6665 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
6666 default:
6667 return false;
6668 }
6669
6670 /* We must have drilled down to a symbol. */
6671 if (GET_CODE (x) == LABEL_REF)
6672 return true;
6673 if (GET_CODE (x) != SYMBOL_REF)
6674 return false;
6675 /* FALLTHRU */
6676
6677 case SYMBOL_REF:
6678 /* TLS symbols are never valid. */
6679 if (SYMBOL_REF_TLS_MODEL (x))
6680 return false;
6681
6682 /* DLLIMPORT symbols are never valid. */
6683 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
6684 && SYMBOL_REF_DLLIMPORT_P (x))
6685 return false;
6686 break;
6687
6688 case CONST_DOUBLE:
6689 if (GET_MODE (x) == TImode
6690 && x != CONST0_RTX (TImode)
6691 && !TARGET_64BIT)
6692 return false;
6693 break;
6694
6695 case CONST_VECTOR:
6696 if (x == CONST0_RTX (GET_MODE (x)))
6697 return true;
6698 return false;
6699
6700 default:
6701 break;
6702 }
6703
6704 /* Otherwise we handle everything else in the move patterns. */
6705 return true;
6706 }
6707
6708 /* Determine if it's legal to put X into the constant pool. This
6709 is not possible for the address of thread-local symbols, which
6710 is checked above. */
6711
6712 static bool
6713 ix86_cannot_force_const_mem (rtx x)
6714 {
6715 /* We can always put integral constants and vectors in memory. */
6716 switch (GET_CODE (x))
6717 {
6718 case CONST_INT:
6719 case CONST_DOUBLE:
6720 case CONST_VECTOR:
6721 return false;
6722
6723 default:
6724 break;
6725 }
6726 return !legitimate_constant_p (x);
6727 }
6728
6729 /* Determine if a given RTX is a valid constant address. */
6730
6731 bool
6732 constant_address_p (rtx x)
6733 {
6734 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
6735 }
6736
6737 /* Nonzero if the constant value X is a legitimate general operand
6738 when generating PIC code. It is given that flag_pic is on and
6739 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
6740
6741 bool
6742 legitimate_pic_operand_p (rtx x)
6743 {
6744 rtx inner;
6745
6746 switch (GET_CODE (x))
6747 {
6748 case CONST:
6749 inner = XEXP (x, 0);
6750 if (GET_CODE (inner) == PLUS
6751 && CONST_INT_P (XEXP (inner, 1)))
6752 inner = XEXP (inner, 0);
6753
6754 /* Only some unspecs are valid as "constants". */
6755 if (GET_CODE (inner) == UNSPEC)
6756 switch (XINT (inner, 1))
6757 {
6758 case UNSPEC_GOT:
6759 case UNSPEC_GOTOFF:
6760 case UNSPEC_PLTOFF:
6761 return TARGET_64BIT;
6762 case UNSPEC_TPOFF:
6763 x = XVECEXP (inner, 0, 0);
6764 return (GET_CODE (x) == SYMBOL_REF
6765 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6766 default:
6767 return false;
6768 }
6769 /* FALLTHRU */
6770
6771 case SYMBOL_REF:
6772 case LABEL_REF:
6773 return legitimate_pic_address_disp_p (x);
6774
6775 default:
6776 return true;
6777 }
6778 }
6779
6780 /* Determine if a given CONST RTX is a valid memory displacement
6781 in PIC mode. */
6782
6783 int
6784 legitimate_pic_address_disp_p (rtx disp)
6785 {
6786 bool saw_plus;
6787
6788 /* In 64bit mode we can allow direct addresses of symbols and labels
6789 when they are not dynamic symbols. */
6790 if (TARGET_64BIT)
6791 {
6792 rtx op0 = disp, op1;
6793
6794 switch (GET_CODE (disp))
6795 {
6796 case LABEL_REF:
6797 return true;
6798
6799 case CONST:
6800 if (GET_CODE (XEXP (disp, 0)) != PLUS)
6801 break;
6802 op0 = XEXP (XEXP (disp, 0), 0);
6803 op1 = XEXP (XEXP (disp, 0), 1);
6804 if (!CONST_INT_P (op1)
6805 || INTVAL (op1) >= 16*1024*1024
6806 || INTVAL (op1) < -16*1024*1024)
6807 break;
6808 if (GET_CODE (op0) == LABEL_REF)
6809 return true;
6810 if (GET_CODE (op0) != SYMBOL_REF)
6811 break;
6812 /* FALLTHRU */
6813
6814 case SYMBOL_REF:
6815 /* TLS references should always be enclosed in UNSPEC. */
6816 if (SYMBOL_REF_TLS_MODEL (op0))
6817 return false;
6818 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
6819 && ix86_cmodel != CM_LARGE_PIC)
6820 return true;
6821 break;
6822
6823 default:
6824 break;
6825 }
6826 }
6827 if (GET_CODE (disp) != CONST)
6828 return 0;
6829 disp = XEXP (disp, 0);
6830
6831 if (TARGET_64BIT)
6832 {
6833 /* We are unsafe to allow PLUS expressions. This limit allowed distance
6834 of GOT tables. We should not need these anyway. */
6835 if (GET_CODE (disp) != UNSPEC
6836 || (XINT (disp, 1) != UNSPEC_GOTPCREL
6837 && XINT (disp, 1) != UNSPEC_GOTOFF
6838 && XINT (disp, 1) != UNSPEC_PLTOFF))
6839 return 0;
6840
6841 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
6842 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
6843 return 0;
6844 return 1;
6845 }
6846
6847 saw_plus = false;
6848 if (GET_CODE (disp) == PLUS)
6849 {
6850 if (!CONST_INT_P (XEXP (disp, 1)))
6851 return 0;
6852 disp = XEXP (disp, 0);
6853 saw_plus = true;
6854 }
6855
6856 if (TARGET_MACHO && darwin_local_data_pic (disp))
6857 return 1;
6858
6859 if (GET_CODE (disp) != UNSPEC)
6860 return 0;
6861
6862 switch (XINT (disp, 1))
6863 {
6864 case UNSPEC_GOT:
6865 if (saw_plus)
6866 return false;
6867 /* We need to check for both symbols and labels because VxWorks loads
6868 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
6869 details. */
6870 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6871 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
6872 case UNSPEC_GOTOFF:
6873 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
6874 While ABI specify also 32bit relocation but we don't produce it in
6875 small PIC model at all. */
6876 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6877 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
6878 && !TARGET_64BIT)
6879 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
6880 return false;
6881 case UNSPEC_GOTTPOFF:
6882 case UNSPEC_GOTNTPOFF:
6883 case UNSPEC_INDNTPOFF:
6884 if (saw_plus)
6885 return false;
6886 disp = XVECEXP (disp, 0, 0);
6887 return (GET_CODE (disp) == SYMBOL_REF
6888 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
6889 case UNSPEC_NTPOFF:
6890 disp = XVECEXP (disp, 0, 0);
6891 return (GET_CODE (disp) == SYMBOL_REF
6892 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
6893 case UNSPEC_DTPOFF:
6894 disp = XVECEXP (disp, 0, 0);
6895 return (GET_CODE (disp) == SYMBOL_REF
6896 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
6897 }
6898
6899 return 0;
6900 }
6901
6902 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6903 memory address for an instruction. The MODE argument is the machine mode
6904 for the MEM expression that wants to use this address.
6905
6906 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6907 convert common non-canonical forms to canonical form so that they will
6908 be recognized. */
6909
6910 int
6911 legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
6912 rtx addr, int strict)
6913 {
6914 struct ix86_address parts;
6915 rtx base, index, disp;
6916 HOST_WIDE_INT scale;
6917 const char *reason = NULL;
6918 rtx reason_rtx = NULL_RTX;
6919
6920 if (ix86_decompose_address (addr, &parts) <= 0)
6921 {
6922 reason = "decomposition failed";
6923 goto report_error;
6924 }
6925
6926 base = parts.base;
6927 index = parts.index;
6928 disp = parts.disp;
6929 scale = parts.scale;
6930
6931 /* Validate base register.
6932
6933 Don't allow SUBREG's that span more than a word here. It can lead to spill
6934 failures when the base is one word out of a two word structure, which is
6935 represented internally as a DImode int. */
6936
6937 if (base)
6938 {
6939 rtx reg;
6940 reason_rtx = base;
6941
6942 if (REG_P (base))
6943 reg = base;
6944 else if (GET_CODE (base) == SUBREG
6945 && REG_P (SUBREG_REG (base))
6946 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
6947 <= UNITS_PER_WORD)
6948 reg = SUBREG_REG (base);
6949 else
6950 {
6951 reason = "base is not a register";
6952 goto report_error;
6953 }
6954
6955 if (GET_MODE (base) != Pmode)
6956 {
6957 reason = "base is not in Pmode";
6958 goto report_error;
6959 }
6960
6961 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
6962 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
6963 {
6964 reason = "base is not valid";
6965 goto report_error;
6966 }
6967 }
6968
6969 /* Validate index register.
6970
6971 Don't allow SUBREG's that span more than a word here -- same as above. */
6972
6973 if (index)
6974 {
6975 rtx reg;
6976 reason_rtx = index;
6977
6978 if (REG_P (index))
6979 reg = index;
6980 else if (GET_CODE (index) == SUBREG
6981 && REG_P (SUBREG_REG (index))
6982 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
6983 <= UNITS_PER_WORD)
6984 reg = SUBREG_REG (index);
6985 else
6986 {
6987 reason = "index is not a register";
6988 goto report_error;
6989 }
6990
6991 if (GET_MODE (index) != Pmode)
6992 {
6993 reason = "index is not in Pmode";
6994 goto report_error;
6995 }
6996
6997 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
6998 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
6999 {
7000 reason = "index is not valid";
7001 goto report_error;
7002 }
7003 }
7004
7005 /* Validate scale factor. */
7006 if (scale != 1)
7007 {
7008 reason_rtx = GEN_INT (scale);
7009 if (!index)
7010 {
7011 reason = "scale without index";
7012 goto report_error;
7013 }
7014
7015 if (scale != 2 && scale != 4 && scale != 8)
7016 {
7017 reason = "scale is not a valid multiplier";
7018 goto report_error;
7019 }
7020 }
7021
7022 /* Validate displacement. */
7023 if (disp)
7024 {
7025 reason_rtx = disp;
7026
7027 if (GET_CODE (disp) == CONST
7028 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
7029 switch (XINT (XEXP (disp, 0), 1))
7030 {
7031 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
7032 used. While ABI specify also 32bit relocations, we don't produce
7033 them at all and use IP relative instead. */
7034 case UNSPEC_GOT:
7035 case UNSPEC_GOTOFF:
7036 gcc_assert (flag_pic);
7037 if (!TARGET_64BIT)
7038 goto is_legitimate_pic;
7039 reason = "64bit address unspec";
7040 goto report_error;
7041
7042 case UNSPEC_GOTPCREL:
7043 gcc_assert (flag_pic);
7044 goto is_legitimate_pic;
7045
7046 case UNSPEC_GOTTPOFF:
7047 case UNSPEC_GOTNTPOFF:
7048 case UNSPEC_INDNTPOFF:
7049 case UNSPEC_NTPOFF:
7050 case UNSPEC_DTPOFF:
7051 break;
7052
7053 default:
7054 reason = "invalid address unspec";
7055 goto report_error;
7056 }
7057
7058 else if (SYMBOLIC_CONST (disp)
7059 && (flag_pic
7060 || (TARGET_MACHO
7061 #if TARGET_MACHO
7062 && MACHOPIC_INDIRECT
7063 && !machopic_operand_p (disp)
7064 #endif
7065 )))
7066 {
7067
7068 is_legitimate_pic:
7069 if (TARGET_64BIT && (index || base))
7070 {
7071 /* foo@dtpoff(%rX) is ok. */
7072 if (GET_CODE (disp) != CONST
7073 || GET_CODE (XEXP (disp, 0)) != PLUS
7074 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
7075 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
7076 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
7077 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
7078 {
7079 reason = "non-constant pic memory reference";
7080 goto report_error;
7081 }
7082 }
7083 else if (! legitimate_pic_address_disp_p (disp))
7084 {
7085 reason = "displacement is an invalid pic construct";
7086 goto report_error;
7087 }
7088
7089 /* This code used to verify that a symbolic pic displacement
7090 includes the pic_offset_table_rtx register.
7091
7092 While this is good idea, unfortunately these constructs may
7093 be created by "adds using lea" optimization for incorrect
7094 code like:
7095
7096 int a;
7097 int foo(int i)
7098 {
7099 return *(&a+i);
7100 }
7101
7102 This code is nonsensical, but results in addressing
7103 GOT table with pic_offset_table_rtx base. We can't
7104 just refuse it easily, since it gets matched by
7105 "addsi3" pattern, that later gets split to lea in the
7106 case output register differs from input. While this
7107 can be handled by separate addsi pattern for this case
7108 that never results in lea, this seems to be easier and
7109 correct fix for crash to disable this test. */
7110 }
7111 else if (GET_CODE (disp) != LABEL_REF
7112 && !CONST_INT_P (disp)
7113 && (GET_CODE (disp) != CONST
7114 || !legitimate_constant_p (disp))
7115 && (GET_CODE (disp) != SYMBOL_REF
7116 || !legitimate_constant_p (disp)))
7117 {
7118 reason = "displacement is not constant";
7119 goto report_error;
7120 }
7121 else if (TARGET_64BIT
7122 && !x86_64_immediate_operand (disp, VOIDmode))
7123 {
7124 reason = "displacement is out of range";
7125 goto report_error;
7126 }
7127 }
7128
7129 /* Everything looks valid. */
7130 return TRUE;
7131
7132 report_error:
7133 return FALSE;
7134 }
7135 \f
7136 /* Return a unique alias set for the GOT. */
7137
7138 static HOST_WIDE_INT
7139 ix86_GOT_alias_set (void)
7140 {
7141 static HOST_WIDE_INT set = -1;
7142 if (set == -1)
7143 set = new_alias_set ();
7144 return set;
7145 }
7146
7147 /* Return a legitimate reference for ORIG (an address) using the
7148 register REG. If REG is 0, a new pseudo is generated.
7149
7150 There are two types of references that must be handled:
7151
7152 1. Global data references must load the address from the GOT, via
7153 the PIC reg. An insn is emitted to do this load, and the reg is
7154 returned.
7155
7156 2. Static data references, constant pool addresses, and code labels
7157 compute the address as an offset from the GOT, whose base is in
7158 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
7159 differentiate them from global data objects. The returned
7160 address is the PIC reg + an unspec constant.
7161
7162 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
7163 reg also appears in the address. */
7164
7165 static rtx
7166 legitimize_pic_address (rtx orig, rtx reg)
7167 {
7168 rtx addr = orig;
7169 rtx new_rtx = orig;
7170 rtx base;
7171
7172 #if TARGET_MACHO
7173 if (TARGET_MACHO && !TARGET_64BIT)
7174 {
7175 if (reg == 0)
7176 reg = gen_reg_rtx (Pmode);
7177 /* Use the generic Mach-O PIC machinery. */
7178 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
7179 }
7180 #endif
7181
7182 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
7183 new_rtx = addr;
7184 else if (TARGET_64BIT
7185 && ix86_cmodel != CM_SMALL_PIC
7186 && gotoff_operand (addr, Pmode))
7187 {
7188 rtx tmpreg;
7189 /* This symbol may be referenced via a displacement from the PIC
7190 base address (@GOTOFF). */
7191
7192 if (reload_in_progress)
7193 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7194 if (GET_CODE (addr) == CONST)
7195 addr = XEXP (addr, 0);
7196 if (GET_CODE (addr) == PLUS)
7197 {
7198 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
7199 UNSPEC_GOTOFF);
7200 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
7201 }
7202 else
7203 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
7204 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7205 if (!reg)
7206 tmpreg = gen_reg_rtx (Pmode);
7207 else
7208 tmpreg = reg;
7209 emit_move_insn (tmpreg, new_rtx);
7210
7211 if (reg != 0)
7212 {
7213 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
7214 tmpreg, 1, OPTAB_DIRECT);
7215 new_rtx = reg;
7216 }
7217 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
7218 }
7219 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
7220 {
7221 /* This symbol may be referenced via a displacement from the PIC
7222 base address (@GOTOFF). */
7223
7224 if (reload_in_progress)
7225 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7226 if (GET_CODE (addr) == CONST)
7227 addr = XEXP (addr, 0);
7228 if (GET_CODE (addr) == PLUS)
7229 {
7230 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
7231 UNSPEC_GOTOFF);
7232 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
7233 }
7234 else
7235 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
7236 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7237 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
7238
7239 if (reg != 0)
7240 {
7241 emit_move_insn (reg, new_rtx);
7242 new_rtx = reg;
7243 }
7244 }
7245 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
7246 /* We can't use @GOTOFF for text labels on VxWorks;
7247 see gotoff_operand. */
7248 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
7249 {
7250 /* Given that we've already handled dllimport variables separately
7251 in legitimize_address, and all other variables should satisfy
7252 legitimate_pic_address_disp_p, we should never arrive here. */
7253 gcc_assert (!TARGET_64BIT_MS_ABI);
7254
7255 if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
7256 {
7257 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
7258 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7259 new_rtx = gen_const_mem (Pmode, new_rtx);
7260 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
7261
7262 if (reg == 0)
7263 reg = gen_reg_rtx (Pmode);
7264 /* Use directly gen_movsi, otherwise the address is loaded
7265 into register for CSE. We don't want to CSE this addresses,
7266 instead we CSE addresses from the GOT table, so skip this. */
7267 emit_insn (gen_movsi (reg, new_rtx));
7268 new_rtx = reg;
7269 }
7270 else
7271 {
7272 /* This symbol must be referenced via a load from the
7273 Global Offset Table (@GOT). */
7274
7275 if (reload_in_progress)
7276 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7277 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
7278 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7279 if (TARGET_64BIT)
7280 new_rtx = force_reg (Pmode, new_rtx);
7281 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
7282 new_rtx = gen_const_mem (Pmode, new_rtx);
7283 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
7284
7285 if (reg == 0)
7286 reg = gen_reg_rtx (Pmode);
7287 emit_move_insn (reg, new_rtx);
7288 new_rtx = reg;
7289 }
7290 }
7291 else
7292 {
7293 if (CONST_INT_P (addr)
7294 && !x86_64_immediate_operand (addr, VOIDmode))
7295 {
7296 if (reg)
7297 {
7298 emit_move_insn (reg, addr);
7299 new_rtx = reg;
7300 }
7301 else
7302 new_rtx = force_reg (Pmode, addr);
7303 }
7304 else if (GET_CODE (addr) == CONST)
7305 {
7306 addr = XEXP (addr, 0);
7307
7308 /* We must match stuff we generate before. Assume the only
7309 unspecs that can get here are ours. Not that we could do
7310 anything with them anyway.... */
7311 if (GET_CODE (addr) == UNSPEC
7312 || (GET_CODE (addr) == PLUS
7313 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
7314 return orig;
7315 gcc_assert (GET_CODE (addr) == PLUS);
7316 }
7317 if (GET_CODE (addr) == PLUS)
7318 {
7319 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
7320
7321 /* Check first to see if this is a constant offset from a @GOTOFF
7322 symbol reference. */
7323 if (gotoff_operand (op0, Pmode)
7324 && CONST_INT_P (op1))
7325 {
7326 if (!TARGET_64BIT)
7327 {
7328 if (reload_in_progress)
7329 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7330 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
7331 UNSPEC_GOTOFF);
7332 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
7333 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7334 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
7335
7336 if (reg != 0)
7337 {
7338 emit_move_insn (reg, new_rtx);
7339 new_rtx = reg;
7340 }
7341 }
7342 else
7343 {
7344 if (INTVAL (op1) < -16*1024*1024
7345 || INTVAL (op1) >= 16*1024*1024)
7346 {
7347 if (!x86_64_immediate_operand (op1, Pmode))
7348 op1 = force_reg (Pmode, op1);
7349 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
7350 }
7351 }
7352 }
7353 else
7354 {
7355 base = legitimize_pic_address (XEXP (addr, 0), reg);
7356 new_rtx = legitimize_pic_address (XEXP (addr, 1),
7357 base == reg ? NULL_RTX : reg);
7358
7359 if (CONST_INT_P (new_rtx))
7360 new_rtx = plus_constant (base, INTVAL (new_rtx));
7361 else
7362 {
7363 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
7364 {
7365 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
7366 new_rtx = XEXP (new_rtx, 1);
7367 }
7368 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
7369 }
7370 }
7371 }
7372 }
7373 return new_rtx;
7374 }
7375 \f
7376 /* Load the thread pointer. If TO_REG is true, force it into a register. */
7377
7378 static rtx
7379 get_thread_pointer (int to_reg)
7380 {
7381 rtx tp, reg, insn;
7382
7383 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
7384 if (!to_reg)
7385 return tp;
7386
7387 reg = gen_reg_rtx (Pmode);
7388 insn = gen_rtx_SET (VOIDmode, reg, tp);
7389 insn = emit_insn (insn);
7390
7391 return reg;
7392 }
7393
7394 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
7395 false if we expect this to be used for a memory address and true if
7396 we expect to load the address into a register. */
7397
7398 static rtx
7399 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
7400 {
7401 rtx dest, base, off, pic, tp;
7402 int type;
7403
7404 switch (model)
7405 {
7406 case TLS_MODEL_GLOBAL_DYNAMIC:
7407 dest = gen_reg_rtx (Pmode);
7408 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7409
7410 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7411 {
7412 rtx rax = gen_rtx_REG (Pmode, 0), insns;
7413
7414 start_sequence ();
7415 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
7416 insns = get_insns ();
7417 end_sequence ();
7418
7419 CONST_OR_PURE_CALL_P (insns) = 1;
7420 emit_libcall_block (insns, dest, rax, x);
7421 }
7422 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7423 emit_insn (gen_tls_global_dynamic_64 (dest, x));
7424 else
7425 emit_insn (gen_tls_global_dynamic_32 (dest, x));
7426
7427 if (TARGET_GNU2_TLS)
7428 {
7429 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
7430
7431 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7432 }
7433 break;
7434
7435 case TLS_MODEL_LOCAL_DYNAMIC:
7436 base = gen_reg_rtx (Pmode);
7437 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7438
7439 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7440 {
7441 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
7442
7443 start_sequence ();
7444 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
7445 insns = get_insns ();
7446 end_sequence ();
7447
7448 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
7449 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
7450 CONST_OR_PURE_CALL_P (insns) = 1;
7451 emit_libcall_block (insns, base, rax, note);
7452 }
7453 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7454 emit_insn (gen_tls_local_dynamic_base_64 (base));
7455 else
7456 emit_insn (gen_tls_local_dynamic_base_32 (base));
7457
7458 if (TARGET_GNU2_TLS)
7459 {
7460 rtx x = ix86_tls_module_base ();
7461
7462 set_unique_reg_note (get_last_insn (), REG_EQUIV,
7463 gen_rtx_MINUS (Pmode, x, tp));
7464 }
7465
7466 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
7467 off = gen_rtx_CONST (Pmode, off);
7468
7469 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
7470
7471 if (TARGET_GNU2_TLS)
7472 {
7473 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
7474
7475 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7476 }
7477
7478 break;
7479
7480 case TLS_MODEL_INITIAL_EXEC:
7481 if (TARGET_64BIT)
7482 {
7483 pic = NULL;
7484 type = UNSPEC_GOTNTPOFF;
7485 }
7486 else if (flag_pic)
7487 {
7488 if (reload_in_progress)
7489 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7490 pic = pic_offset_table_rtx;
7491 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
7492 }
7493 else if (!TARGET_ANY_GNU_TLS)
7494 {
7495 pic = gen_reg_rtx (Pmode);
7496 emit_insn (gen_set_got (pic));
7497 type = UNSPEC_GOTTPOFF;
7498 }
7499 else
7500 {
7501 pic = NULL;
7502 type = UNSPEC_INDNTPOFF;
7503 }
7504
7505 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
7506 off = gen_rtx_CONST (Pmode, off);
7507 if (pic)
7508 off = gen_rtx_PLUS (Pmode, pic, off);
7509 off = gen_const_mem (Pmode, off);
7510 set_mem_alias_set (off, ix86_GOT_alias_set ());
7511
7512 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7513 {
7514 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7515 off = force_reg (Pmode, off);
7516 return gen_rtx_PLUS (Pmode, base, off);
7517 }
7518 else
7519 {
7520 base = get_thread_pointer (true);
7521 dest = gen_reg_rtx (Pmode);
7522 emit_insn (gen_subsi3 (dest, base, off));
7523 }
7524 break;
7525
7526 case TLS_MODEL_LOCAL_EXEC:
7527 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
7528 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7529 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
7530 off = gen_rtx_CONST (Pmode, off);
7531
7532 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7533 {
7534 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7535 return gen_rtx_PLUS (Pmode, base, off);
7536 }
7537 else
7538 {
7539 base = get_thread_pointer (true);
7540 dest = gen_reg_rtx (Pmode);
7541 emit_insn (gen_subsi3 (dest, base, off));
7542 }
7543 break;
7544
7545 default:
7546 gcc_unreachable ();
7547 }
7548
7549 return dest;
7550 }
7551
7552 /* Create or return the unique __imp_DECL dllimport symbol corresponding
7553 to symbol DECL. */
7554
7555 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
7556 htab_t dllimport_map;
7557
7558 static tree
7559 get_dllimport_decl (tree decl)
7560 {
7561 struct tree_map *h, in;
7562 void **loc;
7563 const char *name;
7564 const char *prefix;
7565 size_t namelen, prefixlen;
7566 char *imp_name;
7567 tree to;
7568 rtx rtl;
7569
7570 if (!dllimport_map)
7571 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
7572
7573 in.hash = htab_hash_pointer (decl);
7574 in.base.from = decl;
7575 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
7576 h = (struct tree_map *) *loc;
7577 if (h)
7578 return h->to;
7579
7580 *loc = h = GGC_NEW (struct tree_map);
7581 h->hash = in.hash;
7582 h->base.from = decl;
7583 h->to = to = build_decl (VAR_DECL, NULL, ptr_type_node);
7584 DECL_ARTIFICIAL (to) = 1;
7585 DECL_IGNORED_P (to) = 1;
7586 DECL_EXTERNAL (to) = 1;
7587 TREE_READONLY (to) = 1;
7588
7589 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
7590 name = targetm.strip_name_encoding (name);
7591 if (name[0] == FASTCALL_PREFIX)
7592 {
7593 name++;
7594 prefix = "*__imp_";
7595 }
7596 else
7597 prefix = "*__imp__";
7598
7599 namelen = strlen (name);
7600 prefixlen = strlen (prefix);
7601 imp_name = (char *) alloca (namelen + prefixlen + 1);
7602 memcpy (imp_name, prefix, prefixlen);
7603 memcpy (imp_name + prefixlen, name, namelen + 1);
7604
7605 name = ggc_alloc_string (imp_name, namelen + prefixlen);
7606 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
7607 SET_SYMBOL_REF_DECL (rtl, to);
7608 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
7609
7610 rtl = gen_const_mem (Pmode, rtl);
7611 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
7612
7613 SET_DECL_RTL (to, rtl);
7614
7615 return to;
7616 }
7617
7618 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
7619 true if we require the result be a register. */
7620
7621 static rtx
7622 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
7623 {
7624 tree imp_decl;
7625 rtx x;
7626
7627 gcc_assert (SYMBOL_REF_DECL (symbol));
7628 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
7629
7630 x = DECL_RTL (imp_decl);
7631 if (want_reg)
7632 x = force_reg (Pmode, x);
7633 return x;
7634 }
7635
7636 /* Try machine-dependent ways of modifying an illegitimate address
7637 to be legitimate. If we find one, return the new, valid address.
7638 This macro is used in only one place: `memory_address' in explow.c.
7639
7640 OLDX is the address as it was before break_out_memory_refs was called.
7641 In some cases it is useful to look at this to decide what needs to be done.
7642
7643 MODE and WIN are passed so that this macro can use
7644 GO_IF_LEGITIMATE_ADDRESS.
7645
7646 It is always safe for this macro to do nothing. It exists to recognize
7647 opportunities to optimize the output.
7648
7649 For the 80386, we handle X+REG by loading X into a register R and
7650 using R+REG. R will go in a general reg and indexing will be used.
7651 However, if REG is a broken-out memory address or multiplication,
7652 nothing needs to be done because REG can certainly go in a general reg.
7653
7654 When -fpic is used, special handling is needed for symbolic references.
7655 See comments by legitimize_pic_address in i386.c for details. */
7656
7657 rtx
7658 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
7659 {
7660 int changed = 0;
7661 unsigned log;
7662
7663 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
7664 if (log)
7665 return legitimize_tls_address (x, (enum tls_model) log, false);
7666 if (GET_CODE (x) == CONST
7667 && GET_CODE (XEXP (x, 0)) == PLUS
7668 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
7669 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
7670 {
7671 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
7672 (enum tls_model) log, false);
7673 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
7674 }
7675
7676 if (flag_pic && SYMBOLIC_CONST (x))
7677 return legitimize_pic_address (x, 0);
7678
7679 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
7680 {
7681 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
7682 return legitimize_dllimport_symbol (x, true);
7683 if (GET_CODE (x) == CONST
7684 && GET_CODE (XEXP (x, 0)) == PLUS
7685 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
7686 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
7687 {
7688 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
7689 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
7690 }
7691 }
7692
7693 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
7694 if (GET_CODE (x) == ASHIFT
7695 && CONST_INT_P (XEXP (x, 1))
7696 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
7697 {
7698 changed = 1;
7699 log = INTVAL (XEXP (x, 1));
7700 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
7701 GEN_INT (1 << log));
7702 }
7703
7704 if (GET_CODE (x) == PLUS)
7705 {
7706 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
7707
7708 if (GET_CODE (XEXP (x, 0)) == ASHIFT
7709 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
7710 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
7711 {
7712 changed = 1;
7713 log = INTVAL (XEXP (XEXP (x, 0), 1));
7714 XEXP (x, 0) = gen_rtx_MULT (Pmode,
7715 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
7716 GEN_INT (1 << log));
7717 }
7718
7719 if (GET_CODE (XEXP (x, 1)) == ASHIFT
7720 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
7721 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
7722 {
7723 changed = 1;
7724 log = INTVAL (XEXP (XEXP (x, 1), 1));
7725 XEXP (x, 1) = gen_rtx_MULT (Pmode,
7726 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
7727 GEN_INT (1 << log));
7728 }
7729
7730 /* Put multiply first if it isn't already. */
7731 if (GET_CODE (XEXP (x, 1)) == MULT)
7732 {
7733 rtx tmp = XEXP (x, 0);
7734 XEXP (x, 0) = XEXP (x, 1);
7735 XEXP (x, 1) = tmp;
7736 changed = 1;
7737 }
7738
7739 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
7740 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
7741 created by virtual register instantiation, register elimination, and
7742 similar optimizations. */
7743 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
7744 {
7745 changed = 1;
7746 x = gen_rtx_PLUS (Pmode,
7747 gen_rtx_PLUS (Pmode, XEXP (x, 0),
7748 XEXP (XEXP (x, 1), 0)),
7749 XEXP (XEXP (x, 1), 1));
7750 }
7751
7752 /* Canonicalize
7753 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
7754 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
7755 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
7756 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
7757 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
7758 && CONSTANT_P (XEXP (x, 1)))
7759 {
7760 rtx constant;
7761 rtx other = NULL_RTX;
7762
7763 if (CONST_INT_P (XEXP (x, 1)))
7764 {
7765 constant = XEXP (x, 1);
7766 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
7767 }
7768 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
7769 {
7770 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
7771 other = XEXP (x, 1);
7772 }
7773 else
7774 constant = 0;
7775
7776 if (constant)
7777 {
7778 changed = 1;
7779 x = gen_rtx_PLUS (Pmode,
7780 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
7781 XEXP (XEXP (XEXP (x, 0), 1), 0)),
7782 plus_constant (other, INTVAL (constant)));
7783 }
7784 }
7785
7786 if (changed && legitimate_address_p (mode, x, FALSE))
7787 return x;
7788
7789 if (GET_CODE (XEXP (x, 0)) == MULT)
7790 {
7791 changed = 1;
7792 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
7793 }
7794
7795 if (GET_CODE (XEXP (x, 1)) == MULT)
7796 {
7797 changed = 1;
7798 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
7799 }
7800
7801 if (changed
7802 && REG_P (XEXP (x, 1))
7803 && REG_P (XEXP (x, 0)))
7804 return x;
7805
7806 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
7807 {
7808 changed = 1;
7809 x = legitimize_pic_address (x, 0);
7810 }
7811
7812 if (changed && legitimate_address_p (mode, x, FALSE))
7813 return x;
7814
7815 if (REG_P (XEXP (x, 0)))
7816 {
7817 rtx temp = gen_reg_rtx (Pmode);
7818 rtx val = force_operand (XEXP (x, 1), temp);
7819 if (val != temp)
7820 emit_move_insn (temp, val);
7821
7822 XEXP (x, 1) = temp;
7823 return x;
7824 }
7825
7826 else if (REG_P (XEXP (x, 1)))
7827 {
7828 rtx temp = gen_reg_rtx (Pmode);
7829 rtx val = force_operand (XEXP (x, 0), temp);
7830 if (val != temp)
7831 emit_move_insn (temp, val);
7832
7833 XEXP (x, 0) = temp;
7834 return x;
7835 }
7836 }
7837
7838 return x;
7839 }
7840 \f
7841 /* Print an integer constant expression in assembler syntax. Addition
7842 and subtraction are the only arithmetic that may appear in these
7843 expressions. FILE is the stdio stream to write to, X is the rtx, and
7844 CODE is the operand print code from the output string. */
7845
7846 static void
7847 output_pic_addr_const (FILE *file, rtx x, int code)
7848 {
7849 char buf[256];
7850
7851 switch (GET_CODE (x))
7852 {
7853 case PC:
7854 gcc_assert (flag_pic);
7855 putc ('.', file);
7856 break;
7857
7858 case SYMBOL_REF:
7859 if (! TARGET_MACHO || TARGET_64BIT)
7860 output_addr_const (file, x);
7861 else
7862 {
7863 const char *name = XSTR (x, 0);
7864
7865 /* Mark the decl as referenced so that cgraph will
7866 output the function. */
7867 if (SYMBOL_REF_DECL (x))
7868 mark_decl_referenced (SYMBOL_REF_DECL (x));
7869
7870 #if TARGET_MACHO
7871 if (MACHOPIC_INDIRECT
7872 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
7873 name = machopic_indirection_name (x, /*stub_p=*/true);
7874 #endif
7875 assemble_name (file, name);
7876 }
7877 if (!TARGET_MACHO && !TARGET_64BIT_MS_ABI
7878 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
7879 fputs ("@PLT", file);
7880 break;
7881
7882 case LABEL_REF:
7883 x = XEXP (x, 0);
7884 /* FALLTHRU */
7885 case CODE_LABEL:
7886 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
7887 assemble_name (asm_out_file, buf);
7888 break;
7889
7890 case CONST_INT:
7891 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7892 break;
7893
7894 case CONST:
7895 /* This used to output parentheses around the expression,
7896 but that does not work on the 386 (either ATT or BSD assembler). */
7897 output_pic_addr_const (file, XEXP (x, 0), code);
7898 break;
7899
7900 case CONST_DOUBLE:
7901 if (GET_MODE (x) == VOIDmode)
7902 {
7903 /* We can use %d if the number is <32 bits and positive. */
7904 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
7905 fprintf (file, "0x%lx%08lx",
7906 (unsigned long) CONST_DOUBLE_HIGH (x),
7907 (unsigned long) CONST_DOUBLE_LOW (x));
7908 else
7909 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
7910 }
7911 else
7912 /* We can't handle floating point constants;
7913 PRINT_OPERAND must handle them. */
7914 output_operand_lossage ("floating constant misused");
7915 break;
7916
7917 case PLUS:
7918 /* Some assemblers need integer constants to appear first. */
7919 if (CONST_INT_P (XEXP (x, 0)))
7920 {
7921 output_pic_addr_const (file, XEXP (x, 0), code);
7922 putc ('+', file);
7923 output_pic_addr_const (file, XEXP (x, 1), code);
7924 }
7925 else
7926 {
7927 gcc_assert (CONST_INT_P (XEXP (x, 1)));
7928 output_pic_addr_const (file, XEXP (x, 1), code);
7929 putc ('+', file);
7930 output_pic_addr_const (file, XEXP (x, 0), code);
7931 }
7932 break;
7933
7934 case MINUS:
7935 if (!TARGET_MACHO)
7936 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
7937 output_pic_addr_const (file, XEXP (x, 0), code);
7938 putc ('-', file);
7939 output_pic_addr_const (file, XEXP (x, 1), code);
7940 if (!TARGET_MACHO)
7941 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
7942 break;
7943
7944 case UNSPEC:
7945 gcc_assert (XVECLEN (x, 0) == 1);
7946 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
7947 switch (XINT (x, 1))
7948 {
7949 case UNSPEC_GOT:
7950 fputs ("@GOT", file);
7951 break;
7952 case UNSPEC_GOTOFF:
7953 fputs ("@GOTOFF", file);
7954 break;
7955 case UNSPEC_PLTOFF:
7956 fputs ("@PLTOFF", file);
7957 break;
7958 case UNSPEC_GOTPCREL:
7959 fputs ("@GOTPCREL(%rip)", file);
7960 break;
7961 case UNSPEC_GOTTPOFF:
7962 /* FIXME: This might be @TPOFF in Sun ld too. */
7963 fputs ("@GOTTPOFF", file);
7964 break;
7965 case UNSPEC_TPOFF:
7966 fputs ("@TPOFF", file);
7967 break;
7968 case UNSPEC_NTPOFF:
7969 if (TARGET_64BIT)
7970 fputs ("@TPOFF", file);
7971 else
7972 fputs ("@NTPOFF", file);
7973 break;
7974 case UNSPEC_DTPOFF:
7975 fputs ("@DTPOFF", file);
7976 break;
7977 case UNSPEC_GOTNTPOFF:
7978 if (TARGET_64BIT)
7979 fputs ("@GOTTPOFF(%rip)", file);
7980 else
7981 fputs ("@GOTNTPOFF", file);
7982 break;
7983 case UNSPEC_INDNTPOFF:
7984 fputs ("@INDNTPOFF", file);
7985 break;
7986 default:
7987 output_operand_lossage ("invalid UNSPEC as operand");
7988 break;
7989 }
7990 break;
7991
7992 default:
7993 output_operand_lossage ("invalid expression as operand");
7994 }
7995 }
7996
7997 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7998 We need to emit DTP-relative relocations. */
7999
8000 static void ATTRIBUTE_UNUSED
8001 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
8002 {
8003 fputs (ASM_LONG, file);
8004 output_addr_const (file, x);
8005 fputs ("@DTPOFF", file);
8006 switch (size)
8007 {
8008 case 4:
8009 break;
8010 case 8:
8011 fputs (", 0", file);
8012 break;
8013 default:
8014 gcc_unreachable ();
8015 }
8016 }
8017
8018 /* In the name of slightly smaller debug output, and to cater to
8019 general assembler lossage, recognize PIC+GOTOFF and turn it back
8020 into a direct symbol reference.
8021
8022 On Darwin, this is necessary to avoid a crash, because Darwin
8023 has a different PIC label for each routine but the DWARF debugging
8024 information is not associated with any particular routine, so it's
8025 necessary to remove references to the PIC label from RTL stored by
8026 the DWARF output code. */
8027
8028 static rtx
8029 ix86_delegitimize_address (rtx orig_x)
8030 {
8031 rtx x = orig_x;
8032 /* reg_addend is NULL or a multiple of some register. */
8033 rtx reg_addend = NULL_RTX;
8034 /* const_addend is NULL or a const_int. */
8035 rtx const_addend = NULL_RTX;
8036 /* This is the result, or NULL. */
8037 rtx result = NULL_RTX;
8038
8039 if (MEM_P (x))
8040 x = XEXP (x, 0);
8041
8042 if (TARGET_64BIT)
8043 {
8044 if (GET_CODE (x) != CONST
8045 || GET_CODE (XEXP (x, 0)) != UNSPEC
8046 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
8047 || !MEM_P (orig_x))
8048 return orig_x;
8049 return XVECEXP (XEXP (x, 0), 0, 0);
8050 }
8051
8052 if (GET_CODE (x) != PLUS
8053 || GET_CODE (XEXP (x, 1)) != CONST)
8054 return orig_x;
8055
8056 if (REG_P (XEXP (x, 0))
8057 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
8058 /* %ebx + GOT/GOTOFF */
8059 ;
8060 else if (GET_CODE (XEXP (x, 0)) == PLUS)
8061 {
8062 /* %ebx + %reg * scale + GOT/GOTOFF */
8063 reg_addend = XEXP (x, 0);
8064 if (REG_P (XEXP (reg_addend, 0))
8065 && REGNO (XEXP (reg_addend, 0)) == PIC_OFFSET_TABLE_REGNUM)
8066 reg_addend = XEXP (reg_addend, 1);
8067 else if (REG_P (XEXP (reg_addend, 1))
8068 && REGNO (XEXP (reg_addend, 1)) == PIC_OFFSET_TABLE_REGNUM)
8069 reg_addend = XEXP (reg_addend, 0);
8070 else
8071 return orig_x;
8072 if (!REG_P (reg_addend)
8073 && GET_CODE (reg_addend) != MULT
8074 && GET_CODE (reg_addend) != ASHIFT)
8075 return orig_x;
8076 }
8077 else
8078 return orig_x;
8079
8080 x = XEXP (XEXP (x, 1), 0);
8081 if (GET_CODE (x) == PLUS
8082 && CONST_INT_P (XEXP (x, 1)))
8083 {
8084 const_addend = XEXP (x, 1);
8085 x = XEXP (x, 0);
8086 }
8087
8088 if (GET_CODE (x) == UNSPEC
8089 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x))
8090 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
8091 result = XVECEXP (x, 0, 0);
8092
8093 if (TARGET_MACHO && darwin_local_data_pic (x)
8094 && !MEM_P (orig_x))
8095 result = XEXP (x, 0);
8096
8097 if (! result)
8098 return orig_x;
8099
8100 if (const_addend)
8101 result = gen_rtx_PLUS (Pmode, result, const_addend);
8102 if (reg_addend)
8103 result = gen_rtx_PLUS (Pmode, reg_addend, result);
8104 return result;
8105 }
8106
8107 /* If X is a machine specific address (i.e. a symbol or label being
8108 referenced as a displacement from the GOT implemented using an
8109 UNSPEC), then return the base term. Otherwise return X. */
8110
8111 rtx
8112 ix86_find_base_term (rtx x)
8113 {
8114 rtx term;
8115
8116 if (TARGET_64BIT)
8117 {
8118 if (GET_CODE (x) != CONST)
8119 return x;
8120 term = XEXP (x, 0);
8121 if (GET_CODE (term) == PLUS
8122 && (CONST_INT_P (XEXP (term, 1))
8123 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
8124 term = XEXP (term, 0);
8125 if (GET_CODE (term) != UNSPEC
8126 || XINT (term, 1) != UNSPEC_GOTPCREL)
8127 return x;
8128
8129 term = XVECEXP (term, 0, 0);
8130
8131 if (GET_CODE (term) != SYMBOL_REF
8132 && GET_CODE (term) != LABEL_REF)
8133 return x;
8134
8135 return term;
8136 }
8137
8138 term = ix86_delegitimize_address (x);
8139
8140 if (GET_CODE (term) != SYMBOL_REF
8141 && GET_CODE (term) != LABEL_REF)
8142 return x;
8143
8144 return term;
8145 }
8146 \f
8147 static void
8148 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
8149 int fp, FILE *file)
8150 {
8151 const char *suffix;
8152
8153 if (mode == CCFPmode || mode == CCFPUmode)
8154 {
8155 enum rtx_code second_code, bypass_code;
8156 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
8157 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
8158 code = ix86_fp_compare_code_to_integer (code);
8159 mode = CCmode;
8160 }
8161 if (reverse)
8162 code = reverse_condition (code);
8163
8164 switch (code)
8165 {
8166 case EQ:
8167 switch (mode)
8168 {
8169 case CCAmode:
8170 suffix = "a";
8171 break;
8172
8173 case CCCmode:
8174 suffix = "c";
8175 break;
8176
8177 case CCOmode:
8178 suffix = "o";
8179 break;
8180
8181 case CCSmode:
8182 suffix = "s";
8183 break;
8184
8185 default:
8186 suffix = "e";
8187 }
8188 break;
8189 case NE:
8190 switch (mode)
8191 {
8192 case CCAmode:
8193 suffix = "na";
8194 break;
8195
8196 case CCCmode:
8197 suffix = "nc";
8198 break;
8199
8200 case CCOmode:
8201 suffix = "no";
8202 break;
8203
8204 case CCSmode:
8205 suffix = "ns";
8206 break;
8207
8208 default:
8209 suffix = "ne";
8210 }
8211 break;
8212 case GT:
8213 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
8214 suffix = "g";
8215 break;
8216 case GTU:
8217 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
8218 Those same assemblers have the same but opposite lossage on cmov. */
8219 gcc_assert (mode == CCmode);
8220 suffix = fp ? "nbe" : "a";
8221 break;
8222 case LT:
8223 switch (mode)
8224 {
8225 case CCNOmode:
8226 case CCGOCmode:
8227 suffix = "s";
8228 break;
8229
8230 case CCmode:
8231 case CCGCmode:
8232 suffix = "l";
8233 break;
8234
8235 default:
8236 gcc_unreachable ();
8237 }
8238 break;
8239 case LTU:
8240 gcc_assert (mode == CCmode);
8241 suffix = "b";
8242 break;
8243 case GE:
8244 switch (mode)
8245 {
8246 case CCNOmode:
8247 case CCGOCmode:
8248 suffix = "ns";
8249 break;
8250
8251 case CCmode:
8252 case CCGCmode:
8253 suffix = "ge";
8254 break;
8255
8256 default:
8257 gcc_unreachable ();
8258 }
8259 break;
8260 case GEU:
8261 /* ??? As above. */
8262 gcc_assert (mode == CCmode);
8263 suffix = fp ? "nb" : "ae";
8264 break;
8265 case LE:
8266 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
8267 suffix = "le";
8268 break;
8269 case LEU:
8270 gcc_assert (mode == CCmode);
8271 suffix = "be";
8272 break;
8273 case UNORDERED:
8274 suffix = fp ? "u" : "p";
8275 break;
8276 case ORDERED:
8277 suffix = fp ? "nu" : "np";
8278 break;
8279 default:
8280 gcc_unreachable ();
8281 }
8282 fputs (suffix, file);
8283 }
8284
8285 /* Print the name of register X to FILE based on its machine mode and number.
8286 If CODE is 'w', pretend the mode is HImode.
8287 If CODE is 'b', pretend the mode is QImode.
8288 If CODE is 'k', pretend the mode is SImode.
8289 If CODE is 'q', pretend the mode is DImode.
8290 If CODE is 'h', pretend the reg is the 'high' byte register.
8291 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
8292
8293 void
8294 print_reg (rtx x, int code, FILE *file)
8295 {
8296 gcc_assert (REGNO (x) != ARG_POINTER_REGNUM
8297 && REGNO (x) != FRAME_POINTER_REGNUM
8298 && REGNO (x) != FLAGS_REG
8299 && REGNO (x) != FPSR_REG
8300 && REGNO (x) != FPCR_REG);
8301
8302 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
8303 putc ('%', file);
8304
8305 if (code == 'w' || MMX_REG_P (x))
8306 code = 2;
8307 else if (code == 'b')
8308 code = 1;
8309 else if (code == 'k')
8310 code = 4;
8311 else if (code == 'q')
8312 code = 8;
8313 else if (code == 'y')
8314 code = 3;
8315 else if (code == 'h')
8316 code = 0;
8317 else
8318 code = GET_MODE_SIZE (GET_MODE (x));
8319
8320 /* Irritatingly, AMD extended registers use different naming convention
8321 from the normal registers. */
8322 if (REX_INT_REG_P (x))
8323 {
8324 gcc_assert (TARGET_64BIT);
8325 switch (code)
8326 {
8327 case 0:
8328 error ("extended registers have no high halves");
8329 break;
8330 case 1:
8331 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
8332 break;
8333 case 2:
8334 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
8335 break;
8336 case 4:
8337 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
8338 break;
8339 case 8:
8340 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
8341 break;
8342 default:
8343 error ("unsupported operand size for extended register");
8344 break;
8345 }
8346 return;
8347 }
8348 switch (code)
8349 {
8350 case 3:
8351 if (STACK_TOP_P (x))
8352 {
8353 fputs ("st(0)", file);
8354 break;
8355 }
8356 /* FALLTHRU */
8357 case 8:
8358 case 4:
8359 case 12:
8360 if (! ANY_FP_REG_P (x))
8361 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
8362 /* FALLTHRU */
8363 case 16:
8364 case 2:
8365 normal:
8366 fputs (hi_reg_name[REGNO (x)], file);
8367 break;
8368 case 1:
8369 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
8370 goto normal;
8371 fputs (qi_reg_name[REGNO (x)], file);
8372 break;
8373 case 0:
8374 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
8375 goto normal;
8376 fputs (qi_high_reg_name[REGNO (x)], file);
8377 break;
8378 default:
8379 gcc_unreachable ();
8380 }
8381 }
8382
8383 /* Locate some local-dynamic symbol still in use by this function
8384 so that we can print its name in some tls_local_dynamic_base
8385 pattern. */
8386
8387 static int
8388 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
8389 {
8390 rtx x = *px;
8391
8392 if (GET_CODE (x) == SYMBOL_REF
8393 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
8394 {
8395 cfun->machine->some_ld_name = XSTR (x, 0);
8396 return 1;
8397 }
8398
8399 return 0;
8400 }
8401
8402 static const char *
8403 get_some_local_dynamic_name (void)
8404 {
8405 rtx insn;
8406
8407 if (cfun->machine->some_ld_name)
8408 return cfun->machine->some_ld_name;
8409
8410 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
8411 if (INSN_P (insn)
8412 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
8413 return cfun->machine->some_ld_name;
8414
8415 gcc_unreachable ();
8416 }
8417
8418 /* Meaning of CODE:
8419 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
8420 C -- print opcode suffix for set/cmov insn.
8421 c -- like C, but print reversed condition
8422 F,f -- likewise, but for floating-point.
8423 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
8424 otherwise nothing
8425 R -- print the prefix for register names.
8426 z -- print the opcode suffix for the size of the current operand.
8427 * -- print a star (in certain assembler syntax)
8428 A -- print an absolute memory reference.
8429 w -- print the operand as if it's a "word" (HImode) even if it isn't.
8430 s -- print a shift double count, followed by the assemblers argument
8431 delimiter.
8432 b -- print the QImode name of the register for the indicated operand.
8433 %b0 would print %al if operands[0] is reg 0.
8434 w -- likewise, print the HImode name of the register.
8435 k -- likewise, print the SImode name of the register.
8436 q -- likewise, print the DImode name of the register.
8437 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
8438 y -- print "st(0)" instead of "st" as a register.
8439 D -- print condition for SSE cmp instruction.
8440 P -- if PIC, print an @PLT suffix.
8441 X -- don't print any sort of PIC '@' suffix for a symbol.
8442 & -- print some in-use local-dynamic symbol name.
8443 H -- print a memory address offset by 8; used for sse high-parts
8444 */
8445
8446 void
8447 print_operand (FILE *file, rtx x, int code)
8448 {
8449 if (code)
8450 {
8451 switch (code)
8452 {
8453 case '*':
8454 if (ASSEMBLER_DIALECT == ASM_ATT)
8455 putc ('*', file);
8456 return;
8457
8458 case '&':
8459 assemble_name (file, get_some_local_dynamic_name ());
8460 return;
8461
8462 case 'A':
8463 switch (ASSEMBLER_DIALECT)
8464 {
8465 case ASM_ATT:
8466 putc ('*', file);
8467 break;
8468
8469 case ASM_INTEL:
8470 /* Intel syntax. For absolute addresses, registers should not
8471 be surrounded by braces. */
8472 if (!REG_P (x))
8473 {
8474 putc ('[', file);
8475 PRINT_OPERAND (file, x, 0);
8476 putc (']', file);
8477 return;
8478 }
8479 break;
8480
8481 default:
8482 gcc_unreachable ();
8483 }
8484
8485 PRINT_OPERAND (file, x, 0);
8486 return;
8487
8488
8489 case 'L':
8490 if (ASSEMBLER_DIALECT == ASM_ATT)
8491 putc ('l', file);
8492 return;
8493
8494 case 'W':
8495 if (ASSEMBLER_DIALECT == ASM_ATT)
8496 putc ('w', file);
8497 return;
8498
8499 case 'B':
8500 if (ASSEMBLER_DIALECT == ASM_ATT)
8501 putc ('b', file);
8502 return;
8503
8504 case 'Q':
8505 if (ASSEMBLER_DIALECT == ASM_ATT)
8506 putc ('l', file);
8507 return;
8508
8509 case 'S':
8510 if (ASSEMBLER_DIALECT == ASM_ATT)
8511 putc ('s', file);
8512 return;
8513
8514 case 'T':
8515 if (ASSEMBLER_DIALECT == ASM_ATT)
8516 putc ('t', file);
8517 return;
8518
8519 case 'z':
8520 /* 387 opcodes don't get size suffixes if the operands are
8521 registers. */
8522 if (STACK_REG_P (x))
8523 return;
8524
8525 /* Likewise if using Intel opcodes. */
8526 if (ASSEMBLER_DIALECT == ASM_INTEL)
8527 return;
8528
8529 /* This is the size of op from size of operand. */
8530 switch (GET_MODE_SIZE (GET_MODE (x)))
8531 {
8532 case 1:
8533 putc ('b', file);
8534 return;
8535
8536 case 2:
8537 if (MEM_P (x))
8538 {
8539 #ifdef HAVE_GAS_FILDS_FISTS
8540 putc ('s', file);
8541 #endif
8542 return;
8543 }
8544 else
8545 putc ('w', file);
8546 return;
8547
8548 case 4:
8549 if (GET_MODE (x) == SFmode)
8550 {
8551 putc ('s', file);
8552 return;
8553 }
8554 else
8555 putc ('l', file);
8556 return;
8557
8558 case 12:
8559 case 16:
8560 putc ('t', file);
8561 return;
8562
8563 case 8:
8564 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
8565 {
8566 #ifdef GAS_MNEMONICS
8567 putc ('q', file);
8568 #else
8569 putc ('l', file);
8570 putc ('l', file);
8571 #endif
8572 }
8573 else
8574 putc ('l', file);
8575 return;
8576
8577 default:
8578 gcc_unreachable ();
8579 }
8580
8581 case 'b':
8582 case 'w':
8583 case 'k':
8584 case 'q':
8585 case 'h':
8586 case 'y':
8587 case 'X':
8588 case 'P':
8589 break;
8590
8591 case 's':
8592 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
8593 {
8594 PRINT_OPERAND (file, x, 0);
8595 putc (',', file);
8596 }
8597 return;
8598
8599 case 'D':
8600 /* Little bit of braindamage here. The SSE compare instructions
8601 does use completely different names for the comparisons that the
8602 fp conditional moves. */
8603 switch (GET_CODE (x))
8604 {
8605 case EQ:
8606 case UNEQ:
8607 fputs ("eq", file);
8608 break;
8609 case LT:
8610 case UNLT:
8611 fputs ("lt", file);
8612 break;
8613 case LE:
8614 case UNLE:
8615 fputs ("le", file);
8616 break;
8617 case UNORDERED:
8618 fputs ("unord", file);
8619 break;
8620 case NE:
8621 case LTGT:
8622 fputs ("neq", file);
8623 break;
8624 case UNGE:
8625 case GE:
8626 fputs ("nlt", file);
8627 break;
8628 case UNGT:
8629 case GT:
8630 fputs ("nle", file);
8631 break;
8632 case ORDERED:
8633 fputs ("ord", file);
8634 break;
8635 default:
8636 gcc_unreachable ();
8637 }
8638 return;
8639 case 'O':
8640 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8641 if (ASSEMBLER_DIALECT == ASM_ATT)
8642 {
8643 switch (GET_MODE (x))
8644 {
8645 case HImode: putc ('w', file); break;
8646 case SImode:
8647 case SFmode: putc ('l', file); break;
8648 case DImode:
8649 case DFmode: putc ('q', file); break;
8650 default: gcc_unreachable ();
8651 }
8652 putc ('.', file);
8653 }
8654 #endif
8655 return;
8656 case 'C':
8657 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
8658 return;
8659 case 'F':
8660 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8661 if (ASSEMBLER_DIALECT == ASM_ATT)
8662 putc ('.', file);
8663 #endif
8664 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
8665 return;
8666
8667 /* Like above, but reverse condition */
8668 case 'c':
8669 /* Check to see if argument to %c is really a constant
8670 and not a condition code which needs to be reversed. */
8671 if (!COMPARISON_P (x))
8672 {
8673 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
8674 return;
8675 }
8676 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
8677 return;
8678 case 'f':
8679 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8680 if (ASSEMBLER_DIALECT == ASM_ATT)
8681 putc ('.', file);
8682 #endif
8683 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
8684 return;
8685
8686 case 'H':
8687 /* It doesn't actually matter what mode we use here, as we're
8688 only going to use this for printing. */
8689 x = adjust_address_nv (x, DImode, 8);
8690 break;
8691
8692 case '+':
8693 {
8694 rtx x;
8695
8696 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
8697 return;
8698
8699 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
8700 if (x)
8701 {
8702 int pred_val = INTVAL (XEXP (x, 0));
8703
8704 if (pred_val < REG_BR_PROB_BASE * 45 / 100
8705 || pred_val > REG_BR_PROB_BASE * 55 / 100)
8706 {
8707 int taken = pred_val > REG_BR_PROB_BASE / 2;
8708 int cputaken = final_forward_branch_p (current_output_insn) == 0;
8709
8710 /* Emit hints only in the case default branch prediction
8711 heuristics would fail. */
8712 if (taken != cputaken)
8713 {
8714 /* We use 3e (DS) prefix for taken branches and
8715 2e (CS) prefix for not taken branches. */
8716 if (taken)
8717 fputs ("ds ; ", file);
8718 else
8719 fputs ("cs ; ", file);
8720 }
8721 }
8722 }
8723 return;
8724 }
8725 default:
8726 output_operand_lossage ("invalid operand code '%c'", code);
8727 }
8728 }
8729
8730 if (REG_P (x))
8731 print_reg (x, code, file);
8732
8733 else if (MEM_P (x))
8734 {
8735 /* No `byte ptr' prefix for call instructions. */
8736 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
8737 {
8738 const char * size;
8739 switch (GET_MODE_SIZE (GET_MODE (x)))
8740 {
8741 case 1: size = "BYTE"; break;
8742 case 2: size = "WORD"; break;
8743 case 4: size = "DWORD"; break;
8744 case 8: size = "QWORD"; break;
8745 case 12: size = "XWORD"; break;
8746 case 16: size = "XMMWORD"; break;
8747 default:
8748 gcc_unreachable ();
8749 }
8750
8751 /* Check for explicit size override (codes 'b', 'w' and 'k') */
8752 if (code == 'b')
8753 size = "BYTE";
8754 else if (code == 'w')
8755 size = "WORD";
8756 else if (code == 'k')
8757 size = "DWORD";
8758
8759 fputs (size, file);
8760 fputs (" PTR ", file);
8761 }
8762
8763 x = XEXP (x, 0);
8764 /* Avoid (%rip) for call operands. */
8765 if (CONSTANT_ADDRESS_P (x) && code == 'P'
8766 && !CONST_INT_P (x))
8767 output_addr_const (file, x);
8768 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
8769 output_operand_lossage ("invalid constraints for operand");
8770 else
8771 output_address (x);
8772 }
8773
8774 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
8775 {
8776 REAL_VALUE_TYPE r;
8777 long l;
8778
8779 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8780 REAL_VALUE_TO_TARGET_SINGLE (r, l);
8781
8782 if (ASSEMBLER_DIALECT == ASM_ATT)
8783 putc ('$', file);
8784 fprintf (file, "0x%08lx", l);
8785 }
8786
8787 /* These float cases don't actually occur as immediate operands. */
8788 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
8789 {
8790 char dstr[30];
8791
8792 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8793 fprintf (file, "%s", dstr);
8794 }
8795
8796 else if (GET_CODE (x) == CONST_DOUBLE
8797 && GET_MODE (x) == XFmode)
8798 {
8799 char dstr[30];
8800
8801 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8802 fprintf (file, "%s", dstr);
8803 }
8804
8805 else
8806 {
8807 /* We have patterns that allow zero sets of memory, for instance.
8808 In 64-bit mode, we should probably support all 8-byte vectors,
8809 since we can in fact encode that into an immediate. */
8810 if (GET_CODE (x) == CONST_VECTOR)
8811 {
8812 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
8813 x = const0_rtx;
8814 }
8815
8816 if (code != 'P')
8817 {
8818 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
8819 {
8820 if (ASSEMBLER_DIALECT == ASM_ATT)
8821 putc ('$', file);
8822 }
8823 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
8824 || GET_CODE (x) == LABEL_REF)
8825 {
8826 if (ASSEMBLER_DIALECT == ASM_ATT)
8827 putc ('$', file);
8828 else
8829 fputs ("OFFSET FLAT:", file);
8830 }
8831 }
8832 if (CONST_INT_P (x))
8833 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
8834 else if (flag_pic)
8835 output_pic_addr_const (file, x, code);
8836 else
8837 output_addr_const (file, x);
8838 }
8839 }
8840 \f
8841 /* Print a memory operand whose address is ADDR. */
8842
8843 void
8844 print_operand_address (FILE *file, rtx addr)
8845 {
8846 struct ix86_address parts;
8847 rtx base, index, disp;
8848 int scale;
8849 int ok = ix86_decompose_address (addr, &parts);
8850
8851 gcc_assert (ok);
8852
8853 base = parts.base;
8854 index = parts.index;
8855 disp = parts.disp;
8856 scale = parts.scale;
8857
8858 switch (parts.seg)
8859 {
8860 case SEG_DEFAULT:
8861 break;
8862 case SEG_FS:
8863 case SEG_GS:
8864 if (USER_LABEL_PREFIX[0] == 0)
8865 putc ('%', file);
8866 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
8867 break;
8868 default:
8869 gcc_unreachable ();
8870 }
8871
8872 if (!base && !index)
8873 {
8874 /* Displacement only requires special attention. */
8875
8876 if (CONST_INT_P (disp))
8877 {
8878 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
8879 {
8880 if (USER_LABEL_PREFIX[0] == 0)
8881 putc ('%', file);
8882 fputs ("ds:", file);
8883 }
8884 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
8885 }
8886 else if (flag_pic)
8887 output_pic_addr_const (file, disp, 0);
8888 else
8889 output_addr_const (file, disp);
8890
8891 /* Use one byte shorter RIP relative addressing for 64bit mode. */
8892 if (TARGET_64BIT)
8893 {
8894 if (GET_CODE (disp) == CONST
8895 && GET_CODE (XEXP (disp, 0)) == PLUS
8896 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
8897 disp = XEXP (XEXP (disp, 0), 0);
8898 if (GET_CODE (disp) == LABEL_REF
8899 || (GET_CODE (disp) == SYMBOL_REF
8900 && SYMBOL_REF_TLS_MODEL (disp) == 0))
8901 fputs ("(%rip)", file);
8902 }
8903 }
8904 else
8905 {
8906 if (ASSEMBLER_DIALECT == ASM_ATT)
8907 {
8908 if (disp)
8909 {
8910 if (flag_pic)
8911 output_pic_addr_const (file, disp, 0);
8912 else if (GET_CODE (disp) == LABEL_REF)
8913 output_asm_label (disp);
8914 else
8915 output_addr_const (file, disp);
8916 }
8917
8918 putc ('(', file);
8919 if (base)
8920 print_reg (base, 0, file);
8921 if (index)
8922 {
8923 putc (',', file);
8924 print_reg (index, 0, file);
8925 if (scale != 1)
8926 fprintf (file, ",%d", scale);
8927 }
8928 putc (')', file);
8929 }
8930 else
8931 {
8932 rtx offset = NULL_RTX;
8933
8934 if (disp)
8935 {
8936 /* Pull out the offset of a symbol; print any symbol itself. */
8937 if (GET_CODE (disp) == CONST
8938 && GET_CODE (XEXP (disp, 0)) == PLUS
8939 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
8940 {
8941 offset = XEXP (XEXP (disp, 0), 1);
8942 disp = gen_rtx_CONST (VOIDmode,
8943 XEXP (XEXP (disp, 0), 0));
8944 }
8945
8946 if (flag_pic)
8947 output_pic_addr_const (file, disp, 0);
8948 else if (GET_CODE (disp) == LABEL_REF)
8949 output_asm_label (disp);
8950 else if (CONST_INT_P (disp))
8951 offset = disp;
8952 else
8953 output_addr_const (file, disp);
8954 }
8955
8956 putc ('[', file);
8957 if (base)
8958 {
8959 print_reg (base, 0, file);
8960 if (offset)
8961 {
8962 if (INTVAL (offset) >= 0)
8963 putc ('+', file);
8964 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8965 }
8966 }
8967 else if (offset)
8968 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8969 else
8970 putc ('0', file);
8971
8972 if (index)
8973 {
8974 putc ('+', file);
8975 print_reg (index, 0, file);
8976 if (scale != 1)
8977 fprintf (file, "*%d", scale);
8978 }
8979 putc (']', file);
8980 }
8981 }
8982 }
8983
8984 bool
8985 output_addr_const_extra (FILE *file, rtx x)
8986 {
8987 rtx op;
8988
8989 if (GET_CODE (x) != UNSPEC)
8990 return false;
8991
8992 op = XVECEXP (x, 0, 0);
8993 switch (XINT (x, 1))
8994 {
8995 case UNSPEC_GOTTPOFF:
8996 output_addr_const (file, op);
8997 /* FIXME: This might be @TPOFF in Sun ld. */
8998 fputs ("@GOTTPOFF", file);
8999 break;
9000 case UNSPEC_TPOFF:
9001 output_addr_const (file, op);
9002 fputs ("@TPOFF", file);
9003 break;
9004 case UNSPEC_NTPOFF:
9005 output_addr_const (file, op);
9006 if (TARGET_64BIT)
9007 fputs ("@TPOFF", file);
9008 else
9009 fputs ("@NTPOFF", file);
9010 break;
9011 case UNSPEC_DTPOFF:
9012 output_addr_const (file, op);
9013 fputs ("@DTPOFF", file);
9014 break;
9015 case UNSPEC_GOTNTPOFF:
9016 output_addr_const (file, op);
9017 if (TARGET_64BIT)
9018 fputs ("@GOTTPOFF(%rip)", file);
9019 else
9020 fputs ("@GOTNTPOFF", file);
9021 break;
9022 case UNSPEC_INDNTPOFF:
9023 output_addr_const (file, op);
9024 fputs ("@INDNTPOFF", file);
9025 break;
9026
9027 default:
9028 return false;
9029 }
9030
9031 return true;
9032 }
9033 \f
9034 /* Split one or more DImode RTL references into pairs of SImode
9035 references. The RTL can be REG, offsettable MEM, integer constant, or
9036 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
9037 split and "num" is its length. lo_half and hi_half are output arrays
9038 that parallel "operands". */
9039
9040 void
9041 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
9042 {
9043 while (num--)
9044 {
9045 rtx op = operands[num];
9046
9047 /* simplify_subreg refuse to split volatile memory addresses,
9048 but we still have to handle it. */
9049 if (MEM_P (op))
9050 {
9051 lo_half[num] = adjust_address (op, SImode, 0);
9052 hi_half[num] = adjust_address (op, SImode, 4);
9053 }
9054 else
9055 {
9056 lo_half[num] = simplify_gen_subreg (SImode, op,
9057 GET_MODE (op) == VOIDmode
9058 ? DImode : GET_MODE (op), 0);
9059 hi_half[num] = simplify_gen_subreg (SImode, op,
9060 GET_MODE (op) == VOIDmode
9061 ? DImode : GET_MODE (op), 4);
9062 }
9063 }
9064 }
9065 /* Split one or more TImode RTL references into pairs of DImode
9066 references. The RTL can be REG, offsettable MEM, integer constant, or
9067 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
9068 split and "num" is its length. lo_half and hi_half are output arrays
9069 that parallel "operands". */
9070
9071 void
9072 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
9073 {
9074 while (num--)
9075 {
9076 rtx op = operands[num];
9077
9078 /* simplify_subreg refuse to split volatile memory addresses, but we
9079 still have to handle it. */
9080 if (MEM_P (op))
9081 {
9082 lo_half[num] = adjust_address (op, DImode, 0);
9083 hi_half[num] = adjust_address (op, DImode, 8);
9084 }
9085 else
9086 {
9087 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
9088 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
9089 }
9090 }
9091 }
9092 \f
9093 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
9094 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
9095 is the expression of the binary operation. The output may either be
9096 emitted here, or returned to the caller, like all output_* functions.
9097
9098 There is no guarantee that the operands are the same mode, as they
9099 might be within FLOAT or FLOAT_EXTEND expressions. */
9100
9101 #ifndef SYSV386_COMPAT
9102 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
9103 wants to fix the assemblers because that causes incompatibility
9104 with gcc. No-one wants to fix gcc because that causes
9105 incompatibility with assemblers... You can use the option of
9106 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
9107 #define SYSV386_COMPAT 1
9108 #endif
9109
9110 const char *
9111 output_387_binary_op (rtx insn, rtx *operands)
9112 {
9113 static char buf[30];
9114 const char *p;
9115 const char *ssep;
9116 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
9117
9118 #ifdef ENABLE_CHECKING
9119 /* Even if we do not want to check the inputs, this documents input
9120 constraints. Which helps in understanding the following code. */
9121 if (STACK_REG_P (operands[0])
9122 && ((REG_P (operands[1])
9123 && REGNO (operands[0]) == REGNO (operands[1])
9124 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
9125 || (REG_P (operands[2])
9126 && REGNO (operands[0]) == REGNO (operands[2])
9127 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
9128 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
9129 ; /* ok */
9130 else
9131 gcc_assert (is_sse);
9132 #endif
9133
9134 switch (GET_CODE (operands[3]))
9135 {
9136 case PLUS:
9137 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9138 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9139 p = "fiadd";
9140 else
9141 p = "fadd";
9142 ssep = "add";
9143 break;
9144
9145 case MINUS:
9146 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9147 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9148 p = "fisub";
9149 else
9150 p = "fsub";
9151 ssep = "sub";
9152 break;
9153
9154 case MULT:
9155 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9156 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9157 p = "fimul";
9158 else
9159 p = "fmul";
9160 ssep = "mul";
9161 break;
9162
9163 case DIV:
9164 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9165 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9166 p = "fidiv";
9167 else
9168 p = "fdiv";
9169 ssep = "div";
9170 break;
9171
9172 default:
9173 gcc_unreachable ();
9174 }
9175
9176 if (is_sse)
9177 {
9178 strcpy (buf, ssep);
9179 if (GET_MODE (operands[0]) == SFmode)
9180 strcat (buf, "ss\t{%2, %0|%0, %2}");
9181 else
9182 strcat (buf, "sd\t{%2, %0|%0, %2}");
9183 return buf;
9184 }
9185 strcpy (buf, p);
9186
9187 switch (GET_CODE (operands[3]))
9188 {
9189 case MULT:
9190 case PLUS:
9191 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
9192 {
9193 rtx temp = operands[2];
9194 operands[2] = operands[1];
9195 operands[1] = temp;
9196 }
9197
9198 /* know operands[0] == operands[1]. */
9199
9200 if (MEM_P (operands[2]))
9201 {
9202 p = "%z2\t%2";
9203 break;
9204 }
9205
9206 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
9207 {
9208 if (STACK_TOP_P (operands[0]))
9209 /* How is it that we are storing to a dead operand[2]?
9210 Well, presumably operands[1] is dead too. We can't
9211 store the result to st(0) as st(0) gets popped on this
9212 instruction. Instead store to operands[2] (which I
9213 think has to be st(1)). st(1) will be popped later.
9214 gcc <= 2.8.1 didn't have this check and generated
9215 assembly code that the Unixware assembler rejected. */
9216 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
9217 else
9218 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
9219 break;
9220 }
9221
9222 if (STACK_TOP_P (operands[0]))
9223 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
9224 else
9225 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
9226 break;
9227
9228 case MINUS:
9229 case DIV:
9230 if (MEM_P (operands[1]))
9231 {
9232 p = "r%z1\t%1";
9233 break;
9234 }
9235
9236 if (MEM_P (operands[2]))
9237 {
9238 p = "%z2\t%2";
9239 break;
9240 }
9241
9242 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
9243 {
9244 #if SYSV386_COMPAT
9245 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
9246 derived assemblers, confusingly reverse the direction of
9247 the operation for fsub{r} and fdiv{r} when the
9248 destination register is not st(0). The Intel assembler
9249 doesn't have this brain damage. Read !SYSV386_COMPAT to
9250 figure out what the hardware really does. */
9251 if (STACK_TOP_P (operands[0]))
9252 p = "{p\t%0, %2|rp\t%2, %0}";
9253 else
9254 p = "{rp\t%2, %0|p\t%0, %2}";
9255 #else
9256 if (STACK_TOP_P (operands[0]))
9257 /* As above for fmul/fadd, we can't store to st(0). */
9258 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
9259 else
9260 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
9261 #endif
9262 break;
9263 }
9264
9265 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
9266 {
9267 #if SYSV386_COMPAT
9268 if (STACK_TOP_P (operands[0]))
9269 p = "{rp\t%0, %1|p\t%1, %0}";
9270 else
9271 p = "{p\t%1, %0|rp\t%0, %1}";
9272 #else
9273 if (STACK_TOP_P (operands[0]))
9274 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
9275 else
9276 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
9277 #endif
9278 break;
9279 }
9280
9281 if (STACK_TOP_P (operands[0]))
9282 {
9283 if (STACK_TOP_P (operands[1]))
9284 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
9285 else
9286 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
9287 break;
9288 }
9289 else if (STACK_TOP_P (operands[1]))
9290 {
9291 #if SYSV386_COMPAT
9292 p = "{\t%1, %0|r\t%0, %1}";
9293 #else
9294 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
9295 #endif
9296 }
9297 else
9298 {
9299 #if SYSV386_COMPAT
9300 p = "{r\t%2, %0|\t%0, %2}";
9301 #else
9302 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
9303 #endif
9304 }
9305 break;
9306
9307 default:
9308 gcc_unreachable ();
9309 }
9310
9311 strcat (buf, p);
9312 return buf;
9313 }
9314
9315 /* Return needed mode for entity in optimize_mode_switching pass. */
9316
9317 int
9318 ix86_mode_needed (int entity, rtx insn)
9319 {
9320 enum attr_i387_cw mode;
9321
9322 /* The mode UNINITIALIZED is used to store control word after a
9323 function call or ASM pattern. The mode ANY specify that function
9324 has no requirements on the control word and make no changes in the
9325 bits we are interested in. */
9326
9327 if (CALL_P (insn)
9328 || (NONJUMP_INSN_P (insn)
9329 && (asm_noperands (PATTERN (insn)) >= 0
9330 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
9331 return I387_CW_UNINITIALIZED;
9332
9333 if (recog_memoized (insn) < 0)
9334 return I387_CW_ANY;
9335
9336 mode = get_attr_i387_cw (insn);
9337
9338 switch (entity)
9339 {
9340 case I387_TRUNC:
9341 if (mode == I387_CW_TRUNC)
9342 return mode;
9343 break;
9344
9345 case I387_FLOOR:
9346 if (mode == I387_CW_FLOOR)
9347 return mode;
9348 break;
9349
9350 case I387_CEIL:
9351 if (mode == I387_CW_CEIL)
9352 return mode;
9353 break;
9354
9355 case I387_MASK_PM:
9356 if (mode == I387_CW_MASK_PM)
9357 return mode;
9358 break;
9359
9360 default:
9361 gcc_unreachable ();
9362 }
9363
9364 return I387_CW_ANY;
9365 }
9366
9367 /* Output code to initialize control word copies used by trunc?f?i and
9368 rounding patterns. CURRENT_MODE is set to current control word,
9369 while NEW_MODE is set to new control word. */
9370
9371 void
9372 emit_i387_cw_initialization (int mode)
9373 {
9374 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
9375 rtx new_mode;
9376
9377 enum ix86_stack_slot slot;
9378
9379 rtx reg = gen_reg_rtx (HImode);
9380
9381 emit_insn (gen_x86_fnstcw_1 (stored_mode));
9382 emit_move_insn (reg, copy_rtx (stored_mode));
9383
9384 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
9385 {
9386 switch (mode)
9387 {
9388 case I387_CW_TRUNC:
9389 /* round toward zero (truncate) */
9390 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
9391 slot = SLOT_CW_TRUNC;
9392 break;
9393
9394 case I387_CW_FLOOR:
9395 /* round down toward -oo */
9396 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
9397 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
9398 slot = SLOT_CW_FLOOR;
9399 break;
9400
9401 case I387_CW_CEIL:
9402 /* round up toward +oo */
9403 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
9404 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
9405 slot = SLOT_CW_CEIL;
9406 break;
9407
9408 case I387_CW_MASK_PM:
9409 /* mask precision exception for nearbyint() */
9410 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
9411 slot = SLOT_CW_MASK_PM;
9412 break;
9413
9414 default:
9415 gcc_unreachable ();
9416 }
9417 }
9418 else
9419 {
9420 switch (mode)
9421 {
9422 case I387_CW_TRUNC:
9423 /* round toward zero (truncate) */
9424 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
9425 slot = SLOT_CW_TRUNC;
9426 break;
9427
9428 case I387_CW_FLOOR:
9429 /* round down toward -oo */
9430 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
9431 slot = SLOT_CW_FLOOR;
9432 break;
9433
9434 case I387_CW_CEIL:
9435 /* round up toward +oo */
9436 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
9437 slot = SLOT_CW_CEIL;
9438 break;
9439
9440 case I387_CW_MASK_PM:
9441 /* mask precision exception for nearbyint() */
9442 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
9443 slot = SLOT_CW_MASK_PM;
9444 break;
9445
9446 default:
9447 gcc_unreachable ();
9448 }
9449 }
9450
9451 gcc_assert (slot < MAX_386_STACK_LOCALS);
9452
9453 new_mode = assign_386_stack_local (HImode, slot);
9454 emit_move_insn (new_mode, reg);
9455 }
9456
9457 /* Output code for INSN to convert a float to a signed int. OPERANDS
9458 are the insn operands. The output may be [HSD]Imode and the input
9459 operand may be [SDX]Fmode. */
9460
9461 const char *
9462 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
9463 {
9464 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
9465 int dimode_p = GET_MODE (operands[0]) == DImode;
9466 int round_mode = get_attr_i387_cw (insn);
9467
9468 /* Jump through a hoop or two for DImode, since the hardware has no
9469 non-popping instruction. We used to do this a different way, but
9470 that was somewhat fragile and broke with post-reload splitters. */
9471 if ((dimode_p || fisttp) && !stack_top_dies)
9472 output_asm_insn ("fld\t%y1", operands);
9473
9474 gcc_assert (STACK_TOP_P (operands[1]));
9475 gcc_assert (MEM_P (operands[0]));
9476 gcc_assert (GET_MODE (operands[1]) != TFmode);
9477
9478 if (fisttp)
9479 output_asm_insn ("fisttp%z0\t%0", operands);
9480 else
9481 {
9482 if (round_mode != I387_CW_ANY)
9483 output_asm_insn ("fldcw\t%3", operands);
9484 if (stack_top_dies || dimode_p)
9485 output_asm_insn ("fistp%z0\t%0", operands);
9486 else
9487 output_asm_insn ("fist%z0\t%0", operands);
9488 if (round_mode != I387_CW_ANY)
9489 output_asm_insn ("fldcw\t%2", operands);
9490 }
9491
9492 return "";
9493 }
9494
9495 /* Output code for x87 ffreep insn. The OPNO argument, which may only
9496 have the values zero or one, indicates the ffreep insn's operand
9497 from the OPERANDS array. */
9498
9499 static const char *
9500 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
9501 {
9502 if (TARGET_USE_FFREEP)
9503 #if HAVE_AS_IX86_FFREEP
9504 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
9505 #else
9506 {
9507 static char retval[] = ".word\t0xc_df";
9508 int regno = REGNO (operands[opno]);
9509
9510 gcc_assert (FP_REGNO_P (regno));
9511
9512 retval[9] = '0' + (regno - FIRST_STACK_REG);
9513 return retval;
9514 }
9515 #endif
9516
9517 return opno ? "fstp\t%y1" : "fstp\t%y0";
9518 }
9519
9520
9521 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
9522 should be used. UNORDERED_P is true when fucom should be used. */
9523
9524 const char *
9525 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
9526 {
9527 int stack_top_dies;
9528 rtx cmp_op0, cmp_op1;
9529 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
9530
9531 if (eflags_p)
9532 {
9533 cmp_op0 = operands[0];
9534 cmp_op1 = operands[1];
9535 }
9536 else
9537 {
9538 cmp_op0 = operands[1];
9539 cmp_op1 = operands[2];
9540 }
9541
9542 if (is_sse)
9543 {
9544 if (GET_MODE (operands[0]) == SFmode)
9545 if (unordered_p)
9546 return "ucomiss\t{%1, %0|%0, %1}";
9547 else
9548 return "comiss\t{%1, %0|%0, %1}";
9549 else
9550 if (unordered_p)
9551 return "ucomisd\t{%1, %0|%0, %1}";
9552 else
9553 return "comisd\t{%1, %0|%0, %1}";
9554 }
9555
9556 gcc_assert (STACK_TOP_P (cmp_op0));
9557
9558 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
9559
9560 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
9561 {
9562 if (stack_top_dies)
9563 {
9564 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
9565 return output_387_ffreep (operands, 1);
9566 }
9567 else
9568 return "ftst\n\tfnstsw\t%0";
9569 }
9570
9571 if (STACK_REG_P (cmp_op1)
9572 && stack_top_dies
9573 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
9574 && REGNO (cmp_op1) != FIRST_STACK_REG)
9575 {
9576 /* If both the top of the 387 stack dies, and the other operand
9577 is also a stack register that dies, then this must be a
9578 `fcompp' float compare */
9579
9580 if (eflags_p)
9581 {
9582 /* There is no double popping fcomi variant. Fortunately,
9583 eflags is immune from the fstp's cc clobbering. */
9584 if (unordered_p)
9585 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
9586 else
9587 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
9588 return output_387_ffreep (operands, 0);
9589 }
9590 else
9591 {
9592 if (unordered_p)
9593 return "fucompp\n\tfnstsw\t%0";
9594 else
9595 return "fcompp\n\tfnstsw\t%0";
9596 }
9597 }
9598 else
9599 {
9600 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
9601
9602 static const char * const alt[16] =
9603 {
9604 "fcom%z2\t%y2\n\tfnstsw\t%0",
9605 "fcomp%z2\t%y2\n\tfnstsw\t%0",
9606 "fucom%z2\t%y2\n\tfnstsw\t%0",
9607 "fucomp%z2\t%y2\n\tfnstsw\t%0",
9608
9609 "ficom%z2\t%y2\n\tfnstsw\t%0",
9610 "ficomp%z2\t%y2\n\tfnstsw\t%0",
9611 NULL,
9612 NULL,
9613
9614 "fcomi\t{%y1, %0|%0, %y1}",
9615 "fcomip\t{%y1, %0|%0, %y1}",
9616 "fucomi\t{%y1, %0|%0, %y1}",
9617 "fucomip\t{%y1, %0|%0, %y1}",
9618
9619 NULL,
9620 NULL,
9621 NULL,
9622 NULL
9623 };
9624
9625 int mask;
9626 const char *ret;
9627
9628 mask = eflags_p << 3;
9629 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
9630 mask |= unordered_p << 1;
9631 mask |= stack_top_dies;
9632
9633 gcc_assert (mask < 16);
9634 ret = alt[mask];
9635 gcc_assert (ret);
9636
9637 return ret;
9638 }
9639 }
9640
9641 void
9642 ix86_output_addr_vec_elt (FILE *file, int value)
9643 {
9644 const char *directive = ASM_LONG;
9645
9646 #ifdef ASM_QUAD
9647 if (TARGET_64BIT)
9648 directive = ASM_QUAD;
9649 #else
9650 gcc_assert (!TARGET_64BIT);
9651 #endif
9652
9653 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
9654 }
9655
9656 void
9657 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
9658 {
9659 const char *directive = ASM_LONG;
9660
9661 #ifdef ASM_QUAD
9662 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
9663 directive = ASM_QUAD;
9664 #else
9665 gcc_assert (!TARGET_64BIT);
9666 #endif
9667 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
9668 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
9669 fprintf (file, "%s%s%d-%s%d\n",
9670 directive, LPREFIX, value, LPREFIX, rel);
9671 else if (HAVE_AS_GOTOFF_IN_DATA)
9672 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
9673 #if TARGET_MACHO
9674 else if (TARGET_MACHO)
9675 {
9676 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
9677 machopic_output_function_base_name (file);
9678 fprintf(file, "\n");
9679 }
9680 #endif
9681 else
9682 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
9683 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
9684 }
9685 \f
9686 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
9687 for the target. */
9688
9689 void
9690 ix86_expand_clear (rtx dest)
9691 {
9692 rtx tmp;
9693
9694 /* We play register width games, which are only valid after reload. */
9695 gcc_assert (reload_completed);
9696
9697 /* Avoid HImode and its attendant prefix byte. */
9698 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
9699 dest = gen_rtx_REG (SImode, REGNO (dest));
9700 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
9701
9702 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
9703 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
9704 {
9705 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
9706 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
9707 }
9708
9709 emit_insn (tmp);
9710 }
9711
9712 /* X is an unchanging MEM. If it is a constant pool reference, return
9713 the constant pool rtx, else NULL. */
9714
9715 rtx
9716 maybe_get_pool_constant (rtx x)
9717 {
9718 x = ix86_delegitimize_address (XEXP (x, 0));
9719
9720 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9721 return get_pool_constant (x);
9722
9723 return NULL_RTX;
9724 }
9725
9726 void
9727 ix86_expand_move (enum machine_mode mode, rtx operands[])
9728 {
9729 int strict = (reload_in_progress || reload_completed);
9730 rtx op0, op1;
9731 enum tls_model model;
9732
9733 op0 = operands[0];
9734 op1 = operands[1];
9735
9736 if (GET_CODE (op1) == SYMBOL_REF)
9737 {
9738 model = SYMBOL_REF_TLS_MODEL (op1);
9739 if (model)
9740 {
9741 op1 = legitimize_tls_address (op1, model, true);
9742 op1 = force_operand (op1, op0);
9743 if (op1 == op0)
9744 return;
9745 }
9746 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
9747 && SYMBOL_REF_DLLIMPORT_P (op1))
9748 op1 = legitimize_dllimport_symbol (op1, false);
9749 }
9750 else if (GET_CODE (op1) == CONST
9751 && GET_CODE (XEXP (op1, 0)) == PLUS
9752 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
9753 {
9754 rtx addend = XEXP (XEXP (op1, 0), 1);
9755 rtx symbol = XEXP (XEXP (op1, 0), 0);
9756 rtx tmp = NULL;
9757
9758 model = SYMBOL_REF_TLS_MODEL (symbol);
9759 if (model)
9760 tmp = legitimize_tls_address (symbol, model, true);
9761 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
9762 && SYMBOL_REF_DLLIMPORT_P (symbol))
9763 tmp = legitimize_dllimport_symbol (symbol, true);
9764
9765 if (tmp)
9766 {
9767 tmp = force_operand (tmp, NULL);
9768 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
9769 op0, 1, OPTAB_DIRECT);
9770 if (tmp == op0)
9771 return;
9772 }
9773 }
9774
9775 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
9776 {
9777 if (TARGET_MACHO && !TARGET_64BIT)
9778 {
9779 #if TARGET_MACHO
9780 if (MACHOPIC_PURE)
9781 {
9782 rtx temp = ((reload_in_progress
9783 || ((op0 && REG_P (op0))
9784 && mode == Pmode))
9785 ? op0 : gen_reg_rtx (Pmode));
9786 op1 = machopic_indirect_data_reference (op1, temp);
9787 op1 = machopic_legitimize_pic_address (op1, mode,
9788 temp == op1 ? 0 : temp);
9789 }
9790 else if (MACHOPIC_INDIRECT)
9791 op1 = machopic_indirect_data_reference (op1, 0);
9792 if (op0 == op1)
9793 return;
9794 #endif
9795 }
9796 else
9797 {
9798 if (MEM_P (op0))
9799 op1 = force_reg (Pmode, op1);
9800 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
9801 {
9802 rtx reg = !can_create_pseudo_p () ? op0 : NULL_RTX;
9803 op1 = legitimize_pic_address (op1, reg);
9804 if (op0 == op1)
9805 return;
9806 }
9807 }
9808 }
9809 else
9810 {
9811 if (MEM_P (op0)
9812 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
9813 || !push_operand (op0, mode))
9814 && MEM_P (op1))
9815 op1 = force_reg (mode, op1);
9816
9817 if (push_operand (op0, mode)
9818 && ! general_no_elim_operand (op1, mode))
9819 op1 = copy_to_mode_reg (mode, op1);
9820
9821 /* Force large constants in 64bit compilation into register
9822 to get them CSEed. */
9823 if (TARGET_64BIT && mode == DImode
9824 && immediate_operand (op1, mode)
9825 && !x86_64_zext_immediate_operand (op1, VOIDmode)
9826 && !register_operand (op0, mode)
9827 && optimize && !reload_completed && !reload_in_progress)
9828 op1 = copy_to_mode_reg (mode, op1);
9829
9830 if (FLOAT_MODE_P (mode))
9831 {
9832 /* If we are loading a floating point constant to a register,
9833 force the value to memory now, since we'll get better code
9834 out the back end. */
9835
9836 if (strict)
9837 ;
9838 else if (GET_CODE (op1) == CONST_DOUBLE)
9839 {
9840 op1 = validize_mem (force_const_mem (mode, op1));
9841 if (!register_operand (op0, mode))
9842 {
9843 rtx temp = gen_reg_rtx (mode);
9844 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
9845 emit_move_insn (op0, temp);
9846 return;
9847 }
9848 }
9849 }
9850 }
9851
9852 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9853 }
9854
9855 void
9856 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
9857 {
9858 rtx op0 = operands[0], op1 = operands[1];
9859 unsigned int align = GET_MODE_ALIGNMENT (mode);
9860
9861 /* Force constants other than zero into memory. We do not know how
9862 the instructions used to build constants modify the upper 64 bits
9863 of the register, once we have that information we may be able
9864 to handle some of them more efficiently. */
9865 if ((reload_in_progress | reload_completed) == 0
9866 && register_operand (op0, mode)
9867 && (CONSTANT_P (op1)
9868 || (GET_CODE (op1) == SUBREG
9869 && CONSTANT_P (SUBREG_REG (op1))))
9870 && standard_sse_constant_p (op1) <= 0)
9871 op1 = validize_mem (force_const_mem (mode, op1));
9872
9873 /* TDmode values are passed as TImode on the stack. Timode values
9874 are moved via xmm registers, and moving them to stack can result in
9875 unaligned memory access. Use ix86_expand_vector_move_misalign()
9876 if memory operand is not aligned correctly. */
9877 if (can_create_pseudo_p ()
9878 && (mode == TImode) && !TARGET_64BIT
9879 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
9880 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
9881 {
9882 rtx tmp[2];
9883
9884 /* ix86_expand_vector_move_misalign() does not like constants ... */
9885 if (CONSTANT_P (op1)
9886 || (GET_CODE (op1) == SUBREG
9887 && CONSTANT_P (SUBREG_REG (op1))))
9888 op1 = validize_mem (force_const_mem (mode, op1));
9889
9890 /* ... nor both arguments in memory. */
9891 if (!register_operand (op0, mode)
9892 && !register_operand (op1, mode))
9893 op1 = force_reg (mode, op1);
9894
9895 tmp[0] = op0; tmp[1] = op1;
9896 ix86_expand_vector_move_misalign (mode, tmp);
9897 return;
9898 }
9899
9900 /* Make operand1 a register if it isn't already. */
9901 if (can_create_pseudo_p ()
9902 && !register_operand (op0, mode)
9903 && !register_operand (op1, mode))
9904 {
9905 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
9906 return;
9907 }
9908
9909 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9910 }
9911
9912 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
9913 straight to ix86_expand_vector_move. */
9914 /* Code generation for scalar reg-reg moves of single and double precision data:
9915 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
9916 movaps reg, reg
9917 else
9918 movss reg, reg
9919 if (x86_sse_partial_reg_dependency == true)
9920 movapd reg, reg
9921 else
9922 movsd reg, reg
9923
9924 Code generation for scalar loads of double precision data:
9925 if (x86_sse_split_regs == true)
9926 movlpd mem, reg (gas syntax)
9927 else
9928 movsd mem, reg
9929
9930 Code generation for unaligned packed loads of single precision data
9931 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
9932 if (x86_sse_unaligned_move_optimal)
9933 movups mem, reg
9934
9935 if (x86_sse_partial_reg_dependency == true)
9936 {
9937 xorps reg, reg
9938 movlps mem, reg
9939 movhps mem+8, reg
9940 }
9941 else
9942 {
9943 movlps mem, reg
9944 movhps mem+8, reg
9945 }
9946
9947 Code generation for unaligned packed loads of double precision data
9948 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
9949 if (x86_sse_unaligned_move_optimal)
9950 movupd mem, reg
9951
9952 if (x86_sse_split_regs == true)
9953 {
9954 movlpd mem, reg
9955 movhpd mem+8, reg
9956 }
9957 else
9958 {
9959 movsd mem, reg
9960 movhpd mem+8, reg
9961 }
9962 */
9963
9964 void
9965 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
9966 {
9967 rtx op0, op1, m;
9968
9969 op0 = operands[0];
9970 op1 = operands[1];
9971
9972 if (MEM_P (op1))
9973 {
9974 /* If we're optimizing for size, movups is the smallest. */
9975 if (optimize_size)
9976 {
9977 op0 = gen_lowpart (V4SFmode, op0);
9978 op1 = gen_lowpart (V4SFmode, op1);
9979 emit_insn (gen_sse_movups (op0, op1));
9980 return;
9981 }
9982
9983 /* ??? If we have typed data, then it would appear that using
9984 movdqu is the only way to get unaligned data loaded with
9985 integer type. */
9986 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9987 {
9988 op0 = gen_lowpart (V16QImode, op0);
9989 op1 = gen_lowpart (V16QImode, op1);
9990 emit_insn (gen_sse2_movdqu (op0, op1));
9991 return;
9992 }
9993
9994 if (TARGET_SSE2 && mode == V2DFmode)
9995 {
9996 rtx zero;
9997
9998 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
9999 {
10000 op0 = gen_lowpart (V2DFmode, op0);
10001 op1 = gen_lowpart (V2DFmode, op1);
10002 emit_insn (gen_sse2_movupd (op0, op1));
10003 return;
10004 }
10005
10006 /* When SSE registers are split into halves, we can avoid
10007 writing to the top half twice. */
10008 if (TARGET_SSE_SPLIT_REGS)
10009 {
10010 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
10011 zero = op0;
10012 }
10013 else
10014 {
10015 /* ??? Not sure about the best option for the Intel chips.
10016 The following would seem to satisfy; the register is
10017 entirely cleared, breaking the dependency chain. We
10018 then store to the upper half, with a dependency depth
10019 of one. A rumor has it that Intel recommends two movsd
10020 followed by an unpacklpd, but this is unconfirmed. And
10021 given that the dependency depth of the unpacklpd would
10022 still be one, I'm not sure why this would be better. */
10023 zero = CONST0_RTX (V2DFmode);
10024 }
10025
10026 m = adjust_address (op1, DFmode, 0);
10027 emit_insn (gen_sse2_loadlpd (op0, zero, m));
10028 m = adjust_address (op1, DFmode, 8);
10029 emit_insn (gen_sse2_loadhpd (op0, op0, m));
10030 }
10031 else
10032 {
10033 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
10034 {
10035 op0 = gen_lowpart (V4SFmode, op0);
10036 op1 = gen_lowpart (V4SFmode, op1);
10037 emit_insn (gen_sse_movups (op0, op1));
10038 return;
10039 }
10040
10041 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
10042 emit_move_insn (op0, CONST0_RTX (mode));
10043 else
10044 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
10045
10046 if (mode != V4SFmode)
10047 op0 = gen_lowpart (V4SFmode, op0);
10048 m = adjust_address (op1, V2SFmode, 0);
10049 emit_insn (gen_sse_loadlps (op0, op0, m));
10050 m = adjust_address (op1, V2SFmode, 8);
10051 emit_insn (gen_sse_loadhps (op0, op0, m));
10052 }
10053 }
10054 else if (MEM_P (op0))
10055 {
10056 /* If we're optimizing for size, movups is the smallest. */
10057 if (optimize_size)
10058 {
10059 op0 = gen_lowpart (V4SFmode, op0);
10060 op1 = gen_lowpart (V4SFmode, op1);
10061 emit_insn (gen_sse_movups (op0, op1));
10062 return;
10063 }
10064
10065 /* ??? Similar to above, only less clear because of quote
10066 typeless stores unquote. */
10067 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
10068 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
10069 {
10070 op0 = gen_lowpart (V16QImode, op0);
10071 op1 = gen_lowpart (V16QImode, op1);
10072 emit_insn (gen_sse2_movdqu (op0, op1));
10073 return;
10074 }
10075
10076 if (TARGET_SSE2 && mode == V2DFmode)
10077 {
10078 m = adjust_address (op0, DFmode, 0);
10079 emit_insn (gen_sse2_storelpd (m, op1));
10080 m = adjust_address (op0, DFmode, 8);
10081 emit_insn (gen_sse2_storehpd (m, op1));
10082 }
10083 else
10084 {
10085 if (mode != V4SFmode)
10086 op1 = gen_lowpart (V4SFmode, op1);
10087 m = adjust_address (op0, V2SFmode, 0);
10088 emit_insn (gen_sse_storelps (m, op1));
10089 m = adjust_address (op0, V2SFmode, 8);
10090 emit_insn (gen_sse_storehps (m, op1));
10091 }
10092 }
10093 else
10094 gcc_unreachable ();
10095 }
10096
10097 /* Expand a push in MODE. This is some mode for which we do not support
10098 proper push instructions, at least from the registers that we expect
10099 the value to live in. */
10100
10101 void
10102 ix86_expand_push (enum machine_mode mode, rtx x)
10103 {
10104 rtx tmp;
10105
10106 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
10107 GEN_INT (-GET_MODE_SIZE (mode)),
10108 stack_pointer_rtx, 1, OPTAB_DIRECT);
10109 if (tmp != stack_pointer_rtx)
10110 emit_move_insn (stack_pointer_rtx, tmp);
10111
10112 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
10113 emit_move_insn (tmp, x);
10114 }
10115
10116 /* Helper function of ix86_fixup_binary_operands to canonicalize
10117 operand order. Returns true if the operands should be swapped. */
10118
10119 static bool
10120 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
10121 rtx operands[])
10122 {
10123 rtx dst = operands[0];
10124 rtx src1 = operands[1];
10125 rtx src2 = operands[2];
10126
10127 /* If the operation is not commutative, we can't do anything. */
10128 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
10129 return false;
10130
10131 /* Highest priority is that src1 should match dst. */
10132 if (rtx_equal_p (dst, src1))
10133 return false;
10134 if (rtx_equal_p (dst, src2))
10135 return true;
10136
10137 /* Next highest priority is that immediate constants come second. */
10138 if (immediate_operand (src2, mode))
10139 return false;
10140 if (immediate_operand (src1, mode))
10141 return true;
10142
10143 /* Lowest priority is that memory references should come second. */
10144 if (MEM_P (src2))
10145 return false;
10146 if (MEM_P (src1))
10147 return true;
10148
10149 return false;
10150 }
10151
10152
10153 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
10154 destination to use for the operation. If different from the true
10155 destination in operands[0], a copy operation will be required. */
10156
10157 rtx
10158 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
10159 rtx operands[])
10160 {
10161 rtx dst = operands[0];
10162 rtx src1 = operands[1];
10163 rtx src2 = operands[2];
10164
10165 /* Canonicalize operand order. */
10166 if (ix86_swap_binary_operands_p (code, mode, operands))
10167 {
10168 rtx temp = src1;
10169 src1 = src2;
10170 src2 = temp;
10171 }
10172
10173 /* Both source operands cannot be in memory. */
10174 if (MEM_P (src1) && MEM_P (src2))
10175 {
10176 /* Optimization: Only read from memory once. */
10177 if (rtx_equal_p (src1, src2))
10178 {
10179 src2 = force_reg (mode, src2);
10180 src1 = src2;
10181 }
10182 else
10183 src2 = force_reg (mode, src2);
10184 }
10185
10186 /* If the destination is memory, and we do not have matching source
10187 operands, do things in registers. */
10188 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
10189 dst = gen_reg_rtx (mode);
10190
10191 /* Source 1 cannot be a constant. */
10192 if (CONSTANT_P (src1))
10193 src1 = force_reg (mode, src1);
10194
10195 /* Source 1 cannot be a non-matching memory. */
10196 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
10197 src1 = force_reg (mode, src1);
10198
10199 operands[1] = src1;
10200 operands[2] = src2;
10201 return dst;
10202 }
10203
10204 /* Similarly, but assume that the destination has already been
10205 set up properly. */
10206
10207 void
10208 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
10209 enum machine_mode mode, rtx operands[])
10210 {
10211 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
10212 gcc_assert (dst == operands[0]);
10213 }
10214
10215 /* Attempt to expand a binary operator. Make the expansion closer to the
10216 actual machine, then just general_operand, which will allow 3 separate
10217 memory references (one output, two input) in a single insn. */
10218
10219 void
10220 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
10221 rtx operands[])
10222 {
10223 rtx src1, src2, dst, op, clob;
10224
10225 dst = ix86_fixup_binary_operands (code, mode, operands);
10226 src1 = operands[1];
10227 src2 = operands[2];
10228
10229 /* Emit the instruction. */
10230
10231 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
10232 if (reload_in_progress)
10233 {
10234 /* Reload doesn't know about the flags register, and doesn't know that
10235 it doesn't want to clobber it. We can only do this with PLUS. */
10236 gcc_assert (code == PLUS);
10237 emit_insn (op);
10238 }
10239 else
10240 {
10241 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10242 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
10243 }
10244
10245 /* Fix up the destination if needed. */
10246 if (dst != operands[0])
10247 emit_move_insn (operands[0], dst);
10248 }
10249
10250 /* Return TRUE or FALSE depending on whether the binary operator meets the
10251 appropriate constraints. */
10252
10253 int
10254 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
10255 rtx operands[3])
10256 {
10257 rtx dst = operands[0];
10258 rtx src1 = operands[1];
10259 rtx src2 = operands[2];
10260
10261 /* Both source operands cannot be in memory. */
10262 if (MEM_P (src1) && MEM_P (src2))
10263 return 0;
10264
10265 /* Canonicalize operand order for commutative operators. */
10266 if (ix86_swap_binary_operands_p (code, mode, operands))
10267 {
10268 rtx temp = src1;
10269 src1 = src2;
10270 src2 = temp;
10271 }
10272
10273 /* If the destination is memory, we must have a matching source operand. */
10274 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
10275 return 0;
10276
10277 /* Source 1 cannot be a constant. */
10278 if (CONSTANT_P (src1))
10279 return 0;
10280
10281 /* Source 1 cannot be a non-matching memory. */
10282 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
10283 return 0;
10284
10285 return 1;
10286 }
10287
10288 /* Attempt to expand a unary operator. Make the expansion closer to the
10289 actual machine, then just general_operand, which will allow 2 separate
10290 memory references (one output, one input) in a single insn. */
10291
10292 void
10293 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
10294 rtx operands[])
10295 {
10296 int matching_memory;
10297 rtx src, dst, op, clob;
10298
10299 dst = operands[0];
10300 src = operands[1];
10301
10302 /* If the destination is memory, and we do not have matching source
10303 operands, do things in registers. */
10304 matching_memory = 0;
10305 if (MEM_P (dst))
10306 {
10307 if (rtx_equal_p (dst, src))
10308 matching_memory = 1;
10309 else
10310 dst = gen_reg_rtx (mode);
10311 }
10312
10313 /* When source operand is memory, destination must match. */
10314 if (MEM_P (src) && !matching_memory)
10315 src = force_reg (mode, src);
10316
10317 /* Emit the instruction. */
10318
10319 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
10320 if (reload_in_progress || code == NOT)
10321 {
10322 /* Reload doesn't know about the flags register, and doesn't know that
10323 it doesn't want to clobber it. */
10324 gcc_assert (code == NOT);
10325 emit_insn (op);
10326 }
10327 else
10328 {
10329 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10330 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
10331 }
10332
10333 /* Fix up the destination if needed. */
10334 if (dst != operands[0])
10335 emit_move_insn (operands[0], dst);
10336 }
10337
10338 /* Return TRUE or FALSE depending on whether the unary operator meets the
10339 appropriate constraints. */
10340
10341 int
10342 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
10343 enum machine_mode mode ATTRIBUTE_UNUSED,
10344 rtx operands[2] ATTRIBUTE_UNUSED)
10345 {
10346 /* If one of operands is memory, source and destination must match. */
10347 if ((MEM_P (operands[0])
10348 || MEM_P (operands[1]))
10349 && ! rtx_equal_p (operands[0], operands[1]))
10350 return FALSE;
10351 return TRUE;
10352 }
10353
10354 /* Post-reload splitter for converting an SF or DFmode value in an
10355 SSE register into an unsigned SImode. */
10356
10357 void
10358 ix86_split_convert_uns_si_sse (rtx operands[])
10359 {
10360 enum machine_mode vecmode;
10361 rtx value, large, zero_or_two31, input, two31, x;
10362
10363 large = operands[1];
10364 zero_or_two31 = operands[2];
10365 input = operands[3];
10366 two31 = operands[4];
10367 vecmode = GET_MODE (large);
10368 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
10369
10370 /* Load up the value into the low element. We must ensure that the other
10371 elements are valid floats -- zero is the easiest such value. */
10372 if (MEM_P (input))
10373 {
10374 if (vecmode == V4SFmode)
10375 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
10376 else
10377 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
10378 }
10379 else
10380 {
10381 input = gen_rtx_REG (vecmode, REGNO (input));
10382 emit_move_insn (value, CONST0_RTX (vecmode));
10383 if (vecmode == V4SFmode)
10384 emit_insn (gen_sse_movss (value, value, input));
10385 else
10386 emit_insn (gen_sse2_movsd (value, value, input));
10387 }
10388
10389 emit_move_insn (large, two31);
10390 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
10391
10392 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
10393 emit_insn (gen_rtx_SET (VOIDmode, large, x));
10394
10395 x = gen_rtx_AND (vecmode, zero_or_two31, large);
10396 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
10397
10398 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
10399 emit_insn (gen_rtx_SET (VOIDmode, value, x));
10400
10401 large = gen_rtx_REG (V4SImode, REGNO (large));
10402 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
10403
10404 x = gen_rtx_REG (V4SImode, REGNO (value));
10405 if (vecmode == V4SFmode)
10406 emit_insn (gen_sse2_cvttps2dq (x, value));
10407 else
10408 emit_insn (gen_sse2_cvttpd2dq (x, value));
10409 value = x;
10410
10411 emit_insn (gen_xorv4si3 (value, value, large));
10412 }
10413
10414 /* Convert an unsigned DImode value into a DFmode, using only SSE.
10415 Expects the 64-bit DImode to be supplied in a pair of integral
10416 registers. Requires SSE2; will use SSE3 if available. For x86_32,
10417 -mfpmath=sse, !optimize_size only. */
10418
10419 void
10420 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
10421 {
10422 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
10423 rtx int_xmm, fp_xmm;
10424 rtx biases, exponents;
10425 rtx x;
10426
10427 int_xmm = gen_reg_rtx (V4SImode);
10428 if (TARGET_INTER_UNIT_MOVES)
10429 emit_insn (gen_movdi_to_sse (int_xmm, input));
10430 else if (TARGET_SSE_SPLIT_REGS)
10431 {
10432 emit_insn (gen_rtx_CLOBBER (VOIDmode, int_xmm));
10433 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
10434 }
10435 else
10436 {
10437 x = gen_reg_rtx (V2DImode);
10438 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
10439 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
10440 }
10441
10442 x = gen_rtx_CONST_VECTOR (V4SImode,
10443 gen_rtvec (4, GEN_INT (0x43300000UL),
10444 GEN_INT (0x45300000UL),
10445 const0_rtx, const0_rtx));
10446 exponents = validize_mem (force_const_mem (V4SImode, x));
10447
10448 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
10449 emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, exponents));
10450
10451 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
10452 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
10453 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
10454 (0x1.0p84 + double(fp_value_hi_xmm)).
10455 Note these exponents differ by 32. */
10456
10457 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
10458
10459 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
10460 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
10461 real_ldexp (&bias_lo_rvt, &dconst1, 52);
10462 real_ldexp (&bias_hi_rvt, &dconst1, 84);
10463 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
10464 x = const_double_from_real_value (bias_hi_rvt, DFmode);
10465 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
10466 biases = validize_mem (force_const_mem (V2DFmode, biases));
10467 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
10468
10469 /* Add the upper and lower DFmode values together. */
10470 if (TARGET_SSE3)
10471 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
10472 else
10473 {
10474 x = copy_to_mode_reg (V2DFmode, fp_xmm);
10475 emit_insn (gen_sse2_unpckhpd (fp_xmm, fp_xmm, fp_xmm));
10476 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
10477 }
10478
10479 ix86_expand_vector_extract (false, target, fp_xmm, 0);
10480 }
10481
10482 /* Convert an unsigned SImode value into a DFmode. Only currently used
10483 for SSE, but applicable anywhere. */
10484
10485 void
10486 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
10487 {
10488 REAL_VALUE_TYPE TWO31r;
10489 rtx x, fp;
10490
10491 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
10492 NULL, 1, OPTAB_DIRECT);
10493
10494 fp = gen_reg_rtx (DFmode);
10495 emit_insn (gen_floatsidf2 (fp, x));
10496
10497 real_ldexp (&TWO31r, &dconst1, 31);
10498 x = const_double_from_real_value (TWO31r, DFmode);
10499
10500 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
10501 if (x != target)
10502 emit_move_insn (target, x);
10503 }
10504
10505 /* Convert a signed DImode value into a DFmode. Only used for SSE in
10506 32-bit mode; otherwise we have a direct convert instruction. */
10507
10508 void
10509 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
10510 {
10511 REAL_VALUE_TYPE TWO32r;
10512 rtx fp_lo, fp_hi, x;
10513
10514 fp_lo = gen_reg_rtx (DFmode);
10515 fp_hi = gen_reg_rtx (DFmode);
10516
10517 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
10518
10519 real_ldexp (&TWO32r, &dconst1, 32);
10520 x = const_double_from_real_value (TWO32r, DFmode);
10521 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
10522
10523 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
10524
10525 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
10526 0, OPTAB_DIRECT);
10527 if (x != target)
10528 emit_move_insn (target, x);
10529 }
10530
10531 /* Convert an unsigned SImode value into a SFmode, using only SSE.
10532 For x86_32, -mfpmath=sse, !optimize_size only. */
10533 void
10534 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
10535 {
10536 REAL_VALUE_TYPE ONE16r;
10537 rtx fp_hi, fp_lo, int_hi, int_lo, x;
10538
10539 real_ldexp (&ONE16r, &dconst1, 16);
10540 x = const_double_from_real_value (ONE16r, SFmode);
10541 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
10542 NULL, 0, OPTAB_DIRECT);
10543 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
10544 NULL, 0, OPTAB_DIRECT);
10545 fp_hi = gen_reg_rtx (SFmode);
10546 fp_lo = gen_reg_rtx (SFmode);
10547 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
10548 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
10549 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
10550 0, OPTAB_DIRECT);
10551 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
10552 0, OPTAB_DIRECT);
10553 if (!rtx_equal_p (target, fp_hi))
10554 emit_move_insn (target, fp_hi);
10555 }
10556
10557 /* A subroutine of ix86_build_signbit_mask_vector. If VECT is true,
10558 then replicate the value for all elements of the vector
10559 register. */
10560
10561 rtx
10562 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
10563 {
10564 rtvec v;
10565 switch (mode)
10566 {
10567 case SImode:
10568 gcc_assert (vect);
10569 v = gen_rtvec (4, value, value, value, value);
10570 return gen_rtx_CONST_VECTOR (V4SImode, v);
10571
10572 case DImode:
10573 gcc_assert (vect);
10574 v = gen_rtvec (2, value, value);
10575 return gen_rtx_CONST_VECTOR (V2DImode, v);
10576
10577 case SFmode:
10578 if (vect)
10579 v = gen_rtvec (4, value, value, value, value);
10580 else
10581 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
10582 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
10583 return gen_rtx_CONST_VECTOR (V4SFmode, v);
10584
10585 case DFmode:
10586 if (vect)
10587 v = gen_rtvec (2, value, value);
10588 else
10589 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
10590 return gen_rtx_CONST_VECTOR (V2DFmode, v);
10591
10592 default:
10593 gcc_unreachable ();
10594 }
10595 }
10596
10597 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
10598 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
10599 for an SSE register. If VECT is true, then replicate the mask for
10600 all elements of the vector register. If INVERT is true, then create
10601 a mask excluding the sign bit. */
10602
10603 rtx
10604 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
10605 {
10606 enum machine_mode vec_mode, imode;
10607 HOST_WIDE_INT hi, lo;
10608 int shift = 63;
10609 rtx v;
10610 rtx mask;
10611
10612 /* Find the sign bit, sign extended to 2*HWI. */
10613 switch (mode)
10614 {
10615 case SImode:
10616 case SFmode:
10617 imode = SImode;
10618 vec_mode = (mode == SImode) ? V4SImode : V4SFmode;
10619 lo = 0x80000000, hi = lo < 0;
10620 break;
10621
10622 case DImode:
10623 case DFmode:
10624 imode = DImode;
10625 vec_mode = (mode == DImode) ? V2DImode : V2DFmode;
10626 if (HOST_BITS_PER_WIDE_INT >= 64)
10627 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
10628 else
10629 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
10630 break;
10631
10632 case TImode:
10633 case TFmode:
10634 imode = TImode;
10635 vec_mode = VOIDmode;
10636 gcc_assert (HOST_BITS_PER_WIDE_INT >= 64);
10637 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
10638 break;
10639
10640 default:
10641 gcc_unreachable ();
10642 }
10643
10644 if (invert)
10645 lo = ~lo, hi = ~hi;
10646
10647 /* Force this value into the low part of a fp vector constant. */
10648 mask = immed_double_const (lo, hi, imode);
10649 mask = gen_lowpart (mode, mask);
10650
10651 if (vec_mode == VOIDmode)
10652 return force_reg (mode, mask);
10653
10654 v = ix86_build_const_vector (mode, vect, mask);
10655 return force_reg (vec_mode, v);
10656 }
10657
10658 /* Generate code for floating point ABS or NEG. */
10659
10660 void
10661 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
10662 rtx operands[])
10663 {
10664 rtx mask, set, use, clob, dst, src;
10665 bool matching_memory;
10666 bool use_sse = false;
10667 bool vector_mode = VECTOR_MODE_P (mode);
10668 enum machine_mode elt_mode = mode;
10669
10670 if (vector_mode)
10671 {
10672 elt_mode = GET_MODE_INNER (mode);
10673 use_sse = true;
10674 }
10675 else if (mode == TFmode)
10676 use_sse = true;
10677 else if (TARGET_SSE_MATH)
10678 use_sse = SSE_FLOAT_MODE_P (mode);
10679
10680 /* NEG and ABS performed with SSE use bitwise mask operations.
10681 Create the appropriate mask now. */
10682 if (use_sse)
10683 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
10684 else
10685 mask = NULL_RTX;
10686
10687 dst = operands[0];
10688 src = operands[1];
10689
10690 /* If the destination is memory, and we don't have matching source
10691 operands or we're using the x87, do things in registers. */
10692 matching_memory = false;
10693 if (MEM_P (dst))
10694 {
10695 if (use_sse && rtx_equal_p (dst, src))
10696 matching_memory = true;
10697 else
10698 dst = gen_reg_rtx (mode);
10699 }
10700 if (MEM_P (src) && !matching_memory)
10701 src = force_reg (mode, src);
10702
10703 if (vector_mode)
10704 {
10705 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
10706 set = gen_rtx_SET (VOIDmode, dst, set);
10707 emit_insn (set);
10708 }
10709 else
10710 {
10711 set = gen_rtx_fmt_e (code, mode, src);
10712 set = gen_rtx_SET (VOIDmode, dst, set);
10713 if (mask)
10714 {
10715 use = gen_rtx_USE (VOIDmode, mask);
10716 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10717 emit_insn (gen_rtx_PARALLEL (VOIDmode,
10718 gen_rtvec (3, set, use, clob)));
10719 }
10720 else
10721 emit_insn (set);
10722 }
10723
10724 if (dst != operands[0])
10725 emit_move_insn (operands[0], dst);
10726 }
10727
10728 /* Expand a copysign operation. Special case operand 0 being a constant. */
10729
10730 void
10731 ix86_expand_copysign (rtx operands[])
10732 {
10733 enum machine_mode mode, vmode;
10734 rtx dest, op0, op1, mask, nmask;
10735
10736 dest = operands[0];
10737 op0 = operands[1];
10738 op1 = operands[2];
10739
10740 mode = GET_MODE (dest);
10741 vmode = mode == SFmode ? V4SFmode : V2DFmode;
10742
10743 if (GET_CODE (op0) == CONST_DOUBLE)
10744 {
10745 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
10746
10747 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
10748 op0 = simplify_unary_operation (ABS, mode, op0, mode);
10749
10750 if (mode == SFmode || mode == DFmode)
10751 {
10752 if (op0 == CONST0_RTX (mode))
10753 op0 = CONST0_RTX (vmode);
10754 else
10755 {
10756 rtvec v;
10757
10758 if (mode == SFmode)
10759 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
10760 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
10761 else
10762 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
10763 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
10764 }
10765 }
10766
10767 mask = ix86_build_signbit_mask (mode, 0, 0);
10768
10769 if (mode == SFmode)
10770 copysign_insn = gen_copysignsf3_const;
10771 else if (mode == DFmode)
10772 copysign_insn = gen_copysigndf3_const;
10773 else
10774 copysign_insn = gen_copysigntf3_const;
10775
10776 emit_insn (copysign_insn (dest, op0, op1, mask));
10777 }
10778 else
10779 {
10780 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
10781
10782 nmask = ix86_build_signbit_mask (mode, 0, 1);
10783 mask = ix86_build_signbit_mask (mode, 0, 0);
10784
10785 if (mode == SFmode)
10786 copysign_insn = gen_copysignsf3_var;
10787 else if (mode == DFmode)
10788 copysign_insn = gen_copysigndf3_var;
10789 else
10790 copysign_insn = gen_copysigntf3_var;
10791
10792 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
10793 }
10794 }
10795
10796 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
10797 be a constant, and so has already been expanded into a vector constant. */
10798
10799 void
10800 ix86_split_copysign_const (rtx operands[])
10801 {
10802 enum machine_mode mode, vmode;
10803 rtx dest, op0, op1, mask, x;
10804
10805 dest = operands[0];
10806 op0 = operands[1];
10807 op1 = operands[2];
10808 mask = operands[3];
10809
10810 mode = GET_MODE (dest);
10811 vmode = GET_MODE (mask);
10812
10813 dest = simplify_gen_subreg (vmode, dest, mode, 0);
10814 x = gen_rtx_AND (vmode, dest, mask);
10815 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10816
10817 if (op0 != CONST0_RTX (vmode))
10818 {
10819 x = gen_rtx_IOR (vmode, dest, op0);
10820 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10821 }
10822 }
10823
10824 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
10825 so we have to do two masks. */
10826
10827 void
10828 ix86_split_copysign_var (rtx operands[])
10829 {
10830 enum machine_mode mode, vmode;
10831 rtx dest, scratch, op0, op1, mask, nmask, x;
10832
10833 dest = operands[0];
10834 scratch = operands[1];
10835 op0 = operands[2];
10836 op1 = operands[3];
10837 nmask = operands[4];
10838 mask = operands[5];
10839
10840 mode = GET_MODE (dest);
10841 vmode = GET_MODE (mask);
10842
10843 if (rtx_equal_p (op0, op1))
10844 {
10845 /* Shouldn't happen often (it's useless, obviously), but when it does
10846 we'd generate incorrect code if we continue below. */
10847 emit_move_insn (dest, op0);
10848 return;
10849 }
10850
10851 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
10852 {
10853 gcc_assert (REGNO (op1) == REGNO (scratch));
10854
10855 x = gen_rtx_AND (vmode, scratch, mask);
10856 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
10857
10858 dest = mask;
10859 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
10860 x = gen_rtx_NOT (vmode, dest);
10861 x = gen_rtx_AND (vmode, x, op0);
10862 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10863 }
10864 else
10865 {
10866 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
10867 {
10868 x = gen_rtx_AND (vmode, scratch, mask);
10869 }
10870 else /* alternative 2,4 */
10871 {
10872 gcc_assert (REGNO (mask) == REGNO (scratch));
10873 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
10874 x = gen_rtx_AND (vmode, scratch, op1);
10875 }
10876 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
10877
10878 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
10879 {
10880 dest = simplify_gen_subreg (vmode, op0, mode, 0);
10881 x = gen_rtx_AND (vmode, dest, nmask);
10882 }
10883 else /* alternative 3,4 */
10884 {
10885 gcc_assert (REGNO (nmask) == REGNO (dest));
10886 dest = nmask;
10887 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
10888 x = gen_rtx_AND (vmode, dest, op0);
10889 }
10890 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10891 }
10892
10893 x = gen_rtx_IOR (vmode, dest, scratch);
10894 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10895 }
10896
10897 /* Return TRUE or FALSE depending on whether the first SET in INSN
10898 has source and destination with matching CC modes, and that the
10899 CC mode is at least as constrained as REQ_MODE. */
10900
10901 int
10902 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
10903 {
10904 rtx set;
10905 enum machine_mode set_mode;
10906
10907 set = PATTERN (insn);
10908 if (GET_CODE (set) == PARALLEL)
10909 set = XVECEXP (set, 0, 0);
10910 gcc_assert (GET_CODE (set) == SET);
10911 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
10912
10913 set_mode = GET_MODE (SET_DEST (set));
10914 switch (set_mode)
10915 {
10916 case CCNOmode:
10917 if (req_mode != CCNOmode
10918 && (req_mode != CCmode
10919 || XEXP (SET_SRC (set), 1) != const0_rtx))
10920 return 0;
10921 break;
10922 case CCmode:
10923 if (req_mode == CCGCmode)
10924 return 0;
10925 /* FALLTHRU */
10926 case CCGCmode:
10927 if (req_mode == CCGOCmode || req_mode == CCNOmode)
10928 return 0;
10929 /* FALLTHRU */
10930 case CCGOCmode:
10931 if (req_mode == CCZmode)
10932 return 0;
10933 /* FALLTHRU */
10934 case CCZmode:
10935 break;
10936
10937 default:
10938 gcc_unreachable ();
10939 }
10940
10941 return (GET_MODE (SET_SRC (set)) == set_mode);
10942 }
10943
10944 /* Generate insn patterns to do an integer compare of OPERANDS. */
10945
10946 static rtx
10947 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
10948 {
10949 enum machine_mode cmpmode;
10950 rtx tmp, flags;
10951
10952 cmpmode = SELECT_CC_MODE (code, op0, op1);
10953 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
10954
10955 /* This is very simple, but making the interface the same as in the
10956 FP case makes the rest of the code easier. */
10957 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
10958 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
10959
10960 /* Return the test that should be put into the flags user, i.e.
10961 the bcc, scc, or cmov instruction. */
10962 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
10963 }
10964
10965 /* Figure out whether to use ordered or unordered fp comparisons.
10966 Return the appropriate mode to use. */
10967
10968 enum machine_mode
10969 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
10970 {
10971 /* ??? In order to make all comparisons reversible, we do all comparisons
10972 non-trapping when compiling for IEEE. Once gcc is able to distinguish
10973 all forms trapping and nontrapping comparisons, we can make inequality
10974 comparisons trapping again, since it results in better code when using
10975 FCOM based compares. */
10976 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
10977 }
10978
10979 enum machine_mode
10980 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
10981 {
10982 enum machine_mode mode = GET_MODE (op0);
10983
10984 if (SCALAR_FLOAT_MODE_P (mode))
10985 {
10986 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
10987 return ix86_fp_compare_mode (code);
10988 }
10989
10990 switch (code)
10991 {
10992 /* Only zero flag is needed. */
10993 case EQ: /* ZF=0 */
10994 case NE: /* ZF!=0 */
10995 return CCZmode;
10996 /* Codes needing carry flag. */
10997 case GEU: /* CF=0 */
10998 case GTU: /* CF=0 & ZF=0 */
10999 case LTU: /* CF=1 */
11000 case LEU: /* CF=1 | ZF=1 */
11001 return CCmode;
11002 /* Codes possibly doable only with sign flag when
11003 comparing against zero. */
11004 case GE: /* SF=OF or SF=0 */
11005 case LT: /* SF<>OF or SF=1 */
11006 if (op1 == const0_rtx)
11007 return CCGOCmode;
11008 else
11009 /* For other cases Carry flag is not required. */
11010 return CCGCmode;
11011 /* Codes doable only with sign flag when comparing
11012 against zero, but we miss jump instruction for it
11013 so we need to use relational tests against overflow
11014 that thus needs to be zero. */
11015 case GT: /* ZF=0 & SF=OF */
11016 case LE: /* ZF=1 | SF<>OF */
11017 if (op1 == const0_rtx)
11018 return CCNOmode;
11019 else
11020 return CCGCmode;
11021 /* strcmp pattern do (use flags) and combine may ask us for proper
11022 mode. */
11023 case USE:
11024 return CCmode;
11025 default:
11026 gcc_unreachable ();
11027 }
11028 }
11029
11030 /* Return the fixed registers used for condition codes. */
11031
11032 static bool
11033 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
11034 {
11035 *p1 = FLAGS_REG;
11036 *p2 = FPSR_REG;
11037 return true;
11038 }
11039
11040 /* If two condition code modes are compatible, return a condition code
11041 mode which is compatible with both. Otherwise, return
11042 VOIDmode. */
11043
11044 static enum machine_mode
11045 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
11046 {
11047 if (m1 == m2)
11048 return m1;
11049
11050 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
11051 return VOIDmode;
11052
11053 if ((m1 == CCGCmode && m2 == CCGOCmode)
11054 || (m1 == CCGOCmode && m2 == CCGCmode))
11055 return CCGCmode;
11056
11057 switch (m1)
11058 {
11059 default:
11060 gcc_unreachable ();
11061
11062 case CCmode:
11063 case CCGCmode:
11064 case CCGOCmode:
11065 case CCNOmode:
11066 case CCAmode:
11067 case CCCmode:
11068 case CCOmode:
11069 case CCSmode:
11070 case CCZmode:
11071 switch (m2)
11072 {
11073 default:
11074 return VOIDmode;
11075
11076 case CCmode:
11077 case CCGCmode:
11078 case CCGOCmode:
11079 case CCNOmode:
11080 case CCAmode:
11081 case CCCmode:
11082 case CCOmode:
11083 case CCSmode:
11084 case CCZmode:
11085 return CCmode;
11086 }
11087
11088 case CCFPmode:
11089 case CCFPUmode:
11090 /* These are only compatible with themselves, which we already
11091 checked above. */
11092 return VOIDmode;
11093 }
11094 }
11095
11096 /* Split comparison code CODE into comparisons we can do using branch
11097 instructions. BYPASS_CODE is comparison code for branch that will
11098 branch around FIRST_CODE and SECOND_CODE. If some of branches
11099 is not required, set value to UNKNOWN.
11100 We never require more than two branches. */
11101
11102 void
11103 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
11104 enum rtx_code *first_code,
11105 enum rtx_code *second_code)
11106 {
11107 *first_code = code;
11108 *bypass_code = UNKNOWN;
11109 *second_code = UNKNOWN;
11110
11111 /* The fcomi comparison sets flags as follows:
11112
11113 cmp ZF PF CF
11114 > 0 0 0
11115 < 0 0 1
11116 = 1 0 0
11117 un 1 1 1 */
11118
11119 switch (code)
11120 {
11121 case GT: /* GTU - CF=0 & ZF=0 */
11122 case GE: /* GEU - CF=0 */
11123 case ORDERED: /* PF=0 */
11124 case UNORDERED: /* PF=1 */
11125 case UNEQ: /* EQ - ZF=1 */
11126 case UNLT: /* LTU - CF=1 */
11127 case UNLE: /* LEU - CF=1 | ZF=1 */
11128 case LTGT: /* EQ - ZF=0 */
11129 break;
11130 case LT: /* LTU - CF=1 - fails on unordered */
11131 *first_code = UNLT;
11132 *bypass_code = UNORDERED;
11133 break;
11134 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
11135 *first_code = UNLE;
11136 *bypass_code = UNORDERED;
11137 break;
11138 case EQ: /* EQ - ZF=1 - fails on unordered */
11139 *first_code = UNEQ;
11140 *bypass_code = UNORDERED;
11141 break;
11142 case NE: /* NE - ZF=0 - fails on unordered */
11143 *first_code = LTGT;
11144 *second_code = UNORDERED;
11145 break;
11146 case UNGE: /* GEU - CF=0 - fails on unordered */
11147 *first_code = GE;
11148 *second_code = UNORDERED;
11149 break;
11150 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
11151 *first_code = GT;
11152 *second_code = UNORDERED;
11153 break;
11154 default:
11155 gcc_unreachable ();
11156 }
11157 if (!TARGET_IEEE_FP)
11158 {
11159 *second_code = UNKNOWN;
11160 *bypass_code = UNKNOWN;
11161 }
11162 }
11163
11164 /* Return cost of comparison done fcom + arithmetics operations on AX.
11165 All following functions do use number of instructions as a cost metrics.
11166 In future this should be tweaked to compute bytes for optimize_size and
11167 take into account performance of various instructions on various CPUs. */
11168 static int
11169 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
11170 {
11171 if (!TARGET_IEEE_FP)
11172 return 4;
11173 /* The cost of code output by ix86_expand_fp_compare. */
11174 switch (code)
11175 {
11176 case UNLE:
11177 case UNLT:
11178 case LTGT:
11179 case GT:
11180 case GE:
11181 case UNORDERED:
11182 case ORDERED:
11183 case UNEQ:
11184 return 4;
11185 break;
11186 case LT:
11187 case NE:
11188 case EQ:
11189 case UNGE:
11190 return 5;
11191 break;
11192 case LE:
11193 case UNGT:
11194 return 6;
11195 break;
11196 default:
11197 gcc_unreachable ();
11198 }
11199 }
11200
11201 /* Return cost of comparison done using fcomi operation.
11202 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11203 static int
11204 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
11205 {
11206 enum rtx_code bypass_code, first_code, second_code;
11207 /* Return arbitrarily high cost when instruction is not supported - this
11208 prevents gcc from using it. */
11209 if (!TARGET_CMOVE)
11210 return 1024;
11211 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11212 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
11213 }
11214
11215 /* Return cost of comparison done using sahf operation.
11216 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11217 static int
11218 ix86_fp_comparison_sahf_cost (enum rtx_code code)
11219 {
11220 enum rtx_code bypass_code, first_code, second_code;
11221 /* Return arbitrarily high cost when instruction is not preferred - this
11222 avoids gcc from using it. */
11223 if (!(TARGET_SAHF && (TARGET_USE_SAHF || optimize_size)))
11224 return 1024;
11225 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11226 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
11227 }
11228
11229 /* Compute cost of the comparison done using any method.
11230 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11231 static int
11232 ix86_fp_comparison_cost (enum rtx_code code)
11233 {
11234 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
11235 int min;
11236
11237 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
11238 sahf_cost = ix86_fp_comparison_sahf_cost (code);
11239
11240 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
11241 if (min > sahf_cost)
11242 min = sahf_cost;
11243 if (min > fcomi_cost)
11244 min = fcomi_cost;
11245 return min;
11246 }
11247
11248 /* Return true if we should use an FCOMI instruction for this
11249 fp comparison. */
11250
11251 int
11252 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
11253 {
11254 enum rtx_code swapped_code = swap_condition (code);
11255
11256 return ((ix86_fp_comparison_cost (code)
11257 == ix86_fp_comparison_fcomi_cost (code))
11258 || (ix86_fp_comparison_cost (swapped_code)
11259 == ix86_fp_comparison_fcomi_cost (swapped_code)));
11260 }
11261
11262 /* Swap, force into registers, or otherwise massage the two operands
11263 to a fp comparison. The operands are updated in place; the new
11264 comparison code is returned. */
11265
11266 static enum rtx_code
11267 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
11268 {
11269 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
11270 rtx op0 = *pop0, op1 = *pop1;
11271 enum machine_mode op_mode = GET_MODE (op0);
11272 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
11273
11274 /* All of the unordered compare instructions only work on registers.
11275 The same is true of the fcomi compare instructions. The XFmode
11276 compare instructions require registers except when comparing
11277 against zero or when converting operand 1 from fixed point to
11278 floating point. */
11279
11280 if (!is_sse
11281 && (fpcmp_mode == CCFPUmode
11282 || (op_mode == XFmode
11283 && ! (standard_80387_constant_p (op0) == 1
11284 || standard_80387_constant_p (op1) == 1)
11285 && GET_CODE (op1) != FLOAT)
11286 || ix86_use_fcomi_compare (code)))
11287 {
11288 op0 = force_reg (op_mode, op0);
11289 op1 = force_reg (op_mode, op1);
11290 }
11291 else
11292 {
11293 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
11294 things around if they appear profitable, otherwise force op0
11295 into a register. */
11296
11297 if (standard_80387_constant_p (op0) == 0
11298 || (MEM_P (op0)
11299 && ! (standard_80387_constant_p (op1) == 0
11300 || MEM_P (op1))))
11301 {
11302 rtx tmp;
11303 tmp = op0, op0 = op1, op1 = tmp;
11304 code = swap_condition (code);
11305 }
11306
11307 if (!REG_P (op0))
11308 op0 = force_reg (op_mode, op0);
11309
11310 if (CONSTANT_P (op1))
11311 {
11312 int tmp = standard_80387_constant_p (op1);
11313 if (tmp == 0)
11314 op1 = validize_mem (force_const_mem (op_mode, op1));
11315 else if (tmp == 1)
11316 {
11317 if (TARGET_CMOVE)
11318 op1 = force_reg (op_mode, op1);
11319 }
11320 else
11321 op1 = force_reg (op_mode, op1);
11322 }
11323 }
11324
11325 /* Try to rearrange the comparison to make it cheaper. */
11326 if (ix86_fp_comparison_cost (code)
11327 > ix86_fp_comparison_cost (swap_condition (code))
11328 && (REG_P (op1) || can_create_pseudo_p ()))
11329 {
11330 rtx tmp;
11331 tmp = op0, op0 = op1, op1 = tmp;
11332 code = swap_condition (code);
11333 if (!REG_P (op0))
11334 op0 = force_reg (op_mode, op0);
11335 }
11336
11337 *pop0 = op0;
11338 *pop1 = op1;
11339 return code;
11340 }
11341
11342 /* Convert comparison codes we use to represent FP comparison to integer
11343 code that will result in proper branch. Return UNKNOWN if no such code
11344 is available. */
11345
11346 enum rtx_code
11347 ix86_fp_compare_code_to_integer (enum rtx_code code)
11348 {
11349 switch (code)
11350 {
11351 case GT:
11352 return GTU;
11353 case GE:
11354 return GEU;
11355 case ORDERED:
11356 case UNORDERED:
11357 return code;
11358 break;
11359 case UNEQ:
11360 return EQ;
11361 break;
11362 case UNLT:
11363 return LTU;
11364 break;
11365 case UNLE:
11366 return LEU;
11367 break;
11368 case LTGT:
11369 return NE;
11370 break;
11371 default:
11372 return UNKNOWN;
11373 }
11374 }
11375
11376 /* Generate insn patterns to do a floating point compare of OPERANDS. */
11377
11378 static rtx
11379 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
11380 rtx *second_test, rtx *bypass_test)
11381 {
11382 enum machine_mode fpcmp_mode, intcmp_mode;
11383 rtx tmp, tmp2;
11384 int cost = ix86_fp_comparison_cost (code);
11385 enum rtx_code bypass_code, first_code, second_code;
11386
11387 fpcmp_mode = ix86_fp_compare_mode (code);
11388 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
11389
11390 if (second_test)
11391 *second_test = NULL_RTX;
11392 if (bypass_test)
11393 *bypass_test = NULL_RTX;
11394
11395 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11396
11397 /* Do fcomi/sahf based test when profitable. */
11398 if ((TARGET_CMOVE || TARGET_SAHF)
11399 && (bypass_code == UNKNOWN || bypass_test)
11400 && (second_code == UNKNOWN || second_test)
11401 && ix86_fp_comparison_arithmetics_cost (code) > cost)
11402 {
11403 if (TARGET_CMOVE)
11404 {
11405 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
11406 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
11407 tmp);
11408 emit_insn (tmp);
11409 }
11410 else
11411 {
11412 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
11413 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
11414 if (!scratch)
11415 scratch = gen_reg_rtx (HImode);
11416 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
11417 emit_insn (gen_x86_sahf_1 (scratch));
11418 }
11419
11420 /* The FP codes work out to act like unsigned. */
11421 intcmp_mode = fpcmp_mode;
11422 code = first_code;
11423 if (bypass_code != UNKNOWN)
11424 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
11425 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11426 const0_rtx);
11427 if (second_code != UNKNOWN)
11428 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
11429 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11430 const0_rtx);
11431 }
11432 else
11433 {
11434 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
11435 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
11436 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
11437 if (!scratch)
11438 scratch = gen_reg_rtx (HImode);
11439 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
11440
11441 /* In the unordered case, we have to check C2 for NaN's, which
11442 doesn't happen to work out to anything nice combination-wise.
11443 So do some bit twiddling on the value we've got in AH to come
11444 up with an appropriate set of condition codes. */
11445
11446 intcmp_mode = CCNOmode;
11447 switch (code)
11448 {
11449 case GT:
11450 case UNGT:
11451 if (code == GT || !TARGET_IEEE_FP)
11452 {
11453 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
11454 code = EQ;
11455 }
11456 else
11457 {
11458 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11459 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
11460 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
11461 intcmp_mode = CCmode;
11462 code = GEU;
11463 }
11464 break;
11465 case LT:
11466 case UNLT:
11467 if (code == LT && TARGET_IEEE_FP)
11468 {
11469 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11470 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
11471 intcmp_mode = CCmode;
11472 code = EQ;
11473 }
11474 else
11475 {
11476 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
11477 code = NE;
11478 }
11479 break;
11480 case GE:
11481 case UNGE:
11482 if (code == GE || !TARGET_IEEE_FP)
11483 {
11484 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
11485 code = EQ;
11486 }
11487 else
11488 {
11489 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11490 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
11491 GEN_INT (0x01)));
11492 code = NE;
11493 }
11494 break;
11495 case LE:
11496 case UNLE:
11497 if (code == LE && TARGET_IEEE_FP)
11498 {
11499 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11500 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
11501 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
11502 intcmp_mode = CCmode;
11503 code = LTU;
11504 }
11505 else
11506 {
11507 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
11508 code = NE;
11509 }
11510 break;
11511 case EQ:
11512 case UNEQ:
11513 if (code == EQ && TARGET_IEEE_FP)
11514 {
11515 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11516 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
11517 intcmp_mode = CCmode;
11518 code = EQ;
11519 }
11520 else
11521 {
11522 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
11523 code = NE;
11524 break;
11525 }
11526 break;
11527 case NE:
11528 case LTGT:
11529 if (code == NE && TARGET_IEEE_FP)
11530 {
11531 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11532 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
11533 GEN_INT (0x40)));
11534 code = NE;
11535 }
11536 else
11537 {
11538 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
11539 code = EQ;
11540 }
11541 break;
11542
11543 case UNORDERED:
11544 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
11545 code = NE;
11546 break;
11547 case ORDERED:
11548 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
11549 code = EQ;
11550 break;
11551
11552 default:
11553 gcc_unreachable ();
11554 }
11555 }
11556
11557 /* Return the test that should be put into the flags user, i.e.
11558 the bcc, scc, or cmov instruction. */
11559 return gen_rtx_fmt_ee (code, VOIDmode,
11560 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11561 const0_rtx);
11562 }
11563
11564 rtx
11565 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
11566 {
11567 rtx op0, op1, ret;
11568 op0 = ix86_compare_op0;
11569 op1 = ix86_compare_op1;
11570
11571 if (second_test)
11572 *second_test = NULL_RTX;
11573 if (bypass_test)
11574 *bypass_test = NULL_RTX;
11575
11576 if (ix86_compare_emitted)
11577 {
11578 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
11579 ix86_compare_emitted = NULL_RTX;
11580 }
11581 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
11582 {
11583 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
11584 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
11585 second_test, bypass_test);
11586 }
11587 else
11588 ret = ix86_expand_int_compare (code, op0, op1);
11589
11590 return ret;
11591 }
11592
11593 /* Return true if the CODE will result in nontrivial jump sequence. */
11594 bool
11595 ix86_fp_jump_nontrivial_p (enum rtx_code code)
11596 {
11597 enum rtx_code bypass_code, first_code, second_code;
11598 if (!TARGET_CMOVE)
11599 return true;
11600 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11601 return bypass_code != UNKNOWN || second_code != UNKNOWN;
11602 }
11603
11604 void
11605 ix86_expand_branch (enum rtx_code code, rtx label)
11606 {
11607 rtx tmp;
11608
11609 /* If we have emitted a compare insn, go straight to simple.
11610 ix86_expand_compare won't emit anything if ix86_compare_emitted
11611 is non NULL. */
11612 if (ix86_compare_emitted)
11613 goto simple;
11614
11615 switch (GET_MODE (ix86_compare_op0))
11616 {
11617 case QImode:
11618 case HImode:
11619 case SImode:
11620 simple:
11621 tmp = ix86_expand_compare (code, NULL, NULL);
11622 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11623 gen_rtx_LABEL_REF (VOIDmode, label),
11624 pc_rtx);
11625 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11626 return;
11627
11628 case SFmode:
11629 case DFmode:
11630 case XFmode:
11631 {
11632 rtvec vec;
11633 int use_fcomi;
11634 enum rtx_code bypass_code, first_code, second_code;
11635
11636 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
11637 &ix86_compare_op1);
11638
11639 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11640
11641 /* Check whether we will use the natural sequence with one jump. If
11642 so, we can expand jump early. Otherwise delay expansion by
11643 creating compound insn to not confuse optimizers. */
11644 if (bypass_code == UNKNOWN && second_code == UNKNOWN
11645 && TARGET_CMOVE)
11646 {
11647 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
11648 gen_rtx_LABEL_REF (VOIDmode, label),
11649 pc_rtx, NULL_RTX, NULL_RTX);
11650 }
11651 else
11652 {
11653 tmp = gen_rtx_fmt_ee (code, VOIDmode,
11654 ix86_compare_op0, ix86_compare_op1);
11655 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11656 gen_rtx_LABEL_REF (VOIDmode, label),
11657 pc_rtx);
11658 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
11659
11660 use_fcomi = ix86_use_fcomi_compare (code);
11661 vec = rtvec_alloc (3 + !use_fcomi);
11662 RTVEC_ELT (vec, 0) = tmp;
11663 RTVEC_ELT (vec, 1)
11664 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
11665 RTVEC_ELT (vec, 2)
11666 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
11667 if (! use_fcomi)
11668 RTVEC_ELT (vec, 3)
11669 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
11670
11671 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
11672 }
11673 return;
11674 }
11675
11676 case DImode:
11677 if (TARGET_64BIT)
11678 goto simple;
11679 case TImode:
11680 /* Expand DImode branch into multiple compare+branch. */
11681 {
11682 rtx lo[2], hi[2], label2;
11683 enum rtx_code code1, code2, code3;
11684 enum machine_mode submode;
11685
11686 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
11687 {
11688 tmp = ix86_compare_op0;
11689 ix86_compare_op0 = ix86_compare_op1;
11690 ix86_compare_op1 = tmp;
11691 code = swap_condition (code);
11692 }
11693 if (GET_MODE (ix86_compare_op0) == DImode)
11694 {
11695 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
11696 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
11697 submode = SImode;
11698 }
11699 else
11700 {
11701 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
11702 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
11703 submode = DImode;
11704 }
11705
11706 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
11707 avoid two branches. This costs one extra insn, so disable when
11708 optimizing for size. */
11709
11710 if ((code == EQ || code == NE)
11711 && (!optimize_size
11712 || hi[1] == const0_rtx || lo[1] == const0_rtx))
11713 {
11714 rtx xor0, xor1;
11715
11716 xor1 = hi[0];
11717 if (hi[1] != const0_rtx)
11718 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
11719 NULL_RTX, 0, OPTAB_WIDEN);
11720
11721 xor0 = lo[0];
11722 if (lo[1] != const0_rtx)
11723 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
11724 NULL_RTX, 0, OPTAB_WIDEN);
11725
11726 tmp = expand_binop (submode, ior_optab, xor1, xor0,
11727 NULL_RTX, 0, OPTAB_WIDEN);
11728
11729 ix86_compare_op0 = tmp;
11730 ix86_compare_op1 = const0_rtx;
11731 ix86_expand_branch (code, label);
11732 return;
11733 }
11734
11735 /* Otherwise, if we are doing less-than or greater-or-equal-than,
11736 op1 is a constant and the low word is zero, then we can just
11737 examine the high word. */
11738
11739 if (CONST_INT_P (hi[1]) && lo[1] == const0_rtx)
11740 switch (code)
11741 {
11742 case LT: case LTU: case GE: case GEU:
11743 ix86_compare_op0 = hi[0];
11744 ix86_compare_op1 = hi[1];
11745 ix86_expand_branch (code, label);
11746 return;
11747 default:
11748 break;
11749 }
11750
11751 /* Otherwise, we need two or three jumps. */
11752
11753 label2 = gen_label_rtx ();
11754
11755 code1 = code;
11756 code2 = swap_condition (code);
11757 code3 = unsigned_condition (code);
11758
11759 switch (code)
11760 {
11761 case LT: case GT: case LTU: case GTU:
11762 break;
11763
11764 case LE: code1 = LT; code2 = GT; break;
11765 case GE: code1 = GT; code2 = LT; break;
11766 case LEU: code1 = LTU; code2 = GTU; break;
11767 case GEU: code1 = GTU; code2 = LTU; break;
11768
11769 case EQ: code1 = UNKNOWN; code2 = NE; break;
11770 case NE: code2 = UNKNOWN; break;
11771
11772 default:
11773 gcc_unreachable ();
11774 }
11775
11776 /*
11777 * a < b =>
11778 * if (hi(a) < hi(b)) goto true;
11779 * if (hi(a) > hi(b)) goto false;
11780 * if (lo(a) < lo(b)) goto true;
11781 * false:
11782 */
11783
11784 ix86_compare_op0 = hi[0];
11785 ix86_compare_op1 = hi[1];
11786
11787 if (code1 != UNKNOWN)
11788 ix86_expand_branch (code1, label);
11789 if (code2 != UNKNOWN)
11790 ix86_expand_branch (code2, label2);
11791
11792 ix86_compare_op0 = lo[0];
11793 ix86_compare_op1 = lo[1];
11794 ix86_expand_branch (code3, label);
11795
11796 if (code2 != UNKNOWN)
11797 emit_label (label2);
11798 return;
11799 }
11800
11801 default:
11802 gcc_unreachable ();
11803 }
11804 }
11805
11806 /* Split branch based on floating point condition. */
11807 void
11808 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
11809 rtx target1, rtx target2, rtx tmp, rtx pushed)
11810 {
11811 rtx second, bypass;
11812 rtx label = NULL_RTX;
11813 rtx condition;
11814 int bypass_probability = -1, second_probability = -1, probability = -1;
11815 rtx i;
11816
11817 if (target2 != pc_rtx)
11818 {
11819 rtx tmp = target2;
11820 code = reverse_condition_maybe_unordered (code);
11821 target2 = target1;
11822 target1 = tmp;
11823 }
11824
11825 condition = ix86_expand_fp_compare (code, op1, op2,
11826 tmp, &second, &bypass);
11827
11828 /* Remove pushed operand from stack. */
11829 if (pushed)
11830 ix86_free_from_memory (GET_MODE (pushed));
11831
11832 if (split_branch_probability >= 0)
11833 {
11834 /* Distribute the probabilities across the jumps.
11835 Assume the BYPASS and SECOND to be always test
11836 for UNORDERED. */
11837 probability = split_branch_probability;
11838
11839 /* Value of 1 is low enough to make no need for probability
11840 to be updated. Later we may run some experiments and see
11841 if unordered values are more frequent in practice. */
11842 if (bypass)
11843 bypass_probability = 1;
11844 if (second)
11845 second_probability = 1;
11846 }
11847 if (bypass != NULL_RTX)
11848 {
11849 label = gen_label_rtx ();
11850 i = emit_jump_insn (gen_rtx_SET
11851 (VOIDmode, pc_rtx,
11852 gen_rtx_IF_THEN_ELSE (VOIDmode,
11853 bypass,
11854 gen_rtx_LABEL_REF (VOIDmode,
11855 label),
11856 pc_rtx)));
11857 if (bypass_probability >= 0)
11858 REG_NOTES (i)
11859 = gen_rtx_EXPR_LIST (REG_BR_PROB,
11860 GEN_INT (bypass_probability),
11861 REG_NOTES (i));
11862 }
11863 i = emit_jump_insn (gen_rtx_SET
11864 (VOIDmode, pc_rtx,
11865 gen_rtx_IF_THEN_ELSE (VOIDmode,
11866 condition, target1, target2)));
11867 if (probability >= 0)
11868 REG_NOTES (i)
11869 = gen_rtx_EXPR_LIST (REG_BR_PROB,
11870 GEN_INT (probability),
11871 REG_NOTES (i));
11872 if (second != NULL_RTX)
11873 {
11874 i = emit_jump_insn (gen_rtx_SET
11875 (VOIDmode, pc_rtx,
11876 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
11877 target2)));
11878 if (second_probability >= 0)
11879 REG_NOTES (i)
11880 = gen_rtx_EXPR_LIST (REG_BR_PROB,
11881 GEN_INT (second_probability),
11882 REG_NOTES (i));
11883 }
11884 if (label != NULL_RTX)
11885 emit_label (label);
11886 }
11887
11888 int
11889 ix86_expand_setcc (enum rtx_code code, rtx dest)
11890 {
11891 rtx ret, tmp, tmpreg, equiv;
11892 rtx second_test, bypass_test;
11893
11894 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
11895 return 0; /* FAIL */
11896
11897 gcc_assert (GET_MODE (dest) == QImode);
11898
11899 ret = ix86_expand_compare (code, &second_test, &bypass_test);
11900 PUT_MODE (ret, QImode);
11901
11902 tmp = dest;
11903 tmpreg = dest;
11904
11905 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
11906 if (bypass_test || second_test)
11907 {
11908 rtx test = second_test;
11909 int bypass = 0;
11910 rtx tmp2 = gen_reg_rtx (QImode);
11911 if (bypass_test)
11912 {
11913 gcc_assert (!second_test);
11914 test = bypass_test;
11915 bypass = 1;
11916 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
11917 }
11918 PUT_MODE (test, QImode);
11919 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
11920
11921 if (bypass)
11922 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
11923 else
11924 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
11925 }
11926
11927 /* Attach a REG_EQUAL note describing the comparison result. */
11928 if (ix86_compare_op0 && ix86_compare_op1)
11929 {
11930 equiv = simplify_gen_relational (code, QImode,
11931 GET_MODE (ix86_compare_op0),
11932 ix86_compare_op0, ix86_compare_op1);
11933 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
11934 }
11935
11936 return 1; /* DONE */
11937 }
11938
11939 /* Expand comparison setting or clearing carry flag. Return true when
11940 successful and set pop for the operation. */
11941 static bool
11942 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
11943 {
11944 enum machine_mode mode =
11945 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
11946
11947 /* Do not handle DImode compares that go through special path.
11948 Also we can't deal with FP compares yet. This is possible to add. */
11949 if (mode == (TARGET_64BIT ? TImode : DImode))
11950 return false;
11951
11952 if (SCALAR_FLOAT_MODE_P (mode))
11953 {
11954 rtx second_test = NULL, bypass_test = NULL;
11955 rtx compare_op, compare_seq;
11956
11957 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
11958
11959 /* Shortcut: following common codes never translate
11960 into carry flag compares. */
11961 if (code == EQ || code == NE || code == UNEQ || code == LTGT
11962 || code == ORDERED || code == UNORDERED)
11963 return false;
11964
11965 /* These comparisons require zero flag; swap operands so they won't. */
11966 if ((code == GT || code == UNLE || code == LE || code == UNGT)
11967 && !TARGET_IEEE_FP)
11968 {
11969 rtx tmp = op0;
11970 op0 = op1;
11971 op1 = tmp;
11972 code = swap_condition (code);
11973 }
11974
11975 /* Try to expand the comparison and verify that we end up with carry flag
11976 based comparison. This is fails to be true only when we decide to expand
11977 comparison using arithmetic that is not too common scenario. */
11978 start_sequence ();
11979 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
11980 &second_test, &bypass_test);
11981 compare_seq = get_insns ();
11982 end_sequence ();
11983
11984 if (second_test || bypass_test)
11985 return false;
11986 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11987 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11988 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
11989 else
11990 code = GET_CODE (compare_op);
11991 if (code != LTU && code != GEU)
11992 return false;
11993 emit_insn (compare_seq);
11994 *pop = compare_op;
11995 return true;
11996 }
11997 if (!INTEGRAL_MODE_P (mode))
11998 return false;
11999 switch (code)
12000 {
12001 case LTU:
12002 case GEU:
12003 break;
12004
12005 /* Convert a==0 into (unsigned)a<1. */
12006 case EQ:
12007 case NE:
12008 if (op1 != const0_rtx)
12009 return false;
12010 op1 = const1_rtx;
12011 code = (code == EQ ? LTU : GEU);
12012 break;
12013
12014 /* Convert a>b into b<a or a>=b-1. */
12015 case GTU:
12016 case LEU:
12017 if (CONST_INT_P (op1))
12018 {
12019 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
12020 /* Bail out on overflow. We still can swap operands but that
12021 would force loading of the constant into register. */
12022 if (op1 == const0_rtx
12023 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
12024 return false;
12025 code = (code == GTU ? GEU : LTU);
12026 }
12027 else
12028 {
12029 rtx tmp = op1;
12030 op1 = op0;
12031 op0 = tmp;
12032 code = (code == GTU ? LTU : GEU);
12033 }
12034 break;
12035
12036 /* Convert a>=0 into (unsigned)a<0x80000000. */
12037 case LT:
12038 case GE:
12039 if (mode == DImode || op1 != const0_rtx)
12040 return false;
12041 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
12042 code = (code == LT ? GEU : LTU);
12043 break;
12044 case LE:
12045 case GT:
12046 if (mode == DImode || op1 != constm1_rtx)
12047 return false;
12048 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
12049 code = (code == LE ? GEU : LTU);
12050 break;
12051
12052 default:
12053 return false;
12054 }
12055 /* Swapping operands may cause constant to appear as first operand. */
12056 if (!nonimmediate_operand (op0, VOIDmode))
12057 {
12058 if (!can_create_pseudo_p ())
12059 return false;
12060 op0 = force_reg (mode, op0);
12061 }
12062 ix86_compare_op0 = op0;
12063 ix86_compare_op1 = op1;
12064 *pop = ix86_expand_compare (code, NULL, NULL);
12065 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
12066 return true;
12067 }
12068
12069 int
12070 ix86_expand_int_movcc (rtx operands[])
12071 {
12072 enum rtx_code code = GET_CODE (operands[1]), compare_code;
12073 rtx compare_seq, compare_op;
12074 rtx second_test, bypass_test;
12075 enum machine_mode mode = GET_MODE (operands[0]);
12076 bool sign_bit_compare_p = false;;
12077
12078 start_sequence ();
12079 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
12080 compare_seq = get_insns ();
12081 end_sequence ();
12082
12083 compare_code = GET_CODE (compare_op);
12084
12085 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
12086 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
12087 sign_bit_compare_p = true;
12088
12089 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
12090 HImode insns, we'd be swallowed in word prefix ops. */
12091
12092 if ((mode != HImode || TARGET_FAST_PREFIX)
12093 && (mode != (TARGET_64BIT ? TImode : DImode))
12094 && CONST_INT_P (operands[2])
12095 && CONST_INT_P (operands[3]))
12096 {
12097 rtx out = operands[0];
12098 HOST_WIDE_INT ct = INTVAL (operands[2]);
12099 HOST_WIDE_INT cf = INTVAL (operands[3]);
12100 HOST_WIDE_INT diff;
12101
12102 diff = ct - cf;
12103 /* Sign bit compares are better done using shifts than we do by using
12104 sbb. */
12105 if (sign_bit_compare_p
12106 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
12107 ix86_compare_op1, &compare_op))
12108 {
12109 /* Detect overlap between destination and compare sources. */
12110 rtx tmp = out;
12111
12112 if (!sign_bit_compare_p)
12113 {
12114 bool fpcmp = false;
12115
12116 compare_code = GET_CODE (compare_op);
12117
12118 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
12119 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
12120 {
12121 fpcmp = true;
12122 compare_code = ix86_fp_compare_code_to_integer (compare_code);
12123 }
12124
12125 /* To simplify rest of code, restrict to the GEU case. */
12126 if (compare_code == LTU)
12127 {
12128 HOST_WIDE_INT tmp = ct;
12129 ct = cf;
12130 cf = tmp;
12131 compare_code = reverse_condition (compare_code);
12132 code = reverse_condition (code);
12133 }
12134 else
12135 {
12136 if (fpcmp)
12137 PUT_CODE (compare_op,
12138 reverse_condition_maybe_unordered
12139 (GET_CODE (compare_op)));
12140 else
12141 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
12142 }
12143 diff = ct - cf;
12144
12145 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
12146 || reg_overlap_mentioned_p (out, ix86_compare_op1))
12147 tmp = gen_reg_rtx (mode);
12148
12149 if (mode == DImode)
12150 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
12151 else
12152 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
12153 }
12154 else
12155 {
12156 if (code == GT || code == GE)
12157 code = reverse_condition (code);
12158 else
12159 {
12160 HOST_WIDE_INT tmp = ct;
12161 ct = cf;
12162 cf = tmp;
12163 diff = ct - cf;
12164 }
12165 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
12166 ix86_compare_op1, VOIDmode, 0, -1);
12167 }
12168
12169 if (diff == 1)
12170 {
12171 /*
12172 * cmpl op0,op1
12173 * sbbl dest,dest
12174 * [addl dest, ct]
12175 *
12176 * Size 5 - 8.
12177 */
12178 if (ct)
12179 tmp = expand_simple_binop (mode, PLUS,
12180 tmp, GEN_INT (ct),
12181 copy_rtx (tmp), 1, OPTAB_DIRECT);
12182 }
12183 else if (cf == -1)
12184 {
12185 /*
12186 * cmpl op0,op1
12187 * sbbl dest,dest
12188 * orl $ct, dest
12189 *
12190 * Size 8.
12191 */
12192 tmp = expand_simple_binop (mode, IOR,
12193 tmp, GEN_INT (ct),
12194 copy_rtx (tmp), 1, OPTAB_DIRECT);
12195 }
12196 else if (diff == -1 && ct)
12197 {
12198 /*
12199 * cmpl op0,op1
12200 * sbbl dest,dest
12201 * notl dest
12202 * [addl dest, cf]
12203 *
12204 * Size 8 - 11.
12205 */
12206 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
12207 if (cf)
12208 tmp = expand_simple_binop (mode, PLUS,
12209 copy_rtx (tmp), GEN_INT (cf),
12210 copy_rtx (tmp), 1, OPTAB_DIRECT);
12211 }
12212 else
12213 {
12214 /*
12215 * cmpl op0,op1
12216 * sbbl dest,dest
12217 * [notl dest]
12218 * andl cf - ct, dest
12219 * [addl dest, ct]
12220 *
12221 * Size 8 - 11.
12222 */
12223
12224 if (cf == 0)
12225 {
12226 cf = ct;
12227 ct = 0;
12228 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
12229 }
12230
12231 tmp = expand_simple_binop (mode, AND,
12232 copy_rtx (tmp),
12233 gen_int_mode (cf - ct, mode),
12234 copy_rtx (tmp), 1, OPTAB_DIRECT);
12235 if (ct)
12236 tmp = expand_simple_binop (mode, PLUS,
12237 copy_rtx (tmp), GEN_INT (ct),
12238 copy_rtx (tmp), 1, OPTAB_DIRECT);
12239 }
12240
12241 if (!rtx_equal_p (tmp, out))
12242 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
12243
12244 return 1; /* DONE */
12245 }
12246
12247 if (diff < 0)
12248 {
12249 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
12250
12251 HOST_WIDE_INT tmp;
12252 tmp = ct, ct = cf, cf = tmp;
12253 diff = -diff;
12254
12255 if (SCALAR_FLOAT_MODE_P (cmp_mode))
12256 {
12257 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
12258
12259 /* We may be reversing unordered compare to normal compare, that
12260 is not valid in general (we may convert non-trapping condition
12261 to trapping one), however on i386 we currently emit all
12262 comparisons unordered. */
12263 compare_code = reverse_condition_maybe_unordered (compare_code);
12264 code = reverse_condition_maybe_unordered (code);
12265 }
12266 else
12267 {
12268 compare_code = reverse_condition (compare_code);
12269 code = reverse_condition (code);
12270 }
12271 }
12272
12273 compare_code = UNKNOWN;
12274 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
12275 && CONST_INT_P (ix86_compare_op1))
12276 {
12277 if (ix86_compare_op1 == const0_rtx
12278 && (code == LT || code == GE))
12279 compare_code = code;
12280 else if (ix86_compare_op1 == constm1_rtx)
12281 {
12282 if (code == LE)
12283 compare_code = LT;
12284 else if (code == GT)
12285 compare_code = GE;
12286 }
12287 }
12288
12289 /* Optimize dest = (op0 < 0) ? -1 : cf. */
12290 if (compare_code != UNKNOWN
12291 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
12292 && (cf == -1 || ct == -1))
12293 {
12294 /* If lea code below could be used, only optimize
12295 if it results in a 2 insn sequence. */
12296
12297 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
12298 || diff == 3 || diff == 5 || diff == 9)
12299 || (compare_code == LT && ct == -1)
12300 || (compare_code == GE && cf == -1))
12301 {
12302 /*
12303 * notl op1 (if necessary)
12304 * sarl $31, op1
12305 * orl cf, op1
12306 */
12307 if (ct != -1)
12308 {
12309 cf = ct;
12310 ct = -1;
12311 code = reverse_condition (code);
12312 }
12313
12314 out = emit_store_flag (out, code, ix86_compare_op0,
12315 ix86_compare_op1, VOIDmode, 0, -1);
12316
12317 out = expand_simple_binop (mode, IOR,
12318 out, GEN_INT (cf),
12319 out, 1, OPTAB_DIRECT);
12320 if (out != operands[0])
12321 emit_move_insn (operands[0], out);
12322
12323 return 1; /* DONE */
12324 }
12325 }
12326
12327
12328 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
12329 || diff == 3 || diff == 5 || diff == 9)
12330 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
12331 && (mode != DImode
12332 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
12333 {
12334 /*
12335 * xorl dest,dest
12336 * cmpl op1,op2
12337 * setcc dest
12338 * lea cf(dest*(ct-cf)),dest
12339 *
12340 * Size 14.
12341 *
12342 * This also catches the degenerate setcc-only case.
12343 */
12344
12345 rtx tmp;
12346 int nops;
12347
12348 out = emit_store_flag (out, code, ix86_compare_op0,
12349 ix86_compare_op1, VOIDmode, 0, 1);
12350
12351 nops = 0;
12352 /* On x86_64 the lea instruction operates on Pmode, so we need
12353 to get arithmetics done in proper mode to match. */
12354 if (diff == 1)
12355 tmp = copy_rtx (out);
12356 else
12357 {
12358 rtx out1;
12359 out1 = copy_rtx (out);
12360 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
12361 nops++;
12362 if (diff & 1)
12363 {
12364 tmp = gen_rtx_PLUS (mode, tmp, out1);
12365 nops++;
12366 }
12367 }
12368 if (cf != 0)
12369 {
12370 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
12371 nops++;
12372 }
12373 if (!rtx_equal_p (tmp, out))
12374 {
12375 if (nops == 1)
12376 out = force_operand (tmp, copy_rtx (out));
12377 else
12378 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
12379 }
12380 if (!rtx_equal_p (out, operands[0]))
12381 emit_move_insn (operands[0], copy_rtx (out));
12382
12383 return 1; /* DONE */
12384 }
12385
12386 /*
12387 * General case: Jumpful:
12388 * xorl dest,dest cmpl op1, op2
12389 * cmpl op1, op2 movl ct, dest
12390 * setcc dest jcc 1f
12391 * decl dest movl cf, dest
12392 * andl (cf-ct),dest 1:
12393 * addl ct,dest
12394 *
12395 * Size 20. Size 14.
12396 *
12397 * This is reasonably steep, but branch mispredict costs are
12398 * high on modern cpus, so consider failing only if optimizing
12399 * for space.
12400 */
12401
12402 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
12403 && BRANCH_COST >= 2)
12404 {
12405 if (cf == 0)
12406 {
12407 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
12408
12409 cf = ct;
12410 ct = 0;
12411
12412 if (SCALAR_FLOAT_MODE_P (cmp_mode))
12413 {
12414 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
12415
12416 /* We may be reversing unordered compare to normal compare,
12417 that is not valid in general (we may convert non-trapping
12418 condition to trapping one), however on i386 we currently
12419 emit all comparisons unordered. */
12420 code = reverse_condition_maybe_unordered (code);
12421 }
12422 else
12423 {
12424 code = reverse_condition (code);
12425 if (compare_code != UNKNOWN)
12426 compare_code = reverse_condition (compare_code);
12427 }
12428 }
12429
12430 if (compare_code != UNKNOWN)
12431 {
12432 /* notl op1 (if needed)
12433 sarl $31, op1
12434 andl (cf-ct), op1
12435 addl ct, op1
12436
12437 For x < 0 (resp. x <= -1) there will be no notl,
12438 so if possible swap the constants to get rid of the
12439 complement.
12440 True/false will be -1/0 while code below (store flag
12441 followed by decrement) is 0/-1, so the constants need
12442 to be exchanged once more. */
12443
12444 if (compare_code == GE || !cf)
12445 {
12446 code = reverse_condition (code);
12447 compare_code = LT;
12448 }
12449 else
12450 {
12451 HOST_WIDE_INT tmp = cf;
12452 cf = ct;
12453 ct = tmp;
12454 }
12455
12456 out = emit_store_flag (out, code, ix86_compare_op0,
12457 ix86_compare_op1, VOIDmode, 0, -1);
12458 }
12459 else
12460 {
12461 out = emit_store_flag (out, code, ix86_compare_op0,
12462 ix86_compare_op1, VOIDmode, 0, 1);
12463
12464 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
12465 copy_rtx (out), 1, OPTAB_DIRECT);
12466 }
12467
12468 out = expand_simple_binop (mode, AND, copy_rtx (out),
12469 gen_int_mode (cf - ct, mode),
12470 copy_rtx (out), 1, OPTAB_DIRECT);
12471 if (ct)
12472 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
12473 copy_rtx (out), 1, OPTAB_DIRECT);
12474 if (!rtx_equal_p (out, operands[0]))
12475 emit_move_insn (operands[0], copy_rtx (out));
12476
12477 return 1; /* DONE */
12478 }
12479 }
12480
12481 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
12482 {
12483 /* Try a few things more with specific constants and a variable. */
12484
12485 optab op;
12486 rtx var, orig_out, out, tmp;
12487
12488 if (BRANCH_COST <= 2)
12489 return 0; /* FAIL */
12490
12491 /* If one of the two operands is an interesting constant, load a
12492 constant with the above and mask it in with a logical operation. */
12493
12494 if (CONST_INT_P (operands[2]))
12495 {
12496 var = operands[3];
12497 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
12498 operands[3] = constm1_rtx, op = and_optab;
12499 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
12500 operands[3] = const0_rtx, op = ior_optab;
12501 else
12502 return 0; /* FAIL */
12503 }
12504 else if (CONST_INT_P (operands[3]))
12505 {
12506 var = operands[2];
12507 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
12508 operands[2] = constm1_rtx, op = and_optab;
12509 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
12510 operands[2] = const0_rtx, op = ior_optab;
12511 else
12512 return 0; /* FAIL */
12513 }
12514 else
12515 return 0; /* FAIL */
12516
12517 orig_out = operands[0];
12518 tmp = gen_reg_rtx (mode);
12519 operands[0] = tmp;
12520
12521 /* Recurse to get the constant loaded. */
12522 if (ix86_expand_int_movcc (operands) == 0)
12523 return 0; /* FAIL */
12524
12525 /* Mask in the interesting variable. */
12526 out = expand_binop (mode, op, var, tmp, orig_out, 0,
12527 OPTAB_WIDEN);
12528 if (!rtx_equal_p (out, orig_out))
12529 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
12530
12531 return 1; /* DONE */
12532 }
12533
12534 /*
12535 * For comparison with above,
12536 *
12537 * movl cf,dest
12538 * movl ct,tmp
12539 * cmpl op1,op2
12540 * cmovcc tmp,dest
12541 *
12542 * Size 15.
12543 */
12544
12545 if (! nonimmediate_operand (operands[2], mode))
12546 operands[2] = force_reg (mode, operands[2]);
12547 if (! nonimmediate_operand (operands[3], mode))
12548 operands[3] = force_reg (mode, operands[3]);
12549
12550 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
12551 {
12552 rtx tmp = gen_reg_rtx (mode);
12553 emit_move_insn (tmp, operands[3]);
12554 operands[3] = tmp;
12555 }
12556 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
12557 {
12558 rtx tmp = gen_reg_rtx (mode);
12559 emit_move_insn (tmp, operands[2]);
12560 operands[2] = tmp;
12561 }
12562
12563 if (! register_operand (operands[2], VOIDmode)
12564 && (mode == QImode
12565 || ! register_operand (operands[3], VOIDmode)))
12566 operands[2] = force_reg (mode, operands[2]);
12567
12568 if (mode == QImode
12569 && ! register_operand (operands[3], VOIDmode))
12570 operands[3] = force_reg (mode, operands[3]);
12571
12572 emit_insn (compare_seq);
12573 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12574 gen_rtx_IF_THEN_ELSE (mode,
12575 compare_op, operands[2],
12576 operands[3])));
12577 if (bypass_test)
12578 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
12579 gen_rtx_IF_THEN_ELSE (mode,
12580 bypass_test,
12581 copy_rtx (operands[3]),
12582 copy_rtx (operands[0]))));
12583 if (second_test)
12584 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
12585 gen_rtx_IF_THEN_ELSE (mode,
12586 second_test,
12587 copy_rtx (operands[2]),
12588 copy_rtx (operands[0]))));
12589
12590 return 1; /* DONE */
12591 }
12592
12593 /* Swap, force into registers, or otherwise massage the two operands
12594 to an sse comparison with a mask result. Thus we differ a bit from
12595 ix86_prepare_fp_compare_args which expects to produce a flags result.
12596
12597 The DEST operand exists to help determine whether to commute commutative
12598 operators. The POP0/POP1 operands are updated in place. The new
12599 comparison code is returned, or UNKNOWN if not implementable. */
12600
12601 static enum rtx_code
12602 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
12603 rtx *pop0, rtx *pop1)
12604 {
12605 rtx tmp;
12606
12607 switch (code)
12608 {
12609 case LTGT:
12610 case UNEQ:
12611 /* We have no LTGT as an operator. We could implement it with
12612 NE & ORDERED, but this requires an extra temporary. It's
12613 not clear that it's worth it. */
12614 return UNKNOWN;
12615
12616 case LT:
12617 case LE:
12618 case UNGT:
12619 case UNGE:
12620 /* These are supported directly. */
12621 break;
12622
12623 case EQ:
12624 case NE:
12625 case UNORDERED:
12626 case ORDERED:
12627 /* For commutative operators, try to canonicalize the destination
12628 operand to be first in the comparison - this helps reload to
12629 avoid extra moves. */
12630 if (!dest || !rtx_equal_p (dest, *pop1))
12631 break;
12632 /* FALLTHRU */
12633
12634 case GE:
12635 case GT:
12636 case UNLE:
12637 case UNLT:
12638 /* These are not supported directly. Swap the comparison operands
12639 to transform into something that is supported. */
12640 tmp = *pop0;
12641 *pop0 = *pop1;
12642 *pop1 = tmp;
12643 code = swap_condition (code);
12644 break;
12645
12646 default:
12647 gcc_unreachable ();
12648 }
12649
12650 return code;
12651 }
12652
12653 /* Detect conditional moves that exactly match min/max operational
12654 semantics. Note that this is IEEE safe, as long as we don't
12655 interchange the operands.
12656
12657 Returns FALSE if this conditional move doesn't match a MIN/MAX,
12658 and TRUE if the operation is successful and instructions are emitted. */
12659
12660 static bool
12661 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
12662 rtx cmp_op1, rtx if_true, rtx if_false)
12663 {
12664 enum machine_mode mode;
12665 bool is_min;
12666 rtx tmp;
12667
12668 if (code == LT)
12669 ;
12670 else if (code == UNGE)
12671 {
12672 tmp = if_true;
12673 if_true = if_false;
12674 if_false = tmp;
12675 }
12676 else
12677 return false;
12678
12679 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
12680 is_min = true;
12681 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
12682 is_min = false;
12683 else
12684 return false;
12685
12686 mode = GET_MODE (dest);
12687
12688 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
12689 but MODE may be a vector mode and thus not appropriate. */
12690 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
12691 {
12692 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
12693 rtvec v;
12694
12695 if_true = force_reg (mode, if_true);
12696 v = gen_rtvec (2, if_true, if_false);
12697 tmp = gen_rtx_UNSPEC (mode, v, u);
12698 }
12699 else
12700 {
12701 code = is_min ? SMIN : SMAX;
12702 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
12703 }
12704
12705 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
12706 return true;
12707 }
12708
12709 /* Expand an sse vector comparison. Return the register with the result. */
12710
12711 static rtx
12712 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
12713 rtx op_true, rtx op_false)
12714 {
12715 enum machine_mode mode = GET_MODE (dest);
12716 rtx x;
12717
12718 cmp_op0 = force_reg (mode, cmp_op0);
12719 if (!nonimmediate_operand (cmp_op1, mode))
12720 cmp_op1 = force_reg (mode, cmp_op1);
12721
12722 if (optimize
12723 || reg_overlap_mentioned_p (dest, op_true)
12724 || reg_overlap_mentioned_p (dest, op_false))
12725 dest = gen_reg_rtx (mode);
12726
12727 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
12728 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12729
12730 return dest;
12731 }
12732
12733 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
12734 operations. This is used for both scalar and vector conditional moves. */
12735
12736 static void
12737 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
12738 {
12739 enum machine_mode mode = GET_MODE (dest);
12740 rtx t2, t3, x;
12741
12742 if (op_false == CONST0_RTX (mode))
12743 {
12744 op_true = force_reg (mode, op_true);
12745 x = gen_rtx_AND (mode, cmp, op_true);
12746 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12747 }
12748 else if (op_true == CONST0_RTX (mode))
12749 {
12750 op_false = force_reg (mode, op_false);
12751 x = gen_rtx_NOT (mode, cmp);
12752 x = gen_rtx_AND (mode, x, op_false);
12753 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12754 }
12755 else
12756 {
12757 op_true = force_reg (mode, op_true);
12758 op_false = force_reg (mode, op_false);
12759
12760 t2 = gen_reg_rtx (mode);
12761 if (optimize)
12762 t3 = gen_reg_rtx (mode);
12763 else
12764 t3 = dest;
12765
12766 x = gen_rtx_AND (mode, op_true, cmp);
12767 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
12768
12769 x = gen_rtx_NOT (mode, cmp);
12770 x = gen_rtx_AND (mode, x, op_false);
12771 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
12772
12773 x = gen_rtx_IOR (mode, t3, t2);
12774 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12775 }
12776 }
12777
12778 /* Expand a floating-point conditional move. Return true if successful. */
12779
12780 int
12781 ix86_expand_fp_movcc (rtx operands[])
12782 {
12783 enum machine_mode mode = GET_MODE (operands[0]);
12784 enum rtx_code code = GET_CODE (operands[1]);
12785 rtx tmp, compare_op, second_test, bypass_test;
12786
12787 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
12788 {
12789 enum machine_mode cmode;
12790
12791 /* Since we've no cmove for sse registers, don't force bad register
12792 allocation just to gain access to it. Deny movcc when the
12793 comparison mode doesn't match the move mode. */
12794 cmode = GET_MODE (ix86_compare_op0);
12795 if (cmode == VOIDmode)
12796 cmode = GET_MODE (ix86_compare_op1);
12797 if (cmode != mode)
12798 return 0;
12799
12800 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
12801 &ix86_compare_op0,
12802 &ix86_compare_op1);
12803 if (code == UNKNOWN)
12804 return 0;
12805
12806 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
12807 ix86_compare_op1, operands[2],
12808 operands[3]))
12809 return 1;
12810
12811 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
12812 ix86_compare_op1, operands[2], operands[3]);
12813 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
12814 return 1;
12815 }
12816
12817 /* The floating point conditional move instructions don't directly
12818 support conditions resulting from a signed integer comparison. */
12819
12820 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
12821
12822 /* The floating point conditional move instructions don't directly
12823 support signed integer comparisons. */
12824
12825 if (!fcmov_comparison_operator (compare_op, VOIDmode))
12826 {
12827 gcc_assert (!second_test && !bypass_test);
12828 tmp = gen_reg_rtx (QImode);
12829 ix86_expand_setcc (code, tmp);
12830 code = NE;
12831 ix86_compare_op0 = tmp;
12832 ix86_compare_op1 = const0_rtx;
12833 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
12834 }
12835 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
12836 {
12837 tmp = gen_reg_rtx (mode);
12838 emit_move_insn (tmp, operands[3]);
12839 operands[3] = tmp;
12840 }
12841 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
12842 {
12843 tmp = gen_reg_rtx (mode);
12844 emit_move_insn (tmp, operands[2]);
12845 operands[2] = tmp;
12846 }
12847
12848 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12849 gen_rtx_IF_THEN_ELSE (mode, compare_op,
12850 operands[2], operands[3])));
12851 if (bypass_test)
12852 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12853 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
12854 operands[3], operands[0])));
12855 if (second_test)
12856 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12857 gen_rtx_IF_THEN_ELSE (mode, second_test,
12858 operands[2], operands[0])));
12859
12860 return 1;
12861 }
12862
12863 /* Expand a floating-point vector conditional move; a vcond operation
12864 rather than a movcc operation. */
12865
12866 bool
12867 ix86_expand_fp_vcond (rtx operands[])
12868 {
12869 enum rtx_code code = GET_CODE (operands[3]);
12870 rtx cmp;
12871
12872 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
12873 &operands[4], &operands[5]);
12874 if (code == UNKNOWN)
12875 return false;
12876
12877 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
12878 operands[5], operands[1], operands[2]))
12879 return true;
12880
12881 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
12882 operands[1], operands[2]);
12883 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
12884 return true;
12885 }
12886
12887 /* Expand a signed/unsigned integral vector conditional move. */
12888
12889 bool
12890 ix86_expand_int_vcond (rtx operands[])
12891 {
12892 enum machine_mode mode = GET_MODE (operands[0]);
12893 enum rtx_code code = GET_CODE (operands[3]);
12894 bool negate = false;
12895 rtx x, cop0, cop1;
12896
12897 cop0 = operands[4];
12898 cop1 = operands[5];
12899
12900 /* Canonicalize the comparison to EQ, GT, GTU. */
12901 switch (code)
12902 {
12903 case EQ:
12904 case GT:
12905 case GTU:
12906 break;
12907
12908 case NE:
12909 case LE:
12910 case LEU:
12911 code = reverse_condition (code);
12912 negate = true;
12913 break;
12914
12915 case GE:
12916 case GEU:
12917 code = reverse_condition (code);
12918 negate = true;
12919 /* FALLTHRU */
12920
12921 case LT:
12922 case LTU:
12923 code = swap_condition (code);
12924 x = cop0, cop0 = cop1, cop1 = x;
12925 break;
12926
12927 default:
12928 gcc_unreachable ();
12929 }
12930
12931 /* Only SSE4.1/SSE4.2 supports V2DImode. */
12932 if (mode == V2DImode)
12933 {
12934 switch (code)
12935 {
12936 case EQ:
12937 /* SSE4.1 supports EQ. */
12938 if (!TARGET_SSE4_1)
12939 return false;
12940 break;
12941
12942 case GT:
12943 case GTU:
12944 /* SSE4.2 supports GT/GTU. */
12945 if (!TARGET_SSE4_2)
12946 return false;
12947 break;
12948
12949 default:
12950 gcc_unreachable ();
12951 }
12952 }
12953
12954 /* Unsigned parallel compare is not supported by the hardware. Play some
12955 tricks to turn this into a signed comparison against 0. */
12956 if (code == GTU)
12957 {
12958 cop0 = force_reg (mode, cop0);
12959
12960 switch (mode)
12961 {
12962 case V4SImode:
12963 case V2DImode:
12964 {
12965 rtx t1, t2, mask;
12966
12967 /* Perform a parallel modulo subtraction. */
12968 t1 = gen_reg_rtx (mode);
12969 emit_insn ((mode == V4SImode
12970 ? gen_subv4si3
12971 : gen_subv2di3) (t1, cop0, cop1));
12972
12973 /* Extract the original sign bit of op0. */
12974 mask = ix86_build_signbit_mask (GET_MODE_INNER (mode),
12975 true, false);
12976 t2 = gen_reg_rtx (mode);
12977 emit_insn ((mode == V4SImode
12978 ? gen_andv4si3
12979 : gen_andv2di3) (t2, cop0, mask));
12980
12981 /* XOR it back into the result of the subtraction. This results
12982 in the sign bit set iff we saw unsigned underflow. */
12983 x = gen_reg_rtx (mode);
12984 emit_insn ((mode == V4SImode
12985 ? gen_xorv4si3
12986 : gen_xorv2di3) (x, t1, t2));
12987
12988 code = GT;
12989 }
12990 break;
12991
12992 case V16QImode:
12993 case V8HImode:
12994 /* Perform a parallel unsigned saturating subtraction. */
12995 x = gen_reg_rtx (mode);
12996 emit_insn (gen_rtx_SET (VOIDmode, x,
12997 gen_rtx_US_MINUS (mode, cop0, cop1)));
12998
12999 code = EQ;
13000 negate = !negate;
13001 break;
13002
13003 default:
13004 gcc_unreachable ();
13005 }
13006
13007 cop0 = x;
13008 cop1 = CONST0_RTX (mode);
13009 }
13010
13011 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
13012 operands[1+negate], operands[2-negate]);
13013
13014 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
13015 operands[2-negate]);
13016 return true;
13017 }
13018
13019 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
13020 true if we should do zero extension, else sign extension. HIGH_P is
13021 true if we want the N/2 high elements, else the low elements. */
13022
13023 void
13024 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
13025 {
13026 enum machine_mode imode = GET_MODE (operands[1]);
13027 rtx (*unpack)(rtx, rtx, rtx);
13028 rtx se, dest;
13029
13030 switch (imode)
13031 {
13032 case V16QImode:
13033 if (high_p)
13034 unpack = gen_vec_interleave_highv16qi;
13035 else
13036 unpack = gen_vec_interleave_lowv16qi;
13037 break;
13038 case V8HImode:
13039 if (high_p)
13040 unpack = gen_vec_interleave_highv8hi;
13041 else
13042 unpack = gen_vec_interleave_lowv8hi;
13043 break;
13044 case V4SImode:
13045 if (high_p)
13046 unpack = gen_vec_interleave_highv4si;
13047 else
13048 unpack = gen_vec_interleave_lowv4si;
13049 break;
13050 default:
13051 gcc_unreachable ();
13052 }
13053
13054 dest = gen_lowpart (imode, operands[0]);
13055
13056 if (unsigned_p)
13057 se = force_reg (imode, CONST0_RTX (imode));
13058 else
13059 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
13060 operands[1], pc_rtx, pc_rtx);
13061
13062 emit_insn (unpack (dest, operands[1], se));
13063 }
13064
13065 /* This function performs the same task as ix86_expand_sse_unpack,
13066 but with SSE4.1 instructions. */
13067
13068 void
13069 ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
13070 {
13071 enum machine_mode imode = GET_MODE (operands[1]);
13072 rtx (*unpack)(rtx, rtx);
13073 rtx src, dest;
13074
13075 switch (imode)
13076 {
13077 case V16QImode:
13078 if (unsigned_p)
13079 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
13080 else
13081 unpack = gen_sse4_1_extendv8qiv8hi2;
13082 break;
13083 case V8HImode:
13084 if (unsigned_p)
13085 unpack = gen_sse4_1_zero_extendv4hiv4si2;
13086 else
13087 unpack = gen_sse4_1_extendv4hiv4si2;
13088 break;
13089 case V4SImode:
13090 if (unsigned_p)
13091 unpack = gen_sse4_1_zero_extendv2siv2di2;
13092 else
13093 unpack = gen_sse4_1_extendv2siv2di2;
13094 break;
13095 default:
13096 gcc_unreachable ();
13097 }
13098
13099 dest = operands[0];
13100 if (high_p)
13101 {
13102 /* Shift higher 8 bytes to lower 8 bytes. */
13103 src = gen_reg_rtx (imode);
13104 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, src),
13105 gen_lowpart (TImode, operands[1]),
13106 GEN_INT (64)));
13107 }
13108 else
13109 src = operands[1];
13110
13111 emit_insn (unpack (dest, src));
13112 }
13113
13114 /* Expand conditional increment or decrement using adb/sbb instructions.
13115 The default case using setcc followed by the conditional move can be
13116 done by generic code. */
13117 int
13118 ix86_expand_int_addcc (rtx operands[])
13119 {
13120 enum rtx_code code = GET_CODE (operands[1]);
13121 rtx compare_op;
13122 rtx val = const0_rtx;
13123 bool fpcmp = false;
13124 enum machine_mode mode = GET_MODE (operands[0]);
13125
13126 if (operands[3] != const1_rtx
13127 && operands[3] != constm1_rtx)
13128 return 0;
13129 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
13130 ix86_compare_op1, &compare_op))
13131 return 0;
13132 code = GET_CODE (compare_op);
13133
13134 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
13135 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
13136 {
13137 fpcmp = true;
13138 code = ix86_fp_compare_code_to_integer (code);
13139 }
13140
13141 if (code != LTU)
13142 {
13143 val = constm1_rtx;
13144 if (fpcmp)
13145 PUT_CODE (compare_op,
13146 reverse_condition_maybe_unordered
13147 (GET_CODE (compare_op)));
13148 else
13149 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
13150 }
13151 PUT_MODE (compare_op, mode);
13152
13153 /* Construct either adc or sbb insn. */
13154 if ((code == LTU) == (operands[3] == constm1_rtx))
13155 {
13156 switch (GET_MODE (operands[0]))
13157 {
13158 case QImode:
13159 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
13160 break;
13161 case HImode:
13162 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
13163 break;
13164 case SImode:
13165 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
13166 break;
13167 case DImode:
13168 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
13169 break;
13170 default:
13171 gcc_unreachable ();
13172 }
13173 }
13174 else
13175 {
13176 switch (GET_MODE (operands[0]))
13177 {
13178 case QImode:
13179 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
13180 break;
13181 case HImode:
13182 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
13183 break;
13184 case SImode:
13185 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
13186 break;
13187 case DImode:
13188 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
13189 break;
13190 default:
13191 gcc_unreachable ();
13192 }
13193 }
13194 return 1; /* DONE */
13195 }
13196
13197
13198 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
13199 works for floating pointer parameters and nonoffsetable memories.
13200 For pushes, it returns just stack offsets; the values will be saved
13201 in the right order. Maximally three parts are generated. */
13202
13203 static int
13204 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
13205 {
13206 int size;
13207
13208 if (!TARGET_64BIT)
13209 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
13210 else
13211 size = (GET_MODE_SIZE (mode) + 4) / 8;
13212
13213 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
13214 gcc_assert (size >= 2 && size <= 3);
13215
13216 /* Optimize constant pool reference to immediates. This is used by fp
13217 moves, that force all constants to memory to allow combining. */
13218 if (MEM_P (operand) && MEM_READONLY_P (operand))
13219 {
13220 rtx tmp = maybe_get_pool_constant (operand);
13221 if (tmp)
13222 operand = tmp;
13223 }
13224
13225 if (MEM_P (operand) && !offsettable_memref_p (operand))
13226 {
13227 /* The only non-offsetable memories we handle are pushes. */
13228 int ok = push_operand (operand, VOIDmode);
13229
13230 gcc_assert (ok);
13231
13232 operand = copy_rtx (operand);
13233 PUT_MODE (operand, Pmode);
13234 parts[0] = parts[1] = parts[2] = operand;
13235 return size;
13236 }
13237
13238 if (GET_CODE (operand) == CONST_VECTOR)
13239 {
13240 enum machine_mode imode = int_mode_for_mode (mode);
13241 /* Caution: if we looked through a constant pool memory above,
13242 the operand may actually have a different mode now. That's
13243 ok, since we want to pun this all the way back to an integer. */
13244 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
13245 gcc_assert (operand != NULL);
13246 mode = imode;
13247 }
13248
13249 if (!TARGET_64BIT)
13250 {
13251 if (mode == DImode)
13252 split_di (&operand, 1, &parts[0], &parts[1]);
13253 else
13254 {
13255 if (REG_P (operand))
13256 {
13257 gcc_assert (reload_completed);
13258 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
13259 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
13260 if (size == 3)
13261 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
13262 }
13263 else if (offsettable_memref_p (operand))
13264 {
13265 operand = adjust_address (operand, SImode, 0);
13266 parts[0] = operand;
13267 parts[1] = adjust_address (operand, SImode, 4);
13268 if (size == 3)
13269 parts[2] = adjust_address (operand, SImode, 8);
13270 }
13271 else if (GET_CODE (operand) == CONST_DOUBLE)
13272 {
13273 REAL_VALUE_TYPE r;
13274 long l[4];
13275
13276 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
13277 switch (mode)
13278 {
13279 case XFmode:
13280 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
13281 parts[2] = gen_int_mode (l[2], SImode);
13282 break;
13283 case DFmode:
13284 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
13285 break;
13286 default:
13287 gcc_unreachable ();
13288 }
13289 parts[1] = gen_int_mode (l[1], SImode);
13290 parts[0] = gen_int_mode (l[0], SImode);
13291 }
13292 else
13293 gcc_unreachable ();
13294 }
13295 }
13296 else
13297 {
13298 if (mode == TImode)
13299 split_ti (&operand, 1, &parts[0], &parts[1]);
13300 if (mode == XFmode || mode == TFmode)
13301 {
13302 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
13303 if (REG_P (operand))
13304 {
13305 gcc_assert (reload_completed);
13306 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
13307 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
13308 }
13309 else if (offsettable_memref_p (operand))
13310 {
13311 operand = adjust_address (operand, DImode, 0);
13312 parts[0] = operand;
13313 parts[1] = adjust_address (operand, upper_mode, 8);
13314 }
13315 else if (GET_CODE (operand) == CONST_DOUBLE)
13316 {
13317 REAL_VALUE_TYPE r;
13318 long l[4];
13319
13320 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
13321 real_to_target (l, &r, mode);
13322
13323 /* Do not use shift by 32 to avoid warning on 32bit systems. */
13324 if (HOST_BITS_PER_WIDE_INT >= 64)
13325 parts[0]
13326 = gen_int_mode
13327 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
13328 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
13329 DImode);
13330 else
13331 parts[0] = immed_double_const (l[0], l[1], DImode);
13332
13333 if (upper_mode == SImode)
13334 parts[1] = gen_int_mode (l[2], SImode);
13335 else if (HOST_BITS_PER_WIDE_INT >= 64)
13336 parts[1]
13337 = gen_int_mode
13338 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
13339 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
13340 DImode);
13341 else
13342 parts[1] = immed_double_const (l[2], l[3], DImode);
13343 }
13344 else
13345 gcc_unreachable ();
13346 }
13347 }
13348
13349 return size;
13350 }
13351
13352 /* Emit insns to perform a move or push of DI, DF, and XF values.
13353 Return false when normal moves are needed; true when all required
13354 insns have been emitted. Operands 2-4 contain the input values
13355 int the correct order; operands 5-7 contain the output values. */
13356
13357 void
13358 ix86_split_long_move (rtx operands[])
13359 {
13360 rtx part[2][3];
13361 int nparts;
13362 int push = 0;
13363 int collisions = 0;
13364 enum machine_mode mode = GET_MODE (operands[0]);
13365
13366 /* The DFmode expanders may ask us to move double.
13367 For 64bit target this is single move. By hiding the fact
13368 here we simplify i386.md splitters. */
13369 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
13370 {
13371 /* Optimize constant pool reference to immediates. This is used by
13372 fp moves, that force all constants to memory to allow combining. */
13373
13374 if (MEM_P (operands[1])
13375 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
13376 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
13377 operands[1] = get_pool_constant (XEXP (operands[1], 0));
13378 if (push_operand (operands[0], VOIDmode))
13379 {
13380 operands[0] = copy_rtx (operands[0]);
13381 PUT_MODE (operands[0], Pmode);
13382 }
13383 else
13384 operands[0] = gen_lowpart (DImode, operands[0]);
13385 operands[1] = gen_lowpart (DImode, operands[1]);
13386 emit_move_insn (operands[0], operands[1]);
13387 return;
13388 }
13389
13390 /* The only non-offsettable memory we handle is push. */
13391 if (push_operand (operands[0], VOIDmode))
13392 push = 1;
13393 else
13394 gcc_assert (!MEM_P (operands[0])
13395 || offsettable_memref_p (operands[0]));
13396
13397 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
13398 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
13399
13400 /* When emitting push, take care for source operands on the stack. */
13401 if (push && MEM_P (operands[1])
13402 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
13403 {
13404 if (nparts == 3)
13405 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
13406 XEXP (part[1][2], 0));
13407 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
13408 XEXP (part[1][1], 0));
13409 }
13410
13411 /* We need to do copy in the right order in case an address register
13412 of the source overlaps the destination. */
13413 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
13414 {
13415 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
13416 collisions++;
13417 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
13418 collisions++;
13419 if (nparts == 3
13420 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
13421 collisions++;
13422
13423 /* Collision in the middle part can be handled by reordering. */
13424 if (collisions == 1 && nparts == 3
13425 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
13426 {
13427 rtx tmp;
13428 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
13429 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
13430 }
13431
13432 /* If there are more collisions, we can't handle it by reordering.
13433 Do an lea to the last part and use only one colliding move. */
13434 else if (collisions > 1)
13435 {
13436 rtx base;
13437
13438 collisions = 1;
13439
13440 base = part[0][nparts - 1];
13441
13442 /* Handle the case when the last part isn't valid for lea.
13443 Happens in 64-bit mode storing the 12-byte XFmode. */
13444 if (GET_MODE (base) != Pmode)
13445 base = gen_rtx_REG (Pmode, REGNO (base));
13446
13447 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
13448 part[1][0] = replace_equiv_address (part[1][0], base);
13449 part[1][1] = replace_equiv_address (part[1][1],
13450 plus_constant (base, UNITS_PER_WORD));
13451 if (nparts == 3)
13452 part[1][2] = replace_equiv_address (part[1][2],
13453 plus_constant (base, 8));
13454 }
13455 }
13456
13457 if (push)
13458 {
13459 if (!TARGET_64BIT)
13460 {
13461 if (nparts == 3)
13462 {
13463 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
13464 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
13465 emit_move_insn (part[0][2], part[1][2]);
13466 }
13467 }
13468 else
13469 {
13470 /* In 64bit mode we don't have 32bit push available. In case this is
13471 register, it is OK - we will just use larger counterpart. We also
13472 retype memory - these comes from attempt to avoid REX prefix on
13473 moving of second half of TFmode value. */
13474 if (GET_MODE (part[1][1]) == SImode)
13475 {
13476 switch (GET_CODE (part[1][1]))
13477 {
13478 case MEM:
13479 part[1][1] = adjust_address (part[1][1], DImode, 0);
13480 break;
13481
13482 case REG:
13483 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
13484 break;
13485
13486 default:
13487 gcc_unreachable ();
13488 }
13489
13490 if (GET_MODE (part[1][0]) == SImode)
13491 part[1][0] = part[1][1];
13492 }
13493 }
13494 emit_move_insn (part[0][1], part[1][1]);
13495 emit_move_insn (part[0][0], part[1][0]);
13496 return;
13497 }
13498
13499 /* Choose correct order to not overwrite the source before it is copied. */
13500 if ((REG_P (part[0][0])
13501 && REG_P (part[1][1])
13502 && (REGNO (part[0][0]) == REGNO (part[1][1])
13503 || (nparts == 3
13504 && REGNO (part[0][0]) == REGNO (part[1][2]))))
13505 || (collisions > 0
13506 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
13507 {
13508 if (nparts == 3)
13509 {
13510 operands[2] = part[0][2];
13511 operands[3] = part[0][1];
13512 operands[4] = part[0][0];
13513 operands[5] = part[1][2];
13514 operands[6] = part[1][1];
13515 operands[7] = part[1][0];
13516 }
13517 else
13518 {
13519 operands[2] = part[0][1];
13520 operands[3] = part[0][0];
13521 operands[5] = part[1][1];
13522 operands[6] = part[1][0];
13523 }
13524 }
13525 else
13526 {
13527 if (nparts == 3)
13528 {
13529 operands[2] = part[0][0];
13530 operands[3] = part[0][1];
13531 operands[4] = part[0][2];
13532 operands[5] = part[1][0];
13533 operands[6] = part[1][1];
13534 operands[7] = part[1][2];
13535 }
13536 else
13537 {
13538 operands[2] = part[0][0];
13539 operands[3] = part[0][1];
13540 operands[5] = part[1][0];
13541 operands[6] = part[1][1];
13542 }
13543 }
13544
13545 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
13546 if (optimize_size)
13547 {
13548 if (CONST_INT_P (operands[5])
13549 && operands[5] != const0_rtx
13550 && REG_P (operands[2]))
13551 {
13552 if (CONST_INT_P (operands[6])
13553 && INTVAL (operands[6]) == INTVAL (operands[5]))
13554 operands[6] = operands[2];
13555
13556 if (nparts == 3
13557 && CONST_INT_P (operands[7])
13558 && INTVAL (operands[7]) == INTVAL (operands[5]))
13559 operands[7] = operands[2];
13560 }
13561
13562 if (nparts == 3
13563 && CONST_INT_P (operands[6])
13564 && operands[6] != const0_rtx
13565 && REG_P (operands[3])
13566 && CONST_INT_P (operands[7])
13567 && INTVAL (operands[7]) == INTVAL (operands[6]))
13568 operands[7] = operands[3];
13569 }
13570
13571 emit_move_insn (operands[2], operands[5]);
13572 emit_move_insn (operands[3], operands[6]);
13573 if (nparts == 3)
13574 emit_move_insn (operands[4], operands[7]);
13575
13576 return;
13577 }
13578
13579 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
13580 left shift by a constant, either using a single shift or
13581 a sequence of add instructions. */
13582
13583 static void
13584 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
13585 {
13586 if (count == 1)
13587 {
13588 emit_insn ((mode == DImode
13589 ? gen_addsi3
13590 : gen_adddi3) (operand, operand, operand));
13591 }
13592 else if (!optimize_size
13593 && count * ix86_cost->add <= ix86_cost->shift_const)
13594 {
13595 int i;
13596 for (i=0; i<count; i++)
13597 {
13598 emit_insn ((mode == DImode
13599 ? gen_addsi3
13600 : gen_adddi3) (operand, operand, operand));
13601 }
13602 }
13603 else
13604 emit_insn ((mode == DImode
13605 ? gen_ashlsi3
13606 : gen_ashldi3) (operand, operand, GEN_INT (count)));
13607 }
13608
13609 void
13610 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
13611 {
13612 rtx low[2], high[2];
13613 int count;
13614 const int single_width = mode == DImode ? 32 : 64;
13615
13616 if (CONST_INT_P (operands[2]))
13617 {
13618 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
13619 count = INTVAL (operands[2]) & (single_width * 2 - 1);
13620
13621 if (count >= single_width)
13622 {
13623 emit_move_insn (high[0], low[1]);
13624 emit_move_insn (low[0], const0_rtx);
13625
13626 if (count > single_width)
13627 ix86_expand_ashl_const (high[0], count - single_width, mode);
13628 }
13629 else
13630 {
13631 if (!rtx_equal_p (operands[0], operands[1]))
13632 emit_move_insn (operands[0], operands[1]);
13633 emit_insn ((mode == DImode
13634 ? gen_x86_shld_1
13635 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
13636 ix86_expand_ashl_const (low[0], count, mode);
13637 }
13638 return;
13639 }
13640
13641 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13642
13643 if (operands[1] == const1_rtx)
13644 {
13645 /* Assuming we've chosen a QImode capable registers, then 1 << N
13646 can be done with two 32/64-bit shifts, no branches, no cmoves. */
13647 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
13648 {
13649 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
13650
13651 ix86_expand_clear (low[0]);
13652 ix86_expand_clear (high[0]);
13653 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
13654
13655 d = gen_lowpart (QImode, low[0]);
13656 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
13657 s = gen_rtx_EQ (QImode, flags, const0_rtx);
13658 emit_insn (gen_rtx_SET (VOIDmode, d, s));
13659
13660 d = gen_lowpart (QImode, high[0]);
13661 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
13662 s = gen_rtx_NE (QImode, flags, const0_rtx);
13663 emit_insn (gen_rtx_SET (VOIDmode, d, s));
13664 }
13665
13666 /* Otherwise, we can get the same results by manually performing
13667 a bit extract operation on bit 5/6, and then performing the two
13668 shifts. The two methods of getting 0/1 into low/high are exactly
13669 the same size. Avoiding the shift in the bit extract case helps
13670 pentium4 a bit; no one else seems to care much either way. */
13671 else
13672 {
13673 rtx x;
13674
13675 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
13676 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
13677 else
13678 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
13679 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
13680
13681 emit_insn ((mode == DImode
13682 ? gen_lshrsi3
13683 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
13684 emit_insn ((mode == DImode
13685 ? gen_andsi3
13686 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
13687 emit_move_insn (low[0], high[0]);
13688 emit_insn ((mode == DImode
13689 ? gen_xorsi3
13690 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
13691 }
13692
13693 emit_insn ((mode == DImode
13694 ? gen_ashlsi3
13695 : gen_ashldi3) (low[0], low[0], operands[2]));
13696 emit_insn ((mode == DImode
13697 ? gen_ashlsi3
13698 : gen_ashldi3) (high[0], high[0], operands[2]));
13699 return;
13700 }
13701
13702 if (operands[1] == constm1_rtx)
13703 {
13704 /* For -1 << N, we can avoid the shld instruction, because we
13705 know that we're shifting 0...31/63 ones into a -1. */
13706 emit_move_insn (low[0], constm1_rtx);
13707 if (optimize_size)
13708 emit_move_insn (high[0], low[0]);
13709 else
13710 emit_move_insn (high[0], constm1_rtx);
13711 }
13712 else
13713 {
13714 if (!rtx_equal_p (operands[0], operands[1]))
13715 emit_move_insn (operands[0], operands[1]);
13716
13717 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13718 emit_insn ((mode == DImode
13719 ? gen_x86_shld_1
13720 : gen_x86_64_shld) (high[0], low[0], operands[2]));
13721 }
13722
13723 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
13724
13725 if (TARGET_CMOVE && scratch)
13726 {
13727 ix86_expand_clear (scratch);
13728 emit_insn ((mode == DImode
13729 ? gen_x86_shift_adj_1
13730 : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch));
13731 }
13732 else
13733 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
13734 }
13735
13736 void
13737 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
13738 {
13739 rtx low[2], high[2];
13740 int count;
13741 const int single_width = mode == DImode ? 32 : 64;
13742
13743 if (CONST_INT_P (operands[2]))
13744 {
13745 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
13746 count = INTVAL (operands[2]) & (single_width * 2 - 1);
13747
13748 if (count == single_width * 2 - 1)
13749 {
13750 emit_move_insn (high[0], high[1]);
13751 emit_insn ((mode == DImode
13752 ? gen_ashrsi3
13753 : gen_ashrdi3) (high[0], high[0],
13754 GEN_INT (single_width - 1)));
13755 emit_move_insn (low[0], high[0]);
13756
13757 }
13758 else if (count >= single_width)
13759 {
13760 emit_move_insn (low[0], high[1]);
13761 emit_move_insn (high[0], low[0]);
13762 emit_insn ((mode == DImode
13763 ? gen_ashrsi3
13764 : gen_ashrdi3) (high[0], high[0],
13765 GEN_INT (single_width - 1)));
13766 if (count > single_width)
13767 emit_insn ((mode == DImode
13768 ? gen_ashrsi3
13769 : gen_ashrdi3) (low[0], low[0],
13770 GEN_INT (count - single_width)));
13771 }
13772 else
13773 {
13774 if (!rtx_equal_p (operands[0], operands[1]))
13775 emit_move_insn (operands[0], operands[1]);
13776 emit_insn ((mode == DImode
13777 ? gen_x86_shrd_1
13778 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
13779 emit_insn ((mode == DImode
13780 ? gen_ashrsi3
13781 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
13782 }
13783 }
13784 else
13785 {
13786 if (!rtx_equal_p (operands[0], operands[1]))
13787 emit_move_insn (operands[0], operands[1]);
13788
13789 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13790
13791 emit_insn ((mode == DImode
13792 ? gen_x86_shrd_1
13793 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
13794 emit_insn ((mode == DImode
13795 ? gen_ashrsi3
13796 : gen_ashrdi3) (high[0], high[0], operands[2]));
13797
13798 if (TARGET_CMOVE && scratch)
13799 {
13800 emit_move_insn (scratch, high[0]);
13801 emit_insn ((mode == DImode
13802 ? gen_ashrsi3
13803 : gen_ashrdi3) (scratch, scratch,
13804 GEN_INT (single_width - 1)));
13805 emit_insn ((mode == DImode
13806 ? gen_x86_shift_adj_1
13807 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
13808 scratch));
13809 }
13810 else
13811 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
13812 }
13813 }
13814
13815 void
13816 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
13817 {
13818 rtx low[2], high[2];
13819 int count;
13820 const int single_width = mode == DImode ? 32 : 64;
13821
13822 if (CONST_INT_P (operands[2]))
13823 {
13824 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
13825 count = INTVAL (operands[2]) & (single_width * 2 - 1);
13826
13827 if (count >= single_width)
13828 {
13829 emit_move_insn (low[0], high[1]);
13830 ix86_expand_clear (high[0]);
13831
13832 if (count > single_width)
13833 emit_insn ((mode == DImode
13834 ? gen_lshrsi3
13835 : gen_lshrdi3) (low[0], low[0],
13836 GEN_INT (count - single_width)));
13837 }
13838 else
13839 {
13840 if (!rtx_equal_p (operands[0], operands[1]))
13841 emit_move_insn (operands[0], operands[1]);
13842 emit_insn ((mode == DImode
13843 ? gen_x86_shrd_1
13844 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
13845 emit_insn ((mode == DImode
13846 ? gen_lshrsi3
13847 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
13848 }
13849 }
13850 else
13851 {
13852 if (!rtx_equal_p (operands[0], operands[1]))
13853 emit_move_insn (operands[0], operands[1]);
13854
13855 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13856
13857 emit_insn ((mode == DImode
13858 ? gen_x86_shrd_1
13859 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
13860 emit_insn ((mode == DImode
13861 ? gen_lshrsi3
13862 : gen_lshrdi3) (high[0], high[0], operands[2]));
13863
13864 /* Heh. By reversing the arguments, we can reuse this pattern. */
13865 if (TARGET_CMOVE && scratch)
13866 {
13867 ix86_expand_clear (scratch);
13868 emit_insn ((mode == DImode
13869 ? gen_x86_shift_adj_1
13870 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
13871 scratch));
13872 }
13873 else
13874 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
13875 }
13876 }
13877
13878 /* Predict just emitted jump instruction to be taken with probability PROB. */
13879 static void
13880 predict_jump (int prob)
13881 {
13882 rtx insn = get_last_insn ();
13883 gcc_assert (JUMP_P (insn));
13884 REG_NOTES (insn)
13885 = gen_rtx_EXPR_LIST (REG_BR_PROB,
13886 GEN_INT (prob),
13887 REG_NOTES (insn));
13888 }
13889
13890 /* Helper function for the string operations below. Dest VARIABLE whether
13891 it is aligned to VALUE bytes. If true, jump to the label. */
13892 static rtx
13893 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
13894 {
13895 rtx label = gen_label_rtx ();
13896 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
13897 if (GET_MODE (variable) == DImode)
13898 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
13899 else
13900 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
13901 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
13902 1, label);
13903 if (epilogue)
13904 predict_jump (REG_BR_PROB_BASE * 50 / 100);
13905 else
13906 predict_jump (REG_BR_PROB_BASE * 90 / 100);
13907 return label;
13908 }
13909
13910 /* Adjust COUNTER by the VALUE. */
13911 static void
13912 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
13913 {
13914 if (GET_MODE (countreg) == DImode)
13915 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
13916 else
13917 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
13918 }
13919
13920 /* Zero extend possibly SImode EXP to Pmode register. */
13921 rtx
13922 ix86_zero_extend_to_Pmode (rtx exp)
13923 {
13924 rtx r;
13925 if (GET_MODE (exp) == VOIDmode)
13926 return force_reg (Pmode, exp);
13927 if (GET_MODE (exp) == Pmode)
13928 return copy_to_mode_reg (Pmode, exp);
13929 r = gen_reg_rtx (Pmode);
13930 emit_insn (gen_zero_extendsidi2 (r, exp));
13931 return r;
13932 }
13933
13934 /* Divide COUNTREG by SCALE. */
13935 static rtx
13936 scale_counter (rtx countreg, int scale)
13937 {
13938 rtx sc;
13939 rtx piece_size_mask;
13940
13941 if (scale == 1)
13942 return countreg;
13943 if (CONST_INT_P (countreg))
13944 return GEN_INT (INTVAL (countreg) / scale);
13945 gcc_assert (REG_P (countreg));
13946
13947 piece_size_mask = GEN_INT (scale - 1);
13948 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
13949 GEN_INT (exact_log2 (scale)),
13950 NULL, 1, OPTAB_DIRECT);
13951 return sc;
13952 }
13953
13954 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
13955 DImode for constant loop counts. */
13956
13957 static enum machine_mode
13958 counter_mode (rtx count_exp)
13959 {
13960 if (GET_MODE (count_exp) != VOIDmode)
13961 return GET_MODE (count_exp);
13962 if (GET_CODE (count_exp) != CONST_INT)
13963 return Pmode;
13964 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
13965 return DImode;
13966 return SImode;
13967 }
13968
13969 /* When SRCPTR is non-NULL, output simple loop to move memory
13970 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
13971 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
13972 equivalent loop to set memory by VALUE (supposed to be in MODE).
13973
13974 The size is rounded down to whole number of chunk size moved at once.
13975 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
13976
13977
13978 static void
13979 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
13980 rtx destptr, rtx srcptr, rtx value,
13981 rtx count, enum machine_mode mode, int unroll,
13982 int expected_size)
13983 {
13984 rtx out_label, top_label, iter, tmp;
13985 enum machine_mode iter_mode = counter_mode (count);
13986 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
13987 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
13988 rtx size;
13989 rtx x_addr;
13990 rtx y_addr;
13991 int i;
13992
13993 top_label = gen_label_rtx ();
13994 out_label = gen_label_rtx ();
13995 iter = gen_reg_rtx (iter_mode);
13996
13997 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
13998 NULL, 1, OPTAB_DIRECT);
13999 /* Those two should combine. */
14000 if (piece_size == const1_rtx)
14001 {
14002 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
14003 true, out_label);
14004 predict_jump (REG_BR_PROB_BASE * 10 / 100);
14005 }
14006 emit_move_insn (iter, const0_rtx);
14007
14008 emit_label (top_label);
14009
14010 tmp = convert_modes (Pmode, iter_mode, iter, true);
14011 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
14012 destmem = change_address (destmem, mode, x_addr);
14013
14014 if (srcmem)
14015 {
14016 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
14017 srcmem = change_address (srcmem, mode, y_addr);
14018
14019 /* When unrolling for chips that reorder memory reads and writes,
14020 we can save registers by using single temporary.
14021 Also using 4 temporaries is overkill in 32bit mode. */
14022 if (!TARGET_64BIT && 0)
14023 {
14024 for (i = 0; i < unroll; i++)
14025 {
14026 if (i)
14027 {
14028 destmem =
14029 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
14030 srcmem =
14031 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
14032 }
14033 emit_move_insn (destmem, srcmem);
14034 }
14035 }
14036 else
14037 {
14038 rtx tmpreg[4];
14039 gcc_assert (unroll <= 4);
14040 for (i = 0; i < unroll; i++)
14041 {
14042 tmpreg[i] = gen_reg_rtx (mode);
14043 if (i)
14044 {
14045 srcmem =
14046 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
14047 }
14048 emit_move_insn (tmpreg[i], srcmem);
14049 }
14050 for (i = 0; i < unroll; i++)
14051 {
14052 if (i)
14053 {
14054 destmem =
14055 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
14056 }
14057 emit_move_insn (destmem, tmpreg[i]);
14058 }
14059 }
14060 }
14061 else
14062 for (i = 0; i < unroll; i++)
14063 {
14064 if (i)
14065 destmem =
14066 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
14067 emit_move_insn (destmem, value);
14068 }
14069
14070 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
14071 true, OPTAB_LIB_WIDEN);
14072 if (tmp != iter)
14073 emit_move_insn (iter, tmp);
14074
14075 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
14076 true, top_label);
14077 if (expected_size != -1)
14078 {
14079 expected_size /= GET_MODE_SIZE (mode) * unroll;
14080 if (expected_size == 0)
14081 predict_jump (0);
14082 else if (expected_size > REG_BR_PROB_BASE)
14083 predict_jump (REG_BR_PROB_BASE - 1);
14084 else
14085 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
14086 }
14087 else
14088 predict_jump (REG_BR_PROB_BASE * 80 / 100);
14089 iter = ix86_zero_extend_to_Pmode (iter);
14090 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
14091 true, OPTAB_LIB_WIDEN);
14092 if (tmp != destptr)
14093 emit_move_insn (destptr, tmp);
14094 if (srcptr)
14095 {
14096 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
14097 true, OPTAB_LIB_WIDEN);
14098 if (tmp != srcptr)
14099 emit_move_insn (srcptr, tmp);
14100 }
14101 emit_label (out_label);
14102 }
14103
14104 /* Output "rep; mov" instruction.
14105 Arguments have same meaning as for previous function */
14106 static void
14107 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
14108 rtx destptr, rtx srcptr,
14109 rtx count,
14110 enum machine_mode mode)
14111 {
14112 rtx destexp;
14113 rtx srcexp;
14114 rtx countreg;
14115
14116 /* If the size is known, it is shorter to use rep movs. */
14117 if (mode == QImode && CONST_INT_P (count)
14118 && !(INTVAL (count) & 3))
14119 mode = SImode;
14120
14121 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
14122 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
14123 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
14124 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
14125 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
14126 if (mode != QImode)
14127 {
14128 destexp = gen_rtx_ASHIFT (Pmode, countreg,
14129 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
14130 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
14131 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
14132 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
14133 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
14134 }
14135 else
14136 {
14137 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
14138 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
14139 }
14140 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
14141 destexp, srcexp));
14142 }
14143
14144 /* Output "rep; stos" instruction.
14145 Arguments have same meaning as for previous function */
14146 static void
14147 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
14148 rtx count,
14149 enum machine_mode mode)
14150 {
14151 rtx destexp;
14152 rtx countreg;
14153
14154 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
14155 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
14156 value = force_reg (mode, gen_lowpart (mode, value));
14157 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
14158 if (mode != QImode)
14159 {
14160 destexp = gen_rtx_ASHIFT (Pmode, countreg,
14161 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
14162 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
14163 }
14164 else
14165 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
14166 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
14167 }
14168
14169 static void
14170 emit_strmov (rtx destmem, rtx srcmem,
14171 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
14172 {
14173 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
14174 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
14175 emit_insn (gen_strmov (destptr, dest, srcptr, src));
14176 }
14177
14178 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
14179 static void
14180 expand_movmem_epilogue (rtx destmem, rtx srcmem,
14181 rtx destptr, rtx srcptr, rtx count, int max_size)
14182 {
14183 rtx src, dest;
14184 if (CONST_INT_P (count))
14185 {
14186 HOST_WIDE_INT countval = INTVAL (count);
14187 int offset = 0;
14188
14189 if ((countval & 0x10) && max_size > 16)
14190 {
14191 if (TARGET_64BIT)
14192 {
14193 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
14194 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
14195 }
14196 else
14197 gcc_unreachable ();
14198 offset += 16;
14199 }
14200 if ((countval & 0x08) && max_size > 8)
14201 {
14202 if (TARGET_64BIT)
14203 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
14204 else
14205 {
14206 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
14207 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
14208 }
14209 offset += 8;
14210 }
14211 if ((countval & 0x04) && max_size > 4)
14212 {
14213 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
14214 offset += 4;
14215 }
14216 if ((countval & 0x02) && max_size > 2)
14217 {
14218 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
14219 offset += 2;
14220 }
14221 if ((countval & 0x01) && max_size > 1)
14222 {
14223 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
14224 offset += 1;
14225 }
14226 return;
14227 }
14228 if (max_size > 8)
14229 {
14230 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
14231 count, 1, OPTAB_DIRECT);
14232 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
14233 count, QImode, 1, 4);
14234 return;
14235 }
14236
14237 /* When there are stringops, we can cheaply increase dest and src pointers.
14238 Otherwise we save code size by maintaining offset (zero is readily
14239 available from preceding rep operation) and using x86 addressing modes.
14240 */
14241 if (TARGET_SINGLE_STRINGOP)
14242 {
14243 if (max_size > 4)
14244 {
14245 rtx label = ix86_expand_aligntest (count, 4, true);
14246 src = change_address (srcmem, SImode, srcptr);
14247 dest = change_address (destmem, SImode, destptr);
14248 emit_insn (gen_strmov (destptr, dest, srcptr, src));
14249 emit_label (label);
14250 LABEL_NUSES (label) = 1;
14251 }
14252 if (max_size > 2)
14253 {
14254 rtx label = ix86_expand_aligntest (count, 2, true);
14255 src = change_address (srcmem, HImode, srcptr);
14256 dest = change_address (destmem, HImode, destptr);
14257 emit_insn (gen_strmov (destptr, dest, srcptr, src));
14258 emit_label (label);
14259 LABEL_NUSES (label) = 1;
14260 }
14261 if (max_size > 1)
14262 {
14263 rtx label = ix86_expand_aligntest (count, 1, true);
14264 src = change_address (srcmem, QImode, srcptr);
14265 dest = change_address (destmem, QImode, destptr);
14266 emit_insn (gen_strmov (destptr, dest, srcptr, src));
14267 emit_label (label);
14268 LABEL_NUSES (label) = 1;
14269 }
14270 }
14271 else
14272 {
14273 rtx offset = force_reg (Pmode, const0_rtx);
14274 rtx tmp;
14275
14276 if (max_size > 4)
14277 {
14278 rtx label = ix86_expand_aligntest (count, 4, true);
14279 src = change_address (srcmem, SImode, srcptr);
14280 dest = change_address (destmem, SImode, destptr);
14281 emit_move_insn (dest, src);
14282 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
14283 true, OPTAB_LIB_WIDEN);
14284 if (tmp != offset)
14285 emit_move_insn (offset, tmp);
14286 emit_label (label);
14287 LABEL_NUSES (label) = 1;
14288 }
14289 if (max_size > 2)
14290 {
14291 rtx label = ix86_expand_aligntest (count, 2, true);
14292 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
14293 src = change_address (srcmem, HImode, tmp);
14294 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
14295 dest = change_address (destmem, HImode, tmp);
14296 emit_move_insn (dest, src);
14297 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
14298 true, OPTAB_LIB_WIDEN);
14299 if (tmp != offset)
14300 emit_move_insn (offset, tmp);
14301 emit_label (label);
14302 LABEL_NUSES (label) = 1;
14303 }
14304 if (max_size > 1)
14305 {
14306 rtx label = ix86_expand_aligntest (count, 1, true);
14307 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
14308 src = change_address (srcmem, QImode, tmp);
14309 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
14310 dest = change_address (destmem, QImode, tmp);
14311 emit_move_insn (dest, src);
14312 emit_label (label);
14313 LABEL_NUSES (label) = 1;
14314 }
14315 }
14316 }
14317
14318 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
14319 static void
14320 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
14321 rtx count, int max_size)
14322 {
14323 count =
14324 expand_simple_binop (counter_mode (count), AND, count,
14325 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
14326 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
14327 gen_lowpart (QImode, value), count, QImode,
14328 1, max_size / 2);
14329 }
14330
14331 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
14332 static void
14333 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
14334 {
14335 rtx dest;
14336
14337 if (CONST_INT_P (count))
14338 {
14339 HOST_WIDE_INT countval = INTVAL (count);
14340 int offset = 0;
14341
14342 if ((countval & 0x10) && max_size > 16)
14343 {
14344 if (TARGET_64BIT)
14345 {
14346 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
14347 emit_insn (gen_strset (destptr, dest, value));
14348 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
14349 emit_insn (gen_strset (destptr, dest, value));
14350 }
14351 else
14352 gcc_unreachable ();
14353 offset += 16;
14354 }
14355 if ((countval & 0x08) && max_size > 8)
14356 {
14357 if (TARGET_64BIT)
14358 {
14359 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
14360 emit_insn (gen_strset (destptr, dest, value));
14361 }
14362 else
14363 {
14364 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
14365 emit_insn (gen_strset (destptr, dest, value));
14366 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
14367 emit_insn (gen_strset (destptr, dest, value));
14368 }
14369 offset += 8;
14370 }
14371 if ((countval & 0x04) && max_size > 4)
14372 {
14373 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
14374 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
14375 offset += 4;
14376 }
14377 if ((countval & 0x02) && max_size > 2)
14378 {
14379 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
14380 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
14381 offset += 2;
14382 }
14383 if ((countval & 0x01) && max_size > 1)
14384 {
14385 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
14386 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
14387 offset += 1;
14388 }
14389 return;
14390 }
14391 if (max_size > 32)
14392 {
14393 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
14394 return;
14395 }
14396 if (max_size > 16)
14397 {
14398 rtx label = ix86_expand_aligntest (count, 16, true);
14399 if (TARGET_64BIT)
14400 {
14401 dest = change_address (destmem, DImode, destptr);
14402 emit_insn (gen_strset (destptr, dest, value));
14403 emit_insn (gen_strset (destptr, dest, value));
14404 }
14405 else
14406 {
14407 dest = change_address (destmem, SImode, destptr);
14408 emit_insn (gen_strset (destptr, dest, value));
14409 emit_insn (gen_strset (destptr, dest, value));
14410 emit_insn (gen_strset (destptr, dest, value));
14411 emit_insn (gen_strset (destptr, dest, value));
14412 }
14413 emit_label (label);
14414 LABEL_NUSES (label) = 1;
14415 }
14416 if (max_size > 8)
14417 {
14418 rtx label = ix86_expand_aligntest (count, 8, true);
14419 if (TARGET_64BIT)
14420 {
14421 dest = change_address (destmem, DImode, destptr);
14422 emit_insn (gen_strset (destptr, dest, value));
14423 }
14424 else
14425 {
14426 dest = change_address (destmem, SImode, destptr);
14427 emit_insn (gen_strset (destptr, dest, value));
14428 emit_insn (gen_strset (destptr, dest, value));
14429 }
14430 emit_label (label);
14431 LABEL_NUSES (label) = 1;
14432 }
14433 if (max_size > 4)
14434 {
14435 rtx label = ix86_expand_aligntest (count, 4, true);
14436 dest = change_address (destmem, SImode, destptr);
14437 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
14438 emit_label (label);
14439 LABEL_NUSES (label) = 1;
14440 }
14441 if (max_size > 2)
14442 {
14443 rtx label = ix86_expand_aligntest (count, 2, true);
14444 dest = change_address (destmem, HImode, destptr);
14445 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
14446 emit_label (label);
14447 LABEL_NUSES (label) = 1;
14448 }
14449 if (max_size > 1)
14450 {
14451 rtx label = ix86_expand_aligntest (count, 1, true);
14452 dest = change_address (destmem, QImode, destptr);
14453 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
14454 emit_label (label);
14455 LABEL_NUSES (label) = 1;
14456 }
14457 }
14458
14459 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
14460 DESIRED_ALIGNMENT. */
14461 static void
14462 expand_movmem_prologue (rtx destmem, rtx srcmem,
14463 rtx destptr, rtx srcptr, rtx count,
14464 int align, int desired_alignment)
14465 {
14466 if (align <= 1 && desired_alignment > 1)
14467 {
14468 rtx label = ix86_expand_aligntest (destptr, 1, false);
14469 srcmem = change_address (srcmem, QImode, srcptr);
14470 destmem = change_address (destmem, QImode, destptr);
14471 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
14472 ix86_adjust_counter (count, 1);
14473 emit_label (label);
14474 LABEL_NUSES (label) = 1;
14475 }
14476 if (align <= 2 && desired_alignment > 2)
14477 {
14478 rtx label = ix86_expand_aligntest (destptr, 2, false);
14479 srcmem = change_address (srcmem, HImode, srcptr);
14480 destmem = change_address (destmem, HImode, destptr);
14481 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
14482 ix86_adjust_counter (count, 2);
14483 emit_label (label);
14484 LABEL_NUSES (label) = 1;
14485 }
14486 if (align <= 4 && desired_alignment > 4)
14487 {
14488 rtx label = ix86_expand_aligntest (destptr, 4, false);
14489 srcmem = change_address (srcmem, SImode, srcptr);
14490 destmem = change_address (destmem, SImode, destptr);
14491 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
14492 ix86_adjust_counter (count, 4);
14493 emit_label (label);
14494 LABEL_NUSES (label) = 1;
14495 }
14496 gcc_assert (desired_alignment <= 8);
14497 }
14498
14499 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
14500 DESIRED_ALIGNMENT. */
14501 static void
14502 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
14503 int align, int desired_alignment)
14504 {
14505 if (align <= 1 && desired_alignment > 1)
14506 {
14507 rtx label = ix86_expand_aligntest (destptr, 1, false);
14508 destmem = change_address (destmem, QImode, destptr);
14509 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
14510 ix86_adjust_counter (count, 1);
14511 emit_label (label);
14512 LABEL_NUSES (label) = 1;
14513 }
14514 if (align <= 2 && desired_alignment > 2)
14515 {
14516 rtx label = ix86_expand_aligntest (destptr, 2, false);
14517 destmem = change_address (destmem, HImode, destptr);
14518 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
14519 ix86_adjust_counter (count, 2);
14520 emit_label (label);
14521 LABEL_NUSES (label) = 1;
14522 }
14523 if (align <= 4 && desired_alignment > 4)
14524 {
14525 rtx label = ix86_expand_aligntest (destptr, 4, false);
14526 destmem = change_address (destmem, SImode, destptr);
14527 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
14528 ix86_adjust_counter (count, 4);
14529 emit_label (label);
14530 LABEL_NUSES (label) = 1;
14531 }
14532 gcc_assert (desired_alignment <= 8);
14533 }
14534
14535 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
14536 static enum stringop_alg
14537 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
14538 int *dynamic_check)
14539 {
14540 const struct stringop_algs * algs;
14541
14542 *dynamic_check = -1;
14543 if (memset)
14544 algs = &ix86_cost->memset[TARGET_64BIT != 0];
14545 else
14546 algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
14547 if (stringop_alg != no_stringop)
14548 return stringop_alg;
14549 /* rep; movq or rep; movl is the smallest variant. */
14550 else if (optimize_size)
14551 {
14552 if (!count || (count & 3))
14553 return rep_prefix_1_byte;
14554 else
14555 return rep_prefix_4_byte;
14556 }
14557 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
14558 */
14559 else if (expected_size != -1 && expected_size < 4)
14560 return loop_1_byte;
14561 else if (expected_size != -1)
14562 {
14563 unsigned int i;
14564 enum stringop_alg alg = libcall;
14565 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
14566 {
14567 gcc_assert (algs->size[i].max);
14568 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
14569 {
14570 if (algs->size[i].alg != libcall)
14571 alg = algs->size[i].alg;
14572 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
14573 last non-libcall inline algorithm. */
14574 if (TARGET_INLINE_ALL_STRINGOPS)
14575 {
14576 /* When the current size is best to be copied by a libcall,
14577 but we are still forced to inline, run the heuristic bellow
14578 that will pick code for medium sized blocks. */
14579 if (alg != libcall)
14580 return alg;
14581 break;
14582 }
14583 else
14584 return algs->size[i].alg;
14585 }
14586 }
14587 gcc_assert (TARGET_INLINE_ALL_STRINGOPS);
14588 }
14589 /* When asked to inline the call anyway, try to pick meaningful choice.
14590 We look for maximal size of block that is faster to copy by hand and
14591 take blocks of at most of that size guessing that average size will
14592 be roughly half of the block.
14593
14594 If this turns out to be bad, we might simply specify the preferred
14595 choice in ix86_costs. */
14596 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
14597 && algs->unknown_size == libcall)
14598 {
14599 int max = -1;
14600 enum stringop_alg alg;
14601 int i;
14602
14603 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
14604 if (algs->size[i].alg != libcall && algs->size[i].alg)
14605 max = algs->size[i].max;
14606 if (max == -1)
14607 max = 4096;
14608 alg = decide_alg (count, max / 2, memset, dynamic_check);
14609 gcc_assert (*dynamic_check == -1);
14610 gcc_assert (alg != libcall);
14611 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
14612 *dynamic_check = max;
14613 return alg;
14614 }
14615 return algs->unknown_size;
14616 }
14617
14618 /* Decide on alignment. We know that the operand is already aligned to ALIGN
14619 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
14620 static int
14621 decide_alignment (int align,
14622 enum stringop_alg alg,
14623 int expected_size)
14624 {
14625 int desired_align = 0;
14626 switch (alg)
14627 {
14628 case no_stringop:
14629 gcc_unreachable ();
14630 case loop:
14631 case unrolled_loop:
14632 desired_align = GET_MODE_SIZE (Pmode);
14633 break;
14634 case rep_prefix_8_byte:
14635 desired_align = 8;
14636 break;
14637 case rep_prefix_4_byte:
14638 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
14639 copying whole cacheline at once. */
14640 if (TARGET_PENTIUMPRO)
14641 desired_align = 8;
14642 else
14643 desired_align = 4;
14644 break;
14645 case rep_prefix_1_byte:
14646 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
14647 copying whole cacheline at once. */
14648 if (TARGET_PENTIUMPRO)
14649 desired_align = 8;
14650 else
14651 desired_align = 1;
14652 break;
14653 case loop_1_byte:
14654 desired_align = 1;
14655 break;
14656 case libcall:
14657 return 0;
14658 }
14659
14660 if (optimize_size)
14661 desired_align = 1;
14662 if (desired_align < align)
14663 desired_align = align;
14664 if (expected_size != -1 && expected_size < 4)
14665 desired_align = align;
14666 return desired_align;
14667 }
14668
14669 /* Return the smallest power of 2 greater than VAL. */
14670 static int
14671 smallest_pow2_greater_than (int val)
14672 {
14673 int ret = 1;
14674 while (ret <= val)
14675 ret <<= 1;
14676 return ret;
14677 }
14678
14679 /* Expand string move (memcpy) operation. Use i386 string operations when
14680 profitable. expand_clrmem contains similar code. The code depends upon
14681 architecture, block size and alignment, but always has the same
14682 overall structure:
14683
14684 1) Prologue guard: Conditional that jumps up to epilogues for small
14685 blocks that can be handled by epilogue alone. This is faster but
14686 also needed for correctness, since prologue assume the block is larger
14687 than the desired alignment.
14688
14689 Optional dynamic check for size and libcall for large
14690 blocks is emitted here too, with -minline-stringops-dynamically.
14691
14692 2) Prologue: copy first few bytes in order to get destination aligned
14693 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
14694 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
14695 We emit either a jump tree on power of two sized blocks, or a byte loop.
14696
14697 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
14698 with specified algorithm.
14699
14700 4) Epilogue: code copying tail of the block that is too small to be
14701 handled by main body (or up to size guarded by prologue guard). */
14702
14703 int
14704 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
14705 rtx expected_align_exp, rtx expected_size_exp)
14706 {
14707 rtx destreg;
14708 rtx srcreg;
14709 rtx label = NULL;
14710 rtx tmp;
14711 rtx jump_around_label = NULL;
14712 HOST_WIDE_INT align = 1;
14713 unsigned HOST_WIDE_INT count = 0;
14714 HOST_WIDE_INT expected_size = -1;
14715 int size_needed = 0, epilogue_size_needed;
14716 int desired_align = 0;
14717 enum stringop_alg alg;
14718 int dynamic_check;
14719
14720 if (CONST_INT_P (align_exp))
14721 align = INTVAL (align_exp);
14722 /* i386 can do misaligned access on reasonably increased cost. */
14723 if (CONST_INT_P (expected_align_exp)
14724 && INTVAL (expected_align_exp) > align)
14725 align = INTVAL (expected_align_exp);
14726 if (CONST_INT_P (count_exp))
14727 count = expected_size = INTVAL (count_exp);
14728 if (CONST_INT_P (expected_size_exp) && count == 0)
14729 expected_size = INTVAL (expected_size_exp);
14730
14731 /* Step 0: Decide on preferred algorithm, desired alignment and
14732 size of chunks to be copied by main loop. */
14733
14734 alg = decide_alg (count, expected_size, false, &dynamic_check);
14735 desired_align = decide_alignment (align, alg, expected_size);
14736
14737 if (!TARGET_ALIGN_STRINGOPS)
14738 align = desired_align;
14739
14740 if (alg == libcall)
14741 return 0;
14742 gcc_assert (alg != no_stringop);
14743 if (!count)
14744 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
14745 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
14746 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
14747 switch (alg)
14748 {
14749 case libcall:
14750 case no_stringop:
14751 gcc_unreachable ();
14752 case loop:
14753 size_needed = GET_MODE_SIZE (Pmode);
14754 break;
14755 case unrolled_loop:
14756 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
14757 break;
14758 case rep_prefix_8_byte:
14759 size_needed = 8;
14760 break;
14761 case rep_prefix_4_byte:
14762 size_needed = 4;
14763 break;
14764 case rep_prefix_1_byte:
14765 case loop_1_byte:
14766 size_needed = 1;
14767 break;
14768 }
14769
14770 epilogue_size_needed = size_needed;
14771
14772 /* Step 1: Prologue guard. */
14773
14774 /* Alignment code needs count to be in register. */
14775 if (CONST_INT_P (count_exp) && desired_align > align)
14776 {
14777 enum machine_mode mode = SImode;
14778 if (TARGET_64BIT && (count & ~0xffffffff))
14779 mode = DImode;
14780 count_exp = force_reg (mode, count_exp);
14781 }
14782 gcc_assert (desired_align >= 1 && align >= 1);
14783
14784 /* Ensure that alignment prologue won't copy past end of block. */
14785 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
14786 {
14787 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
14788 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
14789 Make sure it is power of 2. */
14790 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
14791
14792 label = gen_label_rtx ();
14793 emit_cmp_and_jump_insns (count_exp,
14794 GEN_INT (epilogue_size_needed),
14795 LTU, 0, counter_mode (count_exp), 1, label);
14796 if (GET_CODE (count_exp) == CONST_INT)
14797 ;
14798 else if (expected_size == -1 || expected_size < epilogue_size_needed)
14799 predict_jump (REG_BR_PROB_BASE * 60 / 100);
14800 else
14801 predict_jump (REG_BR_PROB_BASE * 20 / 100);
14802 }
14803 /* Emit code to decide on runtime whether library call or inline should be
14804 used. */
14805 if (dynamic_check != -1)
14806 {
14807 rtx hot_label = gen_label_rtx ();
14808 jump_around_label = gen_label_rtx ();
14809 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
14810 LEU, 0, GET_MODE (count_exp), 1, hot_label);
14811 predict_jump (REG_BR_PROB_BASE * 90 / 100);
14812 emit_block_move_via_libcall (dst, src, count_exp, false);
14813 emit_jump (jump_around_label);
14814 emit_label (hot_label);
14815 }
14816
14817 /* Step 2: Alignment prologue. */
14818
14819 if (desired_align > align)
14820 {
14821 /* Except for the first move in epilogue, we no longer know
14822 constant offset in aliasing info. It don't seems to worth
14823 the pain to maintain it for the first move, so throw away
14824 the info early. */
14825 src = change_address (src, BLKmode, srcreg);
14826 dst = change_address (dst, BLKmode, destreg);
14827 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
14828 desired_align);
14829 }
14830 if (label && size_needed == 1)
14831 {
14832 emit_label (label);
14833 LABEL_NUSES (label) = 1;
14834 label = NULL;
14835 }
14836
14837 /* Step 3: Main loop. */
14838
14839 switch (alg)
14840 {
14841 case libcall:
14842 case no_stringop:
14843 gcc_unreachable ();
14844 case loop_1_byte:
14845 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
14846 count_exp, QImode, 1, expected_size);
14847 break;
14848 case loop:
14849 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
14850 count_exp, Pmode, 1, expected_size);
14851 break;
14852 case unrolled_loop:
14853 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
14854 registers for 4 temporaries anyway. */
14855 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
14856 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
14857 expected_size);
14858 break;
14859 case rep_prefix_8_byte:
14860 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
14861 DImode);
14862 break;
14863 case rep_prefix_4_byte:
14864 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
14865 SImode);
14866 break;
14867 case rep_prefix_1_byte:
14868 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
14869 QImode);
14870 break;
14871 }
14872 /* Adjust properly the offset of src and dest memory for aliasing. */
14873 if (CONST_INT_P (count_exp))
14874 {
14875 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
14876 (count / size_needed) * size_needed);
14877 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
14878 (count / size_needed) * size_needed);
14879 }
14880 else
14881 {
14882 src = change_address (src, BLKmode, srcreg);
14883 dst = change_address (dst, BLKmode, destreg);
14884 }
14885
14886 /* Step 4: Epilogue to copy the remaining bytes. */
14887
14888 if (label)
14889 {
14890 /* When the main loop is done, COUNT_EXP might hold original count,
14891 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
14892 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
14893 bytes. Compensate if needed. */
14894
14895 if (size_needed < epilogue_size_needed)
14896 {
14897 tmp =
14898 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
14899 GEN_INT (size_needed - 1), count_exp, 1,
14900 OPTAB_DIRECT);
14901 if (tmp != count_exp)
14902 emit_move_insn (count_exp, tmp);
14903 }
14904 emit_label (label);
14905 LABEL_NUSES (label) = 1;
14906 }
14907
14908 if (count_exp != const0_rtx && epilogue_size_needed > 1)
14909 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
14910 epilogue_size_needed);
14911 if (jump_around_label)
14912 emit_label (jump_around_label);
14913 return 1;
14914 }
14915
14916 /* Helper function for memcpy. For QImode value 0xXY produce
14917 0xXYXYXYXY of wide specified by MODE. This is essentially
14918 a * 0x10101010, but we can do slightly better than
14919 synth_mult by unwinding the sequence by hand on CPUs with
14920 slow multiply. */
14921 static rtx
14922 promote_duplicated_reg (enum machine_mode mode, rtx val)
14923 {
14924 enum machine_mode valmode = GET_MODE (val);
14925 rtx tmp;
14926 int nops = mode == DImode ? 3 : 2;
14927
14928 gcc_assert (mode == SImode || mode == DImode);
14929 if (val == const0_rtx)
14930 return copy_to_mode_reg (mode, const0_rtx);
14931 if (CONST_INT_P (val))
14932 {
14933 HOST_WIDE_INT v = INTVAL (val) & 255;
14934
14935 v |= v << 8;
14936 v |= v << 16;
14937 if (mode == DImode)
14938 v |= (v << 16) << 16;
14939 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
14940 }
14941
14942 if (valmode == VOIDmode)
14943 valmode = QImode;
14944 if (valmode != QImode)
14945 val = gen_lowpart (QImode, val);
14946 if (mode == QImode)
14947 return val;
14948 if (!TARGET_PARTIAL_REG_STALL)
14949 nops--;
14950 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
14951 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
14952 <= (ix86_cost->shift_const + ix86_cost->add) * nops
14953 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
14954 {
14955 rtx reg = convert_modes (mode, QImode, val, true);
14956 tmp = promote_duplicated_reg (mode, const1_rtx);
14957 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
14958 OPTAB_DIRECT);
14959 }
14960 else
14961 {
14962 rtx reg = convert_modes (mode, QImode, val, true);
14963
14964 if (!TARGET_PARTIAL_REG_STALL)
14965 if (mode == SImode)
14966 emit_insn (gen_movsi_insv_1 (reg, reg));
14967 else
14968 emit_insn (gen_movdi_insv_1_rex64 (reg, reg));
14969 else
14970 {
14971 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
14972 NULL, 1, OPTAB_DIRECT);
14973 reg =
14974 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
14975 }
14976 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
14977 NULL, 1, OPTAB_DIRECT);
14978 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
14979 if (mode == SImode)
14980 return reg;
14981 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
14982 NULL, 1, OPTAB_DIRECT);
14983 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
14984 return reg;
14985 }
14986 }
14987
14988 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
14989 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
14990 alignment from ALIGN to DESIRED_ALIGN. */
14991 static rtx
14992 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
14993 {
14994 rtx promoted_val;
14995
14996 if (TARGET_64BIT
14997 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
14998 promoted_val = promote_duplicated_reg (DImode, val);
14999 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
15000 promoted_val = promote_duplicated_reg (SImode, val);
15001 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
15002 promoted_val = promote_duplicated_reg (HImode, val);
15003 else
15004 promoted_val = val;
15005
15006 return promoted_val;
15007 }
15008
15009 /* Expand string clear operation (bzero). Use i386 string operations when
15010 profitable. See expand_movmem comment for explanation of individual
15011 steps performed. */
15012 int
15013 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
15014 rtx expected_align_exp, rtx expected_size_exp)
15015 {
15016 rtx destreg;
15017 rtx label = NULL;
15018 rtx tmp;
15019 rtx jump_around_label = NULL;
15020 HOST_WIDE_INT align = 1;
15021 unsigned HOST_WIDE_INT count = 0;
15022 HOST_WIDE_INT expected_size = -1;
15023 int size_needed = 0, epilogue_size_needed;
15024 int desired_align = 0;
15025 enum stringop_alg alg;
15026 rtx promoted_val = NULL;
15027 bool force_loopy_epilogue = false;
15028 int dynamic_check;
15029
15030 if (CONST_INT_P (align_exp))
15031 align = INTVAL (align_exp);
15032 /* i386 can do misaligned access on reasonably increased cost. */
15033 if (CONST_INT_P (expected_align_exp)
15034 && INTVAL (expected_align_exp) > align)
15035 align = INTVAL (expected_align_exp);
15036 if (CONST_INT_P (count_exp))
15037 count = expected_size = INTVAL (count_exp);
15038 if (CONST_INT_P (expected_size_exp) && count == 0)
15039 expected_size = INTVAL (expected_size_exp);
15040
15041 /* Step 0: Decide on preferred algorithm, desired alignment and
15042 size of chunks to be copied by main loop. */
15043
15044 alg = decide_alg (count, expected_size, true, &dynamic_check);
15045 desired_align = decide_alignment (align, alg, expected_size);
15046
15047 if (!TARGET_ALIGN_STRINGOPS)
15048 align = desired_align;
15049
15050 if (alg == libcall)
15051 return 0;
15052 gcc_assert (alg != no_stringop);
15053 if (!count)
15054 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
15055 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
15056 switch (alg)
15057 {
15058 case libcall:
15059 case no_stringop:
15060 gcc_unreachable ();
15061 case loop:
15062 size_needed = GET_MODE_SIZE (Pmode);
15063 break;
15064 case unrolled_loop:
15065 size_needed = GET_MODE_SIZE (Pmode) * 4;
15066 break;
15067 case rep_prefix_8_byte:
15068 size_needed = 8;
15069 break;
15070 case rep_prefix_4_byte:
15071 size_needed = 4;
15072 break;
15073 case rep_prefix_1_byte:
15074 case loop_1_byte:
15075 size_needed = 1;
15076 break;
15077 }
15078 epilogue_size_needed = size_needed;
15079
15080 /* Step 1: Prologue guard. */
15081
15082 /* Alignment code needs count to be in register. */
15083 if (CONST_INT_P (count_exp) && desired_align > align)
15084 {
15085 enum machine_mode mode = SImode;
15086 if (TARGET_64BIT && (count & ~0xffffffff))
15087 mode = DImode;
15088 count_exp = force_reg (mode, count_exp);
15089 }
15090 /* Do the cheap promotion to allow better CSE across the
15091 main loop and epilogue (ie one load of the big constant in the
15092 front of all code. */
15093 if (CONST_INT_P (val_exp))
15094 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
15095 desired_align, align);
15096 /* Ensure that alignment prologue won't copy past end of block. */
15097 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
15098 {
15099 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
15100 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
15101 Make sure it is power of 2. */
15102 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
15103
15104 /* To improve performance of small blocks, we jump around the VAL
15105 promoting mode. This mean that if the promoted VAL is not constant,
15106 we might not use it in the epilogue and have to use byte
15107 loop variant. */
15108 if (epilogue_size_needed > 2 && !promoted_val)
15109 force_loopy_epilogue = true;
15110 label = gen_label_rtx ();
15111 emit_cmp_and_jump_insns (count_exp,
15112 GEN_INT (epilogue_size_needed),
15113 LTU, 0, counter_mode (count_exp), 1, label);
15114 if (GET_CODE (count_exp) == CONST_INT)
15115 ;
15116 else if (expected_size == -1 || expected_size <= epilogue_size_needed)
15117 predict_jump (REG_BR_PROB_BASE * 60 / 100);
15118 else
15119 predict_jump (REG_BR_PROB_BASE * 20 / 100);
15120 }
15121 if (dynamic_check != -1)
15122 {
15123 rtx hot_label = gen_label_rtx ();
15124 jump_around_label = gen_label_rtx ();
15125 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
15126 LEU, 0, counter_mode (count_exp), 1, hot_label);
15127 predict_jump (REG_BR_PROB_BASE * 90 / 100);
15128 set_storage_via_libcall (dst, count_exp, val_exp, false);
15129 emit_jump (jump_around_label);
15130 emit_label (hot_label);
15131 }
15132
15133 /* Step 2: Alignment prologue. */
15134
15135 /* Do the expensive promotion once we branched off the small blocks. */
15136 if (!promoted_val)
15137 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
15138 desired_align, align);
15139 gcc_assert (desired_align >= 1 && align >= 1);
15140
15141 if (desired_align > align)
15142 {
15143 /* Except for the first move in epilogue, we no longer know
15144 constant offset in aliasing info. It don't seems to worth
15145 the pain to maintain it for the first move, so throw away
15146 the info early. */
15147 dst = change_address (dst, BLKmode, destreg);
15148 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
15149 desired_align);
15150 }
15151 if (label && size_needed == 1)
15152 {
15153 emit_label (label);
15154 LABEL_NUSES (label) = 1;
15155 label = NULL;
15156 }
15157
15158 /* Step 3: Main loop. */
15159
15160 switch (alg)
15161 {
15162 case libcall:
15163 case no_stringop:
15164 gcc_unreachable ();
15165 case loop_1_byte:
15166 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
15167 count_exp, QImode, 1, expected_size);
15168 break;
15169 case loop:
15170 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
15171 count_exp, Pmode, 1, expected_size);
15172 break;
15173 case unrolled_loop:
15174 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
15175 count_exp, Pmode, 4, expected_size);
15176 break;
15177 case rep_prefix_8_byte:
15178 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
15179 DImode);
15180 break;
15181 case rep_prefix_4_byte:
15182 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
15183 SImode);
15184 break;
15185 case rep_prefix_1_byte:
15186 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
15187 QImode);
15188 break;
15189 }
15190 /* Adjust properly the offset of src and dest memory for aliasing. */
15191 if (CONST_INT_P (count_exp))
15192 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
15193 (count / size_needed) * size_needed);
15194 else
15195 dst = change_address (dst, BLKmode, destreg);
15196
15197 /* Step 4: Epilogue to copy the remaining bytes. */
15198
15199 if (label)
15200 {
15201 /* When the main loop is done, COUNT_EXP might hold original count,
15202 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
15203 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
15204 bytes. Compensate if needed. */
15205
15206 if (size_needed < desired_align - align)
15207 {
15208 tmp =
15209 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
15210 GEN_INT (size_needed - 1), count_exp, 1,
15211 OPTAB_DIRECT);
15212 size_needed = desired_align - align + 1;
15213 if (tmp != count_exp)
15214 emit_move_insn (count_exp, tmp);
15215 }
15216 emit_label (label);
15217 LABEL_NUSES (label) = 1;
15218 }
15219 if (count_exp != const0_rtx && epilogue_size_needed > 1)
15220 {
15221 if (force_loopy_epilogue)
15222 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
15223 size_needed);
15224 else
15225 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
15226 size_needed);
15227 }
15228 if (jump_around_label)
15229 emit_label (jump_around_label);
15230 return 1;
15231 }
15232
15233 /* Expand the appropriate insns for doing strlen if not just doing
15234 repnz; scasb
15235
15236 out = result, initialized with the start address
15237 align_rtx = alignment of the address.
15238 scratch = scratch register, initialized with the startaddress when
15239 not aligned, otherwise undefined
15240
15241 This is just the body. It needs the initializations mentioned above and
15242 some address computing at the end. These things are done in i386.md. */
15243
15244 static void
15245 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
15246 {
15247 int align;
15248 rtx tmp;
15249 rtx align_2_label = NULL_RTX;
15250 rtx align_3_label = NULL_RTX;
15251 rtx align_4_label = gen_label_rtx ();
15252 rtx end_0_label = gen_label_rtx ();
15253 rtx mem;
15254 rtx tmpreg = gen_reg_rtx (SImode);
15255 rtx scratch = gen_reg_rtx (SImode);
15256 rtx cmp;
15257
15258 align = 0;
15259 if (CONST_INT_P (align_rtx))
15260 align = INTVAL (align_rtx);
15261
15262 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
15263
15264 /* Is there a known alignment and is it less than 4? */
15265 if (align < 4)
15266 {
15267 rtx scratch1 = gen_reg_rtx (Pmode);
15268 emit_move_insn (scratch1, out);
15269 /* Is there a known alignment and is it not 2? */
15270 if (align != 2)
15271 {
15272 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
15273 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
15274
15275 /* Leave just the 3 lower bits. */
15276 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
15277 NULL_RTX, 0, OPTAB_WIDEN);
15278
15279 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
15280 Pmode, 1, align_4_label);
15281 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
15282 Pmode, 1, align_2_label);
15283 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
15284 Pmode, 1, align_3_label);
15285 }
15286 else
15287 {
15288 /* Since the alignment is 2, we have to check 2 or 0 bytes;
15289 check if is aligned to 4 - byte. */
15290
15291 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
15292 NULL_RTX, 0, OPTAB_WIDEN);
15293
15294 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
15295 Pmode, 1, align_4_label);
15296 }
15297
15298 mem = change_address (src, QImode, out);
15299
15300 /* Now compare the bytes. */
15301
15302 /* Compare the first n unaligned byte on a byte per byte basis. */
15303 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
15304 QImode, 1, end_0_label);
15305
15306 /* Increment the address. */
15307 if (TARGET_64BIT)
15308 emit_insn (gen_adddi3 (out, out, const1_rtx));
15309 else
15310 emit_insn (gen_addsi3 (out, out, const1_rtx));
15311
15312 /* Not needed with an alignment of 2 */
15313 if (align != 2)
15314 {
15315 emit_label (align_2_label);
15316
15317 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
15318 end_0_label);
15319
15320 if (TARGET_64BIT)
15321 emit_insn (gen_adddi3 (out, out, const1_rtx));
15322 else
15323 emit_insn (gen_addsi3 (out, out, const1_rtx));
15324
15325 emit_label (align_3_label);
15326 }
15327
15328 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
15329 end_0_label);
15330
15331 if (TARGET_64BIT)
15332 emit_insn (gen_adddi3 (out, out, const1_rtx));
15333 else
15334 emit_insn (gen_addsi3 (out, out, const1_rtx));
15335 }
15336
15337 /* Generate loop to check 4 bytes at a time. It is not a good idea to
15338 align this loop. It gives only huge programs, but does not help to
15339 speed up. */
15340 emit_label (align_4_label);
15341
15342 mem = change_address (src, SImode, out);
15343 emit_move_insn (scratch, mem);
15344 if (TARGET_64BIT)
15345 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
15346 else
15347 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
15348
15349 /* This formula yields a nonzero result iff one of the bytes is zero.
15350 This saves three branches inside loop and many cycles. */
15351
15352 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
15353 emit_insn (gen_one_cmplsi2 (scratch, scratch));
15354 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
15355 emit_insn (gen_andsi3 (tmpreg, tmpreg,
15356 gen_int_mode (0x80808080, SImode)));
15357 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
15358 align_4_label);
15359
15360 if (TARGET_CMOVE)
15361 {
15362 rtx reg = gen_reg_rtx (SImode);
15363 rtx reg2 = gen_reg_rtx (Pmode);
15364 emit_move_insn (reg, tmpreg);
15365 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
15366
15367 /* If zero is not in the first two bytes, move two bytes forward. */
15368 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
15369 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
15370 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
15371 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
15372 gen_rtx_IF_THEN_ELSE (SImode, tmp,
15373 reg,
15374 tmpreg)));
15375 /* Emit lea manually to avoid clobbering of flags. */
15376 emit_insn (gen_rtx_SET (SImode, reg2,
15377 gen_rtx_PLUS (Pmode, out, const2_rtx)));
15378
15379 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
15380 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
15381 emit_insn (gen_rtx_SET (VOIDmode, out,
15382 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
15383 reg2,
15384 out)));
15385
15386 }
15387 else
15388 {
15389 rtx end_2_label = gen_label_rtx ();
15390 /* Is zero in the first two bytes? */
15391
15392 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
15393 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
15394 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
15395 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
15396 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
15397 pc_rtx);
15398 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
15399 JUMP_LABEL (tmp) = end_2_label;
15400
15401 /* Not in the first two. Move two bytes forward. */
15402 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
15403 if (TARGET_64BIT)
15404 emit_insn (gen_adddi3 (out, out, const2_rtx));
15405 else
15406 emit_insn (gen_addsi3 (out, out, const2_rtx));
15407
15408 emit_label (end_2_label);
15409
15410 }
15411
15412 /* Avoid branch in fixing the byte. */
15413 tmpreg = gen_lowpart (QImode, tmpreg);
15414 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
15415 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
15416 if (TARGET_64BIT)
15417 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
15418 else
15419 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
15420
15421 emit_label (end_0_label);
15422 }
15423
15424 /* Expand strlen. */
15425
15426 int
15427 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
15428 {
15429 rtx addr, scratch1, scratch2, scratch3, scratch4;
15430
15431 /* The generic case of strlen expander is long. Avoid it's
15432 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
15433
15434 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
15435 && !TARGET_INLINE_ALL_STRINGOPS
15436 && !optimize_size
15437 && (!CONST_INT_P (align) || INTVAL (align) < 4))
15438 return 0;
15439
15440 addr = force_reg (Pmode, XEXP (src, 0));
15441 scratch1 = gen_reg_rtx (Pmode);
15442
15443 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
15444 && !optimize_size)
15445 {
15446 /* Well it seems that some optimizer does not combine a call like
15447 foo(strlen(bar), strlen(bar));
15448 when the move and the subtraction is done here. It does calculate
15449 the length just once when these instructions are done inside of
15450 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
15451 often used and I use one fewer register for the lifetime of
15452 output_strlen_unroll() this is better. */
15453
15454 emit_move_insn (out, addr);
15455
15456 ix86_expand_strlensi_unroll_1 (out, src, align);
15457
15458 /* strlensi_unroll_1 returns the address of the zero at the end of
15459 the string, like memchr(), so compute the length by subtracting
15460 the start address. */
15461 if (TARGET_64BIT)
15462 emit_insn (gen_subdi3 (out, out, addr));
15463 else
15464 emit_insn (gen_subsi3 (out, out, addr));
15465 }
15466 else
15467 {
15468 rtx unspec;
15469 scratch2 = gen_reg_rtx (Pmode);
15470 scratch3 = gen_reg_rtx (Pmode);
15471 scratch4 = force_reg (Pmode, constm1_rtx);
15472
15473 emit_move_insn (scratch3, addr);
15474 eoschar = force_reg (QImode, eoschar);
15475
15476 src = replace_equiv_address_nv (src, scratch3);
15477
15478 /* If .md starts supporting :P, this can be done in .md. */
15479 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
15480 scratch4), UNSPEC_SCAS);
15481 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
15482 if (TARGET_64BIT)
15483 {
15484 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
15485 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
15486 }
15487 else
15488 {
15489 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
15490 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
15491 }
15492 }
15493 return 1;
15494 }
15495
15496 /* For given symbol (function) construct code to compute address of it's PLT
15497 entry in large x86-64 PIC model. */
15498 rtx
15499 construct_plt_address (rtx symbol)
15500 {
15501 rtx tmp = gen_reg_rtx (Pmode);
15502 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
15503
15504 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
15505 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
15506
15507 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
15508 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
15509 return tmp;
15510 }
15511
15512 void
15513 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
15514 rtx callarg2 ATTRIBUTE_UNUSED,
15515 rtx pop, int sibcall)
15516 {
15517 rtx use = NULL, call;
15518
15519 if (pop == const0_rtx)
15520 pop = NULL;
15521 gcc_assert (!TARGET_64BIT || !pop);
15522
15523 if (TARGET_MACHO && !TARGET_64BIT)
15524 {
15525 #if TARGET_MACHO
15526 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
15527 fnaddr = machopic_indirect_call_target (fnaddr);
15528 #endif
15529 }
15530 else
15531 {
15532 /* Static functions and indirect calls don't need the pic register. */
15533 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
15534 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
15535 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
15536 use_reg (&use, pic_offset_table_rtx);
15537 }
15538
15539 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
15540 {
15541 rtx al = gen_rtx_REG (QImode, 0);
15542 emit_move_insn (al, callarg2);
15543 use_reg (&use, al);
15544 }
15545
15546 if (ix86_cmodel == CM_LARGE_PIC
15547 && GET_CODE (fnaddr) == MEM
15548 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
15549 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
15550 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
15551 else if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
15552 {
15553 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
15554 fnaddr = gen_rtx_MEM (QImode, fnaddr);
15555 }
15556 if (sibcall && TARGET_64BIT
15557 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
15558 {
15559 rtx addr;
15560 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
15561 fnaddr = gen_rtx_REG (Pmode, R11_REG);
15562 emit_move_insn (fnaddr, addr);
15563 fnaddr = gen_rtx_MEM (QImode, fnaddr);
15564 }
15565
15566 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
15567 if (retval)
15568 call = gen_rtx_SET (VOIDmode, retval, call);
15569 if (pop)
15570 {
15571 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
15572 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
15573 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
15574 }
15575
15576 call = emit_call_insn (call);
15577 if (use)
15578 CALL_INSN_FUNCTION_USAGE (call) = use;
15579 }
15580
15581 \f
15582 /* Clear stack slot assignments remembered from previous functions.
15583 This is called from INIT_EXPANDERS once before RTL is emitted for each
15584 function. */
15585
15586 static struct machine_function *
15587 ix86_init_machine_status (void)
15588 {
15589 struct machine_function *f;
15590
15591 f = GGC_CNEW (struct machine_function);
15592 f->use_fast_prologue_epilogue_nregs = -1;
15593 f->tls_descriptor_call_expanded_p = 0;
15594
15595 return f;
15596 }
15597
15598 /* Return a MEM corresponding to a stack slot with mode MODE.
15599 Allocate a new slot if necessary.
15600
15601 The RTL for a function can have several slots available: N is
15602 which slot to use. */
15603
15604 rtx
15605 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
15606 {
15607 struct stack_local_entry *s;
15608
15609 gcc_assert (n < MAX_386_STACK_LOCALS);
15610
15611 /* Virtual slot is valid only before vregs are instantiated. */
15612 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
15613
15614 for (s = ix86_stack_locals; s; s = s->next)
15615 if (s->mode == mode && s->n == n)
15616 return copy_rtx (s->rtl);
15617
15618 s = (struct stack_local_entry *)
15619 ggc_alloc (sizeof (struct stack_local_entry));
15620 s->n = n;
15621 s->mode = mode;
15622 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
15623
15624 s->next = ix86_stack_locals;
15625 ix86_stack_locals = s;
15626 return s->rtl;
15627 }
15628
15629 /* Construct the SYMBOL_REF for the tls_get_addr function. */
15630
15631 static GTY(()) rtx ix86_tls_symbol;
15632 rtx
15633 ix86_tls_get_addr (void)
15634 {
15635
15636 if (!ix86_tls_symbol)
15637 {
15638 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
15639 (TARGET_ANY_GNU_TLS
15640 && !TARGET_64BIT)
15641 ? "___tls_get_addr"
15642 : "__tls_get_addr");
15643 }
15644
15645 return ix86_tls_symbol;
15646 }
15647
15648 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
15649
15650 static GTY(()) rtx ix86_tls_module_base_symbol;
15651 rtx
15652 ix86_tls_module_base (void)
15653 {
15654
15655 if (!ix86_tls_module_base_symbol)
15656 {
15657 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
15658 "_TLS_MODULE_BASE_");
15659 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
15660 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
15661 }
15662
15663 return ix86_tls_module_base_symbol;
15664 }
15665 \f
15666 /* Calculate the length of the memory address in the instruction
15667 encoding. Does not include the one-byte modrm, opcode, or prefix. */
15668
15669 int
15670 memory_address_length (rtx addr)
15671 {
15672 struct ix86_address parts;
15673 rtx base, index, disp;
15674 int len;
15675 int ok;
15676
15677 if (GET_CODE (addr) == PRE_DEC
15678 || GET_CODE (addr) == POST_INC
15679 || GET_CODE (addr) == PRE_MODIFY
15680 || GET_CODE (addr) == POST_MODIFY)
15681 return 0;
15682
15683 ok = ix86_decompose_address (addr, &parts);
15684 gcc_assert (ok);
15685
15686 if (parts.base && GET_CODE (parts.base) == SUBREG)
15687 parts.base = SUBREG_REG (parts.base);
15688 if (parts.index && GET_CODE (parts.index) == SUBREG)
15689 parts.index = SUBREG_REG (parts.index);
15690
15691 base = parts.base;
15692 index = parts.index;
15693 disp = parts.disp;
15694 len = 0;
15695
15696 /* Rule of thumb:
15697 - esp as the base always wants an index,
15698 - ebp as the base always wants a displacement. */
15699
15700 /* Register Indirect. */
15701 if (base && !index && !disp)
15702 {
15703 /* esp (for its index) and ebp (for its displacement) need
15704 the two-byte modrm form. */
15705 if (addr == stack_pointer_rtx
15706 || addr == arg_pointer_rtx
15707 || addr == frame_pointer_rtx
15708 || addr == hard_frame_pointer_rtx)
15709 len = 1;
15710 }
15711
15712 /* Direct Addressing. */
15713 else if (disp && !base && !index)
15714 len = 4;
15715
15716 else
15717 {
15718 /* Find the length of the displacement constant. */
15719 if (disp)
15720 {
15721 if (base && satisfies_constraint_K (disp))
15722 len = 1;
15723 else
15724 len = 4;
15725 }
15726 /* ebp always wants a displacement. */
15727 else if (base == hard_frame_pointer_rtx)
15728 len = 1;
15729
15730 /* An index requires the two-byte modrm form.... */
15731 if (index
15732 /* ...like esp, which always wants an index. */
15733 || base == stack_pointer_rtx
15734 || base == arg_pointer_rtx
15735 || base == frame_pointer_rtx)
15736 len += 1;
15737 }
15738
15739 return len;
15740 }
15741
15742 /* Compute default value for "length_immediate" attribute. When SHORTFORM
15743 is set, expect that insn have 8bit immediate alternative. */
15744 int
15745 ix86_attr_length_immediate_default (rtx insn, int shortform)
15746 {
15747 int len = 0;
15748 int i;
15749 extract_insn_cached (insn);
15750 for (i = recog_data.n_operands - 1; i >= 0; --i)
15751 if (CONSTANT_P (recog_data.operand[i]))
15752 {
15753 gcc_assert (!len);
15754 if (shortform && satisfies_constraint_K (recog_data.operand[i]))
15755 len = 1;
15756 else
15757 {
15758 switch (get_attr_mode (insn))
15759 {
15760 case MODE_QI:
15761 len+=1;
15762 break;
15763 case MODE_HI:
15764 len+=2;
15765 break;
15766 case MODE_SI:
15767 len+=4;
15768 break;
15769 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
15770 case MODE_DI:
15771 len+=4;
15772 break;
15773 default:
15774 fatal_insn ("unknown insn mode", insn);
15775 }
15776 }
15777 }
15778 return len;
15779 }
15780 /* Compute default value for "length_address" attribute. */
15781 int
15782 ix86_attr_length_address_default (rtx insn)
15783 {
15784 int i;
15785
15786 if (get_attr_type (insn) == TYPE_LEA)
15787 {
15788 rtx set = PATTERN (insn);
15789
15790 if (GET_CODE (set) == PARALLEL)
15791 set = XVECEXP (set, 0, 0);
15792
15793 gcc_assert (GET_CODE (set) == SET);
15794
15795 return memory_address_length (SET_SRC (set));
15796 }
15797
15798 extract_insn_cached (insn);
15799 for (i = recog_data.n_operands - 1; i >= 0; --i)
15800 if (MEM_P (recog_data.operand[i]))
15801 {
15802 return memory_address_length (XEXP (recog_data.operand[i], 0));
15803 break;
15804 }
15805 return 0;
15806 }
15807 \f
15808 /* Return the maximum number of instructions a cpu can issue. */
15809
15810 static int
15811 ix86_issue_rate (void)
15812 {
15813 switch (ix86_tune)
15814 {
15815 case PROCESSOR_PENTIUM:
15816 case PROCESSOR_K6:
15817 return 2;
15818
15819 case PROCESSOR_PENTIUMPRO:
15820 case PROCESSOR_PENTIUM4:
15821 case PROCESSOR_ATHLON:
15822 case PROCESSOR_K8:
15823 case PROCESSOR_AMDFAM10:
15824 case PROCESSOR_NOCONA:
15825 case PROCESSOR_GENERIC32:
15826 case PROCESSOR_GENERIC64:
15827 return 3;
15828
15829 case PROCESSOR_CORE2:
15830 return 4;
15831
15832 default:
15833 return 1;
15834 }
15835 }
15836
15837 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
15838 by DEP_INSN and nothing set by DEP_INSN. */
15839
15840 static int
15841 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
15842 {
15843 rtx set, set2;
15844
15845 /* Simplify the test for uninteresting insns. */
15846 if (insn_type != TYPE_SETCC
15847 && insn_type != TYPE_ICMOV
15848 && insn_type != TYPE_FCMOV
15849 && insn_type != TYPE_IBR)
15850 return 0;
15851
15852 if ((set = single_set (dep_insn)) != 0)
15853 {
15854 set = SET_DEST (set);
15855 set2 = NULL_RTX;
15856 }
15857 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
15858 && XVECLEN (PATTERN (dep_insn), 0) == 2
15859 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
15860 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
15861 {
15862 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
15863 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
15864 }
15865 else
15866 return 0;
15867
15868 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
15869 return 0;
15870
15871 /* This test is true if the dependent insn reads the flags but
15872 not any other potentially set register. */
15873 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
15874 return 0;
15875
15876 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
15877 return 0;
15878
15879 return 1;
15880 }
15881
15882 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
15883 address with operands set by DEP_INSN. */
15884
15885 static int
15886 ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
15887 {
15888 rtx addr;
15889
15890 if (insn_type == TYPE_LEA
15891 && TARGET_PENTIUM)
15892 {
15893 addr = PATTERN (insn);
15894
15895 if (GET_CODE (addr) == PARALLEL)
15896 addr = XVECEXP (addr, 0, 0);
15897
15898 gcc_assert (GET_CODE (addr) == SET);
15899
15900 addr = SET_SRC (addr);
15901 }
15902 else
15903 {
15904 int i;
15905 extract_insn_cached (insn);
15906 for (i = recog_data.n_operands - 1; i >= 0; --i)
15907 if (MEM_P (recog_data.operand[i]))
15908 {
15909 addr = XEXP (recog_data.operand[i], 0);
15910 goto found;
15911 }
15912 return 0;
15913 found:;
15914 }
15915
15916 return modified_in_p (addr, dep_insn);
15917 }
15918
15919 static int
15920 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
15921 {
15922 enum attr_type insn_type, dep_insn_type;
15923 enum attr_memory memory;
15924 rtx set, set2;
15925 int dep_insn_code_number;
15926
15927 /* Anti and output dependencies have zero cost on all CPUs. */
15928 if (REG_NOTE_KIND (link) != 0)
15929 return 0;
15930
15931 dep_insn_code_number = recog_memoized (dep_insn);
15932
15933 /* If we can't recognize the insns, we can't really do anything. */
15934 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
15935 return cost;
15936
15937 insn_type = get_attr_type (insn);
15938 dep_insn_type = get_attr_type (dep_insn);
15939
15940 switch (ix86_tune)
15941 {
15942 case PROCESSOR_PENTIUM:
15943 /* Address Generation Interlock adds a cycle of latency. */
15944 if (ix86_agi_dependent (insn, dep_insn, insn_type))
15945 cost += 1;
15946
15947 /* ??? Compares pair with jump/setcc. */
15948 if (ix86_flags_dependent (insn, dep_insn, insn_type))
15949 cost = 0;
15950
15951 /* Floating point stores require value to be ready one cycle earlier. */
15952 if (insn_type == TYPE_FMOV
15953 && get_attr_memory (insn) == MEMORY_STORE
15954 && !ix86_agi_dependent (insn, dep_insn, insn_type))
15955 cost += 1;
15956 break;
15957
15958 case PROCESSOR_PENTIUMPRO:
15959 memory = get_attr_memory (insn);
15960
15961 /* INT->FP conversion is expensive. */
15962 if (get_attr_fp_int_src (dep_insn))
15963 cost += 5;
15964
15965 /* There is one cycle extra latency between an FP op and a store. */
15966 if (insn_type == TYPE_FMOV
15967 && (set = single_set (dep_insn)) != NULL_RTX
15968 && (set2 = single_set (insn)) != NULL_RTX
15969 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
15970 && MEM_P (SET_DEST (set2)))
15971 cost += 1;
15972
15973 /* Show ability of reorder buffer to hide latency of load by executing
15974 in parallel with previous instruction in case
15975 previous instruction is not needed to compute the address. */
15976 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
15977 && !ix86_agi_dependent (insn, dep_insn, insn_type))
15978 {
15979 /* Claim moves to take one cycle, as core can issue one load
15980 at time and the next load can start cycle later. */
15981 if (dep_insn_type == TYPE_IMOV
15982 || dep_insn_type == TYPE_FMOV)
15983 cost = 1;
15984 else if (cost > 1)
15985 cost--;
15986 }
15987 break;
15988
15989 case PROCESSOR_K6:
15990 memory = get_attr_memory (insn);
15991
15992 /* The esp dependency is resolved before the instruction is really
15993 finished. */
15994 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
15995 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
15996 return 1;
15997
15998 /* INT->FP conversion is expensive. */
15999 if (get_attr_fp_int_src (dep_insn))
16000 cost += 5;
16001
16002 /* Show ability of reorder buffer to hide latency of load by executing
16003 in parallel with previous instruction in case
16004 previous instruction is not needed to compute the address. */
16005 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
16006 && !ix86_agi_dependent (insn, dep_insn, insn_type))
16007 {
16008 /* Claim moves to take one cycle, as core can issue one load
16009 at time and the next load can start cycle later. */
16010 if (dep_insn_type == TYPE_IMOV
16011 || dep_insn_type == TYPE_FMOV)
16012 cost = 1;
16013 else if (cost > 2)
16014 cost -= 2;
16015 else
16016 cost = 1;
16017 }
16018 break;
16019
16020 case PROCESSOR_ATHLON:
16021 case PROCESSOR_K8:
16022 case PROCESSOR_AMDFAM10:
16023 case PROCESSOR_GENERIC32:
16024 case PROCESSOR_GENERIC64:
16025 memory = get_attr_memory (insn);
16026
16027 /* Show ability of reorder buffer to hide latency of load by executing
16028 in parallel with previous instruction in case
16029 previous instruction is not needed to compute the address. */
16030 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
16031 && !ix86_agi_dependent (insn, dep_insn, insn_type))
16032 {
16033 enum attr_unit unit = get_attr_unit (insn);
16034 int loadcost = 3;
16035
16036 /* Because of the difference between the length of integer and
16037 floating unit pipeline preparation stages, the memory operands
16038 for floating point are cheaper.
16039
16040 ??? For Athlon it the difference is most probably 2. */
16041 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
16042 loadcost = 3;
16043 else
16044 loadcost = TARGET_ATHLON ? 2 : 0;
16045
16046 if (cost >= loadcost)
16047 cost -= loadcost;
16048 else
16049 cost = 0;
16050 }
16051
16052 default:
16053 break;
16054 }
16055
16056 return cost;
16057 }
16058
16059 /* How many alternative schedules to try. This should be as wide as the
16060 scheduling freedom in the DFA, but no wider. Making this value too
16061 large results extra work for the scheduler. */
16062
16063 static int
16064 ia32_multipass_dfa_lookahead (void)
16065 {
16066 if (ix86_tune == PROCESSOR_PENTIUM)
16067 return 2;
16068
16069 if (ix86_tune == PROCESSOR_PENTIUMPRO
16070 || ix86_tune == PROCESSOR_K6)
16071 return 1;
16072
16073 else
16074 return 0;
16075 }
16076
16077 \f
16078 /* Compute the alignment given to a constant that is being placed in memory.
16079 EXP is the constant and ALIGN is the alignment that the object would
16080 ordinarily have.
16081 The value of this function is used instead of that alignment to align
16082 the object. */
16083
16084 int
16085 ix86_constant_alignment (tree exp, int align)
16086 {
16087 if (TREE_CODE (exp) == REAL_CST)
16088 {
16089 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
16090 return 64;
16091 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
16092 return 128;
16093 }
16094 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
16095 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
16096 return BITS_PER_WORD;
16097
16098 return align;
16099 }
16100
16101 /* Compute the alignment for a static variable.
16102 TYPE is the data type, and ALIGN is the alignment that
16103 the object would ordinarily have. The value of this function is used
16104 instead of that alignment to align the object. */
16105
16106 int
16107 ix86_data_alignment (tree type, int align)
16108 {
16109 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
16110
16111 if (AGGREGATE_TYPE_P (type)
16112 && TYPE_SIZE (type)
16113 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
16114 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
16115 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
16116 && align < max_align)
16117 align = max_align;
16118
16119 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
16120 to 16byte boundary. */
16121 if (TARGET_64BIT)
16122 {
16123 if (AGGREGATE_TYPE_P (type)
16124 && TYPE_SIZE (type)
16125 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
16126 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
16127 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
16128 return 128;
16129 }
16130
16131 if (TREE_CODE (type) == ARRAY_TYPE)
16132 {
16133 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
16134 return 64;
16135 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
16136 return 128;
16137 }
16138 else if (TREE_CODE (type) == COMPLEX_TYPE)
16139 {
16140
16141 if (TYPE_MODE (type) == DCmode && align < 64)
16142 return 64;
16143 if (TYPE_MODE (type) == XCmode && align < 128)
16144 return 128;
16145 }
16146 else if ((TREE_CODE (type) == RECORD_TYPE
16147 || TREE_CODE (type) == UNION_TYPE
16148 || TREE_CODE (type) == QUAL_UNION_TYPE)
16149 && TYPE_FIELDS (type))
16150 {
16151 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
16152 return 64;
16153 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
16154 return 128;
16155 }
16156 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
16157 || TREE_CODE (type) == INTEGER_TYPE)
16158 {
16159 if (TYPE_MODE (type) == DFmode && align < 64)
16160 return 64;
16161 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
16162 return 128;
16163 }
16164
16165 return align;
16166 }
16167
16168 /* Compute the alignment for a local variable.
16169 TYPE is the data type, and ALIGN is the alignment that
16170 the object would ordinarily have. The value of this macro is used
16171 instead of that alignment to align the object. */
16172
16173 int
16174 ix86_local_alignment (tree type, int align)
16175 {
16176 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
16177 to 16byte boundary. */
16178 if (TARGET_64BIT)
16179 {
16180 if (AGGREGATE_TYPE_P (type)
16181 && TYPE_SIZE (type)
16182 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
16183 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
16184 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
16185 return 128;
16186 }
16187 if (TREE_CODE (type) == ARRAY_TYPE)
16188 {
16189 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
16190 return 64;
16191 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
16192 return 128;
16193 }
16194 else if (TREE_CODE (type) == COMPLEX_TYPE)
16195 {
16196 if (TYPE_MODE (type) == DCmode && align < 64)
16197 return 64;
16198 if (TYPE_MODE (type) == XCmode && align < 128)
16199 return 128;
16200 }
16201 else if ((TREE_CODE (type) == RECORD_TYPE
16202 || TREE_CODE (type) == UNION_TYPE
16203 || TREE_CODE (type) == QUAL_UNION_TYPE)
16204 && TYPE_FIELDS (type))
16205 {
16206 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
16207 return 64;
16208 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
16209 return 128;
16210 }
16211 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
16212 || TREE_CODE (type) == INTEGER_TYPE)
16213 {
16214
16215 if (TYPE_MODE (type) == DFmode && align < 64)
16216 return 64;
16217 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
16218 return 128;
16219 }
16220 return align;
16221 }
16222 \f
16223 /* Emit RTL insns to initialize the variable parts of a trampoline.
16224 FNADDR is an RTX for the address of the function's pure code.
16225 CXT is an RTX for the static chain value for the function. */
16226 void
16227 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
16228 {
16229 if (!TARGET_64BIT)
16230 {
16231 /* Compute offset from the end of the jmp to the target function. */
16232 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
16233 plus_constant (tramp, 10),
16234 NULL_RTX, 1, OPTAB_DIRECT);
16235 emit_move_insn (gen_rtx_MEM (QImode, tramp),
16236 gen_int_mode (0xb9, QImode));
16237 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
16238 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
16239 gen_int_mode (0xe9, QImode));
16240 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
16241 }
16242 else
16243 {
16244 int offset = 0;
16245 /* Try to load address using shorter movl instead of movabs.
16246 We may want to support movq for kernel mode, but kernel does not use
16247 trampolines at the moment. */
16248 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
16249 {
16250 fnaddr = copy_to_mode_reg (DImode, fnaddr);
16251 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
16252 gen_int_mode (0xbb41, HImode));
16253 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
16254 gen_lowpart (SImode, fnaddr));
16255 offset += 6;
16256 }
16257 else
16258 {
16259 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
16260 gen_int_mode (0xbb49, HImode));
16261 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
16262 fnaddr);
16263 offset += 10;
16264 }
16265 /* Load static chain using movabs to r10. */
16266 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
16267 gen_int_mode (0xba49, HImode));
16268 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
16269 cxt);
16270 offset += 10;
16271 /* Jump to the r11 */
16272 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
16273 gen_int_mode (0xff49, HImode));
16274 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
16275 gen_int_mode (0xe3, QImode));
16276 offset += 3;
16277 gcc_assert (offset <= TRAMPOLINE_SIZE);
16278 }
16279
16280 #ifdef ENABLE_EXECUTE_STACK
16281 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
16282 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
16283 #endif
16284 }
16285 \f
16286 /* Codes for all the SSE/MMX builtins. */
16287 enum ix86_builtins
16288 {
16289 IX86_BUILTIN_ADDPS,
16290 IX86_BUILTIN_ADDSS,
16291 IX86_BUILTIN_DIVPS,
16292 IX86_BUILTIN_DIVSS,
16293 IX86_BUILTIN_MULPS,
16294 IX86_BUILTIN_MULSS,
16295 IX86_BUILTIN_SUBPS,
16296 IX86_BUILTIN_SUBSS,
16297
16298 IX86_BUILTIN_CMPEQPS,
16299 IX86_BUILTIN_CMPLTPS,
16300 IX86_BUILTIN_CMPLEPS,
16301 IX86_BUILTIN_CMPGTPS,
16302 IX86_BUILTIN_CMPGEPS,
16303 IX86_BUILTIN_CMPNEQPS,
16304 IX86_BUILTIN_CMPNLTPS,
16305 IX86_BUILTIN_CMPNLEPS,
16306 IX86_BUILTIN_CMPNGTPS,
16307 IX86_BUILTIN_CMPNGEPS,
16308 IX86_BUILTIN_CMPORDPS,
16309 IX86_BUILTIN_CMPUNORDPS,
16310 IX86_BUILTIN_CMPEQSS,
16311 IX86_BUILTIN_CMPLTSS,
16312 IX86_BUILTIN_CMPLESS,
16313 IX86_BUILTIN_CMPNEQSS,
16314 IX86_BUILTIN_CMPNLTSS,
16315 IX86_BUILTIN_CMPNLESS,
16316 IX86_BUILTIN_CMPNGTSS,
16317 IX86_BUILTIN_CMPNGESS,
16318 IX86_BUILTIN_CMPORDSS,
16319 IX86_BUILTIN_CMPUNORDSS,
16320
16321 IX86_BUILTIN_COMIEQSS,
16322 IX86_BUILTIN_COMILTSS,
16323 IX86_BUILTIN_COMILESS,
16324 IX86_BUILTIN_COMIGTSS,
16325 IX86_BUILTIN_COMIGESS,
16326 IX86_BUILTIN_COMINEQSS,
16327 IX86_BUILTIN_UCOMIEQSS,
16328 IX86_BUILTIN_UCOMILTSS,
16329 IX86_BUILTIN_UCOMILESS,
16330 IX86_BUILTIN_UCOMIGTSS,
16331 IX86_BUILTIN_UCOMIGESS,
16332 IX86_BUILTIN_UCOMINEQSS,
16333
16334 IX86_BUILTIN_CVTPI2PS,
16335 IX86_BUILTIN_CVTPS2PI,
16336 IX86_BUILTIN_CVTSI2SS,
16337 IX86_BUILTIN_CVTSI642SS,
16338 IX86_BUILTIN_CVTSS2SI,
16339 IX86_BUILTIN_CVTSS2SI64,
16340 IX86_BUILTIN_CVTTPS2PI,
16341 IX86_BUILTIN_CVTTSS2SI,
16342 IX86_BUILTIN_CVTTSS2SI64,
16343
16344 IX86_BUILTIN_MAXPS,
16345 IX86_BUILTIN_MAXSS,
16346 IX86_BUILTIN_MINPS,
16347 IX86_BUILTIN_MINSS,
16348
16349 IX86_BUILTIN_LOADUPS,
16350 IX86_BUILTIN_STOREUPS,
16351 IX86_BUILTIN_MOVSS,
16352
16353 IX86_BUILTIN_MOVHLPS,
16354 IX86_BUILTIN_MOVLHPS,
16355 IX86_BUILTIN_LOADHPS,
16356 IX86_BUILTIN_LOADLPS,
16357 IX86_BUILTIN_STOREHPS,
16358 IX86_BUILTIN_STORELPS,
16359
16360 IX86_BUILTIN_MASKMOVQ,
16361 IX86_BUILTIN_MOVMSKPS,
16362 IX86_BUILTIN_PMOVMSKB,
16363
16364 IX86_BUILTIN_MOVNTPS,
16365 IX86_BUILTIN_MOVNTQ,
16366
16367 IX86_BUILTIN_LOADDQU,
16368 IX86_BUILTIN_STOREDQU,
16369
16370 IX86_BUILTIN_PACKSSWB,
16371 IX86_BUILTIN_PACKSSDW,
16372 IX86_BUILTIN_PACKUSWB,
16373
16374 IX86_BUILTIN_PADDB,
16375 IX86_BUILTIN_PADDW,
16376 IX86_BUILTIN_PADDD,
16377 IX86_BUILTIN_PADDQ,
16378 IX86_BUILTIN_PADDSB,
16379 IX86_BUILTIN_PADDSW,
16380 IX86_BUILTIN_PADDUSB,
16381 IX86_BUILTIN_PADDUSW,
16382 IX86_BUILTIN_PSUBB,
16383 IX86_BUILTIN_PSUBW,
16384 IX86_BUILTIN_PSUBD,
16385 IX86_BUILTIN_PSUBQ,
16386 IX86_BUILTIN_PSUBSB,
16387 IX86_BUILTIN_PSUBSW,
16388 IX86_BUILTIN_PSUBUSB,
16389 IX86_BUILTIN_PSUBUSW,
16390
16391 IX86_BUILTIN_PAND,
16392 IX86_BUILTIN_PANDN,
16393 IX86_BUILTIN_POR,
16394 IX86_BUILTIN_PXOR,
16395
16396 IX86_BUILTIN_PAVGB,
16397 IX86_BUILTIN_PAVGW,
16398
16399 IX86_BUILTIN_PCMPEQB,
16400 IX86_BUILTIN_PCMPEQW,
16401 IX86_BUILTIN_PCMPEQD,
16402 IX86_BUILTIN_PCMPGTB,
16403 IX86_BUILTIN_PCMPGTW,
16404 IX86_BUILTIN_PCMPGTD,
16405
16406 IX86_BUILTIN_PMADDWD,
16407
16408 IX86_BUILTIN_PMAXSW,
16409 IX86_BUILTIN_PMAXUB,
16410 IX86_BUILTIN_PMINSW,
16411 IX86_BUILTIN_PMINUB,
16412
16413 IX86_BUILTIN_PMULHUW,
16414 IX86_BUILTIN_PMULHW,
16415 IX86_BUILTIN_PMULLW,
16416
16417 IX86_BUILTIN_PSADBW,
16418 IX86_BUILTIN_PSHUFW,
16419
16420 IX86_BUILTIN_PSLLW,
16421 IX86_BUILTIN_PSLLD,
16422 IX86_BUILTIN_PSLLQ,
16423 IX86_BUILTIN_PSRAW,
16424 IX86_BUILTIN_PSRAD,
16425 IX86_BUILTIN_PSRLW,
16426 IX86_BUILTIN_PSRLD,
16427 IX86_BUILTIN_PSRLQ,
16428 IX86_BUILTIN_PSLLWI,
16429 IX86_BUILTIN_PSLLDI,
16430 IX86_BUILTIN_PSLLQI,
16431 IX86_BUILTIN_PSRAWI,
16432 IX86_BUILTIN_PSRADI,
16433 IX86_BUILTIN_PSRLWI,
16434 IX86_BUILTIN_PSRLDI,
16435 IX86_BUILTIN_PSRLQI,
16436
16437 IX86_BUILTIN_PUNPCKHBW,
16438 IX86_BUILTIN_PUNPCKHWD,
16439 IX86_BUILTIN_PUNPCKHDQ,
16440 IX86_BUILTIN_PUNPCKLBW,
16441 IX86_BUILTIN_PUNPCKLWD,
16442 IX86_BUILTIN_PUNPCKLDQ,
16443
16444 IX86_BUILTIN_SHUFPS,
16445
16446 IX86_BUILTIN_RCPPS,
16447 IX86_BUILTIN_RCPSS,
16448 IX86_BUILTIN_RSQRTPS,
16449 IX86_BUILTIN_RSQRTSS,
16450 IX86_BUILTIN_RSQRTF,
16451 IX86_BUILTIN_SQRTPS,
16452 IX86_BUILTIN_SQRTSS,
16453
16454 IX86_BUILTIN_UNPCKHPS,
16455 IX86_BUILTIN_UNPCKLPS,
16456
16457 IX86_BUILTIN_ANDPS,
16458 IX86_BUILTIN_ANDNPS,
16459 IX86_BUILTIN_ORPS,
16460 IX86_BUILTIN_XORPS,
16461
16462 IX86_BUILTIN_EMMS,
16463 IX86_BUILTIN_LDMXCSR,
16464 IX86_BUILTIN_STMXCSR,
16465 IX86_BUILTIN_SFENCE,
16466
16467 /* 3DNow! Original */
16468 IX86_BUILTIN_FEMMS,
16469 IX86_BUILTIN_PAVGUSB,
16470 IX86_BUILTIN_PF2ID,
16471 IX86_BUILTIN_PFACC,
16472 IX86_BUILTIN_PFADD,
16473 IX86_BUILTIN_PFCMPEQ,
16474 IX86_BUILTIN_PFCMPGE,
16475 IX86_BUILTIN_PFCMPGT,
16476 IX86_BUILTIN_PFMAX,
16477 IX86_BUILTIN_PFMIN,
16478 IX86_BUILTIN_PFMUL,
16479 IX86_BUILTIN_PFRCP,
16480 IX86_BUILTIN_PFRCPIT1,
16481 IX86_BUILTIN_PFRCPIT2,
16482 IX86_BUILTIN_PFRSQIT1,
16483 IX86_BUILTIN_PFRSQRT,
16484 IX86_BUILTIN_PFSUB,
16485 IX86_BUILTIN_PFSUBR,
16486 IX86_BUILTIN_PI2FD,
16487 IX86_BUILTIN_PMULHRW,
16488
16489 /* 3DNow! Athlon Extensions */
16490 IX86_BUILTIN_PF2IW,
16491 IX86_BUILTIN_PFNACC,
16492 IX86_BUILTIN_PFPNACC,
16493 IX86_BUILTIN_PI2FW,
16494 IX86_BUILTIN_PSWAPDSI,
16495 IX86_BUILTIN_PSWAPDSF,
16496
16497 /* SSE2 */
16498 IX86_BUILTIN_ADDPD,
16499 IX86_BUILTIN_ADDSD,
16500 IX86_BUILTIN_DIVPD,
16501 IX86_BUILTIN_DIVSD,
16502 IX86_BUILTIN_MULPD,
16503 IX86_BUILTIN_MULSD,
16504 IX86_BUILTIN_SUBPD,
16505 IX86_BUILTIN_SUBSD,
16506
16507 IX86_BUILTIN_CMPEQPD,
16508 IX86_BUILTIN_CMPLTPD,
16509 IX86_BUILTIN_CMPLEPD,
16510 IX86_BUILTIN_CMPGTPD,
16511 IX86_BUILTIN_CMPGEPD,
16512 IX86_BUILTIN_CMPNEQPD,
16513 IX86_BUILTIN_CMPNLTPD,
16514 IX86_BUILTIN_CMPNLEPD,
16515 IX86_BUILTIN_CMPNGTPD,
16516 IX86_BUILTIN_CMPNGEPD,
16517 IX86_BUILTIN_CMPORDPD,
16518 IX86_BUILTIN_CMPUNORDPD,
16519 IX86_BUILTIN_CMPEQSD,
16520 IX86_BUILTIN_CMPLTSD,
16521 IX86_BUILTIN_CMPLESD,
16522 IX86_BUILTIN_CMPNEQSD,
16523 IX86_BUILTIN_CMPNLTSD,
16524 IX86_BUILTIN_CMPNLESD,
16525 IX86_BUILTIN_CMPORDSD,
16526 IX86_BUILTIN_CMPUNORDSD,
16527
16528 IX86_BUILTIN_COMIEQSD,
16529 IX86_BUILTIN_COMILTSD,
16530 IX86_BUILTIN_COMILESD,
16531 IX86_BUILTIN_COMIGTSD,
16532 IX86_BUILTIN_COMIGESD,
16533 IX86_BUILTIN_COMINEQSD,
16534 IX86_BUILTIN_UCOMIEQSD,
16535 IX86_BUILTIN_UCOMILTSD,
16536 IX86_BUILTIN_UCOMILESD,
16537 IX86_BUILTIN_UCOMIGTSD,
16538 IX86_BUILTIN_UCOMIGESD,
16539 IX86_BUILTIN_UCOMINEQSD,
16540
16541 IX86_BUILTIN_MAXPD,
16542 IX86_BUILTIN_MAXSD,
16543 IX86_BUILTIN_MINPD,
16544 IX86_BUILTIN_MINSD,
16545
16546 IX86_BUILTIN_ANDPD,
16547 IX86_BUILTIN_ANDNPD,
16548 IX86_BUILTIN_ORPD,
16549 IX86_BUILTIN_XORPD,
16550
16551 IX86_BUILTIN_SQRTPD,
16552 IX86_BUILTIN_SQRTSD,
16553
16554 IX86_BUILTIN_UNPCKHPD,
16555 IX86_BUILTIN_UNPCKLPD,
16556
16557 IX86_BUILTIN_SHUFPD,
16558
16559 IX86_BUILTIN_LOADUPD,
16560 IX86_BUILTIN_STOREUPD,
16561 IX86_BUILTIN_MOVSD,
16562
16563 IX86_BUILTIN_LOADHPD,
16564 IX86_BUILTIN_LOADLPD,
16565
16566 IX86_BUILTIN_CVTDQ2PD,
16567 IX86_BUILTIN_CVTDQ2PS,
16568
16569 IX86_BUILTIN_CVTPD2DQ,
16570 IX86_BUILTIN_CVTPD2PI,
16571 IX86_BUILTIN_CVTPD2PS,
16572 IX86_BUILTIN_CVTTPD2DQ,
16573 IX86_BUILTIN_CVTTPD2PI,
16574
16575 IX86_BUILTIN_CVTPI2PD,
16576 IX86_BUILTIN_CVTSI2SD,
16577 IX86_BUILTIN_CVTSI642SD,
16578
16579 IX86_BUILTIN_CVTSD2SI,
16580 IX86_BUILTIN_CVTSD2SI64,
16581 IX86_BUILTIN_CVTSD2SS,
16582 IX86_BUILTIN_CVTSS2SD,
16583 IX86_BUILTIN_CVTTSD2SI,
16584 IX86_BUILTIN_CVTTSD2SI64,
16585
16586 IX86_BUILTIN_CVTPS2DQ,
16587 IX86_BUILTIN_CVTPS2PD,
16588 IX86_BUILTIN_CVTTPS2DQ,
16589
16590 IX86_BUILTIN_MOVNTI,
16591 IX86_BUILTIN_MOVNTPD,
16592 IX86_BUILTIN_MOVNTDQ,
16593
16594 /* SSE2 MMX */
16595 IX86_BUILTIN_MASKMOVDQU,
16596 IX86_BUILTIN_MOVMSKPD,
16597 IX86_BUILTIN_PMOVMSKB128,
16598
16599 IX86_BUILTIN_PACKSSWB128,
16600 IX86_BUILTIN_PACKSSDW128,
16601 IX86_BUILTIN_PACKUSWB128,
16602
16603 IX86_BUILTIN_PADDB128,
16604 IX86_BUILTIN_PADDW128,
16605 IX86_BUILTIN_PADDD128,
16606 IX86_BUILTIN_PADDQ128,
16607 IX86_BUILTIN_PADDSB128,
16608 IX86_BUILTIN_PADDSW128,
16609 IX86_BUILTIN_PADDUSB128,
16610 IX86_BUILTIN_PADDUSW128,
16611 IX86_BUILTIN_PSUBB128,
16612 IX86_BUILTIN_PSUBW128,
16613 IX86_BUILTIN_PSUBD128,
16614 IX86_BUILTIN_PSUBQ128,
16615 IX86_BUILTIN_PSUBSB128,
16616 IX86_BUILTIN_PSUBSW128,
16617 IX86_BUILTIN_PSUBUSB128,
16618 IX86_BUILTIN_PSUBUSW128,
16619
16620 IX86_BUILTIN_PAND128,
16621 IX86_BUILTIN_PANDN128,
16622 IX86_BUILTIN_POR128,
16623 IX86_BUILTIN_PXOR128,
16624
16625 IX86_BUILTIN_PAVGB128,
16626 IX86_BUILTIN_PAVGW128,
16627
16628 IX86_BUILTIN_PCMPEQB128,
16629 IX86_BUILTIN_PCMPEQW128,
16630 IX86_BUILTIN_PCMPEQD128,
16631 IX86_BUILTIN_PCMPGTB128,
16632 IX86_BUILTIN_PCMPGTW128,
16633 IX86_BUILTIN_PCMPGTD128,
16634
16635 IX86_BUILTIN_PMADDWD128,
16636
16637 IX86_BUILTIN_PMAXSW128,
16638 IX86_BUILTIN_PMAXUB128,
16639 IX86_BUILTIN_PMINSW128,
16640 IX86_BUILTIN_PMINUB128,
16641
16642 IX86_BUILTIN_PMULUDQ,
16643 IX86_BUILTIN_PMULUDQ128,
16644 IX86_BUILTIN_PMULHUW128,
16645 IX86_BUILTIN_PMULHW128,
16646 IX86_BUILTIN_PMULLW128,
16647
16648 IX86_BUILTIN_PSADBW128,
16649 IX86_BUILTIN_PSHUFHW,
16650 IX86_BUILTIN_PSHUFLW,
16651 IX86_BUILTIN_PSHUFD,
16652
16653 IX86_BUILTIN_PSLLDQI128,
16654 IX86_BUILTIN_PSLLWI128,
16655 IX86_BUILTIN_PSLLDI128,
16656 IX86_BUILTIN_PSLLQI128,
16657 IX86_BUILTIN_PSRAWI128,
16658 IX86_BUILTIN_PSRADI128,
16659 IX86_BUILTIN_PSRLDQI128,
16660 IX86_BUILTIN_PSRLWI128,
16661 IX86_BUILTIN_PSRLDI128,
16662 IX86_BUILTIN_PSRLQI128,
16663
16664 IX86_BUILTIN_PSLLDQ128,
16665 IX86_BUILTIN_PSLLW128,
16666 IX86_BUILTIN_PSLLD128,
16667 IX86_BUILTIN_PSLLQ128,
16668 IX86_BUILTIN_PSRAW128,
16669 IX86_BUILTIN_PSRAD128,
16670 IX86_BUILTIN_PSRLW128,
16671 IX86_BUILTIN_PSRLD128,
16672 IX86_BUILTIN_PSRLQ128,
16673
16674 IX86_BUILTIN_PUNPCKHBW128,
16675 IX86_BUILTIN_PUNPCKHWD128,
16676 IX86_BUILTIN_PUNPCKHDQ128,
16677 IX86_BUILTIN_PUNPCKHQDQ128,
16678 IX86_BUILTIN_PUNPCKLBW128,
16679 IX86_BUILTIN_PUNPCKLWD128,
16680 IX86_BUILTIN_PUNPCKLDQ128,
16681 IX86_BUILTIN_PUNPCKLQDQ128,
16682
16683 IX86_BUILTIN_CLFLUSH,
16684 IX86_BUILTIN_MFENCE,
16685 IX86_BUILTIN_LFENCE,
16686
16687 /* Prescott New Instructions. */
16688 IX86_BUILTIN_ADDSUBPS,
16689 IX86_BUILTIN_HADDPS,
16690 IX86_BUILTIN_HSUBPS,
16691 IX86_BUILTIN_MOVSHDUP,
16692 IX86_BUILTIN_MOVSLDUP,
16693 IX86_BUILTIN_ADDSUBPD,
16694 IX86_BUILTIN_HADDPD,
16695 IX86_BUILTIN_HSUBPD,
16696 IX86_BUILTIN_LDDQU,
16697
16698 IX86_BUILTIN_MONITOR,
16699 IX86_BUILTIN_MWAIT,
16700
16701 /* SSSE3. */
16702 IX86_BUILTIN_PHADDW,
16703 IX86_BUILTIN_PHADDD,
16704 IX86_BUILTIN_PHADDSW,
16705 IX86_BUILTIN_PHSUBW,
16706 IX86_BUILTIN_PHSUBD,
16707 IX86_BUILTIN_PHSUBSW,
16708 IX86_BUILTIN_PMADDUBSW,
16709 IX86_BUILTIN_PMULHRSW,
16710 IX86_BUILTIN_PSHUFB,
16711 IX86_BUILTIN_PSIGNB,
16712 IX86_BUILTIN_PSIGNW,
16713 IX86_BUILTIN_PSIGND,
16714 IX86_BUILTIN_PALIGNR,
16715 IX86_BUILTIN_PABSB,
16716 IX86_BUILTIN_PABSW,
16717 IX86_BUILTIN_PABSD,
16718
16719 IX86_BUILTIN_PHADDW128,
16720 IX86_BUILTIN_PHADDD128,
16721 IX86_BUILTIN_PHADDSW128,
16722 IX86_BUILTIN_PHSUBW128,
16723 IX86_BUILTIN_PHSUBD128,
16724 IX86_BUILTIN_PHSUBSW128,
16725 IX86_BUILTIN_PMADDUBSW128,
16726 IX86_BUILTIN_PMULHRSW128,
16727 IX86_BUILTIN_PSHUFB128,
16728 IX86_BUILTIN_PSIGNB128,
16729 IX86_BUILTIN_PSIGNW128,
16730 IX86_BUILTIN_PSIGND128,
16731 IX86_BUILTIN_PALIGNR128,
16732 IX86_BUILTIN_PABSB128,
16733 IX86_BUILTIN_PABSW128,
16734 IX86_BUILTIN_PABSD128,
16735
16736 /* AMDFAM10 - SSE4A New Instructions. */
16737 IX86_BUILTIN_MOVNTSD,
16738 IX86_BUILTIN_MOVNTSS,
16739 IX86_BUILTIN_EXTRQI,
16740 IX86_BUILTIN_EXTRQ,
16741 IX86_BUILTIN_INSERTQI,
16742 IX86_BUILTIN_INSERTQ,
16743
16744 /* SSE4.1. */
16745 IX86_BUILTIN_BLENDPD,
16746 IX86_BUILTIN_BLENDPS,
16747 IX86_BUILTIN_BLENDVPD,
16748 IX86_BUILTIN_BLENDVPS,
16749 IX86_BUILTIN_PBLENDVB128,
16750 IX86_BUILTIN_PBLENDW128,
16751
16752 IX86_BUILTIN_DPPD,
16753 IX86_BUILTIN_DPPS,
16754
16755 IX86_BUILTIN_INSERTPS128,
16756
16757 IX86_BUILTIN_MOVNTDQA,
16758 IX86_BUILTIN_MPSADBW128,
16759 IX86_BUILTIN_PACKUSDW128,
16760 IX86_BUILTIN_PCMPEQQ,
16761 IX86_BUILTIN_PHMINPOSUW128,
16762
16763 IX86_BUILTIN_PMAXSB128,
16764 IX86_BUILTIN_PMAXSD128,
16765 IX86_BUILTIN_PMAXUD128,
16766 IX86_BUILTIN_PMAXUW128,
16767
16768 IX86_BUILTIN_PMINSB128,
16769 IX86_BUILTIN_PMINSD128,
16770 IX86_BUILTIN_PMINUD128,
16771 IX86_BUILTIN_PMINUW128,
16772
16773 IX86_BUILTIN_PMOVSXBW128,
16774 IX86_BUILTIN_PMOVSXBD128,
16775 IX86_BUILTIN_PMOVSXBQ128,
16776 IX86_BUILTIN_PMOVSXWD128,
16777 IX86_BUILTIN_PMOVSXWQ128,
16778 IX86_BUILTIN_PMOVSXDQ128,
16779
16780 IX86_BUILTIN_PMOVZXBW128,
16781 IX86_BUILTIN_PMOVZXBD128,
16782 IX86_BUILTIN_PMOVZXBQ128,
16783 IX86_BUILTIN_PMOVZXWD128,
16784 IX86_BUILTIN_PMOVZXWQ128,
16785 IX86_BUILTIN_PMOVZXDQ128,
16786
16787 IX86_BUILTIN_PMULDQ128,
16788 IX86_BUILTIN_PMULLD128,
16789
16790 IX86_BUILTIN_ROUNDPD,
16791 IX86_BUILTIN_ROUNDPS,
16792 IX86_BUILTIN_ROUNDSD,
16793 IX86_BUILTIN_ROUNDSS,
16794
16795 IX86_BUILTIN_PTESTZ,
16796 IX86_BUILTIN_PTESTC,
16797 IX86_BUILTIN_PTESTNZC,
16798
16799 IX86_BUILTIN_VEC_INIT_V2SI,
16800 IX86_BUILTIN_VEC_INIT_V4HI,
16801 IX86_BUILTIN_VEC_INIT_V8QI,
16802 IX86_BUILTIN_VEC_EXT_V2DF,
16803 IX86_BUILTIN_VEC_EXT_V2DI,
16804 IX86_BUILTIN_VEC_EXT_V4SF,
16805 IX86_BUILTIN_VEC_EXT_V4SI,
16806 IX86_BUILTIN_VEC_EXT_V8HI,
16807 IX86_BUILTIN_VEC_EXT_V2SI,
16808 IX86_BUILTIN_VEC_EXT_V4HI,
16809 IX86_BUILTIN_VEC_EXT_V16QI,
16810 IX86_BUILTIN_VEC_SET_V2DI,
16811 IX86_BUILTIN_VEC_SET_V4SF,
16812 IX86_BUILTIN_VEC_SET_V4SI,
16813 IX86_BUILTIN_VEC_SET_V8HI,
16814 IX86_BUILTIN_VEC_SET_V4HI,
16815 IX86_BUILTIN_VEC_SET_V16QI,
16816
16817 IX86_BUILTIN_VEC_PACK_SFIX,
16818
16819 /* SSE4.2. */
16820 IX86_BUILTIN_CRC32QI,
16821 IX86_BUILTIN_CRC32HI,
16822 IX86_BUILTIN_CRC32SI,
16823 IX86_BUILTIN_CRC32DI,
16824
16825 IX86_BUILTIN_PCMPESTRI128,
16826 IX86_BUILTIN_PCMPESTRM128,
16827 IX86_BUILTIN_PCMPESTRA128,
16828 IX86_BUILTIN_PCMPESTRC128,
16829 IX86_BUILTIN_PCMPESTRO128,
16830 IX86_BUILTIN_PCMPESTRS128,
16831 IX86_BUILTIN_PCMPESTRZ128,
16832 IX86_BUILTIN_PCMPISTRI128,
16833 IX86_BUILTIN_PCMPISTRM128,
16834 IX86_BUILTIN_PCMPISTRA128,
16835 IX86_BUILTIN_PCMPISTRC128,
16836 IX86_BUILTIN_PCMPISTRO128,
16837 IX86_BUILTIN_PCMPISTRS128,
16838 IX86_BUILTIN_PCMPISTRZ128,
16839
16840 IX86_BUILTIN_PCMPGTQ,
16841
16842 /* TFmode support builtins. */
16843 IX86_BUILTIN_INFQ,
16844 IX86_BUILTIN_FABSQ,
16845 IX86_BUILTIN_COPYSIGNQ,
16846
16847 IX86_BUILTIN_MAX
16848 };
16849
16850 /* Table for the ix86 builtin decls. */
16851 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
16852
16853 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Do so,
16854 * if the target_flags include one of MASK. Stores the function decl
16855 * in the ix86_builtins array.
16856 * Returns the function decl or NULL_TREE, if the builtin was not added. */
16857
16858 static inline tree
16859 def_builtin (int mask, const char *name, tree type, enum ix86_builtins code)
16860 {
16861 tree decl = NULL_TREE;
16862
16863 if (mask & ix86_isa_flags
16864 && (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT))
16865 {
16866 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
16867 NULL, NULL_TREE);
16868 ix86_builtins[(int) code] = decl;
16869 }
16870
16871 return decl;
16872 }
16873
16874 /* Like def_builtin, but also marks the function decl "const". */
16875
16876 static inline tree
16877 def_builtin_const (int mask, const char *name, tree type,
16878 enum ix86_builtins code)
16879 {
16880 tree decl = def_builtin (mask, name, type, code);
16881 if (decl)
16882 TREE_READONLY (decl) = 1;
16883 return decl;
16884 }
16885
16886 /* Bits for builtin_description.flag. */
16887
16888 /* Set when we don't support the comparison natively, and should
16889 swap_comparison in order to support it. */
16890 #define BUILTIN_DESC_SWAP_OPERANDS 1
16891
16892 struct builtin_description
16893 {
16894 const unsigned int mask;
16895 const enum insn_code icode;
16896 const char *const name;
16897 const enum ix86_builtins code;
16898 const enum rtx_code comparison;
16899 const int flag;
16900 };
16901
16902 static const struct builtin_description bdesc_comi[] =
16903 {
16904 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
16905 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
16906 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
16907 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
16908 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
16909 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
16910 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
16911 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
16912 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
16913 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
16914 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
16915 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
16916 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
16917 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
16918 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
16919 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
16920 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
16921 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
16922 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
16923 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
16924 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
16925 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
16926 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
16927 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
16928 };
16929
16930 static const struct builtin_description bdesc_ptest[] =
16931 {
16932 /* SSE4.1 */
16933 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, 0 },
16934 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, 0 },
16935 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, 0 },
16936 };
16937
16938 static const struct builtin_description bdesc_pcmpestr[] =
16939 {
16940 /* SSE4.2 */
16941 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
16942 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
16943 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
16944 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
16945 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
16946 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
16947 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
16948 };
16949
16950 static const struct builtin_description bdesc_pcmpistr[] =
16951 {
16952 /* SSE4.2 */
16953 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
16954 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
16955 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
16956 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
16957 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
16958 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
16959 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
16960 };
16961
16962 static const struct builtin_description bdesc_crc32[] =
16963 {
16964 /* SSE4.2 */
16965 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32qi, 0, IX86_BUILTIN_CRC32QI, UNKNOWN, 0 },
16966 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32hi, 0, IX86_BUILTIN_CRC32HI, UNKNOWN, 0 },
16967 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32si, 0, IX86_BUILTIN_CRC32SI, UNKNOWN, 0 },
16968 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32di, 0, IX86_BUILTIN_CRC32DI, UNKNOWN, 0 },
16969 };
16970
16971 /* SSE builtins with 3 arguments and the last argument must be an immediate or xmm0. */
16972 static const struct builtin_description bdesc_sse_3arg[] =
16973 {
16974 /* SSE4.1 */
16975 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, 0 },
16976 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, 0 },
16977 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, 0 },
16978 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, 0 },
16979 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, 0 },
16980 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, 0 },
16981 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, 0 },
16982 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, 0 },
16983 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, 0 },
16984 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, 0 },
16985 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_roundsd, 0, IX86_BUILTIN_ROUNDSD, UNKNOWN, 0 },
16986 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_roundss, 0, IX86_BUILTIN_ROUNDSS, UNKNOWN, 0 },
16987 };
16988
16989 static const struct builtin_description bdesc_2arg[] =
16990 {
16991 /* SSE */
16992 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, 0 },
16993 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, 0 },
16994 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, 0 },
16995 { OPTION_MASK_ISA_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, 0 },
16996 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, 0 },
16997 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, 0 },
16998 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, 0 },
16999 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, 0 },
17000
17001 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
17002 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
17003 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
17004 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, BUILTIN_DESC_SWAP_OPERANDS },
17005 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, BUILTIN_DESC_SWAP_OPERANDS },
17006 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
17007 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 },
17008 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 },
17009 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 },
17010 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, BUILTIN_DESC_SWAP_OPERANDS },
17011 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, BUILTIN_DESC_SWAP_OPERANDS },
17012 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 },
17013 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
17014 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
17015 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
17016 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
17017 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 },
17018 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 },
17019 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 },
17020 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, BUILTIN_DESC_SWAP_OPERANDS },
17021 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, BUILTIN_DESC_SWAP_OPERANDS },
17022 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, 0 },
17023
17024 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, 0 },
17025 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, 0 },
17026 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, 0 },
17027 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, 0 },
17028
17029 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, 0 },
17030 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, 0 },
17031 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, 0 },
17032 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, 0 },
17033
17034 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, 0 },
17035 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, 0 },
17036 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, 0 },
17037 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, 0 },
17038 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, 0 },
17039
17040 /* MMX */
17041 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, 0 },
17042 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, 0 },
17043 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, 0 },
17044 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, 0 },
17045 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, 0 },
17046 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, 0 },
17047 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, 0 },
17048 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, 0 },
17049
17050 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, 0 },
17051 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, 0 },
17052 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, 0 },
17053 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, 0 },
17054 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, 0 },
17055 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, 0 },
17056 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, 0 },
17057 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, 0 },
17058
17059 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, 0 },
17060 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, 0 },
17061 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, 0 },
17062
17063 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, 0 },
17064 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, 0 },
17065 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, 0 },
17066 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, 0 },
17067
17068 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, 0 },
17069 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, 0 },
17070
17071 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, 0 },
17072 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, 0 },
17073 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, 0 },
17074 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, 0 },
17075 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, 0 },
17076 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, 0 },
17077
17078 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, 0 },
17079 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, 0 },
17080 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, 0 },
17081 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, 0 },
17082
17083 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, 0 },
17084 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, 0 },
17085 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, 0 },
17086 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, 0 },
17087 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, 0 },
17088 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, 0 },
17089
17090 /* Special. */
17091 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, UNKNOWN, 0 },
17092 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, UNKNOWN, 0 },
17093 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, UNKNOWN, 0 },
17094
17095 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, UNKNOWN, 0 },
17096 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, UNKNOWN, 0 },
17097 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, UNKNOWN, 0 },
17098
17099 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, UNKNOWN, 0 },
17100 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, UNKNOWN, 0 },
17101 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, UNKNOWN, 0 },
17102 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, UNKNOWN, 0 },
17103 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, UNKNOWN, 0 },
17104 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, UNKNOWN, 0 },
17105
17106 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, UNKNOWN, 0 },
17107 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, UNKNOWN, 0 },
17108 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, UNKNOWN, 0 },
17109 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, UNKNOWN, 0 },
17110 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, UNKNOWN, 0 },
17111 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, UNKNOWN, 0 },
17112
17113 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, UNKNOWN, 0 },
17114 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, UNKNOWN, 0 },
17115 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, UNKNOWN, 0 },
17116 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, UNKNOWN, 0 },
17117
17118 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, UNKNOWN, 0 },
17119 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, UNKNOWN, 0 },
17120
17121 /* SSE2 */
17122 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, 0 },
17123 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, 0 },
17124 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, 0 },
17125 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, 0 },
17126 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, 0 },
17127 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, 0 },
17128 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, 0 },
17129 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, 0 },
17130
17131 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
17132 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
17133 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
17134 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, BUILTIN_DESC_SWAP_OPERANDS },
17135 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, BUILTIN_DESC_SWAP_OPERANDS },
17136 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
17137 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 },
17138 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 },
17139 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 },
17140 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, BUILTIN_DESC_SWAP_OPERANDS },
17141 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, BUILTIN_DESC_SWAP_OPERANDS },
17142 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 },
17143 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
17144 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
17145 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
17146 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
17147 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 },
17148 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 },
17149 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 },
17150 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 },
17151
17152 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, 0 },
17153 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, 0 },
17154 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, 0 },
17155 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, 0 },
17156
17157 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, 0 },
17158 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, 0 },
17159 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, 0 },
17160 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, 0 },
17161
17162 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, 0 },
17163 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, 0 },
17164 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, 0 },
17165
17166 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, 0 },
17167
17168 /* SSE2 MMX */
17169 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, 0 },
17170 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, 0 },
17171 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, 0 },
17172 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, 0 },
17173 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, 0 },
17174 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, 0 },
17175 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, 0 },
17176 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, 0 },
17177
17178 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, 0 },
17179 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, 0 },
17180 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, 0 },
17181 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, 0 },
17182 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, 0 },
17183 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, 0 },
17184 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, 0 },
17185 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, 0 },
17186
17187 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, 0 },
17188 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN, 0 },
17189
17190 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, 0 },
17191 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, 0 },
17192 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, 0 },
17193 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, 0 },
17194
17195 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, 0 },
17196 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, 0 },
17197
17198 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, 0 },
17199 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, 0 },
17200 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, 0 },
17201 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, 0 },
17202 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, 0 },
17203 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, 0 },
17204
17205 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, 0 },
17206 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, 0 },
17207 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, 0 },
17208 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, 0 },
17209
17210 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, 0 },
17211 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, 0 },
17212 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, 0 },
17213 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, 0 },
17214 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, 0 },
17215 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, 0 },
17216 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, 0 },
17217 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, 0 },
17218
17219 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, 0 },
17220 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, 0 },
17221 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, 0 },
17222
17223 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, 0 },
17224 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, UNKNOWN, 0 },
17225
17226 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, UNKNOWN, 0 },
17227 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, UNKNOWN, 0 },
17228
17229 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, UNKNOWN, 0 },
17230 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, UNKNOWN, 0 },
17231 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, UNKNOWN, 0 },
17232
17233 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, UNKNOWN, 0 },
17234 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, UNKNOWN, 0 },
17235 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, UNKNOWN, 0 },
17236
17237 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, UNKNOWN, 0 },
17238 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, UNKNOWN, 0 },
17239
17240 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, UNKNOWN, 0 },
17241
17242 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, UNKNOWN, 0 },
17243 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, UNKNOWN, 0 },
17244 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, UNKNOWN, 0 },
17245 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, UNKNOWN, 0 },
17246
17247 /* SSE3 MMX */
17248 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, 0 },
17249 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, 0 },
17250 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, 0 },
17251 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, 0 },
17252 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, 0 },
17253 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, 0 },
17254
17255 /* SSSE3 */
17256 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, 0 },
17257 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, 0 },
17258 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, 0 },
17259 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, 0 },
17260 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, 0 },
17261 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, 0 },
17262 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, 0 },
17263 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, 0 },
17264 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, 0 },
17265 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, 0 },
17266 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, 0 },
17267 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, 0 },
17268 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubswv8hi3, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, 0 },
17269 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubswv4hi3, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, 0 },
17270 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, 0 },
17271 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, 0 },
17272 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, 0 },
17273 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, 0 },
17274 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, 0 },
17275 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, 0 },
17276 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, 0 },
17277 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, 0 },
17278 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, 0 },
17279 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, 0 },
17280
17281 /* SSE4.1 */
17282 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, 0 },
17283 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, 0 },
17284 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, 0 },
17285 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, 0 },
17286 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, 0 },
17287 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, 0 },
17288 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, 0 },
17289 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, 0 },
17290 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, 0 },
17291 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, 0 },
17292 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, 0, IX86_BUILTIN_PMULDQ128, UNKNOWN, 0 },
17293 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, 0 },
17294
17295 /* SSE4.2 */
17296 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, 0 },
17297 };
17298
17299 static const struct builtin_description bdesc_1arg[] =
17300 {
17301 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, UNKNOWN, 0 },
17302 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, UNKNOWN, 0 },
17303
17304 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, UNKNOWN, 0 },
17305 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, UNKNOWN, 0 },
17306 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, UNKNOWN, 0 },
17307
17308 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, UNKNOWN, 0 },
17309 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, UNKNOWN, 0 },
17310 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, UNKNOWN, 0 },
17311 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, UNKNOWN, 0 },
17312 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, UNKNOWN, 0 },
17313 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, 0 },
17314
17315 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, UNKNOWN, 0 },
17316 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, UNKNOWN, 0 },
17317
17318 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, UNKNOWN, 0 },
17319
17320 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, UNKNOWN, 0 },
17321 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, UNKNOWN, 0 },
17322
17323 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, UNKNOWN, 0 },
17324 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, UNKNOWN, 0 },
17325 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, UNKNOWN, 0 },
17326 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, 0 },
17327 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, UNKNOWN, 0 },
17328
17329 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, UNKNOWN, 0 },
17330
17331 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, UNKNOWN, 0 },
17332 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, UNKNOWN, 0 },
17333 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, UNKNOWN, 0 },
17334 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, 0 },
17335
17336 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, UNKNOWN, 0 },
17337 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, UNKNOWN, 0 },
17338 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, 0 },
17339
17340 /* SSE3 */
17341 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, 0 },
17342 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, 0 },
17343
17344 /* SSSE3 */
17345 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, 0 },
17346 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, 0 },
17347 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, 0 },
17348 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, 0 },
17349 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, 0 },
17350 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, 0 },
17351
17352 /* SSE4.1 */
17353 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv8qiv8hi2, 0, IX86_BUILTIN_PMOVSXBW128, UNKNOWN, 0 },
17354 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4qiv4si2, 0, IX86_BUILTIN_PMOVSXBD128, UNKNOWN, 0 },
17355 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2qiv2di2, 0, IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, 0 },
17356 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4hiv4si2, 0, IX86_BUILTIN_PMOVSXWD128, UNKNOWN, 0 },
17357 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2hiv2di2, 0, IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, 0 },
17358 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2siv2di2, 0, IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, 0 },
17359 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, 0, IX86_BUILTIN_PMOVZXBW128, UNKNOWN, 0 },
17360 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, 0, IX86_BUILTIN_PMOVZXBD128, UNKNOWN, 0 },
17361 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, 0, IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, 0 },
17362 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, 0, IX86_BUILTIN_PMOVZXWD128, UNKNOWN, 0 },
17363 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, 0, IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, 0 },
17364 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, 0, IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, 0 },
17365 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, 0 },
17366
17367 /* Fake 1 arg builtins with a constant smaller than 8 bits as the 2nd arg. */
17368 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_roundpd, 0, IX86_BUILTIN_ROUNDPD, UNKNOWN, 0 },
17369 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_roundps, 0, IX86_BUILTIN_ROUNDPS, UNKNOWN, 0 },
17370 };
17371
17372 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
17373 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
17374 builtins. */
17375 static void
17376 ix86_init_mmx_sse_builtins (void)
17377 {
17378 const struct builtin_description * d;
17379 size_t i;
17380
17381 tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode);
17382 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
17383 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
17384 tree V2DI_type_node
17385 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
17386 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
17387 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
17388 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
17389 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
17390 tree V8QI_type_node = build_vector_type_for_mode (char_type_node, V8QImode);
17391 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
17392
17393 tree pchar_type_node = build_pointer_type (char_type_node);
17394 tree pcchar_type_node = build_pointer_type (
17395 build_type_variant (char_type_node, 1, 0));
17396 tree pfloat_type_node = build_pointer_type (float_type_node);
17397 tree pcfloat_type_node = build_pointer_type (
17398 build_type_variant (float_type_node, 1, 0));
17399 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
17400 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
17401 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
17402
17403 /* Comparisons. */
17404 tree int_ftype_v4sf_v4sf
17405 = build_function_type_list (integer_type_node,
17406 V4SF_type_node, V4SF_type_node, NULL_TREE);
17407 tree v4si_ftype_v4sf_v4sf
17408 = build_function_type_list (V4SI_type_node,
17409 V4SF_type_node, V4SF_type_node, NULL_TREE);
17410 /* MMX/SSE/integer conversions. */
17411 tree int_ftype_v4sf
17412 = build_function_type_list (integer_type_node,
17413 V4SF_type_node, NULL_TREE);
17414 tree int64_ftype_v4sf
17415 = build_function_type_list (long_long_integer_type_node,
17416 V4SF_type_node, NULL_TREE);
17417 tree int_ftype_v8qi
17418 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
17419 tree v4sf_ftype_v4sf_int
17420 = build_function_type_list (V4SF_type_node,
17421 V4SF_type_node, integer_type_node, NULL_TREE);
17422 tree v4sf_ftype_v4sf_int64
17423 = build_function_type_list (V4SF_type_node,
17424 V4SF_type_node, long_long_integer_type_node,
17425 NULL_TREE);
17426 tree v4sf_ftype_v4sf_v2si
17427 = build_function_type_list (V4SF_type_node,
17428 V4SF_type_node, V2SI_type_node, NULL_TREE);
17429
17430 /* Miscellaneous. */
17431 tree v8qi_ftype_v4hi_v4hi
17432 = build_function_type_list (V8QI_type_node,
17433 V4HI_type_node, V4HI_type_node, NULL_TREE);
17434 tree v4hi_ftype_v2si_v2si
17435 = build_function_type_list (V4HI_type_node,
17436 V2SI_type_node, V2SI_type_node, NULL_TREE);
17437 tree v4sf_ftype_v4sf_v4sf_int
17438 = build_function_type_list (V4SF_type_node,
17439 V4SF_type_node, V4SF_type_node,
17440 integer_type_node, NULL_TREE);
17441 tree v2si_ftype_v4hi_v4hi
17442 = build_function_type_list (V2SI_type_node,
17443 V4HI_type_node, V4HI_type_node, NULL_TREE);
17444 tree v4hi_ftype_v4hi_int
17445 = build_function_type_list (V4HI_type_node,
17446 V4HI_type_node, integer_type_node, NULL_TREE);
17447 tree v4hi_ftype_v4hi_di
17448 = build_function_type_list (V4HI_type_node,
17449 V4HI_type_node, long_long_unsigned_type_node,
17450 NULL_TREE);
17451 tree v2si_ftype_v2si_di
17452 = build_function_type_list (V2SI_type_node,
17453 V2SI_type_node, long_long_unsigned_type_node,
17454 NULL_TREE);
17455 tree void_ftype_void
17456 = build_function_type (void_type_node, void_list_node);
17457 tree void_ftype_unsigned
17458 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
17459 tree void_ftype_unsigned_unsigned
17460 = build_function_type_list (void_type_node, unsigned_type_node,
17461 unsigned_type_node, NULL_TREE);
17462 tree void_ftype_pcvoid_unsigned_unsigned
17463 = build_function_type_list (void_type_node, const_ptr_type_node,
17464 unsigned_type_node, unsigned_type_node,
17465 NULL_TREE);
17466 tree unsigned_ftype_void
17467 = build_function_type (unsigned_type_node, void_list_node);
17468 tree v2si_ftype_v4sf
17469 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
17470 /* Loads/stores. */
17471 tree void_ftype_v8qi_v8qi_pchar
17472 = build_function_type_list (void_type_node,
17473 V8QI_type_node, V8QI_type_node,
17474 pchar_type_node, NULL_TREE);
17475 tree v4sf_ftype_pcfloat
17476 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
17477 /* @@@ the type is bogus */
17478 tree v4sf_ftype_v4sf_pv2si
17479 = build_function_type_list (V4SF_type_node,
17480 V4SF_type_node, pv2si_type_node, NULL_TREE);
17481 tree void_ftype_pv2si_v4sf
17482 = build_function_type_list (void_type_node,
17483 pv2si_type_node, V4SF_type_node, NULL_TREE);
17484 tree void_ftype_pfloat_v4sf
17485 = build_function_type_list (void_type_node,
17486 pfloat_type_node, V4SF_type_node, NULL_TREE);
17487 tree void_ftype_pdi_di
17488 = build_function_type_list (void_type_node,
17489 pdi_type_node, long_long_unsigned_type_node,
17490 NULL_TREE);
17491 tree void_ftype_pv2di_v2di
17492 = build_function_type_list (void_type_node,
17493 pv2di_type_node, V2DI_type_node, NULL_TREE);
17494 /* Normal vector unops. */
17495 tree v4sf_ftype_v4sf
17496 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
17497 tree v16qi_ftype_v16qi
17498 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
17499 tree v8hi_ftype_v8hi
17500 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
17501 tree v4si_ftype_v4si
17502 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
17503 tree v8qi_ftype_v8qi
17504 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
17505 tree v4hi_ftype_v4hi
17506 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
17507
17508 /* Normal vector binops. */
17509 tree v4sf_ftype_v4sf_v4sf
17510 = build_function_type_list (V4SF_type_node,
17511 V4SF_type_node, V4SF_type_node, NULL_TREE);
17512 tree v8qi_ftype_v8qi_v8qi
17513 = build_function_type_list (V8QI_type_node,
17514 V8QI_type_node, V8QI_type_node, NULL_TREE);
17515 tree v4hi_ftype_v4hi_v4hi
17516 = build_function_type_list (V4HI_type_node,
17517 V4HI_type_node, V4HI_type_node, NULL_TREE);
17518 tree v2si_ftype_v2si_v2si
17519 = build_function_type_list (V2SI_type_node,
17520 V2SI_type_node, V2SI_type_node, NULL_TREE);
17521 tree di_ftype_di_di
17522 = build_function_type_list (long_long_unsigned_type_node,
17523 long_long_unsigned_type_node,
17524 long_long_unsigned_type_node, NULL_TREE);
17525
17526 tree di_ftype_di_di_int
17527 = build_function_type_list (long_long_unsigned_type_node,
17528 long_long_unsigned_type_node,
17529 long_long_unsigned_type_node,
17530 integer_type_node, NULL_TREE);
17531
17532 tree v2si_ftype_v2sf
17533 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
17534 tree v2sf_ftype_v2si
17535 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
17536 tree v2si_ftype_v2si
17537 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
17538 tree v2sf_ftype_v2sf
17539 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
17540 tree v2sf_ftype_v2sf_v2sf
17541 = build_function_type_list (V2SF_type_node,
17542 V2SF_type_node, V2SF_type_node, NULL_TREE);
17543 tree v2si_ftype_v2sf_v2sf
17544 = build_function_type_list (V2SI_type_node,
17545 V2SF_type_node, V2SF_type_node, NULL_TREE);
17546 tree pint_type_node = build_pointer_type (integer_type_node);
17547 tree pdouble_type_node = build_pointer_type (double_type_node);
17548 tree pcdouble_type_node = build_pointer_type (
17549 build_type_variant (double_type_node, 1, 0));
17550 tree int_ftype_v2df_v2df
17551 = build_function_type_list (integer_type_node,
17552 V2DF_type_node, V2DF_type_node, NULL_TREE);
17553
17554 tree void_ftype_pcvoid
17555 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
17556 tree v4sf_ftype_v4si
17557 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
17558 tree v4si_ftype_v4sf
17559 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
17560 tree v2df_ftype_v4si
17561 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
17562 tree v4si_ftype_v2df
17563 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
17564 tree v4si_ftype_v2df_v2df
17565 = build_function_type_list (V4SI_type_node,
17566 V2DF_type_node, V2DF_type_node, NULL_TREE);
17567 tree v2si_ftype_v2df
17568 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
17569 tree v4sf_ftype_v2df
17570 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
17571 tree v2df_ftype_v2si
17572 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
17573 tree v2df_ftype_v4sf
17574 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
17575 tree int_ftype_v2df
17576 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
17577 tree int64_ftype_v2df
17578 = build_function_type_list (long_long_integer_type_node,
17579 V2DF_type_node, NULL_TREE);
17580 tree v2df_ftype_v2df_int
17581 = build_function_type_list (V2DF_type_node,
17582 V2DF_type_node, integer_type_node, NULL_TREE);
17583 tree v2df_ftype_v2df_int64
17584 = build_function_type_list (V2DF_type_node,
17585 V2DF_type_node, long_long_integer_type_node,
17586 NULL_TREE);
17587 tree v4sf_ftype_v4sf_v2df
17588 = build_function_type_list (V4SF_type_node,
17589 V4SF_type_node, V2DF_type_node, NULL_TREE);
17590 tree v2df_ftype_v2df_v4sf
17591 = build_function_type_list (V2DF_type_node,
17592 V2DF_type_node, V4SF_type_node, NULL_TREE);
17593 tree v2df_ftype_v2df_v2df_int
17594 = build_function_type_list (V2DF_type_node,
17595 V2DF_type_node, V2DF_type_node,
17596 integer_type_node,
17597 NULL_TREE);
17598 tree v2df_ftype_v2df_pcdouble
17599 = build_function_type_list (V2DF_type_node,
17600 V2DF_type_node, pcdouble_type_node, NULL_TREE);
17601 tree void_ftype_pdouble_v2df
17602 = build_function_type_list (void_type_node,
17603 pdouble_type_node, V2DF_type_node, NULL_TREE);
17604 tree void_ftype_pint_int
17605 = build_function_type_list (void_type_node,
17606 pint_type_node, integer_type_node, NULL_TREE);
17607 tree void_ftype_v16qi_v16qi_pchar
17608 = build_function_type_list (void_type_node,
17609 V16QI_type_node, V16QI_type_node,
17610 pchar_type_node, NULL_TREE);
17611 tree v2df_ftype_pcdouble
17612 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
17613 tree v2df_ftype_v2df_v2df
17614 = build_function_type_list (V2DF_type_node,
17615 V2DF_type_node, V2DF_type_node, NULL_TREE);
17616 tree v16qi_ftype_v16qi_v16qi
17617 = build_function_type_list (V16QI_type_node,
17618 V16QI_type_node, V16QI_type_node, NULL_TREE);
17619 tree v8hi_ftype_v8hi_v8hi
17620 = build_function_type_list (V8HI_type_node,
17621 V8HI_type_node, V8HI_type_node, NULL_TREE);
17622 tree v4si_ftype_v4si_v4si
17623 = build_function_type_list (V4SI_type_node,
17624 V4SI_type_node, V4SI_type_node, NULL_TREE);
17625 tree v2di_ftype_v2di_v2di
17626 = build_function_type_list (V2DI_type_node,
17627 V2DI_type_node, V2DI_type_node, NULL_TREE);
17628 tree v2di_ftype_v2df_v2df
17629 = build_function_type_list (V2DI_type_node,
17630 V2DF_type_node, V2DF_type_node, NULL_TREE);
17631 tree v2df_ftype_v2df
17632 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
17633 tree v2di_ftype_v2di_int
17634 = build_function_type_list (V2DI_type_node,
17635 V2DI_type_node, integer_type_node, NULL_TREE);
17636 tree v2di_ftype_v2di_v2di_int
17637 = build_function_type_list (V2DI_type_node, V2DI_type_node,
17638 V2DI_type_node, integer_type_node, NULL_TREE);
17639 tree v4si_ftype_v4si_int
17640 = build_function_type_list (V4SI_type_node,
17641 V4SI_type_node, integer_type_node, NULL_TREE);
17642 tree v8hi_ftype_v8hi_int
17643 = build_function_type_list (V8HI_type_node,
17644 V8HI_type_node, integer_type_node, NULL_TREE);
17645 tree v4si_ftype_v8hi_v8hi
17646 = build_function_type_list (V4SI_type_node,
17647 V8HI_type_node, V8HI_type_node, NULL_TREE);
17648 tree di_ftype_v8qi_v8qi
17649 = build_function_type_list (long_long_unsigned_type_node,
17650 V8QI_type_node, V8QI_type_node, NULL_TREE);
17651 tree di_ftype_v2si_v2si
17652 = build_function_type_list (long_long_unsigned_type_node,
17653 V2SI_type_node, V2SI_type_node, NULL_TREE);
17654 tree v2di_ftype_v16qi_v16qi
17655 = build_function_type_list (V2DI_type_node,
17656 V16QI_type_node, V16QI_type_node, NULL_TREE);
17657 tree v2di_ftype_v4si_v4si
17658 = build_function_type_list (V2DI_type_node,
17659 V4SI_type_node, V4SI_type_node, NULL_TREE);
17660 tree int_ftype_v16qi
17661 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
17662 tree v16qi_ftype_pcchar
17663 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
17664 tree void_ftype_pchar_v16qi
17665 = build_function_type_list (void_type_node,
17666 pchar_type_node, V16QI_type_node, NULL_TREE);
17667
17668 tree v2di_ftype_v2di_unsigned_unsigned
17669 = build_function_type_list (V2DI_type_node, V2DI_type_node,
17670 unsigned_type_node, unsigned_type_node,
17671 NULL_TREE);
17672 tree v2di_ftype_v2di_v2di_unsigned_unsigned
17673 = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node,
17674 unsigned_type_node, unsigned_type_node,
17675 NULL_TREE);
17676 tree v2di_ftype_v2di_v16qi
17677 = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node,
17678 NULL_TREE);
17679 tree v2df_ftype_v2df_v2df_v2df
17680 = build_function_type_list (V2DF_type_node,
17681 V2DF_type_node, V2DF_type_node,
17682 V2DF_type_node, NULL_TREE);
17683 tree v4sf_ftype_v4sf_v4sf_v4sf
17684 = build_function_type_list (V4SF_type_node,
17685 V4SF_type_node, V4SF_type_node,
17686 V4SF_type_node, NULL_TREE);
17687 tree v8hi_ftype_v16qi
17688 = build_function_type_list (V8HI_type_node, V16QI_type_node,
17689 NULL_TREE);
17690 tree v4si_ftype_v16qi
17691 = build_function_type_list (V4SI_type_node, V16QI_type_node,
17692 NULL_TREE);
17693 tree v2di_ftype_v16qi
17694 = build_function_type_list (V2DI_type_node, V16QI_type_node,
17695 NULL_TREE);
17696 tree v4si_ftype_v8hi
17697 = build_function_type_list (V4SI_type_node, V8HI_type_node,
17698 NULL_TREE);
17699 tree v2di_ftype_v8hi
17700 = build_function_type_list (V2DI_type_node, V8HI_type_node,
17701 NULL_TREE);
17702 tree v2di_ftype_v4si
17703 = build_function_type_list (V2DI_type_node, V4SI_type_node,
17704 NULL_TREE);
17705 tree v2di_ftype_pv2di
17706 = build_function_type_list (V2DI_type_node, pv2di_type_node,
17707 NULL_TREE);
17708 tree v16qi_ftype_v16qi_v16qi_int
17709 = build_function_type_list (V16QI_type_node, V16QI_type_node,
17710 V16QI_type_node, integer_type_node,
17711 NULL_TREE);
17712 tree v16qi_ftype_v16qi_v16qi_v16qi
17713 = build_function_type_list (V16QI_type_node, V16QI_type_node,
17714 V16QI_type_node, V16QI_type_node,
17715 NULL_TREE);
17716 tree v8hi_ftype_v8hi_v8hi_int
17717 = build_function_type_list (V8HI_type_node, V8HI_type_node,
17718 V8HI_type_node, integer_type_node,
17719 NULL_TREE);
17720 tree v4si_ftype_v4si_v4si_int
17721 = build_function_type_list (V4SI_type_node, V4SI_type_node,
17722 V4SI_type_node, integer_type_node,
17723 NULL_TREE);
17724 tree int_ftype_v2di_v2di
17725 = build_function_type_list (integer_type_node,
17726 V2DI_type_node, V2DI_type_node,
17727 NULL_TREE);
17728 tree int_ftype_v16qi_int_v16qi_int_int
17729 = build_function_type_list (integer_type_node,
17730 V16QI_type_node,
17731 integer_type_node,
17732 V16QI_type_node,
17733 integer_type_node,
17734 integer_type_node,
17735 NULL_TREE);
17736 tree v16qi_ftype_v16qi_int_v16qi_int_int
17737 = build_function_type_list (V16QI_type_node,
17738 V16QI_type_node,
17739 integer_type_node,
17740 V16QI_type_node,
17741 integer_type_node,
17742 integer_type_node,
17743 NULL_TREE);
17744 tree int_ftype_v16qi_v16qi_int
17745 = build_function_type_list (integer_type_node,
17746 V16QI_type_node,
17747 V16QI_type_node,
17748 integer_type_node,
17749 NULL_TREE);
17750 tree ftype;
17751
17752 /* The __float80 type. */
17753 if (TYPE_MODE (long_double_type_node) == XFmode)
17754 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
17755 "__float80");
17756 else
17757 {
17758 /* The __float80 type. */
17759 tree float80_type_node = make_node (REAL_TYPE);
17760
17761 TYPE_PRECISION (float80_type_node) = 80;
17762 layout_type (float80_type_node);
17763 (*lang_hooks.types.register_builtin_type) (float80_type_node,
17764 "__float80");
17765 }
17766
17767 if (TARGET_64BIT)
17768 {
17769 tree float128_type_node = make_node (REAL_TYPE);
17770
17771 TYPE_PRECISION (float128_type_node) = 128;
17772 layout_type (float128_type_node);
17773 (*lang_hooks.types.register_builtin_type) (float128_type_node,
17774 "__float128");
17775
17776 /* TFmode support builtins. */
17777 ftype = build_function_type (float128_type_node,
17778 void_list_node);
17779 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_infq", ftype, IX86_BUILTIN_INFQ);
17780
17781 ftype = build_function_type_list (float128_type_node,
17782 float128_type_node,
17783 NULL_TREE);
17784 def_builtin_const (OPTION_MASK_ISA_64BIT, "__builtin_fabsq", ftype, IX86_BUILTIN_FABSQ);
17785
17786 ftype = build_function_type_list (float128_type_node,
17787 float128_type_node,
17788 float128_type_node,
17789 NULL_TREE);
17790 def_builtin_const (OPTION_MASK_ISA_64BIT, "__builtin_copysignq", ftype, IX86_BUILTIN_COPYSIGNQ);
17791 }
17792
17793 /* Add all SSE builtins that are more or less simple operations on
17794 three operands. */
17795 for (i = 0, d = bdesc_sse_3arg;
17796 i < ARRAY_SIZE (bdesc_sse_3arg);
17797 i++, d++)
17798 {
17799 /* Use one of the operands; the target can have a different mode for
17800 mask-generating compares. */
17801 enum machine_mode mode;
17802 tree type;
17803
17804 if (d->name == 0)
17805 continue;
17806 mode = insn_data[d->icode].operand[1].mode;
17807
17808 switch (mode)
17809 {
17810 case V16QImode:
17811 type = v16qi_ftype_v16qi_v16qi_int;
17812 break;
17813 case V8HImode:
17814 type = v8hi_ftype_v8hi_v8hi_int;
17815 break;
17816 case V4SImode:
17817 type = v4si_ftype_v4si_v4si_int;
17818 break;
17819 case V2DImode:
17820 type = v2di_ftype_v2di_v2di_int;
17821 break;
17822 case V2DFmode:
17823 type = v2df_ftype_v2df_v2df_int;
17824 break;
17825 case V4SFmode:
17826 type = v4sf_ftype_v4sf_v4sf_int;
17827 break;
17828 default:
17829 gcc_unreachable ();
17830 }
17831
17832 /* Override for variable blends. */
17833 switch (d->icode)
17834 {
17835 case CODE_FOR_sse4_1_blendvpd:
17836 type = v2df_ftype_v2df_v2df_v2df;
17837 break;
17838 case CODE_FOR_sse4_1_blendvps:
17839 type = v4sf_ftype_v4sf_v4sf_v4sf;
17840 break;
17841 case CODE_FOR_sse4_1_pblendvb:
17842 type = v16qi_ftype_v16qi_v16qi_v16qi;
17843 break;
17844 default:
17845 break;
17846 }
17847
17848 def_builtin_const (d->mask, d->name, type, d->code);
17849 }
17850
17851 /* Add all builtins that are more or less simple operations on two
17852 operands. */
17853 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
17854 {
17855 /* Use one of the operands; the target can have a different mode for
17856 mask-generating compares. */
17857 enum machine_mode mode;
17858 tree type;
17859
17860 if (d->name == 0)
17861 continue;
17862 mode = insn_data[d->icode].operand[1].mode;
17863
17864 switch (mode)
17865 {
17866 case V16QImode:
17867 type = v16qi_ftype_v16qi_v16qi;
17868 break;
17869 case V8HImode:
17870 type = v8hi_ftype_v8hi_v8hi;
17871 break;
17872 case V4SImode:
17873 type = v4si_ftype_v4si_v4si;
17874 break;
17875 case V2DImode:
17876 type = v2di_ftype_v2di_v2di;
17877 break;
17878 case V2DFmode:
17879 type = v2df_ftype_v2df_v2df;
17880 break;
17881 case V4SFmode:
17882 type = v4sf_ftype_v4sf_v4sf;
17883 break;
17884 case V8QImode:
17885 type = v8qi_ftype_v8qi_v8qi;
17886 break;
17887 case V4HImode:
17888 type = v4hi_ftype_v4hi_v4hi;
17889 break;
17890 case V2SImode:
17891 type = v2si_ftype_v2si_v2si;
17892 break;
17893 case DImode:
17894 type = di_ftype_di_di;
17895 break;
17896
17897 default:
17898 gcc_unreachable ();
17899 }
17900
17901 /* Override for comparisons. */
17902 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
17903 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3)
17904 type = v4si_ftype_v4sf_v4sf;
17905
17906 if (d->icode == CODE_FOR_sse2_maskcmpv2df3
17907 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
17908 type = v2di_ftype_v2df_v2df;
17909
17910 if (d->icode == CODE_FOR_vec_pack_sfix_v2df)
17911 type = v4si_ftype_v2df_v2df;
17912
17913 def_builtin_const (d->mask, d->name, type, d->code);
17914 }
17915
17916 /* Add all builtins that are more or less simple operations on 1 operand. */
17917 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
17918 {
17919 enum machine_mode mode;
17920 tree type;
17921
17922 if (d->name == 0)
17923 continue;
17924 mode = insn_data[d->icode].operand[1].mode;
17925
17926 switch (mode)
17927 {
17928 case V16QImode:
17929 type = v16qi_ftype_v16qi;
17930 break;
17931 case V8HImode:
17932 type = v8hi_ftype_v8hi;
17933 break;
17934 case V4SImode:
17935 type = v4si_ftype_v4si;
17936 break;
17937 case V2DFmode:
17938 type = v2df_ftype_v2df;
17939 break;
17940 case V4SFmode:
17941 type = v4sf_ftype_v4sf;
17942 break;
17943 case V8QImode:
17944 type = v8qi_ftype_v8qi;
17945 break;
17946 case V4HImode:
17947 type = v4hi_ftype_v4hi;
17948 break;
17949 case V2SImode:
17950 type = v2si_ftype_v2si;
17951 break;
17952
17953 default:
17954 abort ();
17955 }
17956
17957 def_builtin_const (d->mask, d->name, type, d->code);
17958 }
17959
17960 /* pcmpestr[im] insns. */
17961 for (i = 0, d = bdesc_pcmpestr;
17962 i < ARRAY_SIZE (bdesc_pcmpestr);
17963 i++, d++)
17964 {
17965 if (d->code == IX86_BUILTIN_PCMPESTRM128)
17966 ftype = v16qi_ftype_v16qi_int_v16qi_int_int;
17967 else
17968 ftype = int_ftype_v16qi_int_v16qi_int_int;
17969 def_builtin_const (d->mask, d->name, ftype, d->code);
17970 }
17971
17972 /* pcmpistr[im] insns. */
17973 for (i = 0, d = bdesc_pcmpistr;
17974 i < ARRAY_SIZE (bdesc_pcmpistr);
17975 i++, d++)
17976 {
17977 if (d->code == IX86_BUILTIN_PCMPISTRM128)
17978 ftype = v16qi_ftype_v16qi_v16qi_int;
17979 else
17980 ftype = int_ftype_v16qi_v16qi_int;
17981 def_builtin_const (d->mask, d->name, ftype, d->code);
17982 }
17983
17984 /* Add the remaining MMX insns with somewhat more complicated types. */
17985 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
17986 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
17987 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
17988 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
17989
17990 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
17991 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
17992 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
17993
17994 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
17995 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
17996
17997 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
17998 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
17999
18000 /* comi/ucomi insns. */
18001 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
18002 if (d->mask == OPTION_MASK_ISA_SSE2)
18003 def_builtin_const (d->mask, d->name, int_ftype_v2df_v2df, d->code);
18004 else
18005 def_builtin_const (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
18006
18007 /* ptest insns. */
18008 for (i = 0, d = bdesc_ptest; i < ARRAY_SIZE (bdesc_ptest); i++, d++)
18009 def_builtin_const (d->mask, d->name, int_ftype_v2di_v2di, d->code);
18010
18011 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
18012 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
18013 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
18014
18015 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
18016 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
18017 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
18018 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
18019 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
18020 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
18021 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
18022 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
18023 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
18024 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
18025 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
18026
18027 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
18028
18029 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
18030 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
18031
18032 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
18033 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
18034 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
18035 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
18036
18037 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
18038 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
18039 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
18040 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
18041
18042 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
18043
18044 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
18045
18046 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
18047 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
18048 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
18049 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
18050 ftype = build_function_type_list (float_type_node,
18051 float_type_node,
18052 NULL_TREE);
18053 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rsqrtf", ftype, IX86_BUILTIN_RSQRTF);
18054 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
18055 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
18056
18057 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
18058
18059 /* Original 3DNow! */
18060 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
18061 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
18062 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
18063 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
18064 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
18065 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
18066 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
18067 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
18068 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
18069 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
18070 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
18071 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
18072 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
18073 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
18074 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
18075 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
18076 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
18077 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
18078 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
18079 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
18080
18081 /* 3DNow! extension as used in the Athlon CPU. */
18082 def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
18083 def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
18084 def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
18085 def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
18086 def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
18087 def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
18088
18089 /* SSE2 */
18090 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
18091
18092 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
18093 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
18094
18095 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
18096 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
18097
18098 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
18099 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
18100 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
18101 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
18102 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
18103
18104 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
18105 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
18106 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
18107 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
18108
18109 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
18110 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
18111
18112 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
18113
18114 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
18115 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
18116
18117 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
18118 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
18119 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
18120 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
18121 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
18122
18123 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
18124
18125 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
18126 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
18127 def_builtin_const (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
18128 def_builtin_const (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
18129
18130 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
18131 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
18132 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
18133
18134 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
18135 def_builtin_const (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
18136 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
18137 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
18138
18139 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
18140 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
18141 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
18142
18143 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
18144 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
18145
18146 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
18147 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
18148
18149 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
18150 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
18151 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
18152 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
18153 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSLLW128);
18154 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSLLD128);
18155 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
18156
18157 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
18158 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
18159 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
18160 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
18161 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRLW128);
18162 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRLD128);
18163 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
18164
18165 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
18166 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
18167 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRAW128);
18168 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRAD128);
18169
18170 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
18171
18172 /* Prescott New Instructions. */
18173 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor", void_ftype_pcvoid_unsigned_unsigned, IX86_BUILTIN_MONITOR);
18174 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait", void_ftype_unsigned_unsigned, IX86_BUILTIN_MWAIT);
18175 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_lddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
18176
18177 /* SSSE3. */
18178 def_builtin_const (OPTION_MASK_ISA_SSSE3, "__builtin_ia32_palignr128", v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PALIGNR128);
18179 def_builtin_const (OPTION_MASK_ISA_SSSE3, "__builtin_ia32_palignr", di_ftype_di_di_int, IX86_BUILTIN_PALIGNR);
18180
18181 /* SSE4.1. */
18182 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_movntdqa", v2di_ftype_pv2di, IX86_BUILTIN_MOVNTDQA);
18183 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxbw128", v8hi_ftype_v16qi, IX86_BUILTIN_PMOVSXBW128);
18184 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxbd128", v4si_ftype_v16qi, IX86_BUILTIN_PMOVSXBD128);
18185 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxbq128", v2di_ftype_v16qi, IX86_BUILTIN_PMOVSXBQ128);
18186 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxwd128", v4si_ftype_v8hi, IX86_BUILTIN_PMOVSXWD128);
18187 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxwq128", v2di_ftype_v8hi, IX86_BUILTIN_PMOVSXWQ128);
18188 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxdq128", v2di_ftype_v4si, IX86_BUILTIN_PMOVSXDQ128);
18189 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxbw128", v8hi_ftype_v16qi, IX86_BUILTIN_PMOVZXBW128);
18190 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxbd128", v4si_ftype_v16qi, IX86_BUILTIN_PMOVZXBD128);
18191 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxbq128", v2di_ftype_v16qi, IX86_BUILTIN_PMOVZXBQ128);
18192 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxwd128", v4si_ftype_v8hi, IX86_BUILTIN_PMOVZXWD128);
18193 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxwq128", v2di_ftype_v8hi, IX86_BUILTIN_PMOVZXWQ128);
18194 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxdq128", v2di_ftype_v4si, IX86_BUILTIN_PMOVZXDQ128);
18195 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmuldq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULDQ128);
18196 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_roundpd", v2df_ftype_v2df_int, IX86_BUILTIN_ROUNDPD);
18197 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_roundps", v4sf_ftype_v4sf_int, IX86_BUILTIN_ROUNDPS);
18198 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_roundsd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_ROUNDSD);
18199 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_roundss", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_ROUNDSS);
18200
18201 /* SSE4.2. */
18202 ftype = build_function_type_list (unsigned_type_node,
18203 unsigned_type_node,
18204 unsigned_char_type_node,
18205 NULL_TREE);
18206 def_builtin_const (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32qi", ftype, IX86_BUILTIN_CRC32QI);
18207 ftype = build_function_type_list (unsigned_type_node,
18208 unsigned_type_node,
18209 short_unsigned_type_node,
18210 NULL_TREE);
18211 def_builtin_const (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32hi", ftype, IX86_BUILTIN_CRC32HI);
18212 ftype = build_function_type_list (unsigned_type_node,
18213 unsigned_type_node,
18214 unsigned_type_node,
18215 NULL_TREE);
18216 def_builtin_const (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32si", ftype, IX86_BUILTIN_CRC32SI);
18217 ftype = build_function_type_list (long_long_unsigned_type_node,
18218 long_long_unsigned_type_node,
18219 long_long_unsigned_type_node,
18220 NULL_TREE);
18221 def_builtin_const (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32di", ftype, IX86_BUILTIN_CRC32DI);
18222
18223 /* AMDFAM10 SSE4A New built-ins */
18224 def_builtin (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_movntsd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTSD);
18225 def_builtin (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_movntss", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTSS);
18226 def_builtin_const (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_extrqi", v2di_ftype_v2di_unsigned_unsigned, IX86_BUILTIN_EXTRQI);
18227 def_builtin_const (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_extrq", v2di_ftype_v2di_v16qi, IX86_BUILTIN_EXTRQ);
18228 def_builtin_const (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_insertqi", v2di_ftype_v2di_v2di_unsigned_unsigned, IX86_BUILTIN_INSERTQI);
18229 def_builtin_const (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_insertq", v2di_ftype_v2di_v2di, IX86_BUILTIN_INSERTQ);
18230
18231 /* Access to the vec_init patterns. */
18232 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
18233 integer_type_node, NULL_TREE);
18234 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si", ftype, IX86_BUILTIN_VEC_INIT_V2SI);
18235
18236 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
18237 short_integer_type_node,
18238 short_integer_type_node,
18239 short_integer_type_node, NULL_TREE);
18240 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi", ftype, IX86_BUILTIN_VEC_INIT_V4HI);
18241
18242 ftype = build_function_type_list (V8QI_type_node, char_type_node,
18243 char_type_node, char_type_node,
18244 char_type_node, char_type_node,
18245 char_type_node, char_type_node,
18246 char_type_node, NULL_TREE);
18247 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi", ftype, IX86_BUILTIN_VEC_INIT_V8QI);
18248
18249 /* Access to the vec_extract patterns. */
18250 ftype = build_function_type_list (double_type_node, V2DF_type_node,
18251 integer_type_node, NULL_TREE);
18252 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df", ftype, IX86_BUILTIN_VEC_EXT_V2DF);
18253
18254 ftype = build_function_type_list (long_long_integer_type_node,
18255 V2DI_type_node, integer_type_node,
18256 NULL_TREE);
18257 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di", ftype, IX86_BUILTIN_VEC_EXT_V2DI);
18258
18259 ftype = build_function_type_list (float_type_node, V4SF_type_node,
18260 integer_type_node, NULL_TREE);
18261 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf", ftype, IX86_BUILTIN_VEC_EXT_V4SF);
18262
18263 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
18264 integer_type_node, NULL_TREE);
18265 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si", ftype, IX86_BUILTIN_VEC_EXT_V4SI);
18266
18267 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
18268 integer_type_node, NULL_TREE);
18269 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi", ftype, IX86_BUILTIN_VEC_EXT_V8HI);
18270
18271 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
18272 integer_type_node, NULL_TREE);
18273 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_ext_v4hi", ftype, IX86_BUILTIN_VEC_EXT_V4HI);
18274
18275 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
18276 integer_type_node, NULL_TREE);
18277 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si", ftype, IX86_BUILTIN_VEC_EXT_V2SI);
18278
18279 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
18280 integer_type_node, NULL_TREE);
18281 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi", ftype, IX86_BUILTIN_VEC_EXT_V16QI);
18282
18283 /* Access to the vec_set patterns. */
18284 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
18285 intDI_type_node,
18286 integer_type_node, NULL_TREE);
18287 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_vec_set_v2di", ftype, IX86_BUILTIN_VEC_SET_V2DI);
18288
18289 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
18290 float_type_node,
18291 integer_type_node, NULL_TREE);
18292 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf", ftype, IX86_BUILTIN_VEC_SET_V4SF);
18293
18294 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
18295 intSI_type_node,
18296 integer_type_node, NULL_TREE);
18297 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si", ftype, IX86_BUILTIN_VEC_SET_V4SI);
18298
18299 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
18300 intHI_type_node,
18301 integer_type_node, NULL_TREE);
18302 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi", ftype, IX86_BUILTIN_VEC_SET_V8HI);
18303
18304 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
18305 intHI_type_node,
18306 integer_type_node, NULL_TREE);
18307 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_set_v4hi", ftype, IX86_BUILTIN_VEC_SET_V4HI);
18308
18309 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
18310 intQI_type_node,
18311 integer_type_node, NULL_TREE);
18312 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi", ftype, IX86_BUILTIN_VEC_SET_V16QI);
18313 }
18314
18315 static void
18316 ix86_init_builtins (void)
18317 {
18318 if (TARGET_MMX)
18319 ix86_init_mmx_sse_builtins ();
18320 }
18321
18322 /* Errors in the source file can cause expand_expr to return const0_rtx
18323 where we expect a vector. To avoid crashing, use one of the vector
18324 clear instructions. */
18325 static rtx
18326 safe_vector_operand (rtx x, enum machine_mode mode)
18327 {
18328 if (x == const0_rtx)
18329 x = CONST0_RTX (mode);
18330 return x;
18331 }
18332
18333 /* Subroutine of ix86_expand_builtin to take care of SSE insns with
18334 4 operands. The third argument must be a constant smaller than 8
18335 bits or xmm0. */
18336
18337 static rtx
18338 ix86_expand_sse_4_operands_builtin (enum insn_code icode, tree exp,
18339 rtx target)
18340 {
18341 rtx pat;
18342 tree arg0 = CALL_EXPR_ARG (exp, 0);
18343 tree arg1 = CALL_EXPR_ARG (exp, 1);
18344 tree arg2 = CALL_EXPR_ARG (exp, 2);
18345 rtx op0 = expand_normal (arg0);
18346 rtx op1 = expand_normal (arg1);
18347 rtx op2 = expand_normal (arg2);
18348 enum machine_mode tmode = insn_data[icode].operand[0].mode;
18349 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
18350 enum machine_mode mode2 = insn_data[icode].operand[2].mode;
18351 enum machine_mode mode3 = insn_data[icode].operand[3].mode;
18352
18353 if (VECTOR_MODE_P (mode1))
18354 op0 = safe_vector_operand (op0, mode1);
18355 if (VECTOR_MODE_P (mode2))
18356 op1 = safe_vector_operand (op1, mode2);
18357 if (VECTOR_MODE_P (mode3))
18358 op2 = safe_vector_operand (op2, mode3);
18359
18360 if (optimize
18361 || target == 0
18362 || GET_MODE (target) != tmode
18363 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18364 target = gen_reg_rtx (tmode);
18365
18366 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18367 op0 = copy_to_mode_reg (mode1, op0);
18368 if ((optimize && !register_operand (op1, mode2))
18369 || !(*insn_data[icode].operand[2].predicate) (op1, mode2))
18370 op1 = copy_to_mode_reg (mode2, op1);
18371
18372 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
18373 switch (icode)
18374 {
18375 case CODE_FOR_sse4_1_blendvpd:
18376 case CODE_FOR_sse4_1_blendvps:
18377 case CODE_FOR_sse4_1_pblendvb:
18378 op2 = copy_to_mode_reg (mode3, op2);
18379 break;
18380
18381 case CODE_FOR_sse4_1_roundsd:
18382 case CODE_FOR_sse4_1_roundss:
18383 error ("the third argument must be a 4-bit immediate");
18384 return const0_rtx;
18385
18386 default:
18387 error ("the third argument must be an 8-bit immediate");
18388 return const0_rtx;
18389 }
18390
18391 pat = GEN_FCN (icode) (target, op0, op1, op2);
18392 if (! pat)
18393 return 0;
18394 emit_insn (pat);
18395 return target;
18396 }
18397
18398 /* Subroutine of ix86_expand_builtin to take care of crc32 insns. */
18399
18400 static rtx
18401 ix86_expand_crc32 (enum insn_code icode, tree exp, rtx target)
18402 {
18403 rtx pat;
18404 tree arg0 = CALL_EXPR_ARG (exp, 0);
18405 tree arg1 = CALL_EXPR_ARG (exp, 1);
18406 rtx op0 = expand_normal (arg0);
18407 rtx op1 = expand_normal (arg1);
18408 enum machine_mode tmode = insn_data[icode].operand[0].mode;
18409 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
18410 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
18411
18412 if (optimize
18413 || !target
18414 || GET_MODE (target) != tmode
18415 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18416 target = gen_reg_rtx (tmode);
18417
18418 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
18419 op0 = copy_to_mode_reg (mode0, op0);
18420 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
18421 {
18422 op1 = copy_to_reg (op1);
18423 op1 = simplify_gen_subreg (mode1, op1, GET_MODE (op1), 0);
18424 }
18425
18426 pat = GEN_FCN (icode) (target, op0, op1);
18427 if (! pat)
18428 return 0;
18429 emit_insn (pat);
18430 return target;
18431 }
18432
18433 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
18434
18435 static rtx
18436 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
18437 {
18438 rtx pat, xops[3];
18439 tree arg0 = CALL_EXPR_ARG (exp, 0);
18440 tree arg1 = CALL_EXPR_ARG (exp, 1);
18441 rtx op0 = expand_normal (arg0);
18442 rtx op1 = expand_normal (arg1);
18443 enum machine_mode tmode = insn_data[icode].operand[0].mode;
18444 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
18445 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
18446
18447 if (VECTOR_MODE_P (mode0))
18448 op0 = safe_vector_operand (op0, mode0);
18449 if (VECTOR_MODE_P (mode1))
18450 op1 = safe_vector_operand (op1, mode1);
18451
18452 if (optimize || !target
18453 || GET_MODE (target) != tmode
18454 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18455 target = gen_reg_rtx (tmode);
18456
18457 if (GET_MODE (op1) == SImode && mode1 == TImode)
18458 {
18459 rtx x = gen_reg_rtx (V4SImode);
18460 emit_insn (gen_sse2_loadd (x, op1));
18461 op1 = gen_lowpart (TImode, x);
18462 }
18463
18464 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
18465 op0 = copy_to_mode_reg (mode0, op0);
18466 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
18467 op1 = copy_to_mode_reg (mode1, op1);
18468
18469 /* ??? Using ix86_fixup_binary_operands is problematic when
18470 we've got mismatched modes. Fake it. */
18471
18472 xops[0] = target;
18473 xops[1] = op0;
18474 xops[2] = op1;
18475
18476 if (tmode == mode0 && tmode == mode1)
18477 {
18478 target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
18479 op0 = xops[1];
18480 op1 = xops[2];
18481 }
18482 else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops))
18483 {
18484 op0 = force_reg (mode0, op0);
18485 op1 = force_reg (mode1, op1);
18486 target = gen_reg_rtx (tmode);
18487 }
18488
18489 pat = GEN_FCN (icode) (target, op0, op1);
18490 if (! pat)
18491 return 0;
18492 emit_insn (pat);
18493 return target;
18494 }
18495
18496 /* Subroutine of ix86_expand_builtin to take care of stores. */
18497
18498 static rtx
18499 ix86_expand_store_builtin (enum insn_code icode, tree exp)
18500 {
18501 rtx pat;
18502 tree arg0 = CALL_EXPR_ARG (exp, 0);
18503 tree arg1 = CALL_EXPR_ARG (exp, 1);
18504 rtx op0 = expand_normal (arg0);
18505 rtx op1 = expand_normal (arg1);
18506 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
18507 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
18508
18509 if (VECTOR_MODE_P (mode1))
18510 op1 = safe_vector_operand (op1, mode1);
18511
18512 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
18513 op1 = copy_to_mode_reg (mode1, op1);
18514
18515 pat = GEN_FCN (icode) (op0, op1);
18516 if (pat)
18517 emit_insn (pat);
18518 return 0;
18519 }
18520
18521 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
18522
18523 static rtx
18524 ix86_expand_unop_builtin (enum insn_code icode, tree exp,
18525 rtx target, int do_load)
18526 {
18527 rtx pat;
18528 tree arg0 = CALL_EXPR_ARG (exp, 0);
18529 rtx op0 = expand_normal (arg0);
18530 enum machine_mode tmode = insn_data[icode].operand[0].mode;
18531 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
18532
18533 if (optimize || !target
18534 || GET_MODE (target) != tmode
18535 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18536 target = gen_reg_rtx (tmode);
18537 if (do_load)
18538 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
18539 else
18540 {
18541 if (VECTOR_MODE_P (mode0))
18542 op0 = safe_vector_operand (op0, mode0);
18543
18544 if ((optimize && !register_operand (op0, mode0))
18545 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18546 op0 = copy_to_mode_reg (mode0, op0);
18547 }
18548
18549 switch (icode)
18550 {
18551 case CODE_FOR_sse4_1_roundpd:
18552 case CODE_FOR_sse4_1_roundps:
18553 {
18554 tree arg1 = CALL_EXPR_ARG (exp, 1);
18555 rtx op1 = expand_normal (arg1);
18556 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
18557
18558 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
18559 {
18560 error ("the second argument must be a 4-bit immediate");
18561 return const0_rtx;
18562 }
18563 pat = GEN_FCN (icode) (target, op0, op1);
18564 }
18565 break;
18566 default:
18567 pat = GEN_FCN (icode) (target, op0);
18568 break;
18569 }
18570
18571 if (! pat)
18572 return 0;
18573 emit_insn (pat);
18574 return target;
18575 }
18576
18577 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
18578 sqrtss, rsqrtss, rcpss. */
18579
18580 static rtx
18581 ix86_expand_unop1_builtin (enum insn_code icode, tree exp, rtx target)
18582 {
18583 rtx pat;
18584 tree arg0 = CALL_EXPR_ARG (exp, 0);
18585 rtx op1, op0 = expand_normal (arg0);
18586 enum machine_mode tmode = insn_data[icode].operand[0].mode;
18587 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
18588
18589 if (optimize || !target
18590 || GET_MODE (target) != tmode
18591 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18592 target = gen_reg_rtx (tmode);
18593
18594 if (VECTOR_MODE_P (mode0))
18595 op0 = safe_vector_operand (op0, mode0);
18596
18597 if ((optimize && !register_operand (op0, mode0))
18598 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18599 op0 = copy_to_mode_reg (mode0, op0);
18600
18601 op1 = op0;
18602 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
18603 op1 = copy_to_mode_reg (mode0, op1);
18604
18605 pat = GEN_FCN (icode) (target, op0, op1);
18606 if (! pat)
18607 return 0;
18608 emit_insn (pat);
18609 return target;
18610 }
18611
18612 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
18613
18614 static rtx
18615 ix86_expand_sse_compare (const struct builtin_description *d, tree exp,
18616 rtx target)
18617 {
18618 rtx pat;
18619 tree arg0 = CALL_EXPR_ARG (exp, 0);
18620 tree arg1 = CALL_EXPR_ARG (exp, 1);
18621 rtx op0 = expand_normal (arg0);
18622 rtx op1 = expand_normal (arg1);
18623 rtx op2;
18624 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
18625 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
18626 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
18627 enum rtx_code comparison = d->comparison;
18628
18629 if (VECTOR_MODE_P (mode0))
18630 op0 = safe_vector_operand (op0, mode0);
18631 if (VECTOR_MODE_P (mode1))
18632 op1 = safe_vector_operand (op1, mode1);
18633
18634 /* Swap operands if we have a comparison that isn't available in
18635 hardware. */
18636 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
18637 {
18638 rtx tmp = gen_reg_rtx (mode1);
18639 emit_move_insn (tmp, op1);
18640 op1 = op0;
18641 op0 = tmp;
18642 }
18643
18644 if (optimize || !target
18645 || GET_MODE (target) != tmode
18646 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
18647 target = gen_reg_rtx (tmode);
18648
18649 if ((optimize && !register_operand (op0, mode0))
18650 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
18651 op0 = copy_to_mode_reg (mode0, op0);
18652 if ((optimize && !register_operand (op1, mode1))
18653 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
18654 op1 = copy_to_mode_reg (mode1, op1);
18655
18656 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
18657 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
18658 if (! pat)
18659 return 0;
18660 emit_insn (pat);
18661 return target;
18662 }
18663
18664 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
18665
18666 static rtx
18667 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
18668 rtx target)
18669 {
18670 rtx pat;
18671 tree arg0 = CALL_EXPR_ARG (exp, 0);
18672 tree arg1 = CALL_EXPR_ARG (exp, 1);
18673 rtx op0 = expand_normal (arg0);
18674 rtx op1 = expand_normal (arg1);
18675 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
18676 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
18677 enum rtx_code comparison = d->comparison;
18678
18679 if (VECTOR_MODE_P (mode0))
18680 op0 = safe_vector_operand (op0, mode0);
18681 if (VECTOR_MODE_P (mode1))
18682 op1 = safe_vector_operand (op1, mode1);
18683
18684 /* Swap operands if we have a comparison that isn't available in
18685 hardware. */
18686 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
18687 {
18688 rtx tmp = op1;
18689 op1 = op0;
18690 op0 = tmp;
18691 }
18692
18693 target = gen_reg_rtx (SImode);
18694 emit_move_insn (target, const0_rtx);
18695 target = gen_rtx_SUBREG (QImode, target, 0);
18696
18697 if ((optimize && !register_operand (op0, mode0))
18698 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
18699 op0 = copy_to_mode_reg (mode0, op0);
18700 if ((optimize && !register_operand (op1, mode1))
18701 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
18702 op1 = copy_to_mode_reg (mode1, op1);
18703
18704 pat = GEN_FCN (d->icode) (op0, op1);
18705 if (! pat)
18706 return 0;
18707 emit_insn (pat);
18708 emit_insn (gen_rtx_SET (VOIDmode,
18709 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
18710 gen_rtx_fmt_ee (comparison, QImode,
18711 SET_DEST (pat),
18712 const0_rtx)));
18713
18714 return SUBREG_REG (target);
18715 }
18716
18717 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
18718
18719 static rtx
18720 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
18721 rtx target)
18722 {
18723 rtx pat;
18724 tree arg0 = CALL_EXPR_ARG (exp, 0);
18725 tree arg1 = CALL_EXPR_ARG (exp, 1);
18726 rtx op0 = expand_normal (arg0);
18727 rtx op1 = expand_normal (arg1);
18728 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
18729 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
18730 enum rtx_code comparison = d->comparison;
18731
18732 if (VECTOR_MODE_P (mode0))
18733 op0 = safe_vector_operand (op0, mode0);
18734 if (VECTOR_MODE_P (mode1))
18735 op1 = safe_vector_operand (op1, mode1);
18736
18737 target = gen_reg_rtx (SImode);
18738 emit_move_insn (target, const0_rtx);
18739 target = gen_rtx_SUBREG (QImode, target, 0);
18740
18741 if ((optimize && !register_operand (op0, mode0))
18742 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
18743 op0 = copy_to_mode_reg (mode0, op0);
18744 if ((optimize && !register_operand (op1, mode1))
18745 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
18746 op1 = copy_to_mode_reg (mode1, op1);
18747
18748 pat = GEN_FCN (d->icode) (op0, op1);
18749 if (! pat)
18750 return 0;
18751 emit_insn (pat);
18752 emit_insn (gen_rtx_SET (VOIDmode,
18753 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
18754 gen_rtx_fmt_ee (comparison, QImode,
18755 SET_DEST (pat),
18756 const0_rtx)));
18757
18758 return SUBREG_REG (target);
18759 }
18760
18761 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
18762
18763 static rtx
18764 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
18765 tree exp, rtx target)
18766 {
18767 rtx pat;
18768 tree arg0 = CALL_EXPR_ARG (exp, 0);
18769 tree arg1 = CALL_EXPR_ARG (exp, 1);
18770 tree arg2 = CALL_EXPR_ARG (exp, 2);
18771 tree arg3 = CALL_EXPR_ARG (exp, 3);
18772 tree arg4 = CALL_EXPR_ARG (exp, 4);
18773 rtx scratch0, scratch1;
18774 rtx op0 = expand_normal (arg0);
18775 rtx op1 = expand_normal (arg1);
18776 rtx op2 = expand_normal (arg2);
18777 rtx op3 = expand_normal (arg3);
18778 rtx op4 = expand_normal (arg4);
18779 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
18780
18781 tmode0 = insn_data[d->icode].operand[0].mode;
18782 tmode1 = insn_data[d->icode].operand[1].mode;
18783 modev2 = insn_data[d->icode].operand[2].mode;
18784 modei3 = insn_data[d->icode].operand[3].mode;
18785 modev4 = insn_data[d->icode].operand[4].mode;
18786 modei5 = insn_data[d->icode].operand[5].mode;
18787 modeimm = insn_data[d->icode].operand[6].mode;
18788
18789 if (VECTOR_MODE_P (modev2))
18790 op0 = safe_vector_operand (op0, modev2);
18791 if (VECTOR_MODE_P (modev4))
18792 op2 = safe_vector_operand (op2, modev4);
18793
18794 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
18795 op0 = copy_to_mode_reg (modev2, op0);
18796 if (! (*insn_data[d->icode].operand[3].predicate) (op1, modei3))
18797 op1 = copy_to_mode_reg (modei3, op1);
18798 if ((optimize && !register_operand (op2, modev4))
18799 || !(*insn_data[d->icode].operand[4].predicate) (op2, modev4))
18800 op2 = copy_to_mode_reg (modev4, op2);
18801 if (! (*insn_data[d->icode].operand[5].predicate) (op3, modei5))
18802 op3 = copy_to_mode_reg (modei5, op3);
18803
18804 if (! (*insn_data[d->icode].operand[6].predicate) (op4, modeimm))
18805 {
18806 error ("the fifth argument must be a 8-bit immediate");
18807 return const0_rtx;
18808 }
18809
18810 if (d->code == IX86_BUILTIN_PCMPESTRI128)
18811 {
18812 if (optimize || !target
18813 || GET_MODE (target) != tmode0
18814 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
18815 target = gen_reg_rtx (tmode0);
18816
18817 scratch1 = gen_reg_rtx (tmode1);
18818
18819 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
18820 }
18821 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
18822 {
18823 if (optimize || !target
18824 || GET_MODE (target) != tmode1
18825 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
18826 target = gen_reg_rtx (tmode1);
18827
18828 scratch0 = gen_reg_rtx (tmode0);
18829
18830 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
18831 }
18832 else
18833 {
18834 gcc_assert (d->flag);
18835
18836 scratch0 = gen_reg_rtx (tmode0);
18837 scratch1 = gen_reg_rtx (tmode1);
18838
18839 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
18840 }
18841
18842 if (! pat)
18843 return 0;
18844
18845 emit_insn (pat);
18846
18847 if (d->flag)
18848 {
18849 target = gen_reg_rtx (SImode);
18850 emit_move_insn (target, const0_rtx);
18851 target = gen_rtx_SUBREG (QImode, target, 0);
18852
18853 emit_insn
18854 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
18855 gen_rtx_fmt_ee (EQ, QImode,
18856 gen_rtx_REG ((enum machine_mode) d->flag,
18857 FLAGS_REG),
18858 const0_rtx)));
18859 return SUBREG_REG (target);
18860 }
18861 else
18862 return target;
18863 }
18864
18865
18866 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
18867
18868 static rtx
18869 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
18870 tree exp, rtx target)
18871 {
18872 rtx pat;
18873 tree arg0 = CALL_EXPR_ARG (exp, 0);
18874 tree arg1 = CALL_EXPR_ARG (exp, 1);
18875 tree arg2 = CALL_EXPR_ARG (exp, 2);
18876 rtx scratch0, scratch1;
18877 rtx op0 = expand_normal (arg0);
18878 rtx op1 = expand_normal (arg1);
18879 rtx op2 = expand_normal (arg2);
18880 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
18881
18882 tmode0 = insn_data[d->icode].operand[0].mode;
18883 tmode1 = insn_data[d->icode].operand[1].mode;
18884 modev2 = insn_data[d->icode].operand[2].mode;
18885 modev3 = insn_data[d->icode].operand[3].mode;
18886 modeimm = insn_data[d->icode].operand[4].mode;
18887
18888 if (VECTOR_MODE_P (modev2))
18889 op0 = safe_vector_operand (op0, modev2);
18890 if (VECTOR_MODE_P (modev3))
18891 op1 = safe_vector_operand (op1, modev3);
18892
18893 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
18894 op0 = copy_to_mode_reg (modev2, op0);
18895 if ((optimize && !register_operand (op1, modev3))
18896 || !(*insn_data[d->icode].operand[3].predicate) (op1, modev3))
18897 op1 = copy_to_mode_reg (modev3, op1);
18898
18899 if (! (*insn_data[d->icode].operand[4].predicate) (op2, modeimm))
18900 {
18901 error ("the third argument must be a 8-bit immediate");
18902 return const0_rtx;
18903 }
18904
18905 if (d->code == IX86_BUILTIN_PCMPISTRI128)
18906 {
18907 if (optimize || !target
18908 || GET_MODE (target) != tmode0
18909 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
18910 target = gen_reg_rtx (tmode0);
18911
18912 scratch1 = gen_reg_rtx (tmode1);
18913
18914 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
18915 }
18916 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
18917 {
18918 if (optimize || !target
18919 || GET_MODE (target) != tmode1
18920 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
18921 target = gen_reg_rtx (tmode1);
18922
18923 scratch0 = gen_reg_rtx (tmode0);
18924
18925 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
18926 }
18927 else
18928 {
18929 gcc_assert (d->flag);
18930
18931 scratch0 = gen_reg_rtx (tmode0);
18932 scratch1 = gen_reg_rtx (tmode1);
18933
18934 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
18935 }
18936
18937 if (! pat)
18938 return 0;
18939
18940 emit_insn (pat);
18941
18942 if (d->flag)
18943 {
18944 target = gen_reg_rtx (SImode);
18945 emit_move_insn (target, const0_rtx);
18946 target = gen_rtx_SUBREG (QImode, target, 0);
18947
18948 emit_insn
18949 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
18950 gen_rtx_fmt_ee (EQ, QImode,
18951 gen_rtx_REG ((enum machine_mode) d->flag,
18952 FLAGS_REG),
18953 const0_rtx)));
18954 return SUBREG_REG (target);
18955 }
18956 else
18957 return target;
18958 }
18959
18960 /* Return the integer constant in ARG. Constrain it to be in the range
18961 of the subparts of VEC_TYPE; issue an error if not. */
18962
18963 static int
18964 get_element_number (tree vec_type, tree arg)
18965 {
18966 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
18967
18968 if (!host_integerp (arg, 1)
18969 || (elt = tree_low_cst (arg, 1), elt > max))
18970 {
18971 error ("selector must be an integer constant in the range 0..%wi", max);
18972 return 0;
18973 }
18974
18975 return elt;
18976 }
18977
18978 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
18979 ix86_expand_vector_init. We DO have language-level syntax for this, in
18980 the form of (type){ init-list }. Except that since we can't place emms
18981 instructions from inside the compiler, we can't allow the use of MMX
18982 registers unless the user explicitly asks for it. So we do *not* define
18983 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
18984 we have builtins invoked by mmintrin.h that gives us license to emit
18985 these sorts of instructions. */
18986
18987 static rtx
18988 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
18989 {
18990 enum machine_mode tmode = TYPE_MODE (type);
18991 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
18992 int i, n_elt = GET_MODE_NUNITS (tmode);
18993 rtvec v = rtvec_alloc (n_elt);
18994
18995 gcc_assert (VECTOR_MODE_P (tmode));
18996 gcc_assert (call_expr_nargs (exp) == n_elt);
18997
18998 for (i = 0; i < n_elt; ++i)
18999 {
19000 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
19001 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
19002 }
19003
19004 if (!target || !register_operand (target, tmode))
19005 target = gen_reg_rtx (tmode);
19006
19007 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
19008 return target;
19009 }
19010
19011 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
19012 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
19013 had a language-level syntax for referencing vector elements. */
19014
19015 static rtx
19016 ix86_expand_vec_ext_builtin (tree exp, rtx target)
19017 {
19018 enum machine_mode tmode, mode0;
19019 tree arg0, arg1;
19020 int elt;
19021 rtx op0;
19022
19023 arg0 = CALL_EXPR_ARG (exp, 0);
19024 arg1 = CALL_EXPR_ARG (exp, 1);
19025
19026 op0 = expand_normal (arg0);
19027 elt = get_element_number (TREE_TYPE (arg0), arg1);
19028
19029 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
19030 mode0 = TYPE_MODE (TREE_TYPE (arg0));
19031 gcc_assert (VECTOR_MODE_P (mode0));
19032
19033 op0 = force_reg (mode0, op0);
19034
19035 if (optimize || !target || !register_operand (target, tmode))
19036 target = gen_reg_rtx (tmode);
19037
19038 ix86_expand_vector_extract (true, target, op0, elt);
19039
19040 return target;
19041 }
19042
19043 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
19044 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
19045 a language-level syntax for referencing vector elements. */
19046
19047 static rtx
19048 ix86_expand_vec_set_builtin (tree exp)
19049 {
19050 enum machine_mode tmode, mode1;
19051 tree arg0, arg1, arg2;
19052 int elt;
19053 rtx op0, op1, target;
19054
19055 arg0 = CALL_EXPR_ARG (exp, 0);
19056 arg1 = CALL_EXPR_ARG (exp, 1);
19057 arg2 = CALL_EXPR_ARG (exp, 2);
19058
19059 tmode = TYPE_MODE (TREE_TYPE (arg0));
19060 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
19061 gcc_assert (VECTOR_MODE_P (tmode));
19062
19063 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
19064 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
19065 elt = get_element_number (TREE_TYPE (arg0), arg2);
19066
19067 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
19068 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
19069
19070 op0 = force_reg (tmode, op0);
19071 op1 = force_reg (mode1, op1);
19072
19073 /* OP0 is the source of these builtin functions and shouldn't be
19074 modified. Create a copy, use it and return it as target. */
19075 target = gen_reg_rtx (tmode);
19076 emit_move_insn (target, op0);
19077 ix86_expand_vector_set (true, target, op1, elt);
19078
19079 return target;
19080 }
19081
19082 /* Expand an expression EXP that calls a built-in function,
19083 with result going to TARGET if that's convenient
19084 (and in mode MODE if that's convenient).
19085 SUBTARGET may be used as the target for computing one of EXP's operands.
19086 IGNORE is nonzero if the value is to be ignored. */
19087
19088 static rtx
19089 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
19090 enum machine_mode mode ATTRIBUTE_UNUSED,
19091 int ignore ATTRIBUTE_UNUSED)
19092 {
19093 const struct builtin_description *d;
19094 size_t i;
19095 enum insn_code icode;
19096 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
19097 tree arg0, arg1, arg2, arg3;
19098 rtx op0, op1, op2, op3, pat;
19099 enum machine_mode tmode, mode0, mode1, mode2, mode3, mode4;
19100 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
19101
19102 switch (fcode)
19103 {
19104 case IX86_BUILTIN_EMMS:
19105 emit_insn (gen_mmx_emms ());
19106 return 0;
19107
19108 case IX86_BUILTIN_SFENCE:
19109 emit_insn (gen_sse_sfence ());
19110 return 0;
19111
19112 case IX86_BUILTIN_MASKMOVQ:
19113 case IX86_BUILTIN_MASKMOVDQU:
19114 icode = (fcode == IX86_BUILTIN_MASKMOVQ
19115 ? CODE_FOR_mmx_maskmovq
19116 : CODE_FOR_sse2_maskmovdqu);
19117 /* Note the arg order is different from the operand order. */
19118 arg1 = CALL_EXPR_ARG (exp, 0);
19119 arg2 = CALL_EXPR_ARG (exp, 1);
19120 arg0 = CALL_EXPR_ARG (exp, 2);
19121 op0 = expand_normal (arg0);
19122 op1 = expand_normal (arg1);
19123 op2 = expand_normal (arg2);
19124 mode0 = insn_data[icode].operand[0].mode;
19125 mode1 = insn_data[icode].operand[1].mode;
19126 mode2 = insn_data[icode].operand[2].mode;
19127
19128 op0 = force_reg (Pmode, op0);
19129 op0 = gen_rtx_MEM (mode1, op0);
19130
19131 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
19132 op0 = copy_to_mode_reg (mode0, op0);
19133 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
19134 op1 = copy_to_mode_reg (mode1, op1);
19135 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
19136 op2 = copy_to_mode_reg (mode2, op2);
19137 pat = GEN_FCN (icode) (op0, op1, op2);
19138 if (! pat)
19139 return 0;
19140 emit_insn (pat);
19141 return 0;
19142
19143 case IX86_BUILTIN_RSQRTF:
19144 return ix86_expand_unop1_builtin (CODE_FOR_rsqrtsf2, exp, target);
19145
19146 case IX86_BUILTIN_SQRTSS:
19147 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, exp, target);
19148 case IX86_BUILTIN_RSQRTSS:
19149 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, exp, target);
19150 case IX86_BUILTIN_RCPSS:
19151 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, exp, target);
19152
19153 case IX86_BUILTIN_LOADUPS:
19154 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, exp, target, 1);
19155
19156 case IX86_BUILTIN_STOREUPS:
19157 return ix86_expand_store_builtin (CODE_FOR_sse_movups, exp);
19158
19159 case IX86_BUILTIN_LOADHPS:
19160 case IX86_BUILTIN_LOADLPS:
19161 case IX86_BUILTIN_LOADHPD:
19162 case IX86_BUILTIN_LOADLPD:
19163 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
19164 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
19165 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
19166 : CODE_FOR_sse2_loadlpd);
19167 arg0 = CALL_EXPR_ARG (exp, 0);
19168 arg1 = CALL_EXPR_ARG (exp, 1);
19169 op0 = expand_normal (arg0);
19170 op1 = expand_normal (arg1);
19171 tmode = insn_data[icode].operand[0].mode;
19172 mode0 = insn_data[icode].operand[1].mode;
19173 mode1 = insn_data[icode].operand[2].mode;
19174
19175 op0 = force_reg (mode0, op0);
19176 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
19177 if (optimize || target == 0
19178 || GET_MODE (target) != tmode
19179 || !register_operand (target, tmode))
19180 target = gen_reg_rtx (tmode);
19181 pat = GEN_FCN (icode) (target, op0, op1);
19182 if (! pat)
19183 return 0;
19184 emit_insn (pat);
19185 return target;
19186
19187 case IX86_BUILTIN_STOREHPS:
19188 case IX86_BUILTIN_STORELPS:
19189 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
19190 : CODE_FOR_sse_storelps);
19191 arg0 = CALL_EXPR_ARG (exp, 0);
19192 arg1 = CALL_EXPR_ARG (exp, 1);
19193 op0 = expand_normal (arg0);
19194 op1 = expand_normal (arg1);
19195 mode0 = insn_data[icode].operand[0].mode;
19196 mode1 = insn_data[icode].operand[1].mode;
19197
19198 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
19199 op1 = force_reg (mode1, op1);
19200
19201 pat = GEN_FCN (icode) (op0, op1);
19202 if (! pat)
19203 return 0;
19204 emit_insn (pat);
19205 return const0_rtx;
19206
19207 case IX86_BUILTIN_MOVNTPS:
19208 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, exp);
19209 case IX86_BUILTIN_MOVNTQ:
19210 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, exp);
19211
19212 case IX86_BUILTIN_LDMXCSR:
19213 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
19214 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
19215 emit_move_insn (target, op0);
19216 emit_insn (gen_sse_ldmxcsr (target));
19217 return 0;
19218
19219 case IX86_BUILTIN_STMXCSR:
19220 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
19221 emit_insn (gen_sse_stmxcsr (target));
19222 return copy_to_mode_reg (SImode, target);
19223
19224 case IX86_BUILTIN_SHUFPS:
19225 case IX86_BUILTIN_SHUFPD:
19226 icode = (fcode == IX86_BUILTIN_SHUFPS
19227 ? CODE_FOR_sse_shufps
19228 : CODE_FOR_sse2_shufpd);
19229 arg0 = CALL_EXPR_ARG (exp, 0);
19230 arg1 = CALL_EXPR_ARG (exp, 1);
19231 arg2 = CALL_EXPR_ARG (exp, 2);
19232 op0 = expand_normal (arg0);
19233 op1 = expand_normal (arg1);
19234 op2 = expand_normal (arg2);
19235 tmode = insn_data[icode].operand[0].mode;
19236 mode0 = insn_data[icode].operand[1].mode;
19237 mode1 = insn_data[icode].operand[2].mode;
19238 mode2 = insn_data[icode].operand[3].mode;
19239
19240 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19241 op0 = copy_to_mode_reg (mode0, op0);
19242 if ((optimize && !register_operand (op1, mode1))
19243 || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
19244 op1 = copy_to_mode_reg (mode1, op1);
19245 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
19246 {
19247 /* @@@ better error message */
19248 error ("mask must be an immediate");
19249 return gen_reg_rtx (tmode);
19250 }
19251 if (optimize || target == 0
19252 || GET_MODE (target) != tmode
19253 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19254 target = gen_reg_rtx (tmode);
19255 pat = GEN_FCN (icode) (target, op0, op1, op2);
19256 if (! pat)
19257 return 0;
19258 emit_insn (pat);
19259 return target;
19260
19261 case IX86_BUILTIN_PSHUFW:
19262 case IX86_BUILTIN_PSHUFD:
19263 case IX86_BUILTIN_PSHUFHW:
19264 case IX86_BUILTIN_PSHUFLW:
19265 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
19266 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
19267 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
19268 : CODE_FOR_mmx_pshufw);
19269 arg0 = CALL_EXPR_ARG (exp, 0);
19270 arg1 = CALL_EXPR_ARG (exp, 1);
19271 op0 = expand_normal (arg0);
19272 op1 = expand_normal (arg1);
19273 tmode = insn_data[icode].operand[0].mode;
19274 mode1 = insn_data[icode].operand[1].mode;
19275 mode2 = insn_data[icode].operand[2].mode;
19276
19277 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19278 op0 = copy_to_mode_reg (mode1, op0);
19279 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
19280 {
19281 /* @@@ better error message */
19282 error ("mask must be an immediate");
19283 return const0_rtx;
19284 }
19285 if (target == 0
19286 || GET_MODE (target) != tmode
19287 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19288 target = gen_reg_rtx (tmode);
19289 pat = GEN_FCN (icode) (target, op0, op1);
19290 if (! pat)
19291 return 0;
19292 emit_insn (pat);
19293 return target;
19294
19295 case IX86_BUILTIN_PSLLWI128:
19296 icode = CODE_FOR_ashlv8hi3;
19297 goto do_pshifti;
19298 case IX86_BUILTIN_PSLLDI128:
19299 icode = CODE_FOR_ashlv4si3;
19300 goto do_pshifti;
19301 case IX86_BUILTIN_PSLLQI128:
19302 icode = CODE_FOR_ashlv2di3;
19303 goto do_pshifti;
19304 case IX86_BUILTIN_PSRAWI128:
19305 icode = CODE_FOR_ashrv8hi3;
19306 goto do_pshifti;
19307 case IX86_BUILTIN_PSRADI128:
19308 icode = CODE_FOR_ashrv4si3;
19309 goto do_pshifti;
19310 case IX86_BUILTIN_PSRLWI128:
19311 icode = CODE_FOR_lshrv8hi3;
19312 goto do_pshifti;
19313 case IX86_BUILTIN_PSRLDI128:
19314 icode = CODE_FOR_lshrv4si3;
19315 goto do_pshifti;
19316 case IX86_BUILTIN_PSRLQI128:
19317 icode = CODE_FOR_lshrv2di3;
19318 goto do_pshifti;
19319 do_pshifti:
19320 arg0 = CALL_EXPR_ARG (exp, 0);
19321 arg1 = CALL_EXPR_ARG (exp, 1);
19322 op0 = expand_normal (arg0);
19323 op1 = expand_normal (arg1);
19324
19325 if (!CONST_INT_P (op1))
19326 {
19327 error ("shift must be an immediate");
19328 return const0_rtx;
19329 }
19330 if (INTVAL (op1) < 0 || INTVAL (op1) > 255)
19331 op1 = GEN_INT (255);
19332
19333 tmode = insn_data[icode].operand[0].mode;
19334 mode1 = insn_data[icode].operand[1].mode;
19335 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19336 op0 = copy_to_reg (op0);
19337
19338 target = gen_reg_rtx (tmode);
19339 pat = GEN_FCN (icode) (target, op0, op1);
19340 if (!pat)
19341 return 0;
19342 emit_insn (pat);
19343 return target;
19344
19345 case IX86_BUILTIN_PSLLW128:
19346 icode = CODE_FOR_ashlv8hi3;
19347 goto do_pshift;
19348 case IX86_BUILTIN_PSLLD128:
19349 icode = CODE_FOR_ashlv4si3;
19350 goto do_pshift;
19351 case IX86_BUILTIN_PSLLQ128:
19352 icode = CODE_FOR_ashlv2di3;
19353 goto do_pshift;
19354 case IX86_BUILTIN_PSRAW128:
19355 icode = CODE_FOR_ashrv8hi3;
19356 goto do_pshift;
19357 case IX86_BUILTIN_PSRAD128:
19358 icode = CODE_FOR_ashrv4si3;
19359 goto do_pshift;
19360 case IX86_BUILTIN_PSRLW128:
19361 icode = CODE_FOR_lshrv8hi3;
19362 goto do_pshift;
19363 case IX86_BUILTIN_PSRLD128:
19364 icode = CODE_FOR_lshrv4si3;
19365 goto do_pshift;
19366 case IX86_BUILTIN_PSRLQ128:
19367 icode = CODE_FOR_lshrv2di3;
19368 goto do_pshift;
19369 do_pshift:
19370 arg0 = CALL_EXPR_ARG (exp, 0);
19371 arg1 = CALL_EXPR_ARG (exp, 1);
19372 op0 = expand_normal (arg0);
19373 op1 = expand_normal (arg1);
19374
19375 tmode = insn_data[icode].operand[0].mode;
19376 mode1 = insn_data[icode].operand[1].mode;
19377
19378 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19379 op0 = copy_to_reg (op0);
19380
19381 op1 = simplify_gen_subreg (TImode, op1, GET_MODE (op1), 0);
19382 if (! (*insn_data[icode].operand[2].predicate) (op1, TImode))
19383 op1 = copy_to_reg (op1);
19384
19385 target = gen_reg_rtx (tmode);
19386 pat = GEN_FCN (icode) (target, op0, op1);
19387 if (!pat)
19388 return 0;
19389 emit_insn (pat);
19390 return target;
19391
19392 case IX86_BUILTIN_PSLLDQI128:
19393 case IX86_BUILTIN_PSRLDQI128:
19394 icode = (fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
19395 : CODE_FOR_sse2_lshrti3);
19396 arg0 = CALL_EXPR_ARG (exp, 0);
19397 arg1 = CALL_EXPR_ARG (exp, 1);
19398 op0 = expand_normal (arg0);
19399 op1 = expand_normal (arg1);
19400 tmode = insn_data[icode].operand[0].mode;
19401 mode1 = insn_data[icode].operand[1].mode;
19402 mode2 = insn_data[icode].operand[2].mode;
19403
19404 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19405 {
19406 op0 = copy_to_reg (op0);
19407 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
19408 }
19409 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
19410 {
19411 error ("shift must be an immediate");
19412 return const0_rtx;
19413 }
19414 target = gen_reg_rtx (V2DImode);
19415 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0),
19416 op0, op1);
19417 if (! pat)
19418 return 0;
19419 emit_insn (pat);
19420 return target;
19421
19422 case IX86_BUILTIN_FEMMS:
19423 emit_insn (gen_mmx_femms ());
19424 return NULL_RTX;
19425
19426 case IX86_BUILTIN_PAVGUSB:
19427 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, exp, target);
19428
19429 case IX86_BUILTIN_PF2ID:
19430 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, exp, target, 0);
19431
19432 case IX86_BUILTIN_PFACC:
19433 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, exp, target);
19434
19435 case IX86_BUILTIN_PFADD:
19436 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, exp, target);
19437
19438 case IX86_BUILTIN_PFCMPEQ:
19439 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, exp, target);
19440
19441 case IX86_BUILTIN_PFCMPGE:
19442 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, exp, target);
19443
19444 case IX86_BUILTIN_PFCMPGT:
19445 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, exp, target);
19446
19447 case IX86_BUILTIN_PFMAX:
19448 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, exp, target);
19449
19450 case IX86_BUILTIN_PFMIN:
19451 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, exp, target);
19452
19453 case IX86_BUILTIN_PFMUL:
19454 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, exp, target);
19455
19456 case IX86_BUILTIN_PFRCP:
19457 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, exp, target, 0);
19458
19459 case IX86_BUILTIN_PFRCPIT1:
19460 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, exp, target);
19461
19462 case IX86_BUILTIN_PFRCPIT2:
19463 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, exp, target);
19464
19465 case IX86_BUILTIN_PFRSQIT1:
19466 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, exp, target);
19467
19468 case IX86_BUILTIN_PFRSQRT:
19469 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, exp, target, 0);
19470
19471 case IX86_BUILTIN_PFSUB:
19472 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, exp, target);
19473
19474 case IX86_BUILTIN_PFSUBR:
19475 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, exp, target);
19476
19477 case IX86_BUILTIN_PI2FD:
19478 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, exp, target, 0);
19479
19480 case IX86_BUILTIN_PMULHRW:
19481 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, exp, target);
19482
19483 case IX86_BUILTIN_PF2IW:
19484 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, exp, target, 0);
19485
19486 case IX86_BUILTIN_PFNACC:
19487 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, exp, target);
19488
19489 case IX86_BUILTIN_PFPNACC:
19490 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, exp, target);
19491
19492 case IX86_BUILTIN_PI2FW:
19493 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, exp, target, 0);
19494
19495 case IX86_BUILTIN_PSWAPDSI:
19496 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, exp, target, 0);
19497
19498 case IX86_BUILTIN_PSWAPDSF:
19499 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, exp, target, 0);
19500
19501 case IX86_BUILTIN_SQRTSD:
19502 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, exp, target);
19503 case IX86_BUILTIN_LOADUPD:
19504 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, exp, target, 1);
19505 case IX86_BUILTIN_STOREUPD:
19506 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, exp);
19507
19508 case IX86_BUILTIN_MFENCE:
19509 emit_insn (gen_sse2_mfence ());
19510 return 0;
19511 case IX86_BUILTIN_LFENCE:
19512 emit_insn (gen_sse2_lfence ());
19513 return 0;
19514
19515 case IX86_BUILTIN_CLFLUSH:
19516 arg0 = CALL_EXPR_ARG (exp, 0);
19517 op0 = expand_normal (arg0);
19518 icode = CODE_FOR_sse2_clflush;
19519 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
19520 op0 = copy_to_mode_reg (Pmode, op0);
19521
19522 emit_insn (gen_sse2_clflush (op0));
19523 return 0;
19524
19525 case IX86_BUILTIN_MOVNTPD:
19526 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, exp);
19527 case IX86_BUILTIN_MOVNTDQ:
19528 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, exp);
19529 case IX86_BUILTIN_MOVNTI:
19530 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, exp);
19531
19532 case IX86_BUILTIN_LOADDQU:
19533 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, exp, target, 1);
19534 case IX86_BUILTIN_STOREDQU:
19535 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, exp);
19536
19537 case IX86_BUILTIN_MONITOR:
19538 arg0 = CALL_EXPR_ARG (exp, 0);
19539 arg1 = CALL_EXPR_ARG (exp, 1);
19540 arg2 = CALL_EXPR_ARG (exp, 2);
19541 op0 = expand_normal (arg0);
19542 op1 = expand_normal (arg1);
19543 op2 = expand_normal (arg2);
19544 if (!REG_P (op0))
19545 op0 = copy_to_mode_reg (Pmode, op0);
19546 if (!REG_P (op1))
19547 op1 = copy_to_mode_reg (SImode, op1);
19548 if (!REG_P (op2))
19549 op2 = copy_to_mode_reg (SImode, op2);
19550 if (!TARGET_64BIT)
19551 emit_insn (gen_sse3_monitor (op0, op1, op2));
19552 else
19553 emit_insn (gen_sse3_monitor64 (op0, op1, op2));
19554 return 0;
19555
19556 case IX86_BUILTIN_MWAIT:
19557 arg0 = CALL_EXPR_ARG (exp, 0);
19558 arg1 = CALL_EXPR_ARG (exp, 1);
19559 op0 = expand_normal (arg0);
19560 op1 = expand_normal (arg1);
19561 if (!REG_P (op0))
19562 op0 = copy_to_mode_reg (SImode, op0);
19563 if (!REG_P (op1))
19564 op1 = copy_to_mode_reg (SImode, op1);
19565 emit_insn (gen_sse3_mwait (op0, op1));
19566 return 0;
19567
19568 case IX86_BUILTIN_LDDQU:
19569 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, exp,
19570 target, 1);
19571
19572 case IX86_BUILTIN_PALIGNR:
19573 case IX86_BUILTIN_PALIGNR128:
19574 if (fcode == IX86_BUILTIN_PALIGNR)
19575 {
19576 icode = CODE_FOR_ssse3_palignrdi;
19577 mode = DImode;
19578 }
19579 else
19580 {
19581 icode = CODE_FOR_ssse3_palignrti;
19582 mode = V2DImode;
19583 }
19584 arg0 = CALL_EXPR_ARG (exp, 0);
19585 arg1 = CALL_EXPR_ARG (exp, 1);
19586 arg2 = CALL_EXPR_ARG (exp, 2);
19587 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
19588 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, EXPAND_NORMAL);
19589 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, EXPAND_NORMAL);
19590 tmode = insn_data[icode].operand[0].mode;
19591 mode1 = insn_data[icode].operand[1].mode;
19592 mode2 = insn_data[icode].operand[2].mode;
19593 mode3 = insn_data[icode].operand[3].mode;
19594
19595 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19596 {
19597 op0 = copy_to_reg (op0);
19598 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
19599 }
19600 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
19601 {
19602 op1 = copy_to_reg (op1);
19603 op1 = simplify_gen_subreg (mode2, op1, GET_MODE (op1), 0);
19604 }
19605 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
19606 {
19607 error ("shift must be an immediate");
19608 return const0_rtx;
19609 }
19610 target = gen_reg_rtx (mode);
19611 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, mode, 0),
19612 op0, op1, op2);
19613 if (! pat)
19614 return 0;
19615 emit_insn (pat);
19616 return target;
19617
19618 case IX86_BUILTIN_MOVNTDQA:
19619 return ix86_expand_unop_builtin (CODE_FOR_sse4_1_movntdqa, exp,
19620 target, 1);
19621
19622 case IX86_BUILTIN_MOVNTSD:
19623 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv2df, exp);
19624
19625 case IX86_BUILTIN_MOVNTSS:
19626 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv4sf, exp);
19627
19628 case IX86_BUILTIN_INSERTQ:
19629 case IX86_BUILTIN_EXTRQ:
19630 icode = (fcode == IX86_BUILTIN_EXTRQ
19631 ? CODE_FOR_sse4a_extrq
19632 : CODE_FOR_sse4a_insertq);
19633 arg0 = CALL_EXPR_ARG (exp, 0);
19634 arg1 = CALL_EXPR_ARG (exp, 1);
19635 op0 = expand_normal (arg0);
19636 op1 = expand_normal (arg1);
19637 tmode = insn_data[icode].operand[0].mode;
19638 mode1 = insn_data[icode].operand[1].mode;
19639 mode2 = insn_data[icode].operand[2].mode;
19640 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19641 op0 = copy_to_mode_reg (mode1, op0);
19642 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
19643 op1 = copy_to_mode_reg (mode2, op1);
19644 if (optimize || target == 0
19645 || GET_MODE (target) != tmode
19646 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19647 target = gen_reg_rtx (tmode);
19648 pat = GEN_FCN (icode) (target, op0, op1);
19649 if (! pat)
19650 return NULL_RTX;
19651 emit_insn (pat);
19652 return target;
19653
19654 case IX86_BUILTIN_EXTRQI:
19655 icode = CODE_FOR_sse4a_extrqi;
19656 arg0 = CALL_EXPR_ARG (exp, 0);
19657 arg1 = CALL_EXPR_ARG (exp, 1);
19658 arg2 = CALL_EXPR_ARG (exp, 2);
19659 op0 = expand_normal (arg0);
19660 op1 = expand_normal (arg1);
19661 op2 = expand_normal (arg2);
19662 tmode = insn_data[icode].operand[0].mode;
19663 mode1 = insn_data[icode].operand[1].mode;
19664 mode2 = insn_data[icode].operand[2].mode;
19665 mode3 = insn_data[icode].operand[3].mode;
19666 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19667 op0 = copy_to_mode_reg (mode1, op0);
19668 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
19669 {
19670 error ("index mask must be an immediate");
19671 return gen_reg_rtx (tmode);
19672 }
19673 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
19674 {
19675 error ("length mask must be an immediate");
19676 return gen_reg_rtx (tmode);
19677 }
19678 if (optimize || target == 0
19679 || GET_MODE (target) != tmode
19680 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19681 target = gen_reg_rtx (tmode);
19682 pat = GEN_FCN (icode) (target, op0, op1, op2);
19683 if (! pat)
19684 return NULL_RTX;
19685 emit_insn (pat);
19686 return target;
19687
19688 case IX86_BUILTIN_INSERTQI:
19689 icode = CODE_FOR_sse4a_insertqi;
19690 arg0 = CALL_EXPR_ARG (exp, 0);
19691 arg1 = CALL_EXPR_ARG (exp, 1);
19692 arg2 = CALL_EXPR_ARG (exp, 2);
19693 arg3 = CALL_EXPR_ARG (exp, 3);
19694 op0 = expand_normal (arg0);
19695 op1 = expand_normal (arg1);
19696 op2 = expand_normal (arg2);
19697 op3 = expand_normal (arg3);
19698 tmode = insn_data[icode].operand[0].mode;
19699 mode1 = insn_data[icode].operand[1].mode;
19700 mode2 = insn_data[icode].operand[2].mode;
19701 mode3 = insn_data[icode].operand[3].mode;
19702 mode4 = insn_data[icode].operand[4].mode;
19703
19704 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19705 op0 = copy_to_mode_reg (mode1, op0);
19706
19707 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
19708 op1 = copy_to_mode_reg (mode2, op1);
19709
19710 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
19711 {
19712 error ("index mask must be an immediate");
19713 return gen_reg_rtx (tmode);
19714 }
19715 if (! (*insn_data[icode].operand[4].predicate) (op3, mode4))
19716 {
19717 error ("length mask must be an immediate");
19718 return gen_reg_rtx (tmode);
19719 }
19720 if (optimize || target == 0
19721 || GET_MODE (target) != tmode
19722 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19723 target = gen_reg_rtx (tmode);
19724 pat = GEN_FCN (icode) (target, op0, op1, op2, op3);
19725 if (! pat)
19726 return NULL_RTX;
19727 emit_insn (pat);
19728 return target;
19729
19730 case IX86_BUILTIN_VEC_INIT_V2SI:
19731 case IX86_BUILTIN_VEC_INIT_V4HI:
19732 case IX86_BUILTIN_VEC_INIT_V8QI:
19733 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
19734
19735 case IX86_BUILTIN_VEC_EXT_V2DF:
19736 case IX86_BUILTIN_VEC_EXT_V2DI:
19737 case IX86_BUILTIN_VEC_EXT_V4SF:
19738 case IX86_BUILTIN_VEC_EXT_V4SI:
19739 case IX86_BUILTIN_VEC_EXT_V8HI:
19740 case IX86_BUILTIN_VEC_EXT_V2SI:
19741 case IX86_BUILTIN_VEC_EXT_V4HI:
19742 case IX86_BUILTIN_VEC_EXT_V16QI:
19743 return ix86_expand_vec_ext_builtin (exp, target);
19744
19745 case IX86_BUILTIN_VEC_SET_V2DI:
19746 case IX86_BUILTIN_VEC_SET_V4SF:
19747 case IX86_BUILTIN_VEC_SET_V4SI:
19748 case IX86_BUILTIN_VEC_SET_V8HI:
19749 case IX86_BUILTIN_VEC_SET_V4HI:
19750 case IX86_BUILTIN_VEC_SET_V16QI:
19751 return ix86_expand_vec_set_builtin (exp);
19752
19753 case IX86_BUILTIN_INFQ:
19754 {
19755 REAL_VALUE_TYPE inf;
19756 rtx tmp;
19757
19758 real_inf (&inf);
19759 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
19760
19761 tmp = validize_mem (force_const_mem (mode, tmp));
19762
19763 if (target == 0)
19764 target = gen_reg_rtx (mode);
19765
19766 emit_move_insn (target, tmp);
19767 return target;
19768 }
19769
19770 case IX86_BUILTIN_FABSQ:
19771 return ix86_expand_unop_builtin (CODE_FOR_abstf2, exp, target, 0);
19772
19773 case IX86_BUILTIN_COPYSIGNQ:
19774 return ix86_expand_binop_builtin (CODE_FOR_copysigntf3, exp, target);
19775
19776 default:
19777 break;
19778 }
19779
19780 for (i = 0, d = bdesc_sse_3arg;
19781 i < ARRAY_SIZE (bdesc_sse_3arg);
19782 i++, d++)
19783 if (d->code == fcode)
19784 return ix86_expand_sse_4_operands_builtin (d->icode, exp,
19785 target);
19786
19787 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
19788 if (d->code == fcode)
19789 {
19790 /* Compares are treated specially. */
19791 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
19792 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3
19793 || d->icode == CODE_FOR_sse2_maskcmpv2df3
19794 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
19795 return ix86_expand_sse_compare (d, exp, target);
19796
19797 return ix86_expand_binop_builtin (d->icode, exp, target);
19798 }
19799
19800 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
19801 if (d->code == fcode)
19802 return ix86_expand_unop_builtin (d->icode, exp, target, 0);
19803
19804 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
19805 if (d->code == fcode)
19806 return ix86_expand_sse_comi (d, exp, target);
19807
19808 for (i = 0, d = bdesc_ptest; i < ARRAY_SIZE (bdesc_ptest); i++, d++)
19809 if (d->code == fcode)
19810 return ix86_expand_sse_ptest (d, exp, target);
19811
19812 for (i = 0, d = bdesc_crc32; i < ARRAY_SIZE (bdesc_crc32); i++, d++)
19813 if (d->code == fcode)
19814 return ix86_expand_crc32 (d->icode, exp, target);
19815
19816 for (i = 0, d = bdesc_pcmpestr;
19817 i < ARRAY_SIZE (bdesc_pcmpestr);
19818 i++, d++)
19819 if (d->code == fcode)
19820 return ix86_expand_sse_pcmpestr (d, exp, target);
19821
19822 for (i = 0, d = bdesc_pcmpistr;
19823 i < ARRAY_SIZE (bdesc_pcmpistr);
19824 i++, d++)
19825 if (d->code == fcode)
19826 return ix86_expand_sse_pcmpistr (d, exp, target);
19827
19828 gcc_unreachable ();
19829 }
19830
19831 /* Returns a function decl for a vectorized version of the builtin function
19832 with builtin function code FN and the result vector type TYPE, or NULL_TREE
19833 if it is not available. */
19834
19835 static tree
19836 ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
19837 tree type_in)
19838 {
19839 enum machine_mode in_mode, out_mode;
19840 int in_n, out_n;
19841
19842 if (TREE_CODE (type_out) != VECTOR_TYPE
19843 || TREE_CODE (type_in) != VECTOR_TYPE)
19844 return NULL_TREE;
19845
19846 out_mode = TYPE_MODE (TREE_TYPE (type_out));
19847 out_n = TYPE_VECTOR_SUBPARTS (type_out);
19848 in_mode = TYPE_MODE (TREE_TYPE (type_in));
19849 in_n = TYPE_VECTOR_SUBPARTS (type_in);
19850
19851 switch (fn)
19852 {
19853 case BUILT_IN_SQRT:
19854 if (out_mode == DFmode && out_n == 2
19855 && in_mode == DFmode && in_n == 2)
19856 return ix86_builtins[IX86_BUILTIN_SQRTPD];
19857 return NULL_TREE;
19858
19859 case BUILT_IN_SQRTF:
19860 if (out_mode == SFmode && out_n == 4
19861 && in_mode == SFmode && in_n == 4)
19862 return ix86_builtins[IX86_BUILTIN_SQRTPS];
19863 return NULL_TREE;
19864
19865 case BUILT_IN_LRINT:
19866 if (out_mode == SImode && out_n == 4
19867 && in_mode == DFmode && in_n == 2)
19868 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
19869 return NULL_TREE;
19870
19871 case BUILT_IN_LRINTF:
19872 if (out_mode == SImode && out_n == 4
19873 && in_mode == SFmode && in_n == 4)
19874 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
19875 return NULL_TREE;
19876
19877 default:
19878 ;
19879 }
19880
19881 return NULL_TREE;
19882 }
19883
19884 /* Returns a decl of a function that implements conversion of the
19885 input vector of type TYPE, or NULL_TREE if it is not available. */
19886
19887 static tree
19888 ix86_vectorize_builtin_conversion (unsigned int code, tree type)
19889 {
19890 if (TREE_CODE (type) != VECTOR_TYPE)
19891 return NULL_TREE;
19892
19893 switch (code)
19894 {
19895 case FLOAT_EXPR:
19896 switch (TYPE_MODE (type))
19897 {
19898 case V4SImode:
19899 return ix86_builtins[IX86_BUILTIN_CVTDQ2PS];
19900 default:
19901 return NULL_TREE;
19902 }
19903
19904 case FIX_TRUNC_EXPR:
19905 switch (TYPE_MODE (type))
19906 {
19907 case V4SFmode:
19908 return ix86_builtins[IX86_BUILTIN_CVTTPS2DQ];
19909 default:
19910 return NULL_TREE;
19911 }
19912 default:
19913 return NULL_TREE;
19914
19915 }
19916 }
19917
19918 /* Returns a code for a target-specific builtin that implements
19919 reciprocal of the function, or NULL_TREE if not available. */
19920
19921 static tree
19922 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
19923 bool sqrt ATTRIBUTE_UNUSED)
19924 {
19925 if (! (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
19926 && flag_finite_math_only && !flag_trapping_math
19927 && flag_unsafe_math_optimizations))
19928 return NULL_TREE;
19929
19930 if (md_fn)
19931 /* Machine dependent builtins. */
19932 switch (fn)
19933 {
19934 /* Vectorized version of sqrt to rsqrt conversion. */
19935 case IX86_BUILTIN_SQRTPS:
19936 return ix86_builtins[IX86_BUILTIN_RSQRTPS];
19937
19938 default:
19939 return NULL_TREE;
19940 }
19941 else
19942 /* Normal builtins. */
19943 switch (fn)
19944 {
19945 /* Sqrt to rsqrt conversion. */
19946 case BUILT_IN_SQRTF:
19947 return ix86_builtins[IX86_BUILTIN_RSQRTF];
19948
19949 default:
19950 return NULL_TREE;
19951 }
19952 }
19953
19954 /* Store OPERAND to the memory after reload is completed. This means
19955 that we can't easily use assign_stack_local. */
19956 rtx
19957 ix86_force_to_memory (enum machine_mode mode, rtx operand)
19958 {
19959 rtx result;
19960
19961 gcc_assert (reload_completed);
19962 if (TARGET_RED_ZONE)
19963 {
19964 result = gen_rtx_MEM (mode,
19965 gen_rtx_PLUS (Pmode,
19966 stack_pointer_rtx,
19967 GEN_INT (-RED_ZONE_SIZE)));
19968 emit_move_insn (result, operand);
19969 }
19970 else if (!TARGET_RED_ZONE && TARGET_64BIT)
19971 {
19972 switch (mode)
19973 {
19974 case HImode:
19975 case SImode:
19976 operand = gen_lowpart (DImode, operand);
19977 /* FALLTHRU */
19978 case DImode:
19979 emit_insn (
19980 gen_rtx_SET (VOIDmode,
19981 gen_rtx_MEM (DImode,
19982 gen_rtx_PRE_DEC (DImode,
19983 stack_pointer_rtx)),
19984 operand));
19985 break;
19986 default:
19987 gcc_unreachable ();
19988 }
19989 result = gen_rtx_MEM (mode, stack_pointer_rtx);
19990 }
19991 else
19992 {
19993 switch (mode)
19994 {
19995 case DImode:
19996 {
19997 rtx operands[2];
19998 split_di (&operand, 1, operands, operands + 1);
19999 emit_insn (
20000 gen_rtx_SET (VOIDmode,
20001 gen_rtx_MEM (SImode,
20002 gen_rtx_PRE_DEC (Pmode,
20003 stack_pointer_rtx)),
20004 operands[1]));
20005 emit_insn (
20006 gen_rtx_SET (VOIDmode,
20007 gen_rtx_MEM (SImode,
20008 gen_rtx_PRE_DEC (Pmode,
20009 stack_pointer_rtx)),
20010 operands[0]));
20011 }
20012 break;
20013 case HImode:
20014 /* Store HImodes as SImodes. */
20015 operand = gen_lowpart (SImode, operand);
20016 /* FALLTHRU */
20017 case SImode:
20018 emit_insn (
20019 gen_rtx_SET (VOIDmode,
20020 gen_rtx_MEM (GET_MODE (operand),
20021 gen_rtx_PRE_DEC (SImode,
20022 stack_pointer_rtx)),
20023 operand));
20024 break;
20025 default:
20026 gcc_unreachable ();
20027 }
20028 result = gen_rtx_MEM (mode, stack_pointer_rtx);
20029 }
20030 return result;
20031 }
20032
20033 /* Free operand from the memory. */
20034 void
20035 ix86_free_from_memory (enum machine_mode mode)
20036 {
20037 if (!TARGET_RED_ZONE)
20038 {
20039 int size;
20040
20041 if (mode == DImode || TARGET_64BIT)
20042 size = 8;
20043 else
20044 size = 4;
20045 /* Use LEA to deallocate stack space. In peephole2 it will be converted
20046 to pop or add instruction if registers are available. */
20047 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
20048 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
20049 GEN_INT (size))));
20050 }
20051 }
20052
20053 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
20054 QImode must go into class Q_REGS.
20055 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
20056 movdf to do mem-to-mem moves through integer regs. */
20057 enum reg_class
20058 ix86_preferred_reload_class (rtx x, enum reg_class regclass)
20059 {
20060 enum machine_mode mode = GET_MODE (x);
20061
20062 /* We're only allowed to return a subclass of CLASS. Many of the
20063 following checks fail for NO_REGS, so eliminate that early. */
20064 if (regclass == NO_REGS)
20065 return NO_REGS;
20066
20067 /* All classes can load zeros. */
20068 if (x == CONST0_RTX (mode))
20069 return regclass;
20070
20071 /* Force constants into memory if we are loading a (nonzero) constant into
20072 an MMX or SSE register. This is because there are no MMX/SSE instructions
20073 to load from a constant. */
20074 if (CONSTANT_P (x)
20075 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
20076 return NO_REGS;
20077
20078 /* Prefer SSE regs only, if we can use them for math. */
20079 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
20080 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
20081
20082 /* Floating-point constants need more complex checks. */
20083 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
20084 {
20085 /* General regs can load everything. */
20086 if (reg_class_subset_p (regclass, GENERAL_REGS))
20087 return regclass;
20088
20089 /* Floats can load 0 and 1 plus some others. Note that we eliminated
20090 zero above. We only want to wind up preferring 80387 registers if
20091 we plan on doing computation with them. */
20092 if (TARGET_80387
20093 && standard_80387_constant_p (x))
20094 {
20095 /* Limit class to non-sse. */
20096 if (regclass == FLOAT_SSE_REGS)
20097 return FLOAT_REGS;
20098 if (regclass == FP_TOP_SSE_REGS)
20099 return FP_TOP_REG;
20100 if (regclass == FP_SECOND_SSE_REGS)
20101 return FP_SECOND_REG;
20102 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
20103 return regclass;
20104 }
20105
20106 return NO_REGS;
20107 }
20108
20109 /* Generally when we see PLUS here, it's the function invariant
20110 (plus soft-fp const_int). Which can only be computed into general
20111 regs. */
20112 if (GET_CODE (x) == PLUS)
20113 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
20114
20115 /* QImode constants are easy to load, but non-constant QImode data
20116 must go into Q_REGS. */
20117 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
20118 {
20119 if (reg_class_subset_p (regclass, Q_REGS))
20120 return regclass;
20121 if (reg_class_subset_p (Q_REGS, regclass))
20122 return Q_REGS;
20123 return NO_REGS;
20124 }
20125
20126 return regclass;
20127 }
20128
20129 /* Discourage putting floating-point values in SSE registers unless
20130 SSE math is being used, and likewise for the 387 registers. */
20131 enum reg_class
20132 ix86_preferred_output_reload_class (rtx x, enum reg_class regclass)
20133 {
20134 enum machine_mode mode = GET_MODE (x);
20135
20136 /* Restrict the output reload class to the register bank that we are doing
20137 math on. If we would like not to return a subset of CLASS, reject this
20138 alternative: if reload cannot do this, it will still use its choice. */
20139 mode = GET_MODE (x);
20140 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
20141 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
20142
20143 if (X87_FLOAT_MODE_P (mode))
20144 {
20145 if (regclass == FP_TOP_SSE_REGS)
20146 return FP_TOP_REG;
20147 else if (regclass == FP_SECOND_SSE_REGS)
20148 return FP_SECOND_REG;
20149 else
20150 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
20151 }
20152
20153 return regclass;
20154 }
20155
20156 /* If we are copying between general and FP registers, we need a memory
20157 location. The same is true for SSE and MMX registers.
20158
20159 To optimize register_move_cost performance, allow inline variant.
20160
20161 The macro can't work reliably when one of the CLASSES is class containing
20162 registers from multiple units (SSE, MMX, integer). We avoid this by never
20163 combining those units in single alternative in the machine description.
20164 Ensure that this constraint holds to avoid unexpected surprises.
20165
20166 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
20167 enforce these sanity checks. */
20168
20169 static inline int
20170 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
20171 enum machine_mode mode, int strict)
20172 {
20173 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
20174 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
20175 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
20176 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
20177 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
20178 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
20179 {
20180 gcc_assert (!strict);
20181 return true;
20182 }
20183
20184 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
20185 return true;
20186
20187 /* ??? This is a lie. We do have moves between mmx/general, and for
20188 mmx/sse2. But by saying we need secondary memory we discourage the
20189 register allocator from using the mmx registers unless needed. */
20190 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
20191 return true;
20192
20193 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
20194 {
20195 /* SSE1 doesn't have any direct moves from other classes. */
20196 if (!TARGET_SSE2)
20197 return true;
20198
20199 /* If the target says that inter-unit moves are more expensive
20200 than moving through memory, then don't generate them. */
20201 if (!TARGET_INTER_UNIT_MOVES)
20202 return true;
20203
20204 /* Between SSE and general, we have moves no larger than word size. */
20205 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
20206 return true;
20207 }
20208
20209 return false;
20210 }
20211
20212 int
20213 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
20214 enum machine_mode mode, int strict)
20215 {
20216 return inline_secondary_memory_needed (class1, class2, mode, strict);
20217 }
20218
20219 /* Return true if the registers in CLASS cannot represent the change from
20220 modes FROM to TO. */
20221
20222 bool
20223 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
20224 enum reg_class regclass)
20225 {
20226 if (from == to)
20227 return false;
20228
20229 /* x87 registers can't do subreg at all, as all values are reformatted
20230 to extended precision. */
20231 if (MAYBE_FLOAT_CLASS_P (regclass))
20232 return true;
20233
20234 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
20235 {
20236 /* Vector registers do not support QI or HImode loads. If we don't
20237 disallow a change to these modes, reload will assume it's ok to
20238 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
20239 the vec_dupv4hi pattern. */
20240 if (GET_MODE_SIZE (from) < 4)
20241 return true;
20242
20243 /* Vector registers do not support subreg with nonzero offsets, which
20244 are otherwise valid for integer registers. Since we can't see
20245 whether we have a nonzero offset from here, prohibit all
20246 nonparadoxical subregs changing size. */
20247 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
20248 return true;
20249 }
20250
20251 return false;
20252 }
20253
20254 /* Return the cost of moving data of mode M between a
20255 register and memory. A value of 2 is the default; this cost is
20256 relative to those in `REGISTER_MOVE_COST'.
20257
20258 This function is used extensively by register_move_cost that is used to
20259 build tables at startup. Make it inline in this case.
20260 When IN is 2, return maximum of in and out move cost.
20261
20262 If moving between registers and memory is more expensive than
20263 between two registers, you should define this macro to express the
20264 relative cost.
20265
20266 Model also increased moving costs of QImode registers in non
20267 Q_REGS classes.
20268 */
20269 static inline int
20270 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
20271 int in)
20272 {
20273 int cost;
20274 if (FLOAT_CLASS_P (regclass))
20275 {
20276 int index;
20277 switch (mode)
20278 {
20279 case SFmode:
20280 index = 0;
20281 break;
20282 case DFmode:
20283 index = 1;
20284 break;
20285 case XFmode:
20286 index = 2;
20287 break;
20288 default:
20289 return 100;
20290 }
20291 if (in == 2)
20292 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
20293 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
20294 }
20295 if (SSE_CLASS_P (regclass))
20296 {
20297 int index;
20298 switch (GET_MODE_SIZE (mode))
20299 {
20300 case 4:
20301 index = 0;
20302 break;
20303 case 8:
20304 index = 1;
20305 break;
20306 case 16:
20307 index = 2;
20308 break;
20309 default:
20310 return 100;
20311 }
20312 if (in == 2)
20313 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
20314 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
20315 }
20316 if (MMX_CLASS_P (regclass))
20317 {
20318 int index;
20319 switch (GET_MODE_SIZE (mode))
20320 {
20321 case 4:
20322 index = 0;
20323 break;
20324 case 8:
20325 index = 1;
20326 break;
20327 default:
20328 return 100;
20329 }
20330 if (in)
20331 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
20332 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
20333 }
20334 switch (GET_MODE_SIZE (mode))
20335 {
20336 case 1:
20337 if (Q_CLASS_P (regclass) || TARGET_64BIT)
20338 {
20339 if (!in)
20340 return ix86_cost->int_store[0];
20341 if (TARGET_PARTIAL_REG_DEPENDENCY && !optimize_size)
20342 cost = ix86_cost->movzbl_load;
20343 else
20344 cost = ix86_cost->int_load[0];
20345 if (in == 2)
20346 return MAX (cost, ix86_cost->int_store[0]);
20347 return cost;
20348 }
20349 else
20350 {
20351 if (in == 2)
20352 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
20353 if (in)
20354 return ix86_cost->movzbl_load;
20355 else
20356 return ix86_cost->int_store[0] + 4;
20357 }
20358 break;
20359 case 2:
20360 if (in == 2)
20361 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
20362 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
20363 default:
20364 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
20365 if (mode == TFmode)
20366 mode = XFmode;
20367 if (in == 2)
20368 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
20369 else if (in)
20370 cost = ix86_cost->int_load[2];
20371 else
20372 cost = ix86_cost->int_store[2];
20373 return (cost * (((int) GET_MODE_SIZE (mode)
20374 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
20375 }
20376 }
20377
20378 int
20379 ix86_memory_move_cost (enum machine_mode mode, enum reg_class regclass, int in)
20380 {
20381 return inline_memory_move_cost (mode, regclass, in);
20382 }
20383
20384
20385 /* Return the cost of moving data from a register in class CLASS1 to
20386 one in class CLASS2.
20387
20388 It is not required that the cost always equal 2 when FROM is the same as TO;
20389 on some machines it is expensive to move between registers if they are not
20390 general registers. */
20391
20392 int
20393 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
20394 enum reg_class class2)
20395 {
20396 /* In case we require secondary memory, compute cost of the store followed
20397 by load. In order to avoid bad register allocation choices, we need
20398 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
20399
20400 if (inline_secondary_memory_needed (class1, class2, mode, 0))
20401 {
20402 int cost = 1;
20403
20404 #if 0
20405 cost += MAX (inline_memory_move_cost (mode, class1, 0),
20406 inline_memory_move_cost (mode, class1, 1));
20407 cost += MAX (inline_memory_move_cost (mode, class2, 0),
20408 inline_memory_move_cost (mode, class2, 1));
20409 #endif
20410 cost += inline_memory_move_cost (mode, class1, 2);
20411 cost += inline_memory_move_cost (mode, class2, 2);
20412
20413 /* In case of copying from general_purpose_register we may emit multiple
20414 stores followed by single load causing memory size mismatch stall.
20415 Count this as arbitrarily high cost of 20. */
20416 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
20417 cost += 20;
20418
20419 /* In the case of FP/MMX moves, the registers actually overlap, and we
20420 have to switch modes in order to treat them differently. */
20421 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
20422 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
20423 cost += 20;
20424
20425 return cost;
20426 }
20427
20428 /* Moves between SSE/MMX and integer unit are expensive. */
20429 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
20430 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
20431
20432 /* ??? By keeping returned value relatively high, we limit the number
20433 of moves between integer and MMX/SSE registers for all targets.
20434 Additionally, high value prevents problem with x86_modes_tieable_p(),
20435 where integer modes in MMX/SSE registers are not tieable
20436 because of missing QImode and HImode moves to, from or between
20437 MMX/SSE registers. */
20438 return MAX (ix86_cost->mmxsse_to_integer, 8);
20439
20440 if (MAYBE_FLOAT_CLASS_P (class1))
20441 return ix86_cost->fp_move;
20442 if (MAYBE_SSE_CLASS_P (class1))
20443 return ix86_cost->sse_move;
20444 if (MAYBE_MMX_CLASS_P (class1))
20445 return ix86_cost->mmx_move;
20446 return 2;
20447 }
20448
20449 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
20450
20451 bool
20452 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
20453 {
20454 /* Flags and only flags can only hold CCmode values. */
20455 if (CC_REGNO_P (regno))
20456 return GET_MODE_CLASS (mode) == MODE_CC;
20457 if (GET_MODE_CLASS (mode) == MODE_CC
20458 || GET_MODE_CLASS (mode) == MODE_RANDOM
20459 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
20460 return 0;
20461 if (FP_REGNO_P (regno))
20462 return VALID_FP_MODE_P (mode);
20463 if (SSE_REGNO_P (regno))
20464 {
20465 /* We implement the move patterns for all vector modes into and
20466 out of SSE registers, even when no operation instructions
20467 are available. */
20468 return (VALID_SSE_REG_MODE (mode)
20469 || VALID_SSE2_REG_MODE (mode)
20470 || VALID_MMX_REG_MODE (mode)
20471 || VALID_MMX_REG_MODE_3DNOW (mode));
20472 }
20473 if (MMX_REGNO_P (regno))
20474 {
20475 /* We implement the move patterns for 3DNOW modes even in MMX mode,
20476 so if the register is available at all, then we can move data of
20477 the given mode into or out of it. */
20478 return (VALID_MMX_REG_MODE (mode)
20479 || VALID_MMX_REG_MODE_3DNOW (mode));
20480 }
20481
20482 if (mode == QImode)
20483 {
20484 /* Take care for QImode values - they can be in non-QI regs,
20485 but then they do cause partial register stalls. */
20486 if (regno < 4 || TARGET_64BIT)
20487 return 1;
20488 if (!TARGET_PARTIAL_REG_STALL)
20489 return 1;
20490 return reload_in_progress || reload_completed;
20491 }
20492 /* We handle both integer and floats in the general purpose registers. */
20493 else if (VALID_INT_MODE_P (mode))
20494 return 1;
20495 else if (VALID_FP_MODE_P (mode))
20496 return 1;
20497 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
20498 on to use that value in smaller contexts, this can easily force a
20499 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
20500 supporting DImode, allow it. */
20501 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
20502 return 1;
20503
20504 return 0;
20505 }
20506
20507 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
20508 tieable integer mode. */
20509
20510 static bool
20511 ix86_tieable_integer_mode_p (enum machine_mode mode)
20512 {
20513 switch (mode)
20514 {
20515 case HImode:
20516 case SImode:
20517 return true;
20518
20519 case QImode:
20520 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
20521
20522 case DImode:
20523 return TARGET_64BIT;
20524
20525 default:
20526 return false;
20527 }
20528 }
20529
20530 /* Return true if MODE1 is accessible in a register that can hold MODE2
20531 without copying. That is, all register classes that can hold MODE2
20532 can also hold MODE1. */
20533
20534 bool
20535 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
20536 {
20537 if (mode1 == mode2)
20538 return true;
20539
20540 if (ix86_tieable_integer_mode_p (mode1)
20541 && ix86_tieable_integer_mode_p (mode2))
20542 return true;
20543
20544 /* MODE2 being XFmode implies fp stack or general regs, which means we
20545 can tie any smaller floating point modes to it. Note that we do not
20546 tie this with TFmode. */
20547 if (mode2 == XFmode)
20548 return mode1 == SFmode || mode1 == DFmode;
20549
20550 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
20551 that we can tie it with SFmode. */
20552 if (mode2 == DFmode)
20553 return mode1 == SFmode;
20554
20555 /* If MODE2 is only appropriate for an SSE register, then tie with
20556 any other mode acceptable to SSE registers. */
20557 if (GET_MODE_SIZE (mode2) == 16
20558 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
20559 return (GET_MODE_SIZE (mode1) == 16
20560 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
20561
20562 /* If MODE2 is appropriate for an MMX register, then tie
20563 with any other mode acceptable to MMX registers. */
20564 if (GET_MODE_SIZE (mode2) == 8
20565 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
20566 return (GET_MODE_SIZE (mode1) == 8
20567 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
20568
20569 return false;
20570 }
20571
20572 /* Compute a (partial) cost for rtx X. Return true if the complete
20573 cost has been computed, and false if subexpressions should be
20574 scanned. In either case, *TOTAL contains the cost result. */
20575
20576 static bool
20577 ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total)
20578 {
20579 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
20580 enum machine_mode mode = GET_MODE (x);
20581
20582 switch (code)
20583 {
20584 case CONST_INT:
20585 case CONST:
20586 case LABEL_REF:
20587 case SYMBOL_REF:
20588 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
20589 *total = 3;
20590 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
20591 *total = 2;
20592 else if (flag_pic && SYMBOLIC_CONST (x)
20593 && (!TARGET_64BIT
20594 || (!GET_CODE (x) != LABEL_REF
20595 && (GET_CODE (x) != SYMBOL_REF
20596 || !SYMBOL_REF_LOCAL_P (x)))))
20597 *total = 1;
20598 else
20599 *total = 0;
20600 return true;
20601
20602 case CONST_DOUBLE:
20603 if (mode == VOIDmode)
20604 *total = 0;
20605 else
20606 switch (standard_80387_constant_p (x))
20607 {
20608 case 1: /* 0.0 */
20609 *total = 1;
20610 break;
20611 default: /* Other constants */
20612 *total = 2;
20613 break;
20614 case 0:
20615 case -1:
20616 /* Start with (MEM (SYMBOL_REF)), since that's where
20617 it'll probably end up. Add a penalty for size. */
20618 *total = (COSTS_N_INSNS (1)
20619 + (flag_pic != 0 && !TARGET_64BIT)
20620 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
20621 break;
20622 }
20623 return true;
20624
20625 case ZERO_EXTEND:
20626 /* The zero extensions is often completely free on x86_64, so make
20627 it as cheap as possible. */
20628 if (TARGET_64BIT && mode == DImode
20629 && GET_MODE (XEXP (x, 0)) == SImode)
20630 *total = 1;
20631 else if (TARGET_ZERO_EXTEND_WITH_AND)
20632 *total = ix86_cost->add;
20633 else
20634 *total = ix86_cost->movzx;
20635 return false;
20636
20637 case SIGN_EXTEND:
20638 *total = ix86_cost->movsx;
20639 return false;
20640
20641 case ASHIFT:
20642 if (CONST_INT_P (XEXP (x, 1))
20643 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
20644 {
20645 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
20646 if (value == 1)
20647 {
20648 *total = ix86_cost->add;
20649 return false;
20650 }
20651 if ((value == 2 || value == 3)
20652 && ix86_cost->lea <= ix86_cost->shift_const)
20653 {
20654 *total = ix86_cost->lea;
20655 return false;
20656 }
20657 }
20658 /* FALLTHRU */
20659
20660 case ROTATE:
20661 case ASHIFTRT:
20662 case LSHIFTRT:
20663 case ROTATERT:
20664 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
20665 {
20666 if (CONST_INT_P (XEXP (x, 1)))
20667 {
20668 if (INTVAL (XEXP (x, 1)) > 32)
20669 *total = ix86_cost->shift_const + COSTS_N_INSNS (2);
20670 else
20671 *total = ix86_cost->shift_const * 2;
20672 }
20673 else
20674 {
20675 if (GET_CODE (XEXP (x, 1)) == AND)
20676 *total = ix86_cost->shift_var * 2;
20677 else
20678 *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
20679 }
20680 }
20681 else
20682 {
20683 if (CONST_INT_P (XEXP (x, 1)))
20684 *total = ix86_cost->shift_const;
20685 else
20686 *total = ix86_cost->shift_var;
20687 }
20688 return false;
20689
20690 case MULT:
20691 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
20692 {
20693 /* ??? SSE scalar cost should be used here. */
20694 *total = ix86_cost->fmul;
20695 return false;
20696 }
20697 else if (X87_FLOAT_MODE_P (mode))
20698 {
20699 *total = ix86_cost->fmul;
20700 return false;
20701 }
20702 else if (FLOAT_MODE_P (mode))
20703 {
20704 /* ??? SSE vector cost should be used here. */
20705 *total = ix86_cost->fmul;
20706 return false;
20707 }
20708 else
20709 {
20710 rtx op0 = XEXP (x, 0);
20711 rtx op1 = XEXP (x, 1);
20712 int nbits;
20713 if (CONST_INT_P (XEXP (x, 1)))
20714 {
20715 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
20716 for (nbits = 0; value != 0; value &= value - 1)
20717 nbits++;
20718 }
20719 else
20720 /* This is arbitrary. */
20721 nbits = 7;
20722
20723 /* Compute costs correctly for widening multiplication. */
20724 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
20725 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
20726 == GET_MODE_SIZE (mode))
20727 {
20728 int is_mulwiden = 0;
20729 enum machine_mode inner_mode = GET_MODE (op0);
20730
20731 if (GET_CODE (op0) == GET_CODE (op1))
20732 is_mulwiden = 1, op1 = XEXP (op1, 0);
20733 else if (CONST_INT_P (op1))
20734 {
20735 if (GET_CODE (op0) == SIGN_EXTEND)
20736 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
20737 == INTVAL (op1);
20738 else
20739 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
20740 }
20741
20742 if (is_mulwiden)
20743 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
20744 }
20745
20746 *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
20747 + nbits * ix86_cost->mult_bit
20748 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
20749
20750 return true;
20751 }
20752
20753 case DIV:
20754 case UDIV:
20755 case MOD:
20756 case UMOD:
20757 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
20758 /* ??? SSE cost should be used here. */
20759 *total = ix86_cost->fdiv;
20760 else if (X87_FLOAT_MODE_P (mode))
20761 *total = ix86_cost->fdiv;
20762 else if (FLOAT_MODE_P (mode))
20763 /* ??? SSE vector cost should be used here. */
20764 *total = ix86_cost->fdiv;
20765 else
20766 *total = ix86_cost->divide[MODE_INDEX (mode)];
20767 return false;
20768
20769 case PLUS:
20770 if (GET_MODE_CLASS (mode) == MODE_INT
20771 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
20772 {
20773 if (GET_CODE (XEXP (x, 0)) == PLUS
20774 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
20775 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
20776 && CONSTANT_P (XEXP (x, 1)))
20777 {
20778 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
20779 if (val == 2 || val == 4 || val == 8)
20780 {
20781 *total = ix86_cost->lea;
20782 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
20783 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
20784 outer_code);
20785 *total += rtx_cost (XEXP (x, 1), outer_code);
20786 return true;
20787 }
20788 }
20789 else if (GET_CODE (XEXP (x, 0)) == MULT
20790 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
20791 {
20792 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
20793 if (val == 2 || val == 4 || val == 8)
20794 {
20795 *total = ix86_cost->lea;
20796 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
20797 *total += rtx_cost (XEXP (x, 1), outer_code);
20798 return true;
20799 }
20800 }
20801 else if (GET_CODE (XEXP (x, 0)) == PLUS)
20802 {
20803 *total = ix86_cost->lea;
20804 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
20805 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
20806 *total += rtx_cost (XEXP (x, 1), outer_code);
20807 return true;
20808 }
20809 }
20810 /* FALLTHRU */
20811
20812 case MINUS:
20813 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
20814 {
20815 /* ??? SSE cost should be used here. */
20816 *total = ix86_cost->fadd;
20817 return false;
20818 }
20819 else if (X87_FLOAT_MODE_P (mode))
20820 {
20821 *total = ix86_cost->fadd;
20822 return false;
20823 }
20824 else if (FLOAT_MODE_P (mode))
20825 {
20826 /* ??? SSE vector cost should be used here. */
20827 *total = ix86_cost->fadd;
20828 return false;
20829 }
20830 /* FALLTHRU */
20831
20832 case AND:
20833 case IOR:
20834 case XOR:
20835 if (!TARGET_64BIT && mode == DImode)
20836 {
20837 *total = (ix86_cost->add * 2
20838 + (rtx_cost (XEXP (x, 0), outer_code)
20839 << (GET_MODE (XEXP (x, 0)) != DImode))
20840 + (rtx_cost (XEXP (x, 1), outer_code)
20841 << (GET_MODE (XEXP (x, 1)) != DImode)));
20842 return true;
20843 }
20844 /* FALLTHRU */
20845
20846 case NEG:
20847 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
20848 {
20849 /* ??? SSE cost should be used here. */
20850 *total = ix86_cost->fchs;
20851 return false;
20852 }
20853 else if (X87_FLOAT_MODE_P (mode))
20854 {
20855 *total = ix86_cost->fchs;
20856 return false;
20857 }
20858 else if (FLOAT_MODE_P (mode))
20859 {
20860 /* ??? SSE vector cost should be used here. */
20861 *total = ix86_cost->fchs;
20862 return false;
20863 }
20864 /* FALLTHRU */
20865
20866 case NOT:
20867 if (!TARGET_64BIT && mode == DImode)
20868 *total = ix86_cost->add * 2;
20869 else
20870 *total = ix86_cost->add;
20871 return false;
20872
20873 case COMPARE:
20874 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
20875 && XEXP (XEXP (x, 0), 1) == const1_rtx
20876 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
20877 && XEXP (x, 1) == const0_rtx)
20878 {
20879 /* This kind of construct is implemented using test[bwl].
20880 Treat it as if we had an AND. */
20881 *total = (ix86_cost->add
20882 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
20883 + rtx_cost (const1_rtx, outer_code));
20884 return true;
20885 }
20886 return false;
20887
20888 case FLOAT_EXTEND:
20889 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
20890 *total = 0;
20891 return false;
20892
20893 case ABS:
20894 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
20895 /* ??? SSE cost should be used here. */
20896 *total = ix86_cost->fabs;
20897 else if (X87_FLOAT_MODE_P (mode))
20898 *total = ix86_cost->fabs;
20899 else if (FLOAT_MODE_P (mode))
20900 /* ??? SSE vector cost should be used here. */
20901 *total = ix86_cost->fabs;
20902 return false;
20903
20904 case SQRT:
20905 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
20906 /* ??? SSE cost should be used here. */
20907 *total = ix86_cost->fsqrt;
20908 else if (X87_FLOAT_MODE_P (mode))
20909 *total = ix86_cost->fsqrt;
20910 else if (FLOAT_MODE_P (mode))
20911 /* ??? SSE vector cost should be used here. */
20912 *total = ix86_cost->fsqrt;
20913 return false;
20914
20915 case UNSPEC:
20916 if (XINT (x, 1) == UNSPEC_TP)
20917 *total = 0;
20918 return false;
20919
20920 default:
20921 return false;
20922 }
20923 }
20924
20925 #if TARGET_MACHO
20926
20927 static int current_machopic_label_num;
20928
20929 /* Given a symbol name and its associated stub, write out the
20930 definition of the stub. */
20931
20932 void
20933 machopic_output_stub (FILE *file, const char *symb, const char *stub)
20934 {
20935 unsigned int length;
20936 char *binder_name, *symbol_name, lazy_ptr_name[32];
20937 int label = ++current_machopic_label_num;
20938
20939 /* For 64-bit we shouldn't get here. */
20940 gcc_assert (!TARGET_64BIT);
20941
20942 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
20943 symb = (*targetm.strip_name_encoding) (symb);
20944
20945 length = strlen (stub);
20946 binder_name = alloca (length + 32);
20947 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
20948
20949 length = strlen (symb);
20950 symbol_name = alloca (length + 32);
20951 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
20952
20953 sprintf (lazy_ptr_name, "L%d$lz", label);
20954
20955 if (MACHOPIC_PURE)
20956 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
20957 else
20958 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
20959
20960 fprintf (file, "%s:\n", stub);
20961 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
20962
20963 if (MACHOPIC_PURE)
20964 {
20965 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
20966 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
20967 fprintf (file, "\tjmp\t*%%edx\n");
20968 }
20969 else
20970 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
20971
20972 fprintf (file, "%s:\n", binder_name);
20973
20974 if (MACHOPIC_PURE)
20975 {
20976 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
20977 fprintf (file, "\tpushl\t%%eax\n");
20978 }
20979 else
20980 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
20981
20982 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
20983
20984 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
20985 fprintf (file, "%s:\n", lazy_ptr_name);
20986 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
20987 fprintf (file, "\t.long %s\n", binder_name);
20988 }
20989
20990 void
20991 darwin_x86_file_end (void)
20992 {
20993 darwin_file_end ();
20994 ix86_file_end ();
20995 }
20996 #endif /* TARGET_MACHO */
20997
20998 /* Order the registers for register allocator. */
20999
21000 void
21001 x86_order_regs_for_local_alloc (void)
21002 {
21003 int pos = 0;
21004 int i;
21005
21006 /* First allocate the local general purpose registers. */
21007 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
21008 if (GENERAL_REGNO_P (i) && call_used_regs[i])
21009 reg_alloc_order [pos++] = i;
21010
21011 /* Global general purpose registers. */
21012 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
21013 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
21014 reg_alloc_order [pos++] = i;
21015
21016 /* x87 registers come first in case we are doing FP math
21017 using them. */
21018 if (!TARGET_SSE_MATH)
21019 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
21020 reg_alloc_order [pos++] = i;
21021
21022 /* SSE registers. */
21023 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
21024 reg_alloc_order [pos++] = i;
21025 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
21026 reg_alloc_order [pos++] = i;
21027
21028 /* x87 registers. */
21029 if (TARGET_SSE_MATH)
21030 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
21031 reg_alloc_order [pos++] = i;
21032
21033 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
21034 reg_alloc_order [pos++] = i;
21035
21036 /* Initialize the rest of array as we do not allocate some registers
21037 at all. */
21038 while (pos < FIRST_PSEUDO_REGISTER)
21039 reg_alloc_order [pos++] = 0;
21040 }
21041
21042 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
21043 struct attribute_spec.handler. */
21044 static tree
21045 ix86_handle_struct_attribute (tree *node, tree name,
21046 tree args ATTRIBUTE_UNUSED,
21047 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
21048 {
21049 tree *type = NULL;
21050 if (DECL_P (*node))
21051 {
21052 if (TREE_CODE (*node) == TYPE_DECL)
21053 type = &TREE_TYPE (*node);
21054 }
21055 else
21056 type = node;
21057
21058 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
21059 || TREE_CODE (*type) == UNION_TYPE)))
21060 {
21061 warning (OPT_Wattributes, "%qs attribute ignored",
21062 IDENTIFIER_POINTER (name));
21063 *no_add_attrs = true;
21064 }
21065
21066 else if ((is_attribute_p ("ms_struct", name)
21067 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
21068 || ((is_attribute_p ("gcc_struct", name)
21069 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
21070 {
21071 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
21072 IDENTIFIER_POINTER (name));
21073 *no_add_attrs = true;
21074 }
21075
21076 return NULL_TREE;
21077 }
21078
21079 static bool
21080 ix86_ms_bitfield_layout_p (tree record_type)
21081 {
21082 return (TARGET_MS_BITFIELD_LAYOUT &&
21083 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
21084 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
21085 }
21086
21087 /* Returns an expression indicating where the this parameter is
21088 located on entry to the FUNCTION. */
21089
21090 static rtx
21091 x86_this_parameter (tree function)
21092 {
21093 tree type = TREE_TYPE (function);
21094 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
21095
21096 if (TARGET_64BIT)
21097 {
21098 const int *parm_regs;
21099
21100 if (TARGET_64BIT_MS_ABI)
21101 parm_regs = x86_64_ms_abi_int_parameter_registers;
21102 else
21103 parm_regs = x86_64_int_parameter_registers;
21104 return gen_rtx_REG (DImode, parm_regs[aggr]);
21105 }
21106
21107 if (ix86_function_regparm (type, function) > 0
21108 && !type_has_variadic_args_p (type))
21109 {
21110 int regno = 0;
21111 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
21112 regno = 2;
21113 return gen_rtx_REG (SImode, regno);
21114 }
21115
21116 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
21117 }
21118
21119 /* Determine whether x86_output_mi_thunk can succeed. */
21120
21121 static bool
21122 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
21123 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
21124 HOST_WIDE_INT vcall_offset, tree function)
21125 {
21126 /* 64-bit can handle anything. */
21127 if (TARGET_64BIT)
21128 return true;
21129
21130 /* For 32-bit, everything's fine if we have one free register. */
21131 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
21132 return true;
21133
21134 /* Need a free register for vcall_offset. */
21135 if (vcall_offset)
21136 return false;
21137
21138 /* Need a free register for GOT references. */
21139 if (flag_pic && !(*targetm.binds_local_p) (function))
21140 return false;
21141
21142 /* Otherwise ok. */
21143 return true;
21144 }
21145
21146 /* Output the assembler code for a thunk function. THUNK_DECL is the
21147 declaration for the thunk function itself, FUNCTION is the decl for
21148 the target function. DELTA is an immediate constant offset to be
21149 added to THIS. If VCALL_OFFSET is nonzero, the word at
21150 *(*this + vcall_offset) should be added to THIS. */
21151
21152 static void
21153 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
21154 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
21155 HOST_WIDE_INT vcall_offset, tree function)
21156 {
21157 rtx xops[3];
21158 rtx this_param = x86_this_parameter (function);
21159 rtx this_reg, tmp;
21160
21161 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
21162 pull it in now and let DELTA benefit. */
21163 if (REG_P (this_param))
21164 this_reg = this_param;
21165 else if (vcall_offset)
21166 {
21167 /* Put the this parameter into %eax. */
21168 xops[0] = this_param;
21169 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
21170 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
21171 }
21172 else
21173 this_reg = NULL_RTX;
21174
21175 /* Adjust the this parameter by a fixed constant. */
21176 if (delta)
21177 {
21178 xops[0] = GEN_INT (delta);
21179 xops[1] = this_reg ? this_reg : this_param;
21180 if (TARGET_64BIT)
21181 {
21182 if (!x86_64_general_operand (xops[0], DImode))
21183 {
21184 tmp = gen_rtx_REG (DImode, R10_REG);
21185 xops[1] = tmp;
21186 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
21187 xops[0] = tmp;
21188 xops[1] = this_param;
21189 }
21190 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
21191 }
21192 else
21193 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
21194 }
21195
21196 /* Adjust the this parameter by a value stored in the vtable. */
21197 if (vcall_offset)
21198 {
21199 if (TARGET_64BIT)
21200 tmp = gen_rtx_REG (DImode, R10_REG);
21201 else
21202 {
21203 int tmp_regno = 2 /* ECX */;
21204 if (lookup_attribute ("fastcall",
21205 TYPE_ATTRIBUTES (TREE_TYPE (function))))
21206 tmp_regno = 0 /* EAX */;
21207 tmp = gen_rtx_REG (SImode, tmp_regno);
21208 }
21209
21210 xops[0] = gen_rtx_MEM (Pmode, this_reg);
21211 xops[1] = tmp;
21212 if (TARGET_64BIT)
21213 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
21214 else
21215 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
21216
21217 /* Adjust the this parameter. */
21218 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
21219 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
21220 {
21221 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
21222 xops[0] = GEN_INT (vcall_offset);
21223 xops[1] = tmp2;
21224 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
21225 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
21226 }
21227 xops[1] = this_reg;
21228 if (TARGET_64BIT)
21229 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
21230 else
21231 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
21232 }
21233
21234 /* If necessary, drop THIS back to its stack slot. */
21235 if (this_reg && this_reg != this_param)
21236 {
21237 xops[0] = this_reg;
21238 xops[1] = this_param;
21239 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
21240 }
21241
21242 xops[0] = XEXP (DECL_RTL (function), 0);
21243 if (TARGET_64BIT)
21244 {
21245 if (!flag_pic || (*targetm.binds_local_p) (function))
21246 output_asm_insn ("jmp\t%P0", xops);
21247 /* All thunks should be in the same object as their target,
21248 and thus binds_local_p should be true. */
21249 else if (TARGET_64BIT_MS_ABI)
21250 gcc_unreachable ();
21251 else
21252 {
21253 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
21254 tmp = gen_rtx_CONST (Pmode, tmp);
21255 tmp = gen_rtx_MEM (QImode, tmp);
21256 xops[0] = tmp;
21257 output_asm_insn ("jmp\t%A0", xops);
21258 }
21259 }
21260 else
21261 {
21262 if (!flag_pic || (*targetm.binds_local_p) (function))
21263 output_asm_insn ("jmp\t%P0", xops);
21264 else
21265 #if TARGET_MACHO
21266 if (TARGET_MACHO)
21267 {
21268 rtx sym_ref = XEXP (DECL_RTL (function), 0);
21269 tmp = (gen_rtx_SYMBOL_REF
21270 (Pmode,
21271 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
21272 tmp = gen_rtx_MEM (QImode, tmp);
21273 xops[0] = tmp;
21274 output_asm_insn ("jmp\t%0", xops);
21275 }
21276 else
21277 #endif /* TARGET_MACHO */
21278 {
21279 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
21280 output_set_got (tmp, NULL_RTX);
21281
21282 xops[1] = tmp;
21283 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
21284 output_asm_insn ("jmp\t{*}%1", xops);
21285 }
21286 }
21287 }
21288
21289 static void
21290 x86_file_start (void)
21291 {
21292 default_file_start ();
21293 #if TARGET_MACHO
21294 darwin_file_start ();
21295 #endif
21296 if (X86_FILE_START_VERSION_DIRECTIVE)
21297 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
21298 if (X86_FILE_START_FLTUSED)
21299 fputs ("\t.global\t__fltused\n", asm_out_file);
21300 if (ix86_asm_dialect == ASM_INTEL)
21301 fputs ("\t.intel_syntax\n", asm_out_file);
21302 }
21303
21304 int
21305 x86_field_alignment (tree field, int computed)
21306 {
21307 enum machine_mode mode;
21308 tree type = TREE_TYPE (field);
21309
21310 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
21311 return computed;
21312 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
21313 ? get_inner_array_type (type) : type);
21314 if (mode == DFmode || mode == DCmode
21315 || GET_MODE_CLASS (mode) == MODE_INT
21316 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
21317 return MIN (32, computed);
21318 return computed;
21319 }
21320
21321 /* Output assembler code to FILE to increment profiler label # LABELNO
21322 for profiling a function entry. */
21323 void
21324 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
21325 {
21326 if (TARGET_64BIT)
21327 {
21328 #ifndef NO_PROFILE_COUNTERS
21329 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
21330 #endif
21331
21332 if (!TARGET_64BIT_MS_ABI && flag_pic)
21333 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
21334 else
21335 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
21336 }
21337 else if (flag_pic)
21338 {
21339 #ifndef NO_PROFILE_COUNTERS
21340 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
21341 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
21342 #endif
21343 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
21344 }
21345 else
21346 {
21347 #ifndef NO_PROFILE_COUNTERS
21348 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
21349 PROFILE_COUNT_REGISTER);
21350 #endif
21351 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
21352 }
21353 }
21354
21355 /* We don't have exact information about the insn sizes, but we may assume
21356 quite safely that we are informed about all 1 byte insns and memory
21357 address sizes. This is enough to eliminate unnecessary padding in
21358 99% of cases. */
21359
21360 static int
21361 min_insn_size (rtx insn)
21362 {
21363 int l = 0;
21364
21365 if (!INSN_P (insn) || !active_insn_p (insn))
21366 return 0;
21367
21368 /* Discard alignments we've emit and jump instructions. */
21369 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
21370 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
21371 return 0;
21372 if (JUMP_P (insn)
21373 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
21374 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
21375 return 0;
21376
21377 /* Important case - calls are always 5 bytes.
21378 It is common to have many calls in the row. */
21379 if (CALL_P (insn)
21380 && symbolic_reference_mentioned_p (PATTERN (insn))
21381 && !SIBLING_CALL_P (insn))
21382 return 5;
21383 if (get_attr_length (insn) <= 1)
21384 return 1;
21385
21386 /* For normal instructions we may rely on the sizes of addresses
21387 and the presence of symbol to require 4 bytes of encoding.
21388 This is not the case for jumps where references are PC relative. */
21389 if (!JUMP_P (insn))
21390 {
21391 l = get_attr_length_address (insn);
21392 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
21393 l = 4;
21394 }
21395 if (l)
21396 return 1+l;
21397 else
21398 return 2;
21399 }
21400
21401 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
21402 window. */
21403
21404 static void
21405 ix86_avoid_jump_misspredicts (void)
21406 {
21407 rtx insn, start = get_insns ();
21408 int nbytes = 0, njumps = 0;
21409 int isjump = 0;
21410
21411 /* Look for all minimal intervals of instructions containing 4 jumps.
21412 The intervals are bounded by START and INSN. NBYTES is the total
21413 size of instructions in the interval including INSN and not including
21414 START. When the NBYTES is smaller than 16 bytes, it is possible
21415 that the end of START and INSN ends up in the same 16byte page.
21416
21417 The smallest offset in the page INSN can start is the case where START
21418 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
21419 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
21420 */
21421 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
21422 {
21423
21424 nbytes += min_insn_size (insn);
21425 if (dump_file)
21426 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
21427 INSN_UID (insn), min_insn_size (insn));
21428 if ((JUMP_P (insn)
21429 && GET_CODE (PATTERN (insn)) != ADDR_VEC
21430 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
21431 || CALL_P (insn))
21432 njumps++;
21433 else
21434 continue;
21435
21436 while (njumps > 3)
21437 {
21438 start = NEXT_INSN (start);
21439 if ((JUMP_P (start)
21440 && GET_CODE (PATTERN (start)) != ADDR_VEC
21441 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
21442 || CALL_P (start))
21443 njumps--, isjump = 1;
21444 else
21445 isjump = 0;
21446 nbytes -= min_insn_size (start);
21447 }
21448 gcc_assert (njumps >= 0);
21449 if (dump_file)
21450 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
21451 INSN_UID (start), INSN_UID (insn), nbytes);
21452
21453 if (njumps == 3 && isjump && nbytes < 16)
21454 {
21455 int padsize = 15 - nbytes + min_insn_size (insn);
21456
21457 if (dump_file)
21458 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
21459 INSN_UID (insn), padsize);
21460 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
21461 }
21462 }
21463 }
21464
21465 /* AMD Athlon works faster
21466 when RET is not destination of conditional jump or directly preceded
21467 by other jump instruction. We avoid the penalty by inserting NOP just
21468 before the RET instructions in such cases. */
21469 static void
21470 ix86_pad_returns (void)
21471 {
21472 edge e;
21473 edge_iterator ei;
21474
21475 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
21476 {
21477 basic_block bb = e->src;
21478 rtx ret = BB_END (bb);
21479 rtx prev;
21480 bool replace = false;
21481
21482 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
21483 || !maybe_hot_bb_p (bb))
21484 continue;
21485 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
21486 if (active_insn_p (prev) || LABEL_P (prev))
21487 break;
21488 if (prev && LABEL_P (prev))
21489 {
21490 edge e;
21491 edge_iterator ei;
21492
21493 FOR_EACH_EDGE (e, ei, bb->preds)
21494 if (EDGE_FREQUENCY (e) && e->src->index >= 0
21495 && !(e->flags & EDGE_FALLTHRU))
21496 replace = true;
21497 }
21498 if (!replace)
21499 {
21500 prev = prev_active_insn (ret);
21501 if (prev
21502 && ((JUMP_P (prev) && any_condjump_p (prev))
21503 || CALL_P (prev)))
21504 replace = true;
21505 /* Empty functions get branch mispredict even when the jump destination
21506 is not visible to us. */
21507 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
21508 replace = true;
21509 }
21510 if (replace)
21511 {
21512 emit_insn_before (gen_return_internal_long (), ret);
21513 delete_insn (ret);
21514 }
21515 }
21516 }
21517
21518 /* Implement machine specific optimizations. We implement padding of returns
21519 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
21520 static void
21521 ix86_reorg (void)
21522 {
21523 if (TARGET_PAD_RETURNS && optimize && !optimize_size)
21524 ix86_pad_returns ();
21525 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
21526 ix86_avoid_jump_misspredicts ();
21527 }
21528
21529 /* Return nonzero when QImode register that must be represented via REX prefix
21530 is used. */
21531 bool
21532 x86_extended_QIreg_mentioned_p (rtx insn)
21533 {
21534 int i;
21535 extract_insn_cached (insn);
21536 for (i = 0; i < recog_data.n_operands; i++)
21537 if (REG_P (recog_data.operand[i])
21538 && REGNO (recog_data.operand[i]) >= 4)
21539 return true;
21540 return false;
21541 }
21542
21543 /* Return nonzero when P points to register encoded via REX prefix.
21544 Called via for_each_rtx. */
21545 static int
21546 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
21547 {
21548 unsigned int regno;
21549 if (!REG_P (*p))
21550 return 0;
21551 regno = REGNO (*p);
21552 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
21553 }
21554
21555 /* Return true when INSN mentions register that must be encoded using REX
21556 prefix. */
21557 bool
21558 x86_extended_reg_mentioned_p (rtx insn)
21559 {
21560 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
21561 }
21562
21563 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
21564 optabs would emit if we didn't have TFmode patterns. */
21565
21566 void
21567 x86_emit_floatuns (rtx operands[2])
21568 {
21569 rtx neglab, donelab, i0, i1, f0, in, out;
21570 enum machine_mode mode, inmode;
21571
21572 inmode = GET_MODE (operands[1]);
21573 gcc_assert (inmode == SImode || inmode == DImode);
21574
21575 out = operands[0];
21576 in = force_reg (inmode, operands[1]);
21577 mode = GET_MODE (out);
21578 neglab = gen_label_rtx ();
21579 donelab = gen_label_rtx ();
21580 f0 = gen_reg_rtx (mode);
21581
21582 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
21583
21584 expand_float (out, in, 0);
21585
21586 emit_jump_insn (gen_jump (donelab));
21587 emit_barrier ();
21588
21589 emit_label (neglab);
21590
21591 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
21592 1, OPTAB_DIRECT);
21593 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
21594 1, OPTAB_DIRECT);
21595 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
21596
21597 expand_float (f0, i0, 0);
21598
21599 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
21600
21601 emit_label (donelab);
21602 }
21603 \f
21604 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
21605 with all elements equal to VAR. Return true if successful. */
21606
21607 static bool
21608 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
21609 rtx target, rtx val)
21610 {
21611 enum machine_mode smode, wsmode, wvmode;
21612 rtx x;
21613
21614 switch (mode)
21615 {
21616 case V2SImode:
21617 case V2SFmode:
21618 if (!mmx_ok)
21619 return false;
21620 /* FALLTHRU */
21621
21622 case V2DFmode:
21623 case V2DImode:
21624 case V4SFmode:
21625 case V4SImode:
21626 val = force_reg (GET_MODE_INNER (mode), val);
21627 x = gen_rtx_VEC_DUPLICATE (mode, val);
21628 emit_insn (gen_rtx_SET (VOIDmode, target, x));
21629 return true;
21630
21631 case V4HImode:
21632 if (!mmx_ok)
21633 return false;
21634 if (TARGET_SSE || TARGET_3DNOW_A)
21635 {
21636 val = gen_lowpart (SImode, val);
21637 x = gen_rtx_TRUNCATE (HImode, val);
21638 x = gen_rtx_VEC_DUPLICATE (mode, x);
21639 emit_insn (gen_rtx_SET (VOIDmode, target, x));
21640 return true;
21641 }
21642 else
21643 {
21644 smode = HImode;
21645 wsmode = SImode;
21646 wvmode = V2SImode;
21647 goto widen;
21648 }
21649
21650 case V8QImode:
21651 if (!mmx_ok)
21652 return false;
21653 smode = QImode;
21654 wsmode = HImode;
21655 wvmode = V4HImode;
21656 goto widen;
21657 case V8HImode:
21658 if (TARGET_SSE2)
21659 {
21660 rtx tmp1, tmp2;
21661 /* Extend HImode to SImode using a paradoxical SUBREG. */
21662 tmp1 = gen_reg_rtx (SImode);
21663 emit_move_insn (tmp1, gen_lowpart (SImode, val));
21664 /* Insert the SImode value as low element of V4SImode vector. */
21665 tmp2 = gen_reg_rtx (V4SImode);
21666 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
21667 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
21668 CONST0_RTX (V4SImode),
21669 const1_rtx);
21670 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
21671 /* Cast the V4SImode vector back to a V8HImode vector. */
21672 tmp1 = gen_reg_rtx (V8HImode);
21673 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
21674 /* Duplicate the low short through the whole low SImode word. */
21675 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
21676 /* Cast the V8HImode vector back to a V4SImode vector. */
21677 tmp2 = gen_reg_rtx (V4SImode);
21678 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
21679 /* Replicate the low element of the V4SImode vector. */
21680 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
21681 /* Cast the V2SImode back to V8HImode, and store in target. */
21682 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
21683 return true;
21684 }
21685 smode = HImode;
21686 wsmode = SImode;
21687 wvmode = V4SImode;
21688 goto widen;
21689 case V16QImode:
21690 if (TARGET_SSE2)
21691 {
21692 rtx tmp1, tmp2;
21693 /* Extend QImode to SImode using a paradoxical SUBREG. */
21694 tmp1 = gen_reg_rtx (SImode);
21695 emit_move_insn (tmp1, gen_lowpart (SImode, val));
21696 /* Insert the SImode value as low element of V4SImode vector. */
21697 tmp2 = gen_reg_rtx (V4SImode);
21698 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
21699 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
21700 CONST0_RTX (V4SImode),
21701 const1_rtx);
21702 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
21703 /* Cast the V4SImode vector back to a V16QImode vector. */
21704 tmp1 = gen_reg_rtx (V16QImode);
21705 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
21706 /* Duplicate the low byte through the whole low SImode word. */
21707 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
21708 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
21709 /* Cast the V16QImode vector back to a V4SImode vector. */
21710 tmp2 = gen_reg_rtx (V4SImode);
21711 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
21712 /* Replicate the low element of the V4SImode vector. */
21713 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
21714 /* Cast the V2SImode back to V16QImode, and store in target. */
21715 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
21716 return true;
21717 }
21718 smode = QImode;
21719 wsmode = HImode;
21720 wvmode = V8HImode;
21721 goto widen;
21722 widen:
21723 /* Replicate the value once into the next wider mode and recurse. */
21724 val = convert_modes (wsmode, smode, val, true);
21725 x = expand_simple_binop (wsmode, ASHIFT, val,
21726 GEN_INT (GET_MODE_BITSIZE (smode)),
21727 NULL_RTX, 1, OPTAB_LIB_WIDEN);
21728 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
21729
21730 x = gen_reg_rtx (wvmode);
21731 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
21732 gcc_unreachable ();
21733 emit_move_insn (target, gen_lowpart (mode, x));
21734 return true;
21735
21736 default:
21737 return false;
21738 }
21739 }
21740
21741 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
21742 whose ONE_VAR element is VAR, and other elements are zero. Return true
21743 if successful. */
21744
21745 static bool
21746 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
21747 rtx target, rtx var, int one_var)
21748 {
21749 enum machine_mode vsimode;
21750 rtx new_target;
21751 rtx x, tmp;
21752
21753 switch (mode)
21754 {
21755 case V2SFmode:
21756 case V2SImode:
21757 if (!mmx_ok)
21758 return false;
21759 /* FALLTHRU */
21760
21761 case V2DFmode:
21762 case V2DImode:
21763 if (one_var != 0)
21764 return false;
21765 var = force_reg (GET_MODE_INNER (mode), var);
21766 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
21767 emit_insn (gen_rtx_SET (VOIDmode, target, x));
21768 return true;
21769
21770 case V4SFmode:
21771 case V4SImode:
21772 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
21773 new_target = gen_reg_rtx (mode);
21774 else
21775 new_target = target;
21776 var = force_reg (GET_MODE_INNER (mode), var);
21777 x = gen_rtx_VEC_DUPLICATE (mode, var);
21778 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
21779 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
21780 if (one_var != 0)
21781 {
21782 /* We need to shuffle the value to the correct position, so
21783 create a new pseudo to store the intermediate result. */
21784
21785 /* With SSE2, we can use the integer shuffle insns. */
21786 if (mode != V4SFmode && TARGET_SSE2)
21787 {
21788 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
21789 GEN_INT (1),
21790 GEN_INT (one_var == 1 ? 0 : 1),
21791 GEN_INT (one_var == 2 ? 0 : 1),
21792 GEN_INT (one_var == 3 ? 0 : 1)));
21793 if (target != new_target)
21794 emit_move_insn (target, new_target);
21795 return true;
21796 }
21797
21798 /* Otherwise convert the intermediate result to V4SFmode and
21799 use the SSE1 shuffle instructions. */
21800 if (mode != V4SFmode)
21801 {
21802 tmp = gen_reg_rtx (V4SFmode);
21803 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
21804 }
21805 else
21806 tmp = new_target;
21807
21808 emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp,
21809 GEN_INT (1),
21810 GEN_INT (one_var == 1 ? 0 : 1),
21811 GEN_INT (one_var == 2 ? 0+4 : 1+4),
21812 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
21813
21814 if (mode != V4SFmode)
21815 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
21816 else if (tmp != target)
21817 emit_move_insn (target, tmp);
21818 }
21819 else if (target != new_target)
21820 emit_move_insn (target, new_target);
21821 return true;
21822
21823 case V8HImode:
21824 case V16QImode:
21825 vsimode = V4SImode;
21826 goto widen;
21827 case V4HImode:
21828 case V8QImode:
21829 if (!mmx_ok)
21830 return false;
21831 vsimode = V2SImode;
21832 goto widen;
21833 widen:
21834 if (one_var != 0)
21835 return false;
21836
21837 /* Zero extend the variable element to SImode and recurse. */
21838 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
21839
21840 x = gen_reg_rtx (vsimode);
21841 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
21842 var, one_var))
21843 gcc_unreachable ();
21844
21845 emit_move_insn (target, gen_lowpart (mode, x));
21846 return true;
21847
21848 default:
21849 return false;
21850 }
21851 }
21852
21853 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
21854 consisting of the values in VALS. It is known that all elements
21855 except ONE_VAR are constants. Return true if successful. */
21856
21857 static bool
21858 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
21859 rtx target, rtx vals, int one_var)
21860 {
21861 rtx var = XVECEXP (vals, 0, one_var);
21862 enum machine_mode wmode;
21863 rtx const_vec, x;
21864
21865 const_vec = copy_rtx (vals);
21866 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
21867 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
21868
21869 switch (mode)
21870 {
21871 case V2DFmode:
21872 case V2DImode:
21873 case V2SFmode:
21874 case V2SImode:
21875 /* For the two element vectors, it's just as easy to use
21876 the general case. */
21877 return false;
21878
21879 case V4SFmode:
21880 case V4SImode:
21881 case V8HImode:
21882 case V4HImode:
21883 break;
21884
21885 case V16QImode:
21886 wmode = V8HImode;
21887 goto widen;
21888 case V8QImode:
21889 wmode = V4HImode;
21890 goto widen;
21891 widen:
21892 /* There's no way to set one QImode entry easily. Combine
21893 the variable value with its adjacent constant value, and
21894 promote to an HImode set. */
21895 x = XVECEXP (vals, 0, one_var ^ 1);
21896 if (one_var & 1)
21897 {
21898 var = convert_modes (HImode, QImode, var, true);
21899 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
21900 NULL_RTX, 1, OPTAB_LIB_WIDEN);
21901 x = GEN_INT (INTVAL (x) & 0xff);
21902 }
21903 else
21904 {
21905 var = convert_modes (HImode, QImode, var, true);
21906 x = gen_int_mode (INTVAL (x) << 8, HImode);
21907 }
21908 if (x != const0_rtx)
21909 var = expand_simple_binop (HImode, IOR, var, x, var,
21910 1, OPTAB_LIB_WIDEN);
21911
21912 x = gen_reg_rtx (wmode);
21913 emit_move_insn (x, gen_lowpart (wmode, const_vec));
21914 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
21915
21916 emit_move_insn (target, gen_lowpart (mode, x));
21917 return true;
21918
21919 default:
21920 return false;
21921 }
21922
21923 emit_move_insn (target, const_vec);
21924 ix86_expand_vector_set (mmx_ok, target, var, one_var);
21925 return true;
21926 }
21927
21928 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
21929 all values variable, and none identical. */
21930
21931 static void
21932 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
21933 rtx target, rtx vals)
21934 {
21935 enum machine_mode half_mode = GET_MODE_INNER (mode);
21936 rtx op0 = NULL, op1 = NULL;
21937 bool use_vec_concat = false;
21938
21939 switch (mode)
21940 {
21941 case V2SFmode:
21942 case V2SImode:
21943 if (!mmx_ok && !TARGET_SSE)
21944 break;
21945 /* FALLTHRU */
21946
21947 case V2DFmode:
21948 case V2DImode:
21949 /* For the two element vectors, we always implement VEC_CONCAT. */
21950 op0 = XVECEXP (vals, 0, 0);
21951 op1 = XVECEXP (vals, 0, 1);
21952 use_vec_concat = true;
21953 break;
21954
21955 case V4SFmode:
21956 half_mode = V2SFmode;
21957 goto half;
21958 case V4SImode:
21959 half_mode = V2SImode;
21960 goto half;
21961 half:
21962 {
21963 rtvec v;
21964
21965 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
21966 Recurse to load the two halves. */
21967
21968 op0 = gen_reg_rtx (half_mode);
21969 v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
21970 ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
21971
21972 op1 = gen_reg_rtx (half_mode);
21973 v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
21974 ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
21975
21976 use_vec_concat = true;
21977 }
21978 break;
21979
21980 case V8HImode:
21981 case V16QImode:
21982 case V4HImode:
21983 case V8QImode:
21984 break;
21985
21986 default:
21987 gcc_unreachable ();
21988 }
21989
21990 if (use_vec_concat)
21991 {
21992 if (!register_operand (op0, half_mode))
21993 op0 = force_reg (half_mode, op0);
21994 if (!register_operand (op1, half_mode))
21995 op1 = force_reg (half_mode, op1);
21996
21997 emit_insn (gen_rtx_SET (VOIDmode, target,
21998 gen_rtx_VEC_CONCAT (mode, op0, op1)));
21999 }
22000 else
22001 {
22002 int i, j, n_elts, n_words, n_elt_per_word;
22003 enum machine_mode inner_mode;
22004 rtx words[4], shift;
22005
22006 inner_mode = GET_MODE_INNER (mode);
22007 n_elts = GET_MODE_NUNITS (mode);
22008 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
22009 n_elt_per_word = n_elts / n_words;
22010 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
22011
22012 for (i = 0; i < n_words; ++i)
22013 {
22014 rtx word = NULL_RTX;
22015
22016 for (j = 0; j < n_elt_per_word; ++j)
22017 {
22018 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
22019 elt = convert_modes (word_mode, inner_mode, elt, true);
22020
22021 if (j == 0)
22022 word = elt;
22023 else
22024 {
22025 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
22026 word, 1, OPTAB_LIB_WIDEN);
22027 word = expand_simple_binop (word_mode, IOR, word, elt,
22028 word, 1, OPTAB_LIB_WIDEN);
22029 }
22030 }
22031
22032 words[i] = word;
22033 }
22034
22035 if (n_words == 1)
22036 emit_move_insn (target, gen_lowpart (mode, words[0]));
22037 else if (n_words == 2)
22038 {
22039 rtx tmp = gen_reg_rtx (mode);
22040 emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
22041 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
22042 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
22043 emit_move_insn (target, tmp);
22044 }
22045 else if (n_words == 4)
22046 {
22047 rtx tmp = gen_reg_rtx (V4SImode);
22048 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
22049 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
22050 emit_move_insn (target, gen_lowpart (mode, tmp));
22051 }
22052 else
22053 gcc_unreachable ();
22054 }
22055 }
22056
22057 /* Initialize vector TARGET via VALS. Suppress the use of MMX
22058 instructions unless MMX_OK is true. */
22059
22060 void
22061 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
22062 {
22063 enum machine_mode mode = GET_MODE (target);
22064 enum machine_mode inner_mode = GET_MODE_INNER (mode);
22065 int n_elts = GET_MODE_NUNITS (mode);
22066 int n_var = 0, one_var = -1;
22067 bool all_same = true, all_const_zero = true;
22068 int i;
22069 rtx x;
22070
22071 for (i = 0; i < n_elts; ++i)
22072 {
22073 x = XVECEXP (vals, 0, i);
22074 if (!CONSTANT_P (x))
22075 n_var++, one_var = i;
22076 else if (x != CONST0_RTX (inner_mode))
22077 all_const_zero = false;
22078 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
22079 all_same = false;
22080 }
22081
22082 /* Constants are best loaded from the constant pool. */
22083 if (n_var == 0)
22084 {
22085 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
22086 return;
22087 }
22088
22089 /* If all values are identical, broadcast the value. */
22090 if (all_same
22091 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
22092 XVECEXP (vals, 0, 0)))
22093 return;
22094
22095 /* Values where only one field is non-constant are best loaded from
22096 the pool and overwritten via move later. */
22097 if (n_var == 1)
22098 {
22099 if (all_const_zero
22100 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
22101 XVECEXP (vals, 0, one_var),
22102 one_var))
22103 return;
22104
22105 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
22106 return;
22107 }
22108
22109 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
22110 }
22111
22112 void
22113 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
22114 {
22115 enum machine_mode mode = GET_MODE (target);
22116 enum machine_mode inner_mode = GET_MODE_INNER (mode);
22117 bool use_vec_merge = false;
22118 rtx tmp;
22119
22120 switch (mode)
22121 {
22122 case V2SFmode:
22123 case V2SImode:
22124 if (mmx_ok)
22125 {
22126 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
22127 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
22128 if (elt == 0)
22129 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
22130 else
22131 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
22132 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
22133 return;
22134 }
22135 break;
22136
22137 case V2DImode:
22138 use_vec_merge = TARGET_SSE4_1;
22139 if (use_vec_merge)
22140 break;
22141
22142 case V2DFmode:
22143 {
22144 rtx op0, op1;
22145
22146 /* For the two element vectors, we implement a VEC_CONCAT with
22147 the extraction of the other element. */
22148
22149 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
22150 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
22151
22152 if (elt == 0)
22153 op0 = val, op1 = tmp;
22154 else
22155 op0 = tmp, op1 = val;
22156
22157 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
22158 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
22159 }
22160 return;
22161
22162 case V4SFmode:
22163 use_vec_merge = TARGET_SSE4_1;
22164 if (use_vec_merge)
22165 break;
22166
22167 switch (elt)
22168 {
22169 case 0:
22170 use_vec_merge = true;
22171 break;
22172
22173 case 1:
22174 /* tmp = target = A B C D */
22175 tmp = copy_to_reg (target);
22176 /* target = A A B B */
22177 emit_insn (gen_sse_unpcklps (target, target, target));
22178 /* target = X A B B */
22179 ix86_expand_vector_set (false, target, val, 0);
22180 /* target = A X C D */
22181 emit_insn (gen_sse_shufps_1 (target, target, tmp,
22182 GEN_INT (1), GEN_INT (0),
22183 GEN_INT (2+4), GEN_INT (3+4)));
22184 return;
22185
22186 case 2:
22187 /* tmp = target = A B C D */
22188 tmp = copy_to_reg (target);
22189 /* tmp = X B C D */
22190 ix86_expand_vector_set (false, tmp, val, 0);
22191 /* target = A B X D */
22192 emit_insn (gen_sse_shufps_1 (target, target, tmp,
22193 GEN_INT (0), GEN_INT (1),
22194 GEN_INT (0+4), GEN_INT (3+4)));
22195 return;
22196
22197 case 3:
22198 /* tmp = target = A B C D */
22199 tmp = copy_to_reg (target);
22200 /* tmp = X B C D */
22201 ix86_expand_vector_set (false, tmp, val, 0);
22202 /* target = A B X D */
22203 emit_insn (gen_sse_shufps_1 (target, target, tmp,
22204 GEN_INT (0), GEN_INT (1),
22205 GEN_INT (2+4), GEN_INT (0+4)));
22206 return;
22207
22208 default:
22209 gcc_unreachable ();
22210 }
22211 break;
22212
22213 case V4SImode:
22214 use_vec_merge = TARGET_SSE4_1;
22215 if (use_vec_merge)
22216 break;
22217
22218 /* Element 0 handled by vec_merge below. */
22219 if (elt == 0)
22220 {
22221 use_vec_merge = true;
22222 break;
22223 }
22224
22225 if (TARGET_SSE2)
22226 {
22227 /* With SSE2, use integer shuffles to swap element 0 and ELT,
22228 store into element 0, then shuffle them back. */
22229
22230 rtx order[4];
22231
22232 order[0] = GEN_INT (elt);
22233 order[1] = const1_rtx;
22234 order[2] = const2_rtx;
22235 order[3] = GEN_INT (3);
22236 order[elt] = const0_rtx;
22237
22238 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
22239 order[1], order[2], order[3]));
22240
22241 ix86_expand_vector_set (false, target, val, 0);
22242
22243 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
22244 order[1], order[2], order[3]));
22245 }
22246 else
22247 {
22248 /* For SSE1, we have to reuse the V4SF code. */
22249 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
22250 gen_lowpart (SFmode, val), elt);
22251 }
22252 return;
22253
22254 case V8HImode:
22255 use_vec_merge = TARGET_SSE2;
22256 break;
22257 case V4HImode:
22258 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
22259 break;
22260
22261 case V16QImode:
22262 use_vec_merge = TARGET_SSE4_1;
22263 break;
22264
22265 case V8QImode:
22266 default:
22267 break;
22268 }
22269
22270 if (use_vec_merge)
22271 {
22272 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
22273 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
22274 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
22275 }
22276 else
22277 {
22278 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
22279
22280 emit_move_insn (mem, target);
22281
22282 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
22283 emit_move_insn (tmp, val);
22284
22285 emit_move_insn (target, mem);
22286 }
22287 }
22288
22289 void
22290 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
22291 {
22292 enum machine_mode mode = GET_MODE (vec);
22293 enum machine_mode inner_mode = GET_MODE_INNER (mode);
22294 bool use_vec_extr = false;
22295 rtx tmp;
22296
22297 switch (mode)
22298 {
22299 case V2SImode:
22300 case V2SFmode:
22301 if (!mmx_ok)
22302 break;
22303 /* FALLTHRU */
22304
22305 case V2DFmode:
22306 case V2DImode:
22307 use_vec_extr = true;
22308 break;
22309
22310 case V4SFmode:
22311 use_vec_extr = TARGET_SSE4_1;
22312 if (use_vec_extr)
22313 break;
22314
22315 switch (elt)
22316 {
22317 case 0:
22318 tmp = vec;
22319 break;
22320
22321 case 1:
22322 case 3:
22323 tmp = gen_reg_rtx (mode);
22324 emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
22325 GEN_INT (elt), GEN_INT (elt),
22326 GEN_INT (elt+4), GEN_INT (elt+4)));
22327 break;
22328
22329 case 2:
22330 tmp = gen_reg_rtx (mode);
22331 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
22332 break;
22333
22334 default:
22335 gcc_unreachable ();
22336 }
22337 vec = tmp;
22338 use_vec_extr = true;
22339 elt = 0;
22340 break;
22341
22342 case V4SImode:
22343 use_vec_extr = TARGET_SSE4_1;
22344 if (use_vec_extr)
22345 break;
22346
22347 if (TARGET_SSE2)
22348 {
22349 switch (elt)
22350 {
22351 case 0:
22352 tmp = vec;
22353 break;
22354
22355 case 1:
22356 case 3:
22357 tmp = gen_reg_rtx (mode);
22358 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
22359 GEN_INT (elt), GEN_INT (elt),
22360 GEN_INT (elt), GEN_INT (elt)));
22361 break;
22362
22363 case 2:
22364 tmp = gen_reg_rtx (mode);
22365 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
22366 break;
22367
22368 default:
22369 gcc_unreachable ();
22370 }
22371 vec = tmp;
22372 use_vec_extr = true;
22373 elt = 0;
22374 }
22375 else
22376 {
22377 /* For SSE1, we have to reuse the V4SF code. */
22378 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
22379 gen_lowpart (V4SFmode, vec), elt);
22380 return;
22381 }
22382 break;
22383
22384 case V8HImode:
22385 use_vec_extr = TARGET_SSE2;
22386 break;
22387 case V4HImode:
22388 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
22389 break;
22390
22391 case V16QImode:
22392 use_vec_extr = TARGET_SSE4_1;
22393 break;
22394
22395 case V8QImode:
22396 /* ??? Could extract the appropriate HImode element and shift. */
22397 default:
22398 break;
22399 }
22400
22401 if (use_vec_extr)
22402 {
22403 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
22404 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
22405
22406 /* Let the rtl optimizers know about the zero extension performed. */
22407 if (inner_mode == QImode || inner_mode == HImode)
22408 {
22409 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
22410 target = gen_lowpart (SImode, target);
22411 }
22412
22413 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
22414 }
22415 else
22416 {
22417 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
22418
22419 emit_move_insn (mem, vec);
22420
22421 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
22422 emit_move_insn (target, tmp);
22423 }
22424 }
22425
22426 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
22427 pattern to reduce; DEST is the destination; IN is the input vector. */
22428
22429 void
22430 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
22431 {
22432 rtx tmp1, tmp2, tmp3;
22433
22434 tmp1 = gen_reg_rtx (V4SFmode);
22435 tmp2 = gen_reg_rtx (V4SFmode);
22436 tmp3 = gen_reg_rtx (V4SFmode);
22437
22438 emit_insn (gen_sse_movhlps (tmp1, in, in));
22439 emit_insn (fn (tmp2, tmp1, in));
22440
22441 emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2,
22442 GEN_INT (1), GEN_INT (1),
22443 GEN_INT (1+4), GEN_INT (1+4)));
22444 emit_insn (fn (dest, tmp2, tmp3));
22445 }
22446 \f
22447 /* Target hook for scalar_mode_supported_p. */
22448 static bool
22449 ix86_scalar_mode_supported_p (enum machine_mode mode)
22450 {
22451 if (DECIMAL_FLOAT_MODE_P (mode))
22452 return true;
22453 else if (mode == TFmode)
22454 return TARGET_64BIT;
22455 else
22456 return default_scalar_mode_supported_p (mode);
22457 }
22458
22459 /* Implements target hook vector_mode_supported_p. */
22460 static bool
22461 ix86_vector_mode_supported_p (enum machine_mode mode)
22462 {
22463 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
22464 return true;
22465 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
22466 return true;
22467 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
22468 return true;
22469 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
22470 return true;
22471 return false;
22472 }
22473
22474 /* Target hook for c_mode_for_suffix. */
22475 static enum machine_mode
22476 ix86_c_mode_for_suffix (char suffix)
22477 {
22478 if (TARGET_64BIT && suffix == 'q')
22479 return TFmode;
22480 if (TARGET_MMX && suffix == 'w')
22481 return XFmode;
22482
22483 return VOIDmode;
22484 }
22485
22486 /* Worker function for TARGET_MD_ASM_CLOBBERS.
22487
22488 We do this in the new i386 backend to maintain source compatibility
22489 with the old cc0-based compiler. */
22490
22491 static tree
22492 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
22493 tree inputs ATTRIBUTE_UNUSED,
22494 tree clobbers)
22495 {
22496 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
22497 clobbers);
22498 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
22499 clobbers);
22500 return clobbers;
22501 }
22502
22503 /* Implements target vector targetm.asm.encode_section_info. This
22504 is not used by netware. */
22505
22506 static void ATTRIBUTE_UNUSED
22507 ix86_encode_section_info (tree decl, rtx rtl, int first)
22508 {
22509 default_encode_section_info (decl, rtl, first);
22510
22511 if (TREE_CODE (decl) == VAR_DECL
22512 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
22513 && ix86_in_large_data_p (decl))
22514 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
22515 }
22516
22517 /* Worker function for REVERSE_CONDITION. */
22518
22519 enum rtx_code
22520 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
22521 {
22522 return (mode != CCFPmode && mode != CCFPUmode
22523 ? reverse_condition (code)
22524 : reverse_condition_maybe_unordered (code));
22525 }
22526
22527 /* Output code to perform an x87 FP register move, from OPERANDS[1]
22528 to OPERANDS[0]. */
22529
22530 const char *
22531 output_387_reg_move (rtx insn, rtx *operands)
22532 {
22533 if (REG_P (operands[0]))
22534 {
22535 if (REG_P (operands[1])
22536 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
22537 {
22538 if (REGNO (operands[0]) == FIRST_STACK_REG)
22539 return output_387_ffreep (operands, 0);
22540 return "fstp\t%y0";
22541 }
22542 if (STACK_TOP_P (operands[0]))
22543 return "fld%z1\t%y1";
22544 return "fst\t%y0";
22545 }
22546 else if (MEM_P (operands[0]))
22547 {
22548 gcc_assert (REG_P (operands[1]));
22549 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
22550 return "fstp%z0\t%y0";
22551 else
22552 {
22553 /* There is no non-popping store to memory for XFmode.
22554 So if we need one, follow the store with a load. */
22555 if (GET_MODE (operands[0]) == XFmode)
22556 return "fstp%z0\t%y0\n\tfld%z0\t%y0";
22557 else
22558 return "fst%z0\t%y0";
22559 }
22560 }
22561 else
22562 gcc_unreachable();
22563 }
22564
22565 /* Output code to perform a conditional jump to LABEL, if C2 flag in
22566 FP status register is set. */
22567
22568 void
22569 ix86_emit_fp_unordered_jump (rtx label)
22570 {
22571 rtx reg = gen_reg_rtx (HImode);
22572 rtx temp;
22573
22574 emit_insn (gen_x86_fnstsw_1 (reg));
22575
22576 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_size))
22577 {
22578 emit_insn (gen_x86_sahf_1 (reg));
22579
22580 temp = gen_rtx_REG (CCmode, FLAGS_REG);
22581 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
22582 }
22583 else
22584 {
22585 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
22586
22587 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
22588 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
22589 }
22590
22591 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
22592 gen_rtx_LABEL_REF (VOIDmode, label),
22593 pc_rtx);
22594 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
22595
22596 emit_jump_insn (temp);
22597 predict_jump (REG_BR_PROB_BASE * 10 / 100);
22598 }
22599
22600 /* Output code to perform a log1p XFmode calculation. */
22601
22602 void ix86_emit_i387_log1p (rtx op0, rtx op1)
22603 {
22604 rtx label1 = gen_label_rtx ();
22605 rtx label2 = gen_label_rtx ();
22606
22607 rtx tmp = gen_reg_rtx (XFmode);
22608 rtx tmp2 = gen_reg_rtx (XFmode);
22609
22610 emit_insn (gen_absxf2 (tmp, op1));
22611 emit_insn (gen_cmpxf (tmp,
22612 CONST_DOUBLE_FROM_REAL_VALUE (
22613 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
22614 XFmode)));
22615 emit_jump_insn (gen_bge (label1));
22616
22617 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
22618 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
22619 emit_jump (label2);
22620
22621 emit_label (label1);
22622 emit_move_insn (tmp, CONST1_RTX (XFmode));
22623 emit_insn (gen_addxf3 (tmp, op1, tmp));
22624 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
22625 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
22626
22627 emit_label (label2);
22628 }
22629
22630 /* Output code to perform a Newton-Rhapson approximation of a single precision
22631 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
22632
22633 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
22634 {
22635 rtx x0, x1, e0, e1, two;
22636
22637 x0 = gen_reg_rtx (mode);
22638 e0 = gen_reg_rtx (mode);
22639 e1 = gen_reg_rtx (mode);
22640 x1 = gen_reg_rtx (mode);
22641
22642 two = CONST_DOUBLE_FROM_REAL_VALUE (dconst2, SFmode);
22643
22644 if (VECTOR_MODE_P (mode))
22645 two = ix86_build_const_vector (SFmode, true, two);
22646
22647 two = force_reg (mode, two);
22648
22649 /* a / b = a * rcp(b) * (2.0 - b * rcp(b)) */
22650
22651 /* x0 = 1./b estimate */
22652 emit_insn (gen_rtx_SET (VOIDmode, x0,
22653 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
22654 UNSPEC_RCP)));
22655 /* e0 = x0 * b */
22656 emit_insn (gen_rtx_SET (VOIDmode, e0,
22657 gen_rtx_MULT (mode, x0, b)));
22658 /* e1 = 2. - e0 */
22659 emit_insn (gen_rtx_SET (VOIDmode, e1,
22660 gen_rtx_MINUS (mode, two, e0)));
22661 /* x1 = x0 * e1 */
22662 emit_insn (gen_rtx_SET (VOIDmode, x1,
22663 gen_rtx_MULT (mode, x0, e1)));
22664 /* res = a * x1 */
22665 emit_insn (gen_rtx_SET (VOIDmode, res,
22666 gen_rtx_MULT (mode, a, x1)));
22667 }
22668
22669 /* Output code to perform a Newton-Rhapson approximation of a
22670 single precision floating point [reciprocal] square root. */
22671
22672 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
22673 bool recip)
22674 {
22675 rtx x0, e0, e1, e2, e3, three, half, zero, mask;
22676
22677 x0 = gen_reg_rtx (mode);
22678 e0 = gen_reg_rtx (mode);
22679 e1 = gen_reg_rtx (mode);
22680 e2 = gen_reg_rtx (mode);
22681 e3 = gen_reg_rtx (mode);
22682
22683 three = CONST_DOUBLE_FROM_REAL_VALUE (dconst3, SFmode);
22684 half = CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf, SFmode);
22685
22686 mask = gen_reg_rtx (mode);
22687
22688 if (VECTOR_MODE_P (mode))
22689 {
22690 three = ix86_build_const_vector (SFmode, true, three);
22691 half = ix86_build_const_vector (SFmode, true, half);
22692 }
22693
22694 three = force_reg (mode, three);
22695 half = force_reg (mode, half);
22696
22697 zero = force_reg (mode, CONST0_RTX(mode));
22698
22699 /* sqrt(a) = 0.5 * a * rsqrtss(a) * (3.0 - a * rsqrtss(a) * rsqrtss(a))
22700 1.0 / sqrt(a) = 0.5 * rsqrtss(a) * (3.0 - a * rsqrtss(a) * rsqrtss(a)) */
22701
22702 /* Compare a to zero. */
22703 emit_insn (gen_rtx_SET (VOIDmode, mask,
22704 gen_rtx_NE (mode, a, zero)));
22705
22706 /* x0 = 1./sqrt(a) estimate */
22707 emit_insn (gen_rtx_SET (VOIDmode, x0,
22708 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
22709 UNSPEC_RSQRT)));
22710 /* Filter out infinity. */
22711 if (VECTOR_MODE_P (mode))
22712 emit_insn (gen_rtx_SET (VOIDmode, gen_lowpart (V4SFmode, x0),
22713 gen_rtx_AND (mode,
22714 gen_lowpart (V4SFmode, x0),
22715 gen_lowpart (V4SFmode, mask))));
22716 else
22717 emit_insn (gen_rtx_SET (VOIDmode, x0,
22718 gen_rtx_AND (mode, x0, mask)));
22719
22720 /* e0 = x0 * a */
22721 emit_insn (gen_rtx_SET (VOIDmode, e0,
22722 gen_rtx_MULT (mode, x0, a)));
22723 /* e1 = e0 * x0 */
22724 emit_insn (gen_rtx_SET (VOIDmode, e1,
22725 gen_rtx_MULT (mode, e0, x0)));
22726 /* e2 = 3. - e1 */
22727 emit_insn (gen_rtx_SET (VOIDmode, e2,
22728 gen_rtx_MINUS (mode, three, e1)));
22729 if (recip)
22730 /* e3 = .5 * x0 */
22731 emit_insn (gen_rtx_SET (VOIDmode, e3,
22732 gen_rtx_MULT (mode, half, x0)));
22733 else
22734 /* e3 = .5 * e0 */
22735 emit_insn (gen_rtx_SET (VOIDmode, e3,
22736 gen_rtx_MULT (mode, half, e0)));
22737 /* ret = e2 * e3 */
22738 emit_insn (gen_rtx_SET (VOIDmode, res,
22739 gen_rtx_MULT (mode, e2, e3)));
22740 }
22741
22742 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
22743
22744 static void ATTRIBUTE_UNUSED
22745 i386_solaris_elf_named_section (const char *name, unsigned int flags,
22746 tree decl)
22747 {
22748 /* With Binutils 2.15, the "@unwind" marker must be specified on
22749 every occurrence of the ".eh_frame" section, not just the first
22750 one. */
22751 if (TARGET_64BIT
22752 && strcmp (name, ".eh_frame") == 0)
22753 {
22754 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
22755 flags & SECTION_WRITE ? "aw" : "a");
22756 return;
22757 }
22758 default_elf_asm_named_section (name, flags, decl);
22759 }
22760
22761 /* Return the mangling of TYPE if it is an extended fundamental type. */
22762
22763 static const char *
22764 ix86_mangle_fundamental_type (tree type)
22765 {
22766 switch (TYPE_MODE (type))
22767 {
22768 case TFmode:
22769 /* __float128 is "g". */
22770 return "g";
22771 case XFmode:
22772 /* "long double" or __float80 is "e". */
22773 return "e";
22774 default:
22775 return NULL;
22776 }
22777 }
22778
22779 /* For 32-bit code we can save PIC register setup by using
22780 __stack_chk_fail_local hidden function instead of calling
22781 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
22782 register, so it is better to call __stack_chk_fail directly. */
22783
22784 static tree
22785 ix86_stack_protect_fail (void)
22786 {
22787 return TARGET_64BIT
22788 ? default_external_stack_protect_fail ()
22789 : default_hidden_stack_protect_fail ();
22790 }
22791
22792 /* Select a format to encode pointers in exception handling data. CODE
22793 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
22794 true if the symbol may be affected by dynamic relocations.
22795
22796 ??? All x86 object file formats are capable of representing this.
22797 After all, the relocation needed is the same as for the call insn.
22798 Whether or not a particular assembler allows us to enter such, I
22799 guess we'll have to see. */
22800 int
22801 asm_preferred_eh_data_format (int code, int global)
22802 {
22803 if (flag_pic)
22804 {
22805 int type = DW_EH_PE_sdata8;
22806 if (!TARGET_64BIT
22807 || ix86_cmodel == CM_SMALL_PIC
22808 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
22809 type = DW_EH_PE_sdata4;
22810 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
22811 }
22812 if (ix86_cmodel == CM_SMALL
22813 || (ix86_cmodel == CM_MEDIUM && code))
22814 return DW_EH_PE_udata4;
22815 return DW_EH_PE_absptr;
22816 }
22817 \f
22818 /* Expand copysign from SIGN to the positive value ABS_VALUE
22819 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
22820 the sign-bit. */
22821 static void
22822 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
22823 {
22824 enum machine_mode mode = GET_MODE (sign);
22825 rtx sgn = gen_reg_rtx (mode);
22826 if (mask == NULL_RTX)
22827 {
22828 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
22829 if (!VECTOR_MODE_P (mode))
22830 {
22831 /* We need to generate a scalar mode mask in this case. */
22832 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
22833 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
22834 mask = gen_reg_rtx (mode);
22835 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
22836 }
22837 }
22838 else
22839 mask = gen_rtx_NOT (mode, mask);
22840 emit_insn (gen_rtx_SET (VOIDmode, sgn,
22841 gen_rtx_AND (mode, mask, sign)));
22842 emit_insn (gen_rtx_SET (VOIDmode, result,
22843 gen_rtx_IOR (mode, abs_value, sgn)));
22844 }
22845
22846 /* Expand fabs (OP0) and return a new rtx that holds the result. The
22847 mask for masking out the sign-bit is stored in *SMASK, if that is
22848 non-null. */
22849 static rtx
22850 ix86_expand_sse_fabs (rtx op0, rtx *smask)
22851 {
22852 enum machine_mode mode = GET_MODE (op0);
22853 rtx xa, mask;
22854
22855 xa = gen_reg_rtx (mode);
22856 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
22857 if (!VECTOR_MODE_P (mode))
22858 {
22859 /* We need to generate a scalar mode mask in this case. */
22860 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
22861 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
22862 mask = gen_reg_rtx (mode);
22863 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
22864 }
22865 emit_insn (gen_rtx_SET (VOIDmode, xa,
22866 gen_rtx_AND (mode, op0, mask)));
22867
22868 if (smask)
22869 *smask = mask;
22870
22871 return xa;
22872 }
22873
22874 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
22875 swapping the operands if SWAP_OPERANDS is true. The expanded
22876 code is a forward jump to a newly created label in case the
22877 comparison is true. The generated label rtx is returned. */
22878 static rtx
22879 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
22880 bool swap_operands)
22881 {
22882 rtx label, tmp;
22883
22884 if (swap_operands)
22885 {
22886 tmp = op0;
22887 op0 = op1;
22888 op1 = tmp;
22889 }
22890
22891 label = gen_label_rtx ();
22892 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
22893 emit_insn (gen_rtx_SET (VOIDmode, tmp,
22894 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
22895 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
22896 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
22897 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
22898 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
22899 JUMP_LABEL (tmp) = label;
22900
22901 return label;
22902 }
22903
22904 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
22905 using comparison code CODE. Operands are swapped for the comparison if
22906 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
22907 static rtx
22908 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
22909 bool swap_operands)
22910 {
22911 enum machine_mode mode = GET_MODE (op0);
22912 rtx mask = gen_reg_rtx (mode);
22913
22914 if (swap_operands)
22915 {
22916 rtx tmp = op0;
22917 op0 = op1;
22918 op1 = tmp;
22919 }
22920
22921 if (mode == DFmode)
22922 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
22923 gen_rtx_fmt_ee (code, mode, op0, op1)));
22924 else
22925 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
22926 gen_rtx_fmt_ee (code, mode, op0, op1)));
22927
22928 return mask;
22929 }
22930
22931 /* Generate and return a rtx of mode MODE for 2**n where n is the number
22932 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
22933 static rtx
22934 ix86_gen_TWO52 (enum machine_mode mode)
22935 {
22936 REAL_VALUE_TYPE TWO52r;
22937 rtx TWO52;
22938
22939 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
22940 TWO52 = const_double_from_real_value (TWO52r, mode);
22941 TWO52 = force_reg (mode, TWO52);
22942
22943 return TWO52;
22944 }
22945
22946 /* Expand SSE sequence for computing lround from OP1 storing
22947 into OP0. */
22948 void
22949 ix86_expand_lround (rtx op0, rtx op1)
22950 {
22951 /* C code for the stuff we're doing below:
22952 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
22953 return (long)tmp;
22954 */
22955 enum machine_mode mode = GET_MODE (op1);
22956 const struct real_format *fmt;
22957 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
22958 rtx adj;
22959
22960 /* load nextafter (0.5, 0.0) */
22961 fmt = REAL_MODE_FORMAT (mode);
22962 real_2expN (&half_minus_pred_half, -(fmt->p) - 1);
22963 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
22964
22965 /* adj = copysign (0.5, op1) */
22966 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
22967 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
22968
22969 /* adj = op1 + adj */
22970 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
22971
22972 /* op0 = (imode)adj */
22973 expand_fix (op0, adj, 0);
22974 }
22975
22976 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
22977 into OPERAND0. */
22978 void
22979 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
22980 {
22981 /* C code for the stuff we're doing below (for do_floor):
22982 xi = (long)op1;
22983 xi -= (double)xi > op1 ? 1 : 0;
22984 return xi;
22985 */
22986 enum machine_mode fmode = GET_MODE (op1);
22987 enum machine_mode imode = GET_MODE (op0);
22988 rtx ireg, freg, label, tmp;
22989
22990 /* reg = (long)op1 */
22991 ireg = gen_reg_rtx (imode);
22992 expand_fix (ireg, op1, 0);
22993
22994 /* freg = (double)reg */
22995 freg = gen_reg_rtx (fmode);
22996 expand_float (freg, ireg, 0);
22997
22998 /* ireg = (freg > op1) ? ireg - 1 : ireg */
22999 label = ix86_expand_sse_compare_and_jump (UNLE,
23000 freg, op1, !do_floor);
23001 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
23002 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
23003 emit_move_insn (ireg, tmp);
23004
23005 emit_label (label);
23006 LABEL_NUSES (label) = 1;
23007
23008 emit_move_insn (op0, ireg);
23009 }
23010
23011 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
23012 result in OPERAND0. */
23013 void
23014 ix86_expand_rint (rtx operand0, rtx operand1)
23015 {
23016 /* C code for the stuff we're doing below:
23017 xa = fabs (operand1);
23018 if (!isless (xa, 2**52))
23019 return operand1;
23020 xa = xa + 2**52 - 2**52;
23021 return copysign (xa, operand1);
23022 */
23023 enum machine_mode mode = GET_MODE (operand0);
23024 rtx res, xa, label, TWO52, mask;
23025
23026 res = gen_reg_rtx (mode);
23027 emit_move_insn (res, operand1);
23028
23029 /* xa = abs (operand1) */
23030 xa = ix86_expand_sse_fabs (res, &mask);
23031
23032 /* if (!isless (xa, TWO52)) goto label; */
23033 TWO52 = ix86_gen_TWO52 (mode);
23034 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
23035
23036 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
23037 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
23038
23039 ix86_sse_copysign_to_positive (res, xa, res, mask);
23040
23041 emit_label (label);
23042 LABEL_NUSES (label) = 1;
23043
23044 emit_move_insn (operand0, res);
23045 }
23046
23047 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
23048 into OPERAND0. */
23049 void
23050 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
23051 {
23052 /* C code for the stuff we expand below.
23053 double xa = fabs (x), x2;
23054 if (!isless (xa, TWO52))
23055 return x;
23056 xa = xa + TWO52 - TWO52;
23057 x2 = copysign (xa, x);
23058 Compensate. Floor:
23059 if (x2 > x)
23060 x2 -= 1;
23061 Compensate. Ceil:
23062 if (x2 < x)
23063 x2 -= -1;
23064 return x2;
23065 */
23066 enum machine_mode mode = GET_MODE (operand0);
23067 rtx xa, TWO52, tmp, label, one, res, mask;
23068
23069 TWO52 = ix86_gen_TWO52 (mode);
23070
23071 /* Temporary for holding the result, initialized to the input
23072 operand to ease control flow. */
23073 res = gen_reg_rtx (mode);
23074 emit_move_insn (res, operand1);
23075
23076 /* xa = abs (operand1) */
23077 xa = ix86_expand_sse_fabs (res, &mask);
23078
23079 /* if (!isless (xa, TWO52)) goto label; */
23080 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
23081
23082 /* xa = xa + TWO52 - TWO52; */
23083 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
23084 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
23085
23086 /* xa = copysign (xa, operand1) */
23087 ix86_sse_copysign_to_positive (xa, xa, res, mask);
23088
23089 /* generate 1.0 or -1.0 */
23090 one = force_reg (mode,
23091 const_double_from_real_value (do_floor
23092 ? dconst1 : dconstm1, mode));
23093
23094 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
23095 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
23096 emit_insn (gen_rtx_SET (VOIDmode, tmp,
23097 gen_rtx_AND (mode, one, tmp)));
23098 /* We always need to subtract here to preserve signed zero. */
23099 tmp = expand_simple_binop (mode, MINUS,
23100 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
23101 emit_move_insn (res, tmp);
23102
23103 emit_label (label);
23104 LABEL_NUSES (label) = 1;
23105
23106 emit_move_insn (operand0, res);
23107 }
23108
23109 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
23110 into OPERAND0. */
23111 void
23112 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
23113 {
23114 /* C code for the stuff we expand below.
23115 double xa = fabs (x), x2;
23116 if (!isless (xa, TWO52))
23117 return x;
23118 x2 = (double)(long)x;
23119 Compensate. Floor:
23120 if (x2 > x)
23121 x2 -= 1;
23122 Compensate. Ceil:
23123 if (x2 < x)
23124 x2 += 1;
23125 if (HONOR_SIGNED_ZEROS (mode))
23126 return copysign (x2, x);
23127 return x2;
23128 */
23129 enum machine_mode mode = GET_MODE (operand0);
23130 rtx xa, xi, TWO52, tmp, label, one, res, mask;
23131
23132 TWO52 = ix86_gen_TWO52 (mode);
23133
23134 /* Temporary for holding the result, initialized to the input
23135 operand to ease control flow. */
23136 res = gen_reg_rtx (mode);
23137 emit_move_insn (res, operand1);
23138
23139 /* xa = abs (operand1) */
23140 xa = ix86_expand_sse_fabs (res, &mask);
23141
23142 /* if (!isless (xa, TWO52)) goto label; */
23143 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
23144
23145 /* xa = (double)(long)x */
23146 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
23147 expand_fix (xi, res, 0);
23148 expand_float (xa, xi, 0);
23149
23150 /* generate 1.0 */
23151 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
23152
23153 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
23154 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
23155 emit_insn (gen_rtx_SET (VOIDmode, tmp,
23156 gen_rtx_AND (mode, one, tmp)));
23157 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
23158 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
23159 emit_move_insn (res, tmp);
23160
23161 if (HONOR_SIGNED_ZEROS (mode))
23162 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
23163
23164 emit_label (label);
23165 LABEL_NUSES (label) = 1;
23166
23167 emit_move_insn (operand0, res);
23168 }
23169
23170 /* Expand SSE sequence for computing round from OPERAND1 storing
23171 into OPERAND0. Sequence that works without relying on DImode truncation
23172 via cvttsd2siq that is only available on 64bit targets. */
23173 void
23174 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
23175 {
23176 /* C code for the stuff we expand below.
23177 double xa = fabs (x), xa2, x2;
23178 if (!isless (xa, TWO52))
23179 return x;
23180 Using the absolute value and copying back sign makes
23181 -0.0 -> -0.0 correct.
23182 xa2 = xa + TWO52 - TWO52;
23183 Compensate.
23184 dxa = xa2 - xa;
23185 if (dxa <= -0.5)
23186 xa2 += 1;
23187 else if (dxa > 0.5)
23188 xa2 -= 1;
23189 x2 = copysign (xa2, x);
23190 return x2;
23191 */
23192 enum machine_mode mode = GET_MODE (operand0);
23193 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
23194
23195 TWO52 = ix86_gen_TWO52 (mode);
23196
23197 /* Temporary for holding the result, initialized to the input
23198 operand to ease control flow. */
23199 res = gen_reg_rtx (mode);
23200 emit_move_insn (res, operand1);
23201
23202 /* xa = abs (operand1) */
23203 xa = ix86_expand_sse_fabs (res, &mask);
23204
23205 /* if (!isless (xa, TWO52)) goto label; */
23206 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
23207
23208 /* xa2 = xa + TWO52 - TWO52; */
23209 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
23210 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
23211
23212 /* dxa = xa2 - xa; */
23213 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
23214
23215 /* generate 0.5, 1.0 and -0.5 */
23216 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
23217 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
23218 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
23219 0, OPTAB_DIRECT);
23220
23221 /* Compensate. */
23222 tmp = gen_reg_rtx (mode);
23223 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
23224 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
23225 emit_insn (gen_rtx_SET (VOIDmode, tmp,
23226 gen_rtx_AND (mode, one, tmp)));
23227 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
23228 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
23229 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
23230 emit_insn (gen_rtx_SET (VOIDmode, tmp,
23231 gen_rtx_AND (mode, one, tmp)));
23232 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
23233
23234 /* res = copysign (xa2, operand1) */
23235 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
23236
23237 emit_label (label);
23238 LABEL_NUSES (label) = 1;
23239
23240 emit_move_insn (operand0, res);
23241 }
23242
23243 /* Expand SSE sequence for computing trunc from OPERAND1 storing
23244 into OPERAND0. */
23245 void
23246 ix86_expand_trunc (rtx operand0, rtx operand1)
23247 {
23248 /* C code for SSE variant we expand below.
23249 double xa = fabs (x), x2;
23250 if (!isless (xa, TWO52))
23251 return x;
23252 x2 = (double)(long)x;
23253 if (HONOR_SIGNED_ZEROS (mode))
23254 return copysign (x2, x);
23255 return x2;
23256 */
23257 enum machine_mode mode = GET_MODE (operand0);
23258 rtx xa, xi, TWO52, label, res, mask;
23259
23260 TWO52 = ix86_gen_TWO52 (mode);
23261
23262 /* Temporary for holding the result, initialized to the input
23263 operand to ease control flow. */
23264 res = gen_reg_rtx (mode);
23265 emit_move_insn (res, operand1);
23266
23267 /* xa = abs (operand1) */
23268 xa = ix86_expand_sse_fabs (res, &mask);
23269
23270 /* if (!isless (xa, TWO52)) goto label; */
23271 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
23272
23273 /* x = (double)(long)x */
23274 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
23275 expand_fix (xi, res, 0);
23276 expand_float (res, xi, 0);
23277
23278 if (HONOR_SIGNED_ZEROS (mode))
23279 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
23280
23281 emit_label (label);
23282 LABEL_NUSES (label) = 1;
23283
23284 emit_move_insn (operand0, res);
23285 }
23286
23287 /* Expand SSE sequence for computing trunc from OPERAND1 storing
23288 into OPERAND0. */
23289 void
23290 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
23291 {
23292 enum machine_mode mode = GET_MODE (operand0);
23293 rtx xa, mask, TWO52, label, one, res, smask, tmp;
23294
23295 /* C code for SSE variant we expand below.
23296 double xa = fabs (x), x2;
23297 if (!isless (xa, TWO52))
23298 return x;
23299 xa2 = xa + TWO52 - TWO52;
23300 Compensate:
23301 if (xa2 > xa)
23302 xa2 -= 1.0;
23303 x2 = copysign (xa2, x);
23304 return x2;
23305 */
23306
23307 TWO52 = ix86_gen_TWO52 (mode);
23308
23309 /* Temporary for holding the result, initialized to the input
23310 operand to ease control flow. */
23311 res = gen_reg_rtx (mode);
23312 emit_move_insn (res, operand1);
23313
23314 /* xa = abs (operand1) */
23315 xa = ix86_expand_sse_fabs (res, &smask);
23316
23317 /* if (!isless (xa, TWO52)) goto label; */
23318 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
23319
23320 /* res = xa + TWO52 - TWO52; */
23321 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
23322 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
23323 emit_move_insn (res, tmp);
23324
23325 /* generate 1.0 */
23326 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
23327
23328 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
23329 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
23330 emit_insn (gen_rtx_SET (VOIDmode, mask,
23331 gen_rtx_AND (mode, mask, one)));
23332 tmp = expand_simple_binop (mode, MINUS,
23333 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
23334 emit_move_insn (res, tmp);
23335
23336 /* res = copysign (res, operand1) */
23337 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
23338
23339 emit_label (label);
23340 LABEL_NUSES (label) = 1;
23341
23342 emit_move_insn (operand0, res);
23343 }
23344
23345 /* Expand SSE sequence for computing round from OPERAND1 storing
23346 into OPERAND0. */
23347 void
23348 ix86_expand_round (rtx operand0, rtx operand1)
23349 {
23350 /* C code for the stuff we're doing below:
23351 double xa = fabs (x);
23352 if (!isless (xa, TWO52))
23353 return x;
23354 xa = (double)(long)(xa + nextafter (0.5, 0.0));
23355 return copysign (xa, x);
23356 */
23357 enum machine_mode mode = GET_MODE (operand0);
23358 rtx res, TWO52, xa, label, xi, half, mask;
23359 const struct real_format *fmt;
23360 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
23361
23362 /* Temporary for holding the result, initialized to the input
23363 operand to ease control flow. */
23364 res = gen_reg_rtx (mode);
23365 emit_move_insn (res, operand1);
23366
23367 TWO52 = ix86_gen_TWO52 (mode);
23368 xa = ix86_expand_sse_fabs (res, &mask);
23369 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
23370
23371 /* load nextafter (0.5, 0.0) */
23372 fmt = REAL_MODE_FORMAT (mode);
23373 real_2expN (&half_minus_pred_half, -(fmt->p) - 1);
23374 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
23375
23376 /* xa = xa + 0.5 */
23377 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
23378 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
23379
23380 /* xa = (double)(int64_t)xa */
23381 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
23382 expand_fix (xi, xa, 0);
23383 expand_float (xa, xi, 0);
23384
23385 /* res = copysign (xa, operand1) */
23386 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
23387
23388 emit_label (label);
23389 LABEL_NUSES (label) = 1;
23390
23391 emit_move_insn (operand0, res);
23392 }
23393
23394 \f
23395 /* Table of valid machine attributes. */
23396 static const struct attribute_spec ix86_attribute_table[] =
23397 {
23398 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
23399 /* Stdcall attribute says callee is responsible for popping arguments
23400 if they are not variable. */
23401 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
23402 /* Fastcall attribute says callee is responsible for popping arguments
23403 if they are not variable. */
23404 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
23405 /* Cdecl attribute says the callee is a normal C declaration */
23406 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
23407 /* Regparm attribute specifies how many integer arguments are to be
23408 passed in registers. */
23409 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
23410 /* Sseregparm attribute says we are using x86_64 calling conventions
23411 for FP arguments. */
23412 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
23413 /* force_align_arg_pointer says this function realigns the stack at entry. */
23414 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
23415 false, true, true, ix86_handle_cconv_attribute },
23416 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
23417 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
23418 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
23419 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
23420 #endif
23421 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
23422 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
23423 #ifdef SUBTARGET_ATTRIBUTE_TABLE
23424 SUBTARGET_ATTRIBUTE_TABLE,
23425 #endif
23426 { NULL, 0, 0, false, false, false, NULL }
23427 };
23428
23429 /* Initialize the GCC target structure. */
23430 #undef TARGET_ATTRIBUTE_TABLE
23431 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
23432 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
23433 # undef TARGET_MERGE_DECL_ATTRIBUTES
23434 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
23435 #endif
23436
23437 #undef TARGET_COMP_TYPE_ATTRIBUTES
23438 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
23439
23440 #undef TARGET_INIT_BUILTINS
23441 #define TARGET_INIT_BUILTINS ix86_init_builtins
23442 #undef TARGET_EXPAND_BUILTIN
23443 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
23444
23445 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
23446 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
23447 ix86_builtin_vectorized_function
23448
23449 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
23450 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
23451
23452 #undef TARGET_BUILTIN_RECIPROCAL
23453 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
23454
23455 #undef TARGET_ASM_FUNCTION_EPILOGUE
23456 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
23457
23458 #undef TARGET_ENCODE_SECTION_INFO
23459 #ifndef SUBTARGET_ENCODE_SECTION_INFO
23460 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
23461 #else
23462 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
23463 #endif
23464
23465 #undef TARGET_ASM_OPEN_PAREN
23466 #define TARGET_ASM_OPEN_PAREN ""
23467 #undef TARGET_ASM_CLOSE_PAREN
23468 #define TARGET_ASM_CLOSE_PAREN ""
23469
23470 #undef TARGET_ASM_ALIGNED_HI_OP
23471 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
23472 #undef TARGET_ASM_ALIGNED_SI_OP
23473 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
23474 #ifdef ASM_QUAD
23475 #undef TARGET_ASM_ALIGNED_DI_OP
23476 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
23477 #endif
23478
23479 #undef TARGET_ASM_UNALIGNED_HI_OP
23480 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
23481 #undef TARGET_ASM_UNALIGNED_SI_OP
23482 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
23483 #undef TARGET_ASM_UNALIGNED_DI_OP
23484 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
23485
23486 #undef TARGET_SCHED_ADJUST_COST
23487 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
23488 #undef TARGET_SCHED_ISSUE_RATE
23489 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
23490 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
23491 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
23492 ia32_multipass_dfa_lookahead
23493
23494 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
23495 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
23496
23497 #ifdef HAVE_AS_TLS
23498 #undef TARGET_HAVE_TLS
23499 #define TARGET_HAVE_TLS true
23500 #endif
23501 #undef TARGET_CANNOT_FORCE_CONST_MEM
23502 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
23503 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
23504 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_rtx_true
23505
23506 #undef TARGET_DELEGITIMIZE_ADDRESS
23507 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
23508
23509 #undef TARGET_MS_BITFIELD_LAYOUT_P
23510 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
23511
23512 #if TARGET_MACHO
23513 #undef TARGET_BINDS_LOCAL_P
23514 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
23515 #endif
23516 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
23517 #undef TARGET_BINDS_LOCAL_P
23518 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
23519 #endif
23520
23521 #undef TARGET_ASM_OUTPUT_MI_THUNK
23522 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
23523 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
23524 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
23525
23526 #undef TARGET_ASM_FILE_START
23527 #define TARGET_ASM_FILE_START x86_file_start
23528
23529 #undef TARGET_DEFAULT_TARGET_FLAGS
23530 #define TARGET_DEFAULT_TARGET_FLAGS \
23531 (TARGET_DEFAULT \
23532 | TARGET_SUBTARGET_DEFAULT \
23533 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
23534
23535 #undef TARGET_HANDLE_OPTION
23536 #define TARGET_HANDLE_OPTION ix86_handle_option
23537
23538 #undef TARGET_RTX_COSTS
23539 #define TARGET_RTX_COSTS ix86_rtx_costs
23540 #undef TARGET_ADDRESS_COST
23541 #define TARGET_ADDRESS_COST ix86_address_cost
23542
23543 #undef TARGET_FIXED_CONDITION_CODE_REGS
23544 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
23545 #undef TARGET_CC_MODES_COMPATIBLE
23546 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
23547
23548 #undef TARGET_MACHINE_DEPENDENT_REORG
23549 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
23550
23551 #undef TARGET_BUILD_BUILTIN_VA_LIST
23552 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
23553
23554 #undef TARGET_MD_ASM_CLOBBERS
23555 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
23556
23557 #undef TARGET_PROMOTE_PROTOTYPES
23558 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
23559 #undef TARGET_STRUCT_VALUE_RTX
23560 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
23561 #undef TARGET_SETUP_INCOMING_VARARGS
23562 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
23563 #undef TARGET_MUST_PASS_IN_STACK
23564 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
23565 #undef TARGET_PASS_BY_REFERENCE
23566 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
23567 #undef TARGET_INTERNAL_ARG_POINTER
23568 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
23569 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
23570 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
23571 #undef TARGET_STRICT_ARGUMENT_NAMING
23572 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
23573
23574 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
23575 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
23576
23577 #undef TARGET_SCALAR_MODE_SUPPORTED_P
23578 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
23579
23580 #undef TARGET_VECTOR_MODE_SUPPORTED_P
23581 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
23582
23583 #undef TARGET_C_MODE_FOR_SUFFIX
23584 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
23585
23586 #ifdef HAVE_AS_TLS
23587 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
23588 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
23589 #endif
23590
23591 #ifdef SUBTARGET_INSERT_ATTRIBUTES
23592 #undef TARGET_INSERT_ATTRIBUTES
23593 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
23594 #endif
23595
23596 #undef TARGET_MANGLE_FUNDAMENTAL_TYPE
23597 #define TARGET_MANGLE_FUNDAMENTAL_TYPE ix86_mangle_fundamental_type
23598
23599 #undef TARGET_STACK_PROTECT_FAIL
23600 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
23601
23602 #undef TARGET_FUNCTION_VALUE
23603 #define TARGET_FUNCTION_VALUE ix86_function_value
23604
23605 struct gcc_target targetm = TARGET_INITIALIZER;
23606 \f
23607 #include "gt-i386.h"