cpplib.h (CPP_N_WIDTH_MD, [...]): Add new constants.
[gcc.git] / gcc / config / i386 / i386.c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 Boston, MA 02110-1301, USA. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "tm_p.h"
29 #include "regs.h"
30 #include "hard-reg-set.h"
31 #include "real.h"
32 #include "insn-config.h"
33 #include "conditions.h"
34 #include "output.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
37 #include "flags.h"
38 #include "except.h"
39 #include "function.h"
40 #include "recog.h"
41 #include "expr.h"
42 #include "optabs.h"
43 #include "toplev.h"
44 #include "basic-block.h"
45 #include "ggc.h"
46 #include "target.h"
47 #include "target-def.h"
48 #include "langhooks.h"
49 #include "cgraph.h"
50 #include "tree-gimple.h"
51 #include "dwarf2.h"
52 #include "df.h"
53 #include "tm-constrs.h"
54 #include "params.h"
55
56 #ifndef CHECK_STACK_LIMIT
57 #define CHECK_STACK_LIMIT (-1)
58 #endif
59
60 /* Return index of given mode in mult and division cost tables. */
61 #define MODE_INDEX(mode) \
62 ((mode) == QImode ? 0 \
63 : (mode) == HImode ? 1 \
64 : (mode) == SImode ? 2 \
65 : (mode) == DImode ? 3 \
66 : 4)
67
68 /* Processor costs (relative to an add) */
69 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
70 #define COSTS_N_BYTES(N) ((N) * 2)
71
72 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
73
74 static const
75 struct processor_costs size_cost = { /* costs for tuning for size */
76 COSTS_N_BYTES (2), /* cost of an add instruction */
77 COSTS_N_BYTES (3), /* cost of a lea instruction */
78 COSTS_N_BYTES (2), /* variable shift costs */
79 COSTS_N_BYTES (3), /* constant shift costs */
80 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
81 COSTS_N_BYTES (3), /* HI */
82 COSTS_N_BYTES (3), /* SI */
83 COSTS_N_BYTES (3), /* DI */
84 COSTS_N_BYTES (5)}, /* other */
85 0, /* cost of multiply per each bit set */
86 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
87 COSTS_N_BYTES (3), /* HI */
88 COSTS_N_BYTES (3), /* SI */
89 COSTS_N_BYTES (3), /* DI */
90 COSTS_N_BYTES (5)}, /* other */
91 COSTS_N_BYTES (3), /* cost of movsx */
92 COSTS_N_BYTES (3), /* cost of movzx */
93 0, /* "large" insn */
94 2, /* MOVE_RATIO */
95 2, /* cost for loading QImode using movzbl */
96 {2, 2, 2}, /* cost of loading integer registers
97 in QImode, HImode and SImode.
98 Relative to reg-reg move (2). */
99 {2, 2, 2}, /* cost of storing integer registers */
100 2, /* cost of reg,reg fld/fst */
101 {2, 2, 2}, /* cost of loading fp registers
102 in SFmode, DFmode and XFmode */
103 {2, 2, 2}, /* cost of storing fp registers
104 in SFmode, DFmode and XFmode */
105 3, /* cost of moving MMX register */
106 {3, 3}, /* cost of loading MMX registers
107 in SImode and DImode */
108 {3, 3}, /* cost of storing MMX registers
109 in SImode and DImode */
110 3, /* cost of moving SSE register */
111 {3, 3, 3}, /* cost of loading SSE registers
112 in SImode, DImode and TImode */
113 {3, 3, 3}, /* cost of storing SSE registers
114 in SImode, DImode and TImode */
115 3, /* MMX or SSE register to integer */
116 0, /* size of prefetch block */
117 0, /* number of parallel prefetches */
118 2, /* Branch cost */
119 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
120 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
121 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
122 COSTS_N_BYTES (2), /* cost of FABS instruction. */
123 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
124 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
125 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
126 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
127 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
128 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}}
129 };
130
131 /* Processor costs (relative to an add) */
132 static const
133 struct processor_costs i386_cost = { /* 386 specific costs */
134 COSTS_N_INSNS (1), /* cost of an add instruction */
135 COSTS_N_INSNS (1), /* cost of a lea instruction */
136 COSTS_N_INSNS (3), /* variable shift costs */
137 COSTS_N_INSNS (2), /* constant shift costs */
138 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
139 COSTS_N_INSNS (6), /* HI */
140 COSTS_N_INSNS (6), /* SI */
141 COSTS_N_INSNS (6), /* DI */
142 COSTS_N_INSNS (6)}, /* other */
143 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
144 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
145 COSTS_N_INSNS (23), /* HI */
146 COSTS_N_INSNS (23), /* SI */
147 COSTS_N_INSNS (23), /* DI */
148 COSTS_N_INSNS (23)}, /* other */
149 COSTS_N_INSNS (3), /* cost of movsx */
150 COSTS_N_INSNS (2), /* cost of movzx */
151 15, /* "large" insn */
152 3, /* MOVE_RATIO */
153 4, /* cost for loading QImode using movzbl */
154 {2, 4, 2}, /* cost of loading integer registers
155 in QImode, HImode and SImode.
156 Relative to reg-reg move (2). */
157 {2, 4, 2}, /* cost of storing integer registers */
158 2, /* cost of reg,reg fld/fst */
159 {8, 8, 8}, /* cost of loading fp registers
160 in SFmode, DFmode and XFmode */
161 {8, 8, 8}, /* cost of storing fp registers
162 in SFmode, DFmode and XFmode */
163 2, /* cost of moving MMX register */
164 {4, 8}, /* cost of loading MMX registers
165 in SImode and DImode */
166 {4, 8}, /* cost of storing MMX registers
167 in SImode and DImode */
168 2, /* cost of moving SSE register */
169 {4, 8, 16}, /* cost of loading SSE registers
170 in SImode, DImode and TImode */
171 {4, 8, 16}, /* cost of storing SSE registers
172 in SImode, DImode and TImode */
173 3, /* MMX or SSE register to integer */
174 0, /* size of prefetch block */
175 0, /* number of parallel prefetches */
176 1, /* Branch cost */
177 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
178 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
179 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
180 COSTS_N_INSNS (22), /* cost of FABS instruction. */
181 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
182 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
183 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
184 DUMMY_STRINGOP_ALGS},
185 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
186 DUMMY_STRINGOP_ALGS},
187 };
188
189 static const
190 struct processor_costs i486_cost = { /* 486 specific costs */
191 COSTS_N_INSNS (1), /* cost of an add instruction */
192 COSTS_N_INSNS (1), /* cost of a lea instruction */
193 COSTS_N_INSNS (3), /* variable shift costs */
194 COSTS_N_INSNS (2), /* constant shift costs */
195 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
196 COSTS_N_INSNS (12), /* HI */
197 COSTS_N_INSNS (12), /* SI */
198 COSTS_N_INSNS (12), /* DI */
199 COSTS_N_INSNS (12)}, /* other */
200 1, /* cost of multiply per each bit set */
201 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
202 COSTS_N_INSNS (40), /* HI */
203 COSTS_N_INSNS (40), /* SI */
204 COSTS_N_INSNS (40), /* DI */
205 COSTS_N_INSNS (40)}, /* other */
206 COSTS_N_INSNS (3), /* cost of movsx */
207 COSTS_N_INSNS (2), /* cost of movzx */
208 15, /* "large" insn */
209 3, /* MOVE_RATIO */
210 4, /* cost for loading QImode using movzbl */
211 {2, 4, 2}, /* cost of loading integer registers
212 in QImode, HImode and SImode.
213 Relative to reg-reg move (2). */
214 {2, 4, 2}, /* cost of storing integer registers */
215 2, /* cost of reg,reg fld/fst */
216 {8, 8, 8}, /* cost of loading fp registers
217 in SFmode, DFmode and XFmode */
218 {8, 8, 8}, /* cost of storing fp registers
219 in SFmode, DFmode and XFmode */
220 2, /* cost of moving MMX register */
221 {4, 8}, /* cost of loading MMX registers
222 in SImode and DImode */
223 {4, 8}, /* cost of storing MMX registers
224 in SImode and DImode */
225 2, /* cost of moving SSE register */
226 {4, 8, 16}, /* cost of loading SSE registers
227 in SImode, DImode and TImode */
228 {4, 8, 16}, /* cost of storing SSE registers
229 in SImode, DImode and TImode */
230 3, /* MMX or SSE register to integer */
231 0, /* size of prefetch block */
232 0, /* number of parallel prefetches */
233 1, /* Branch cost */
234 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
235 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
236 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
237 COSTS_N_INSNS (3), /* cost of FABS instruction. */
238 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
239 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
240 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
241 DUMMY_STRINGOP_ALGS},
242 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
243 DUMMY_STRINGOP_ALGS}
244 };
245
246 static const
247 struct processor_costs pentium_cost = {
248 COSTS_N_INSNS (1), /* cost of an add instruction */
249 COSTS_N_INSNS (1), /* cost of a lea instruction */
250 COSTS_N_INSNS (4), /* variable shift costs */
251 COSTS_N_INSNS (1), /* constant shift costs */
252 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
253 COSTS_N_INSNS (11), /* HI */
254 COSTS_N_INSNS (11), /* SI */
255 COSTS_N_INSNS (11), /* DI */
256 COSTS_N_INSNS (11)}, /* other */
257 0, /* cost of multiply per each bit set */
258 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
259 COSTS_N_INSNS (25), /* HI */
260 COSTS_N_INSNS (25), /* SI */
261 COSTS_N_INSNS (25), /* DI */
262 COSTS_N_INSNS (25)}, /* other */
263 COSTS_N_INSNS (3), /* cost of movsx */
264 COSTS_N_INSNS (2), /* cost of movzx */
265 8, /* "large" insn */
266 6, /* MOVE_RATIO */
267 6, /* cost for loading QImode using movzbl */
268 {2, 4, 2}, /* cost of loading integer registers
269 in QImode, HImode and SImode.
270 Relative to reg-reg move (2). */
271 {2, 4, 2}, /* cost of storing integer registers */
272 2, /* cost of reg,reg fld/fst */
273 {2, 2, 6}, /* cost of loading fp registers
274 in SFmode, DFmode and XFmode */
275 {4, 4, 6}, /* cost of storing fp registers
276 in SFmode, DFmode and XFmode */
277 8, /* cost of moving MMX register */
278 {8, 8}, /* cost of loading MMX registers
279 in SImode and DImode */
280 {8, 8}, /* cost of storing MMX registers
281 in SImode and DImode */
282 2, /* cost of moving SSE register */
283 {4, 8, 16}, /* cost of loading SSE registers
284 in SImode, DImode and TImode */
285 {4, 8, 16}, /* cost of storing SSE registers
286 in SImode, DImode and TImode */
287 3, /* MMX or SSE register to integer */
288 0, /* size of prefetch block */
289 0, /* number of parallel prefetches */
290 2, /* Branch cost */
291 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
292 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
293 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
294 COSTS_N_INSNS (1), /* cost of FABS instruction. */
295 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
296 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
297 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
298 DUMMY_STRINGOP_ALGS},
299 {{libcall, {{-1, rep_prefix_4_byte}}},
300 DUMMY_STRINGOP_ALGS}
301 };
302
303 static const
304 struct processor_costs pentiumpro_cost = {
305 COSTS_N_INSNS (1), /* cost of an add instruction */
306 COSTS_N_INSNS (1), /* cost of a lea instruction */
307 COSTS_N_INSNS (1), /* variable shift costs */
308 COSTS_N_INSNS (1), /* constant shift costs */
309 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
310 COSTS_N_INSNS (4), /* HI */
311 COSTS_N_INSNS (4), /* SI */
312 COSTS_N_INSNS (4), /* DI */
313 COSTS_N_INSNS (4)}, /* other */
314 0, /* cost of multiply per each bit set */
315 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
316 COSTS_N_INSNS (17), /* HI */
317 COSTS_N_INSNS (17), /* SI */
318 COSTS_N_INSNS (17), /* DI */
319 COSTS_N_INSNS (17)}, /* other */
320 COSTS_N_INSNS (1), /* cost of movsx */
321 COSTS_N_INSNS (1), /* cost of movzx */
322 8, /* "large" insn */
323 6, /* MOVE_RATIO */
324 2, /* cost for loading QImode using movzbl */
325 {4, 4, 4}, /* cost of loading integer registers
326 in QImode, HImode and SImode.
327 Relative to reg-reg move (2). */
328 {2, 2, 2}, /* cost of storing integer registers */
329 2, /* cost of reg,reg fld/fst */
330 {2, 2, 6}, /* cost of loading fp registers
331 in SFmode, DFmode and XFmode */
332 {4, 4, 6}, /* cost of storing fp registers
333 in SFmode, DFmode and XFmode */
334 2, /* cost of moving MMX register */
335 {2, 2}, /* cost of loading MMX registers
336 in SImode and DImode */
337 {2, 2}, /* cost of storing MMX registers
338 in SImode and DImode */
339 2, /* cost of moving SSE register */
340 {2, 2, 8}, /* cost of loading SSE registers
341 in SImode, DImode and TImode */
342 {2, 2, 8}, /* cost of storing SSE registers
343 in SImode, DImode and TImode */
344 3, /* MMX or SSE register to integer */
345 32, /* size of prefetch block */
346 6, /* number of parallel prefetches */
347 2, /* Branch cost */
348 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
349 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
350 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
351 COSTS_N_INSNS (2), /* cost of FABS instruction. */
352 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
353 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
354 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
355 the alignment). For small blocks inline loop is still a noticeable win, for bigger
356 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
357 more expensive startup time in CPU, but after 4K the difference is down in the noise.
358 */
359 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
360 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
361 DUMMY_STRINGOP_ALGS},
362 {{rep_prefix_4_byte, {{1024, unrolled_loop},
363 {8192, rep_prefix_4_byte}, {-1, libcall}}},
364 DUMMY_STRINGOP_ALGS}
365 };
366
367 static const
368 struct processor_costs geode_cost = {
369 COSTS_N_INSNS (1), /* cost of an add instruction */
370 COSTS_N_INSNS (1), /* cost of a lea instruction */
371 COSTS_N_INSNS (2), /* variable shift costs */
372 COSTS_N_INSNS (1), /* constant shift costs */
373 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
374 COSTS_N_INSNS (4), /* HI */
375 COSTS_N_INSNS (7), /* SI */
376 COSTS_N_INSNS (7), /* DI */
377 COSTS_N_INSNS (7)}, /* other */
378 0, /* cost of multiply per each bit set */
379 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
380 COSTS_N_INSNS (23), /* HI */
381 COSTS_N_INSNS (39), /* SI */
382 COSTS_N_INSNS (39), /* DI */
383 COSTS_N_INSNS (39)}, /* other */
384 COSTS_N_INSNS (1), /* cost of movsx */
385 COSTS_N_INSNS (1), /* cost of movzx */
386 8, /* "large" insn */
387 4, /* MOVE_RATIO */
388 1, /* cost for loading QImode using movzbl */
389 {1, 1, 1}, /* cost of loading integer registers
390 in QImode, HImode and SImode.
391 Relative to reg-reg move (2). */
392 {1, 1, 1}, /* cost of storing integer registers */
393 1, /* cost of reg,reg fld/fst */
394 {1, 1, 1}, /* cost of loading fp registers
395 in SFmode, DFmode and XFmode */
396 {4, 6, 6}, /* cost of storing fp registers
397 in SFmode, DFmode and XFmode */
398
399 1, /* cost of moving MMX register */
400 {1, 1}, /* cost of loading MMX registers
401 in SImode and DImode */
402 {1, 1}, /* cost of storing MMX registers
403 in SImode and DImode */
404 1, /* cost of moving SSE register */
405 {1, 1, 1}, /* cost of loading SSE registers
406 in SImode, DImode and TImode */
407 {1, 1, 1}, /* cost of storing SSE registers
408 in SImode, DImode and TImode */
409 1, /* MMX or SSE register to integer */
410 32, /* size of prefetch block */
411 1, /* number of parallel prefetches */
412 1, /* Branch cost */
413 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
414 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
415 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
416 COSTS_N_INSNS (1), /* cost of FABS instruction. */
417 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
418 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
419 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
420 DUMMY_STRINGOP_ALGS},
421 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
422 DUMMY_STRINGOP_ALGS}
423 };
424
425 static const
426 struct processor_costs k6_cost = {
427 COSTS_N_INSNS (1), /* cost of an add instruction */
428 COSTS_N_INSNS (2), /* cost of a lea instruction */
429 COSTS_N_INSNS (1), /* variable shift costs */
430 COSTS_N_INSNS (1), /* constant shift costs */
431 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
432 COSTS_N_INSNS (3), /* HI */
433 COSTS_N_INSNS (3), /* SI */
434 COSTS_N_INSNS (3), /* DI */
435 COSTS_N_INSNS (3)}, /* other */
436 0, /* cost of multiply per each bit set */
437 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
438 COSTS_N_INSNS (18), /* HI */
439 COSTS_N_INSNS (18), /* SI */
440 COSTS_N_INSNS (18), /* DI */
441 COSTS_N_INSNS (18)}, /* other */
442 COSTS_N_INSNS (2), /* cost of movsx */
443 COSTS_N_INSNS (2), /* cost of movzx */
444 8, /* "large" insn */
445 4, /* MOVE_RATIO */
446 3, /* cost for loading QImode using movzbl */
447 {4, 5, 4}, /* cost of loading integer registers
448 in QImode, HImode and SImode.
449 Relative to reg-reg move (2). */
450 {2, 3, 2}, /* cost of storing integer registers */
451 4, /* cost of reg,reg fld/fst */
452 {6, 6, 6}, /* cost of loading fp registers
453 in SFmode, DFmode and XFmode */
454 {4, 4, 4}, /* cost of storing fp registers
455 in SFmode, DFmode and XFmode */
456 2, /* cost of moving MMX register */
457 {2, 2}, /* cost of loading MMX registers
458 in SImode and DImode */
459 {2, 2}, /* cost of storing MMX registers
460 in SImode and DImode */
461 2, /* cost of moving SSE register */
462 {2, 2, 8}, /* cost of loading SSE registers
463 in SImode, DImode and TImode */
464 {2, 2, 8}, /* cost of storing SSE registers
465 in SImode, DImode and TImode */
466 6, /* MMX or SSE register to integer */
467 32, /* size of prefetch block */
468 1, /* number of parallel prefetches */
469 1, /* Branch cost */
470 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
471 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
472 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
473 COSTS_N_INSNS (2), /* cost of FABS instruction. */
474 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
475 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
476 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
477 DUMMY_STRINGOP_ALGS},
478 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
479 DUMMY_STRINGOP_ALGS}
480 };
481
482 static const
483 struct processor_costs athlon_cost = {
484 COSTS_N_INSNS (1), /* cost of an add instruction */
485 COSTS_N_INSNS (2), /* cost of a lea instruction */
486 COSTS_N_INSNS (1), /* variable shift costs */
487 COSTS_N_INSNS (1), /* constant shift costs */
488 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
489 COSTS_N_INSNS (5), /* HI */
490 COSTS_N_INSNS (5), /* SI */
491 COSTS_N_INSNS (5), /* DI */
492 COSTS_N_INSNS (5)}, /* other */
493 0, /* cost of multiply per each bit set */
494 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
495 COSTS_N_INSNS (26), /* HI */
496 COSTS_N_INSNS (42), /* SI */
497 COSTS_N_INSNS (74), /* DI */
498 COSTS_N_INSNS (74)}, /* other */
499 COSTS_N_INSNS (1), /* cost of movsx */
500 COSTS_N_INSNS (1), /* cost of movzx */
501 8, /* "large" insn */
502 9, /* MOVE_RATIO */
503 4, /* cost for loading QImode using movzbl */
504 {3, 4, 3}, /* cost of loading integer registers
505 in QImode, HImode and SImode.
506 Relative to reg-reg move (2). */
507 {3, 4, 3}, /* cost of storing integer registers */
508 4, /* cost of reg,reg fld/fst */
509 {4, 4, 12}, /* cost of loading fp registers
510 in SFmode, DFmode and XFmode */
511 {6, 6, 8}, /* cost of storing fp registers
512 in SFmode, DFmode and XFmode */
513 2, /* cost of moving MMX register */
514 {4, 4}, /* cost of loading MMX registers
515 in SImode and DImode */
516 {4, 4}, /* cost of storing MMX registers
517 in SImode and DImode */
518 2, /* cost of moving SSE register */
519 {4, 4, 6}, /* cost of loading SSE registers
520 in SImode, DImode and TImode */
521 {4, 4, 5}, /* cost of storing SSE registers
522 in SImode, DImode and TImode */
523 5, /* MMX or SSE register to integer */
524 64, /* size of prefetch block */
525 6, /* number of parallel prefetches */
526 5, /* Branch cost */
527 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
528 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
529 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
530 COSTS_N_INSNS (2), /* cost of FABS instruction. */
531 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
532 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
533 /* For some reason, Athlon deals better with REP prefix (relative to loops)
534 compared to K8. Alignment becomes important after 8 bytes for memcpy and
535 128 bytes for memset. */
536 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
537 DUMMY_STRINGOP_ALGS},
538 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
539 DUMMY_STRINGOP_ALGS}
540 };
541
542 static const
543 struct processor_costs k8_cost = {
544 COSTS_N_INSNS (1), /* cost of an add instruction */
545 COSTS_N_INSNS (2), /* cost of a lea instruction */
546 COSTS_N_INSNS (1), /* variable shift costs */
547 COSTS_N_INSNS (1), /* constant shift costs */
548 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
549 COSTS_N_INSNS (4), /* HI */
550 COSTS_N_INSNS (3), /* SI */
551 COSTS_N_INSNS (4), /* DI */
552 COSTS_N_INSNS (5)}, /* other */
553 0, /* cost of multiply per each bit set */
554 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
555 COSTS_N_INSNS (26), /* HI */
556 COSTS_N_INSNS (42), /* SI */
557 COSTS_N_INSNS (74), /* DI */
558 COSTS_N_INSNS (74)}, /* other */
559 COSTS_N_INSNS (1), /* cost of movsx */
560 COSTS_N_INSNS (1), /* cost of movzx */
561 8, /* "large" insn */
562 9, /* MOVE_RATIO */
563 4, /* cost for loading QImode using movzbl */
564 {3, 4, 3}, /* cost of loading integer registers
565 in QImode, HImode and SImode.
566 Relative to reg-reg move (2). */
567 {3, 4, 3}, /* cost of storing integer registers */
568 4, /* cost of reg,reg fld/fst */
569 {4, 4, 12}, /* cost of loading fp registers
570 in SFmode, DFmode and XFmode */
571 {6, 6, 8}, /* cost of storing fp registers
572 in SFmode, DFmode and XFmode */
573 2, /* cost of moving MMX register */
574 {3, 3}, /* cost of loading MMX registers
575 in SImode and DImode */
576 {4, 4}, /* cost of storing MMX registers
577 in SImode and DImode */
578 2, /* cost of moving SSE register */
579 {4, 3, 6}, /* cost of loading SSE registers
580 in SImode, DImode and TImode */
581 {4, 4, 5}, /* cost of storing SSE registers
582 in SImode, DImode and TImode */
583 5, /* MMX or SSE register to integer */
584 64, /* size of prefetch block */
585 /* New AMD processors never drop prefetches; if they cannot be performed
586 immediately, they are queued. We set number of simultaneous prefetches
587 to a large constant to reflect this (it probably is not a good idea not
588 to limit number of prefetches at all, as their execution also takes some
589 time). */
590 100, /* number of parallel prefetches */
591 5, /* Branch cost */
592 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
593 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
594 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
595 COSTS_N_INSNS (2), /* cost of FABS instruction. */
596 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
597 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
598 /* K8 has optimized REP instruction for medium sized blocks, but for very small
599 blocks it is better to use loop. For large blocks, libcall can do
600 nontemporary accesses and beat inline considerably. */
601 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
602 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
603 {{libcall, {{8, loop}, {24, unrolled_loop},
604 {2048, rep_prefix_4_byte}, {-1, libcall}}},
605 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
606 };
607
608 struct processor_costs amdfam10_cost = {
609 COSTS_N_INSNS (1), /* cost of an add instruction */
610 COSTS_N_INSNS (2), /* cost of a lea instruction */
611 COSTS_N_INSNS (1), /* variable shift costs */
612 COSTS_N_INSNS (1), /* constant shift costs */
613 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
614 COSTS_N_INSNS (4), /* HI */
615 COSTS_N_INSNS (3), /* SI */
616 COSTS_N_INSNS (4), /* DI */
617 COSTS_N_INSNS (5)}, /* other */
618 0, /* cost of multiply per each bit set */
619 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
620 COSTS_N_INSNS (35), /* HI */
621 COSTS_N_INSNS (51), /* SI */
622 COSTS_N_INSNS (83), /* DI */
623 COSTS_N_INSNS (83)}, /* other */
624 COSTS_N_INSNS (1), /* cost of movsx */
625 COSTS_N_INSNS (1), /* cost of movzx */
626 8, /* "large" insn */
627 9, /* MOVE_RATIO */
628 4, /* cost for loading QImode using movzbl */
629 {3, 4, 3}, /* cost of loading integer registers
630 in QImode, HImode and SImode.
631 Relative to reg-reg move (2). */
632 {3, 4, 3}, /* cost of storing integer registers */
633 4, /* cost of reg,reg fld/fst */
634 {4, 4, 12}, /* cost of loading fp registers
635 in SFmode, DFmode and XFmode */
636 {6, 6, 8}, /* cost of storing fp registers
637 in SFmode, DFmode and XFmode */
638 2, /* cost of moving MMX register */
639 {3, 3}, /* cost of loading MMX registers
640 in SImode and DImode */
641 {4, 4}, /* cost of storing MMX registers
642 in SImode and DImode */
643 2, /* cost of moving SSE register */
644 {4, 4, 3}, /* cost of loading SSE registers
645 in SImode, DImode and TImode */
646 {4, 4, 5}, /* cost of storing SSE registers
647 in SImode, DImode and TImode */
648 3, /* MMX or SSE register to integer */
649 /* On K8
650 MOVD reg64, xmmreg Double FSTORE 4
651 MOVD reg32, xmmreg Double FSTORE 4
652 On AMDFAM10
653 MOVD reg64, xmmreg Double FADD 3
654 1/1 1/1
655 MOVD reg32, xmmreg Double FADD 3
656 1/1 1/1 */
657 64, /* size of prefetch block */
658 /* New AMD processors never drop prefetches; if they cannot be performed
659 immediately, they are queued. We set number of simultaneous prefetches
660 to a large constant to reflect this (it probably is not a good idea not
661 to limit number of prefetches at all, as their execution also takes some
662 time). */
663 100, /* number of parallel prefetches */
664 5, /* Branch cost */
665 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
666 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
667 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
668 COSTS_N_INSNS (2), /* cost of FABS instruction. */
669 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
670 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
671
672 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
673 very small blocks it is better to use loop. For large blocks, libcall can
674 do nontemporary accesses and beat inline considerably. */
675 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
676 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
677 {{libcall, {{8, loop}, {24, unrolled_loop},
678 {2048, rep_prefix_4_byte}, {-1, libcall}}},
679 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
680 };
681
682 static const
683 struct processor_costs pentium4_cost = {
684 COSTS_N_INSNS (1), /* cost of an add instruction */
685 COSTS_N_INSNS (3), /* cost of a lea instruction */
686 COSTS_N_INSNS (4), /* variable shift costs */
687 COSTS_N_INSNS (4), /* constant shift costs */
688 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
689 COSTS_N_INSNS (15), /* HI */
690 COSTS_N_INSNS (15), /* SI */
691 COSTS_N_INSNS (15), /* DI */
692 COSTS_N_INSNS (15)}, /* other */
693 0, /* cost of multiply per each bit set */
694 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
695 COSTS_N_INSNS (56), /* HI */
696 COSTS_N_INSNS (56), /* SI */
697 COSTS_N_INSNS (56), /* DI */
698 COSTS_N_INSNS (56)}, /* other */
699 COSTS_N_INSNS (1), /* cost of movsx */
700 COSTS_N_INSNS (1), /* cost of movzx */
701 16, /* "large" insn */
702 6, /* MOVE_RATIO */
703 2, /* cost for loading QImode using movzbl */
704 {4, 5, 4}, /* cost of loading integer registers
705 in QImode, HImode and SImode.
706 Relative to reg-reg move (2). */
707 {2, 3, 2}, /* cost of storing integer registers */
708 2, /* cost of reg,reg fld/fst */
709 {2, 2, 6}, /* cost of loading fp registers
710 in SFmode, DFmode and XFmode */
711 {4, 4, 6}, /* cost of storing fp registers
712 in SFmode, DFmode and XFmode */
713 2, /* cost of moving MMX register */
714 {2, 2}, /* cost of loading MMX registers
715 in SImode and DImode */
716 {2, 2}, /* cost of storing MMX registers
717 in SImode and DImode */
718 12, /* cost of moving SSE register */
719 {12, 12, 12}, /* cost of loading SSE registers
720 in SImode, DImode and TImode */
721 {2, 2, 8}, /* cost of storing SSE registers
722 in SImode, DImode and TImode */
723 10, /* MMX or SSE register to integer */
724 64, /* size of prefetch block */
725 6, /* number of parallel prefetches */
726 2, /* Branch cost */
727 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
728 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
729 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
730 COSTS_N_INSNS (2), /* cost of FABS instruction. */
731 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
732 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
733 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
734 DUMMY_STRINGOP_ALGS},
735 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
736 {-1, libcall}}},
737 DUMMY_STRINGOP_ALGS},
738 };
739
740 static const
741 struct processor_costs nocona_cost = {
742 COSTS_N_INSNS (1), /* cost of an add instruction */
743 COSTS_N_INSNS (1), /* cost of a lea instruction */
744 COSTS_N_INSNS (1), /* variable shift costs */
745 COSTS_N_INSNS (1), /* constant shift costs */
746 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
747 COSTS_N_INSNS (10), /* HI */
748 COSTS_N_INSNS (10), /* SI */
749 COSTS_N_INSNS (10), /* DI */
750 COSTS_N_INSNS (10)}, /* other */
751 0, /* cost of multiply per each bit set */
752 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
753 COSTS_N_INSNS (66), /* HI */
754 COSTS_N_INSNS (66), /* SI */
755 COSTS_N_INSNS (66), /* DI */
756 COSTS_N_INSNS (66)}, /* other */
757 COSTS_N_INSNS (1), /* cost of movsx */
758 COSTS_N_INSNS (1), /* cost of movzx */
759 16, /* "large" insn */
760 17, /* MOVE_RATIO */
761 4, /* cost for loading QImode using movzbl */
762 {4, 4, 4}, /* cost of loading integer registers
763 in QImode, HImode and SImode.
764 Relative to reg-reg move (2). */
765 {4, 4, 4}, /* cost of storing integer registers */
766 3, /* cost of reg,reg fld/fst */
767 {12, 12, 12}, /* cost of loading fp registers
768 in SFmode, DFmode and XFmode */
769 {4, 4, 4}, /* cost of storing fp registers
770 in SFmode, DFmode and XFmode */
771 6, /* cost of moving MMX register */
772 {12, 12}, /* cost of loading MMX registers
773 in SImode and DImode */
774 {12, 12}, /* cost of storing MMX registers
775 in SImode and DImode */
776 6, /* cost of moving SSE register */
777 {12, 12, 12}, /* cost of loading SSE registers
778 in SImode, DImode and TImode */
779 {12, 12, 12}, /* cost of storing SSE registers
780 in SImode, DImode and TImode */
781 8, /* MMX or SSE register to integer */
782 128, /* size of prefetch block */
783 8, /* number of parallel prefetches */
784 1, /* Branch cost */
785 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
786 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
787 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
788 COSTS_N_INSNS (3), /* cost of FABS instruction. */
789 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
790 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
791 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
792 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
793 {100000, unrolled_loop}, {-1, libcall}}}},
794 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
795 {-1, libcall}}},
796 {libcall, {{24, loop}, {64, unrolled_loop},
797 {8192, rep_prefix_8_byte}, {-1, libcall}}}}
798 };
799
800 static const
801 struct processor_costs core2_cost = {
802 COSTS_N_INSNS (1), /* cost of an add instruction */
803 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
804 COSTS_N_INSNS (1), /* variable shift costs */
805 COSTS_N_INSNS (1), /* constant shift costs */
806 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
807 COSTS_N_INSNS (3), /* HI */
808 COSTS_N_INSNS (3), /* SI */
809 COSTS_N_INSNS (3), /* DI */
810 COSTS_N_INSNS (3)}, /* other */
811 0, /* cost of multiply per each bit set */
812 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
813 COSTS_N_INSNS (22), /* HI */
814 COSTS_N_INSNS (22), /* SI */
815 COSTS_N_INSNS (22), /* DI */
816 COSTS_N_INSNS (22)}, /* other */
817 COSTS_N_INSNS (1), /* cost of movsx */
818 COSTS_N_INSNS (1), /* cost of movzx */
819 8, /* "large" insn */
820 16, /* MOVE_RATIO */
821 2, /* cost for loading QImode using movzbl */
822 {6, 6, 6}, /* cost of loading integer registers
823 in QImode, HImode and SImode.
824 Relative to reg-reg move (2). */
825 {4, 4, 4}, /* cost of storing integer registers */
826 2, /* cost of reg,reg fld/fst */
827 {6, 6, 6}, /* cost of loading fp registers
828 in SFmode, DFmode and XFmode */
829 {4, 4, 4}, /* cost of loading integer registers */
830 2, /* cost of moving MMX register */
831 {6, 6}, /* cost of loading MMX registers
832 in SImode and DImode */
833 {4, 4}, /* cost of storing MMX registers
834 in SImode and DImode */
835 2, /* cost of moving SSE register */
836 {6, 6, 6}, /* cost of loading SSE registers
837 in SImode, DImode and TImode */
838 {4, 4, 4}, /* cost of storing SSE registers
839 in SImode, DImode and TImode */
840 2, /* MMX or SSE register to integer */
841 128, /* size of prefetch block */
842 8, /* number of parallel prefetches */
843 3, /* Branch cost */
844 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
845 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
846 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
847 COSTS_N_INSNS (1), /* cost of FABS instruction. */
848 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
849 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
850 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
851 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
852 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
853 {{libcall, {{8, loop}, {15, unrolled_loop},
854 {2048, rep_prefix_4_byte}, {-1, libcall}}},
855 {libcall, {{24, loop}, {32, unrolled_loop},
856 {8192, rep_prefix_8_byte}, {-1, libcall}}}}
857 };
858
859 /* Generic64 should produce code tuned for Nocona and K8. */
860 static const
861 struct processor_costs generic64_cost = {
862 COSTS_N_INSNS (1), /* cost of an add instruction */
863 /* On all chips taken into consideration lea is 2 cycles and more. With
864 this cost however our current implementation of synth_mult results in
865 use of unnecessary temporary registers causing regression on several
866 SPECfp benchmarks. */
867 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
868 COSTS_N_INSNS (1), /* variable shift costs */
869 COSTS_N_INSNS (1), /* constant shift costs */
870 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
871 COSTS_N_INSNS (4), /* HI */
872 COSTS_N_INSNS (3), /* SI */
873 COSTS_N_INSNS (4), /* DI */
874 COSTS_N_INSNS (2)}, /* other */
875 0, /* cost of multiply per each bit set */
876 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
877 COSTS_N_INSNS (26), /* HI */
878 COSTS_N_INSNS (42), /* SI */
879 COSTS_N_INSNS (74), /* DI */
880 COSTS_N_INSNS (74)}, /* other */
881 COSTS_N_INSNS (1), /* cost of movsx */
882 COSTS_N_INSNS (1), /* cost of movzx */
883 8, /* "large" insn */
884 17, /* MOVE_RATIO */
885 4, /* cost for loading QImode using movzbl */
886 {4, 4, 4}, /* cost of loading integer registers
887 in QImode, HImode and SImode.
888 Relative to reg-reg move (2). */
889 {4, 4, 4}, /* cost of storing integer registers */
890 4, /* cost of reg,reg fld/fst */
891 {12, 12, 12}, /* cost of loading fp registers
892 in SFmode, DFmode and XFmode */
893 {6, 6, 8}, /* cost of storing fp registers
894 in SFmode, DFmode and XFmode */
895 2, /* cost of moving MMX register */
896 {8, 8}, /* cost of loading MMX registers
897 in SImode and DImode */
898 {8, 8}, /* cost of storing MMX registers
899 in SImode and DImode */
900 2, /* cost of moving SSE register */
901 {8, 8, 8}, /* cost of loading SSE registers
902 in SImode, DImode and TImode */
903 {8, 8, 8}, /* cost of storing SSE registers
904 in SImode, DImode and TImode */
905 5, /* MMX or SSE register to integer */
906 64, /* size of prefetch block */
907 6, /* number of parallel prefetches */
908 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
909 is increased to perhaps more appropriate value of 5. */
910 3, /* Branch cost */
911 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
912 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
913 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
914 COSTS_N_INSNS (8), /* cost of FABS instruction. */
915 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
916 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
917 {DUMMY_STRINGOP_ALGS,
918 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
919 {DUMMY_STRINGOP_ALGS,
920 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
921 };
922
923 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
924 static const
925 struct processor_costs generic32_cost = {
926 COSTS_N_INSNS (1), /* cost of an add instruction */
927 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
928 COSTS_N_INSNS (1), /* variable shift costs */
929 COSTS_N_INSNS (1), /* constant shift costs */
930 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
931 COSTS_N_INSNS (4), /* HI */
932 COSTS_N_INSNS (3), /* SI */
933 COSTS_N_INSNS (4), /* DI */
934 COSTS_N_INSNS (2)}, /* other */
935 0, /* cost of multiply per each bit set */
936 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
937 COSTS_N_INSNS (26), /* HI */
938 COSTS_N_INSNS (42), /* SI */
939 COSTS_N_INSNS (74), /* DI */
940 COSTS_N_INSNS (74)}, /* other */
941 COSTS_N_INSNS (1), /* cost of movsx */
942 COSTS_N_INSNS (1), /* cost of movzx */
943 8, /* "large" insn */
944 17, /* MOVE_RATIO */
945 4, /* cost for loading QImode using movzbl */
946 {4, 4, 4}, /* cost of loading integer registers
947 in QImode, HImode and SImode.
948 Relative to reg-reg move (2). */
949 {4, 4, 4}, /* cost of storing integer registers */
950 4, /* cost of reg,reg fld/fst */
951 {12, 12, 12}, /* cost of loading fp registers
952 in SFmode, DFmode and XFmode */
953 {6, 6, 8}, /* cost of storing fp registers
954 in SFmode, DFmode and XFmode */
955 2, /* cost of moving MMX register */
956 {8, 8}, /* cost of loading MMX registers
957 in SImode and DImode */
958 {8, 8}, /* cost of storing MMX registers
959 in SImode and DImode */
960 2, /* cost of moving SSE register */
961 {8, 8, 8}, /* cost of loading SSE registers
962 in SImode, DImode and TImode */
963 {8, 8, 8}, /* cost of storing SSE registers
964 in SImode, DImode and TImode */
965 5, /* MMX or SSE register to integer */
966 64, /* size of prefetch block */
967 6, /* number of parallel prefetches */
968 3, /* Branch cost */
969 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
970 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
971 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
972 COSTS_N_INSNS (8), /* cost of FABS instruction. */
973 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
974 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
975 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
976 DUMMY_STRINGOP_ALGS},
977 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
978 DUMMY_STRINGOP_ALGS},
979 };
980
981 const struct processor_costs *ix86_cost = &pentium_cost;
982
983 /* Processor feature/optimization bitmasks. */
984 #define m_386 (1<<PROCESSOR_I386)
985 #define m_486 (1<<PROCESSOR_I486)
986 #define m_PENT (1<<PROCESSOR_PENTIUM)
987 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
988 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
989 #define m_NOCONA (1<<PROCESSOR_NOCONA)
990 #define m_CORE2 (1<<PROCESSOR_CORE2)
991
992 #define m_GEODE (1<<PROCESSOR_GEODE)
993 #define m_K6 (1<<PROCESSOR_K6)
994 #define m_K6_GEODE (m_K6 | m_GEODE)
995 #define m_K8 (1<<PROCESSOR_K8)
996 #define m_ATHLON (1<<PROCESSOR_ATHLON)
997 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
998 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
999 #define m_ATHLON_K8_AMDFAM10 (m_K8 | m_ATHLON | m_AMDFAM10)
1000
1001 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1002 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1003
1004 /* Generic instruction choice should be common subset of supported CPUs
1005 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1006 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1007
1008 /* Feature tests against the various tunings. */
1009 unsigned int ix86_tune_features[X86_TUNE_LAST] = {
1010 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1011 negatively, so enabling for Generic64 seems like good code size
1012 tradeoff. We can't enable it for 32bit generic because it does not
1013 work well with PPro base chips. */
1014 m_386 | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC64,
1015
1016 /* X86_TUNE_PUSH_MEMORY */
1017 m_386 | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4
1018 | m_NOCONA | m_CORE2 | m_GENERIC,
1019
1020 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1021 m_486 | m_PENT,
1022
1023 /* X86_TUNE_USE_BIT_TEST */
1024 m_386,
1025
1026 /* X86_TUNE_UNROLL_STRLEN */
1027 m_486 | m_PENT | m_PPRO | m_ATHLON_K8_AMDFAM10 | m_K6 | m_CORE2 | m_GENERIC,
1028
1029 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1030 m_PPRO | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_GENERIC,
1031
1032 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1033 on simulation result. But after P4 was made, no performance benefit
1034 was observed with branch hints. It also increases the code size.
1035 As a result, icc never generates branch hints. */
1036 0,
1037
1038 /* X86_TUNE_DOUBLE_WITH_ADD */
1039 ~m_386,
1040
1041 /* X86_TUNE_USE_SAHF */
1042 m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
1043 | m_NOCONA | m_CORE2 | m_GENERIC,
1044
1045 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1046 partial dependencies. */
1047 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA
1048 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1049
1050 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1051 register stalls on Generic32 compilation setting as well. However
1052 in current implementation the partial register stalls are not eliminated
1053 very well - they can be introduced via subregs synthesized by combine
1054 and can happen in caller/callee saving sequences. Because this option
1055 pays back little on PPro based chips and is in conflict with partial reg
1056 dependencies used by Athlon/P4 based chips, it is better to leave it off
1057 for generic32 for now. */
1058 m_PPRO,
1059
1060 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1061 m_CORE2 | m_GENERIC,
1062
1063 /* X86_TUNE_USE_HIMODE_FIOP */
1064 m_386 | m_486 | m_K6_GEODE,
1065
1066 /* X86_TUNE_USE_SIMODE_FIOP */
1067 ~(m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT | m_CORE2 | m_GENERIC),
1068
1069 /* X86_TUNE_USE_MOV0 */
1070 m_K6,
1071
1072 /* X86_TUNE_USE_CLTD */
1073 ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC),
1074
1075 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1076 m_PENT4,
1077
1078 /* X86_TUNE_SPLIT_LONG_MOVES */
1079 m_PPRO,
1080
1081 /* X86_TUNE_READ_MODIFY_WRITE */
1082 ~m_PENT,
1083
1084 /* X86_TUNE_READ_MODIFY */
1085 ~(m_PENT | m_PPRO),
1086
1087 /* X86_TUNE_PROMOTE_QIMODE */
1088 m_K6_GEODE | m_PENT | m_386 | m_486 | m_ATHLON_K8_AMDFAM10 | m_CORE2
1089 | m_GENERIC /* | m_PENT4 ? */,
1090
1091 /* X86_TUNE_FAST_PREFIX */
1092 ~(m_PENT | m_486 | m_386),
1093
1094 /* X86_TUNE_SINGLE_STRINGOP */
1095 m_386 | m_PENT4 | m_NOCONA,
1096
1097 /* X86_TUNE_QIMODE_MATH */
1098 ~0,
1099
1100 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1101 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1102 might be considered for Generic32 if our scheme for avoiding partial
1103 stalls was more effective. */
1104 ~m_PPRO,
1105
1106 /* X86_TUNE_PROMOTE_QI_REGS */
1107 0,
1108
1109 /* X86_TUNE_PROMOTE_HI_REGS */
1110 m_PPRO,
1111
1112 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
1113 m_ATHLON_K8_AMDFAM10 | m_K6_GEODE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1114
1115 /* X86_TUNE_ADD_ESP_8 */
1116 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_K6_GEODE | m_386
1117 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1118
1119 /* X86_TUNE_SUB_ESP_4 */
1120 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1121
1122 /* X86_TUNE_SUB_ESP_8 */
1123 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_386 | m_486
1124 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1125
1126 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1127 for DFmode copies */
1128 ~(m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1129 | m_GENERIC | m_GEODE),
1130
1131 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1132 m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1133
1134 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1135 conflict here in between PPro/Pentium4 based chips that thread 128bit
1136 SSE registers as single units versus K8 based chips that divide SSE
1137 registers to two 64bit halves. This knob promotes all store destinations
1138 to be 128bit to allow register renaming on 128bit SSE units, but usually
1139 results in one extra microop on 64bit SSE units. Experimental results
1140 shows that disabling this option on P4 brings over 20% SPECfp regression,
1141 while enabling it on K8 brings roughly 2.4% regression that can be partly
1142 masked by careful scheduling of moves. */
1143 m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC | m_AMDFAM10,
1144
1145 /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
1146 m_AMDFAM10,
1147
1148 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1149 are resolved on SSE register parts instead of whole registers, so we may
1150 maintain just lower part of scalar values in proper format leaving the
1151 upper part undefined. */
1152 m_ATHLON_K8,
1153
1154 /* X86_TUNE_SSE_TYPELESS_STORES */
1155 m_ATHLON_K8_AMDFAM10,
1156
1157 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1158 m_PPRO | m_PENT4 | m_NOCONA,
1159
1160 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1161 m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1162
1163 /* X86_TUNE_PROLOGUE_USING_MOVE */
1164 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1165
1166 /* X86_TUNE_EPILOGUE_USING_MOVE */
1167 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1168
1169 /* X86_TUNE_SHIFT1 */
1170 ~m_486,
1171
1172 /* X86_TUNE_USE_FFREEP */
1173 m_ATHLON_K8_AMDFAM10,
1174
1175 /* X86_TUNE_INTER_UNIT_MOVES */
1176 ~(m_ATHLON_K8_AMDFAM10 | m_GENERIC),
1177
1178 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1179 than 4 branch instructions in the 16 byte window. */
1180 m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1181
1182 /* X86_TUNE_SCHEDULE */
1183 m_PPRO | m_ATHLON_K8_AMDFAM10 | m_K6_GEODE | m_PENT | m_CORE2 | m_GENERIC,
1184
1185 /* X86_TUNE_USE_BT */
1186 m_ATHLON_K8_AMDFAM10,
1187
1188 /* X86_TUNE_USE_INCDEC */
1189 ~(m_PENT4 | m_NOCONA | m_GENERIC),
1190
1191 /* X86_TUNE_PAD_RETURNS */
1192 m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC,
1193
1194 /* X86_TUNE_EXT_80387_CONSTANTS */
1195 m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC,
1196
1197 /* X86_TUNE_SHORTEN_X87_SSE */
1198 ~m_K8,
1199
1200 /* X86_TUNE_AVOID_VECTOR_DECODE */
1201 m_K8 | m_GENERIC64,
1202
1203 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1204 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1205 ~(m_386 | m_486),
1206
1207 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1208 vector path on AMD machines. */
1209 m_K8 | m_GENERIC64 | m_AMDFAM10,
1210
1211 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1212 machines. */
1213 m_K8 | m_GENERIC64 | m_AMDFAM10,
1214
1215 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1216 than a MOV. */
1217 m_PENT,
1218
1219 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1220 but one byte longer. */
1221 m_PENT,
1222
1223 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1224 operand that cannot be represented using a modRM byte. The XOR
1225 replacement is long decoded, so this split helps here as well. */
1226 m_K6,
1227 };
1228
1229 /* Feature tests against the various architecture variations. */
1230 unsigned int ix86_arch_features[X86_ARCH_LAST] = {
1231 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
1232 ~(m_386 | m_486 | m_PENT | m_K6),
1233
1234 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1235 ~m_386,
1236
1237 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1238 ~(m_386 | m_486),
1239
1240 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1241 ~m_386,
1242
1243 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1244 ~m_386,
1245 };
1246
1247 static const unsigned int x86_accumulate_outgoing_args
1248 = m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
1249
1250 static const unsigned int x86_arch_always_fancy_math_387
1251 = m_PENT | m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT4
1252 | m_NOCONA | m_CORE2 | m_GENERIC;
1253
1254 static enum stringop_alg stringop_alg = no_stringop;
1255
1256 /* In case the average insn count for single function invocation is
1257 lower than this constant, emit fast (but longer) prologue and
1258 epilogue code. */
1259 #define FAST_PROLOGUE_INSN_COUNT 20
1260
1261 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1262 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1263 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1264 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1265
1266 /* Array of the smallest class containing reg number REGNO, indexed by
1267 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1268
1269 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1270 {
1271 /* ax, dx, cx, bx */
1272 AREG, DREG, CREG, BREG,
1273 /* si, di, bp, sp */
1274 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1275 /* FP registers */
1276 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1277 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1278 /* arg pointer */
1279 NON_Q_REGS,
1280 /* flags, fpsr, fpcr, frame */
1281 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1282 /* SSE registers */
1283 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1284 SSE_REGS, SSE_REGS,
1285 /* MMX registers */
1286 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1287 MMX_REGS, MMX_REGS,
1288 /* REX registers */
1289 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1290 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1291 /* SSE REX registers */
1292 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1293 SSE_REGS, SSE_REGS,
1294 };
1295
1296 /* The "default" register map used in 32bit mode. */
1297
1298 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1299 {
1300 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1301 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1302 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1303 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1304 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1305 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1306 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1307 };
1308
1309 static int const x86_64_int_parameter_registers[6] =
1310 {
1311 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
1312 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1313 };
1314
1315 static int const x86_64_ms_abi_int_parameter_registers[4] =
1316 {
1317 2 /*RCX*/, 1 /*RDX*/,
1318 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1319 };
1320
1321 static int const x86_64_int_return_registers[4] =
1322 {
1323 0 /*RAX*/, 1 /*RDX*/, 5 /*RDI*/, 4 /*RSI*/
1324 };
1325
1326 /* The "default" register map used in 64bit mode. */
1327 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1328 {
1329 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1330 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1331 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1332 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1333 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1334 8,9,10,11,12,13,14,15, /* extended integer registers */
1335 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1336 };
1337
1338 /* Define the register numbers to be used in Dwarf debugging information.
1339 The SVR4 reference port C compiler uses the following register numbers
1340 in its Dwarf output code:
1341 0 for %eax (gcc regno = 0)
1342 1 for %ecx (gcc regno = 2)
1343 2 for %edx (gcc regno = 1)
1344 3 for %ebx (gcc regno = 3)
1345 4 for %esp (gcc regno = 7)
1346 5 for %ebp (gcc regno = 6)
1347 6 for %esi (gcc regno = 4)
1348 7 for %edi (gcc regno = 5)
1349 The following three DWARF register numbers are never generated by
1350 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1351 believes these numbers have these meanings.
1352 8 for %eip (no gcc equivalent)
1353 9 for %eflags (gcc regno = 17)
1354 10 for %trapno (no gcc equivalent)
1355 It is not at all clear how we should number the FP stack registers
1356 for the x86 architecture. If the version of SDB on x86/svr4 were
1357 a bit less brain dead with respect to floating-point then we would
1358 have a precedent to follow with respect to DWARF register numbers
1359 for x86 FP registers, but the SDB on x86/svr4 is so completely
1360 broken with respect to FP registers that it is hardly worth thinking
1361 of it as something to strive for compatibility with.
1362 The version of x86/svr4 SDB I have at the moment does (partially)
1363 seem to believe that DWARF register number 11 is associated with
1364 the x86 register %st(0), but that's about all. Higher DWARF
1365 register numbers don't seem to be associated with anything in
1366 particular, and even for DWARF regno 11, SDB only seems to under-
1367 stand that it should say that a variable lives in %st(0) (when
1368 asked via an `=' command) if we said it was in DWARF regno 11,
1369 but SDB still prints garbage when asked for the value of the
1370 variable in question (via a `/' command).
1371 (Also note that the labels SDB prints for various FP stack regs
1372 when doing an `x' command are all wrong.)
1373 Note that these problems generally don't affect the native SVR4
1374 C compiler because it doesn't allow the use of -O with -g and
1375 because when it is *not* optimizing, it allocates a memory
1376 location for each floating-point variable, and the memory
1377 location is what gets described in the DWARF AT_location
1378 attribute for the variable in question.
1379 Regardless of the severe mental illness of the x86/svr4 SDB, we
1380 do something sensible here and we use the following DWARF
1381 register numbers. Note that these are all stack-top-relative
1382 numbers.
1383 11 for %st(0) (gcc regno = 8)
1384 12 for %st(1) (gcc regno = 9)
1385 13 for %st(2) (gcc regno = 10)
1386 14 for %st(3) (gcc regno = 11)
1387 15 for %st(4) (gcc regno = 12)
1388 16 for %st(5) (gcc regno = 13)
1389 17 for %st(6) (gcc regno = 14)
1390 18 for %st(7) (gcc regno = 15)
1391 */
1392 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1393 {
1394 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1395 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1396 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1397 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1398 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1399 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1400 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1401 };
1402
1403 /* Test and compare insns in i386.md store the information needed to
1404 generate branch and scc insns here. */
1405
1406 rtx ix86_compare_op0 = NULL_RTX;
1407 rtx ix86_compare_op1 = NULL_RTX;
1408 rtx ix86_compare_emitted = NULL_RTX;
1409
1410 /* Size of the register save area. */
1411 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
1412
1413 /* Define the structure for the machine field in struct function. */
1414
1415 struct stack_local_entry GTY(())
1416 {
1417 unsigned short mode;
1418 unsigned short n;
1419 rtx rtl;
1420 struct stack_local_entry *next;
1421 };
1422
1423 /* Structure describing stack frame layout.
1424 Stack grows downward:
1425
1426 [arguments]
1427 <- ARG_POINTER
1428 saved pc
1429
1430 saved frame pointer if frame_pointer_needed
1431 <- HARD_FRAME_POINTER
1432 [saved regs]
1433
1434 [padding1] \
1435 )
1436 [va_arg registers] (
1437 > to_allocate <- FRAME_POINTER
1438 [frame] (
1439 )
1440 [padding2] /
1441 */
1442 struct ix86_frame
1443 {
1444 int nregs;
1445 int padding1;
1446 int va_arg_size;
1447 HOST_WIDE_INT frame;
1448 int padding2;
1449 int outgoing_arguments_size;
1450 int red_zone_size;
1451
1452 HOST_WIDE_INT to_allocate;
1453 /* The offsets relative to ARG_POINTER. */
1454 HOST_WIDE_INT frame_pointer_offset;
1455 HOST_WIDE_INT hard_frame_pointer_offset;
1456 HOST_WIDE_INT stack_pointer_offset;
1457
1458 /* When save_regs_using_mov is set, emit prologue using
1459 move instead of push instructions. */
1460 bool save_regs_using_mov;
1461 };
1462
1463 /* Code model option. */
1464 enum cmodel ix86_cmodel;
1465 /* Asm dialect. */
1466 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1467 /* TLS dialects. */
1468 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1469
1470 /* Which unit we are generating floating point math for. */
1471 enum fpmath_unit ix86_fpmath;
1472
1473 /* Which cpu are we scheduling for. */
1474 enum processor_type ix86_tune;
1475
1476 /* Which instruction set architecture to use. */
1477 enum processor_type ix86_arch;
1478
1479 /* true if sse prefetch instruction is not NOOP. */
1480 int x86_prefetch_sse;
1481
1482 /* ix86_regparm_string as a number */
1483 static int ix86_regparm;
1484
1485 /* -mstackrealign option */
1486 extern int ix86_force_align_arg_pointer;
1487 static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer";
1488
1489 /* Preferred alignment for stack boundary in bits. */
1490 unsigned int ix86_preferred_stack_boundary;
1491
1492 /* Values 1-5: see jump.c */
1493 int ix86_branch_cost;
1494
1495 /* Variables which are this size or smaller are put in the data/bss
1496 or ldata/lbss sections. */
1497
1498 int ix86_section_threshold = 65536;
1499
1500 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1501 char internal_label_prefix[16];
1502 int internal_label_prefix_len;
1503
1504 /* Fence to use after loop using movnt. */
1505 tree x86_mfence;
1506
1507 /* Register class used for passing given 64bit part of the argument.
1508 These represent classes as documented by the PS ABI, with the exception
1509 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1510 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1511
1512 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1513 whenever possible (upper half does contain padding). */
1514 enum x86_64_reg_class
1515 {
1516 X86_64_NO_CLASS,
1517 X86_64_INTEGER_CLASS,
1518 X86_64_INTEGERSI_CLASS,
1519 X86_64_SSE_CLASS,
1520 X86_64_SSESF_CLASS,
1521 X86_64_SSEDF_CLASS,
1522 X86_64_SSEUP_CLASS,
1523 X86_64_X87_CLASS,
1524 X86_64_X87UP_CLASS,
1525 X86_64_COMPLEX_X87_CLASS,
1526 X86_64_MEMORY_CLASS
1527 };
1528 static const char * const x86_64_reg_class_name[] =
1529 {
1530 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1531 "sseup", "x87", "x87up", "cplx87", "no"
1532 };
1533
1534 #define MAX_CLASSES 4
1535
1536 /* Table of constants used by fldpi, fldln2, etc.... */
1537 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1538 static bool ext_80387_constants_init = 0;
1539
1540 \f
1541 static struct machine_function * ix86_init_machine_status (void);
1542 static rtx ix86_function_value (tree, tree, bool);
1543 static int ix86_function_regparm (tree, tree);
1544 static void ix86_compute_frame_layout (struct ix86_frame *);
1545 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1546 rtx, rtx, int);
1547
1548 \f
1549 /* The svr4 ABI for the i386 says that records and unions are returned
1550 in memory. */
1551 #ifndef DEFAULT_PCC_STRUCT_RETURN
1552 #define DEFAULT_PCC_STRUCT_RETURN 1
1553 #endif
1554
1555 /* Bit flags that specify the ISA we are compiling for. */
1556 int ix86_isa_flags = TARGET_64BIT_DEFAULT | TARGET_SUBTARGET_ISA_DEFAULT;
1557
1558 /* A mask of ix86_isa_flags that includes bit X if X
1559 was set or cleared on the command line. */
1560 static int ix86_isa_flags_explicit;
1561
1562 /* Define a set of ISAs which aren't available for a given ISA. MMX
1563 and SSE ISAs are handled separately. */
1564
1565 #define OPTION_MASK_ISA_MMX_UNSET \
1566 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_UNSET)
1567 #define OPTION_MASK_ISA_3DNOW_UNSET OPTION_MASK_ISA_3DNOW_A
1568
1569 #define OPTION_MASK_ISA_SSE_UNSET \
1570 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE2_UNSET)
1571 #define OPTION_MASK_ISA_SSE2_UNSET \
1572 (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE3_UNSET)
1573 #define OPTION_MASK_ISA_SSE3_UNSET \
1574 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSSE3_UNSET)
1575 #define OPTION_MASK_ISA_SSSE3_UNSET \
1576 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_1_UNSET)
1577 #define OPTION_MASK_ISA_SSE4_1_UNSET \
1578 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_2_UNSET)
1579 #define OPTION_MASK_ISA_SSE4_2_UNSET OPTION_MASK_ISA_SSE4A
1580
1581 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
1582 as -msse4.1 -msse4.2. -mno-sse4 should the same as -mno-sse4.1. */
1583 #define OPTION_MASK_ISA_SSE4 \
1584 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2)
1585 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
1586
1587 #define OPTION_MASK_ISA_SSE4A_UNSET OPTION_MASK_ISA_SSE4
1588
1589 /* Implement TARGET_HANDLE_OPTION. */
1590
1591 static bool
1592 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1593 {
1594 switch (code)
1595 {
1596 case OPT_mmmx:
1597 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX;
1598 if (!value)
1599 {
1600 ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
1601 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
1602 }
1603 return true;
1604
1605 case OPT_m3dnow:
1606 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW;
1607 if (!value)
1608 {
1609 ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
1610 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
1611 }
1612 return true;
1613
1614 case OPT_m3dnowa:
1615 return false;
1616
1617 case OPT_msse:
1618 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE;
1619 if (!value)
1620 {
1621 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
1622 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
1623 }
1624 return true;
1625
1626 case OPT_msse2:
1627 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2;
1628 if (!value)
1629 {
1630 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
1631 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
1632 }
1633 return true;
1634
1635 case OPT_msse3:
1636 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3;
1637 if (!value)
1638 {
1639 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
1640 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
1641 }
1642 return true;
1643
1644 case OPT_mssse3:
1645 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3;
1646 if (!value)
1647 {
1648 ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
1649 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
1650 }
1651 return true;
1652
1653 case OPT_msse4_1:
1654 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1;
1655 if (!value)
1656 {
1657 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
1658 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
1659 }
1660 return true;
1661
1662 case OPT_msse4_2:
1663 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2;
1664 if (!value)
1665 {
1666 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
1667 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
1668 }
1669 return true;
1670
1671 case OPT_msse4:
1672 ix86_isa_flags |= OPTION_MASK_ISA_SSE4;
1673 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4;
1674 return true;
1675
1676 case OPT_mno_sse4:
1677 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
1678 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
1679 return true;
1680
1681 case OPT_msse4a:
1682 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A;
1683 if (!value)
1684 {
1685 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
1686 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
1687 }
1688 return true;
1689
1690 default:
1691 return true;
1692 }
1693 }
1694
1695 /* Sometimes certain combinations of command options do not make
1696 sense on a particular target machine. You can define a macro
1697 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1698 defined, is executed once just after all the command options have
1699 been parsed.
1700
1701 Don't use this macro to turn on various extra optimizations for
1702 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1703
1704 void
1705 override_options (void)
1706 {
1707 int i;
1708 int ix86_tune_defaulted = 0;
1709 int ix86_arch_specified = 0;
1710 unsigned int ix86_arch_mask, ix86_tune_mask;
1711
1712 /* Comes from final.c -- no real reason to change it. */
1713 #define MAX_CODE_ALIGN 16
1714
1715 static struct ptt
1716 {
1717 const struct processor_costs *cost; /* Processor costs */
1718 const int align_loop; /* Default alignments. */
1719 const int align_loop_max_skip;
1720 const int align_jump;
1721 const int align_jump_max_skip;
1722 const int align_func;
1723 }
1724 const processor_target_table[PROCESSOR_max] =
1725 {
1726 {&i386_cost, 4, 3, 4, 3, 4},
1727 {&i486_cost, 16, 15, 16, 15, 16},
1728 {&pentium_cost, 16, 7, 16, 7, 16},
1729 {&pentiumpro_cost, 16, 15, 16, 10, 16},
1730 {&geode_cost, 0, 0, 0, 0, 0},
1731 {&k6_cost, 32, 7, 32, 7, 32},
1732 {&athlon_cost, 16, 7, 16, 7, 16},
1733 {&pentium4_cost, 0, 0, 0, 0, 0},
1734 {&k8_cost, 16, 7, 16, 7, 16},
1735 {&nocona_cost, 0, 0, 0, 0, 0},
1736 {&core2_cost, 16, 10, 16, 10, 16},
1737 {&generic32_cost, 16, 7, 16, 7, 16},
1738 {&generic64_cost, 16, 10, 16, 10, 16},
1739 {&amdfam10_cost, 32, 24, 32, 7, 32}
1740 };
1741
1742 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1743 enum pta_flags
1744 {
1745 PTA_SSE = 1 << 0,
1746 PTA_SSE2 = 1 << 1,
1747 PTA_SSE3 = 1 << 2,
1748 PTA_MMX = 1 << 3,
1749 PTA_PREFETCH_SSE = 1 << 4,
1750 PTA_3DNOW = 1 << 5,
1751 PTA_3DNOW_A = 1 << 6,
1752 PTA_64BIT = 1 << 7,
1753 PTA_SSSE3 = 1 << 8,
1754 PTA_CX16 = 1 << 9,
1755 PTA_POPCNT = 1 << 10,
1756 PTA_ABM = 1 << 11,
1757 PTA_SSE4A = 1 << 12,
1758 PTA_NO_SAHF = 1 << 13,
1759 PTA_SSE4_1 = 1 << 14,
1760 PTA_SSE4_2 = 1 << 15
1761 };
1762
1763 static struct pta
1764 {
1765 const char *const name; /* processor name or nickname. */
1766 const enum processor_type processor;
1767 const unsigned /*enum pta_flags*/ flags;
1768 }
1769 const processor_alias_table[] =
1770 {
1771 {"i386", PROCESSOR_I386, 0},
1772 {"i486", PROCESSOR_I486, 0},
1773 {"i586", PROCESSOR_PENTIUM, 0},
1774 {"pentium", PROCESSOR_PENTIUM, 0},
1775 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1776 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1777 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1778 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1779 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
1780 {"i686", PROCESSOR_PENTIUMPRO, 0},
1781 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1782 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1783 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
1784 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
1785 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_SSE2},
1786 {"pentium4", PROCESSOR_PENTIUM4, PTA_MMX |PTA_SSE | PTA_SSE2},
1787 {"pentium4m", PROCESSOR_PENTIUM4, PTA_MMX | PTA_SSE | PTA_SSE2},
1788 {"prescott", PROCESSOR_NOCONA, PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
1789 {"nocona", PROCESSOR_NOCONA, (PTA_64BIT
1790 | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
1791 | PTA_CX16 | PTA_NO_SAHF)},
1792 {"core2", PROCESSOR_CORE2, (PTA_64BIT
1793 | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
1794 | PTA_SSSE3
1795 | PTA_CX16)},
1796 {"geode", PROCESSOR_GEODE, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1797 |PTA_PREFETCH_SSE)},
1798 {"k6", PROCESSOR_K6, PTA_MMX},
1799 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1800 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1801 {"athlon", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1802 | PTA_PREFETCH_SSE)},
1803 {"athlon-tbird", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1804 | PTA_PREFETCH_SSE)},
1805 {"athlon-4", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1806 | PTA_SSE)},
1807 {"athlon-xp", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1808 | PTA_SSE)},
1809 {"athlon-mp", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1810 | PTA_SSE)},
1811 {"x86-64", PROCESSOR_K8, (PTA_64BIT
1812 | PTA_MMX | PTA_SSE | PTA_SSE2
1813 | PTA_NO_SAHF)},
1814 {"k8", PROCESSOR_K8, (PTA_64BIT
1815 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1816 | PTA_SSE | PTA_SSE2
1817 | PTA_NO_SAHF)},
1818 {"k8-sse3", PROCESSOR_K8, (PTA_64BIT
1819 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1820 | PTA_SSE | PTA_SSE2 | PTA_SSE3
1821 | PTA_NO_SAHF)},
1822 {"opteron", PROCESSOR_K8, (PTA_64BIT
1823 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1824 | PTA_SSE | PTA_SSE2
1825 | PTA_NO_SAHF)},
1826 {"opteron-sse3", PROCESSOR_K8, (PTA_64BIT
1827 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1828 | PTA_SSE | PTA_SSE2 | PTA_SSE3
1829 | PTA_NO_SAHF)},
1830 {"athlon64", PROCESSOR_K8, (PTA_64BIT
1831 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1832 | PTA_SSE | PTA_SSE2
1833 | PTA_NO_SAHF)},
1834 {"athlon64-sse3", PROCESSOR_K8, (PTA_64BIT
1835 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1836 | PTA_SSE | PTA_SSE2 | PTA_SSE3
1837 | PTA_NO_SAHF)},
1838 {"athlon-fx", PROCESSOR_K8, (PTA_64BIT
1839 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1840 | PTA_SSE | PTA_SSE2
1841 | PTA_NO_SAHF)},
1842 {"amdfam10", PROCESSOR_AMDFAM10, (PTA_64BIT
1843 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1844 | PTA_SSE | PTA_SSE2 | PTA_SSE3
1845 | PTA_SSE4A
1846 | PTA_CX16 | PTA_ABM)},
1847 {"barcelona", PROCESSOR_AMDFAM10, (PTA_64BIT
1848 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1849 | PTA_SSE | PTA_SSE2 | PTA_SSE3
1850 | PTA_SSE4A
1851 | PTA_CX16 | PTA_ABM)},
1852 {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ },
1853 {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ },
1854 };
1855
1856 int const pta_size = ARRAY_SIZE (processor_alias_table);
1857
1858 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1859 SUBTARGET_OVERRIDE_OPTIONS;
1860 #endif
1861
1862 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
1863 SUBSUBTARGET_OVERRIDE_OPTIONS;
1864 #endif
1865
1866 /* -fPIC is the default for x86_64. */
1867 if (TARGET_MACHO && TARGET_64BIT)
1868 flag_pic = 2;
1869
1870 /* Set the default values for switches whose default depends on TARGET_64BIT
1871 in case they weren't overwritten by command line options. */
1872 if (TARGET_64BIT)
1873 {
1874 /* Mach-O doesn't support omitting the frame pointer for now. */
1875 if (flag_omit_frame_pointer == 2)
1876 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
1877 if (flag_asynchronous_unwind_tables == 2)
1878 flag_asynchronous_unwind_tables = 1;
1879 if (flag_pcc_struct_return == 2)
1880 flag_pcc_struct_return = 0;
1881 }
1882 else
1883 {
1884 if (flag_omit_frame_pointer == 2)
1885 flag_omit_frame_pointer = 0;
1886 if (flag_asynchronous_unwind_tables == 2)
1887 flag_asynchronous_unwind_tables = 0;
1888 if (flag_pcc_struct_return == 2)
1889 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1890 }
1891
1892 /* Need to check -mtune=generic first. */
1893 if (ix86_tune_string)
1894 {
1895 if (!strcmp (ix86_tune_string, "generic")
1896 || !strcmp (ix86_tune_string, "i686")
1897 /* As special support for cross compilers we read -mtune=native
1898 as -mtune=generic. With native compilers we won't see the
1899 -mtune=native, as it was changed by the driver. */
1900 || !strcmp (ix86_tune_string, "native"))
1901 {
1902 if (TARGET_64BIT)
1903 ix86_tune_string = "generic64";
1904 else
1905 ix86_tune_string = "generic32";
1906 }
1907 else if (!strncmp (ix86_tune_string, "generic", 7))
1908 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1909 }
1910 else
1911 {
1912 if (ix86_arch_string)
1913 ix86_tune_string = ix86_arch_string;
1914 if (!ix86_tune_string)
1915 {
1916 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1917 ix86_tune_defaulted = 1;
1918 }
1919
1920 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
1921 need to use a sensible tune option. */
1922 if (!strcmp (ix86_tune_string, "generic")
1923 || !strcmp (ix86_tune_string, "x86-64")
1924 || !strcmp (ix86_tune_string, "i686"))
1925 {
1926 if (TARGET_64BIT)
1927 ix86_tune_string = "generic64";
1928 else
1929 ix86_tune_string = "generic32";
1930 }
1931 }
1932 if (ix86_stringop_string)
1933 {
1934 if (!strcmp (ix86_stringop_string, "rep_byte"))
1935 stringop_alg = rep_prefix_1_byte;
1936 else if (!strcmp (ix86_stringop_string, "libcall"))
1937 stringop_alg = libcall;
1938 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
1939 stringop_alg = rep_prefix_4_byte;
1940 else if (!strcmp (ix86_stringop_string, "rep_8byte"))
1941 stringop_alg = rep_prefix_8_byte;
1942 else if (!strcmp (ix86_stringop_string, "byte_loop"))
1943 stringop_alg = loop_1_byte;
1944 else if (!strcmp (ix86_stringop_string, "loop"))
1945 stringop_alg = loop;
1946 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
1947 stringop_alg = unrolled_loop;
1948 else
1949 error ("bad value (%s) for -mstringop-strategy= switch", ix86_stringop_string);
1950 }
1951 if (!strcmp (ix86_tune_string, "x86-64"))
1952 warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
1953 "-mtune=generic instead as appropriate.");
1954
1955 if (!ix86_arch_string)
1956 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
1957 else
1958 ix86_arch_specified = 1;
1959
1960 if (!strcmp (ix86_arch_string, "generic"))
1961 error ("generic CPU can be used only for -mtune= switch");
1962 if (!strncmp (ix86_arch_string, "generic", 7))
1963 error ("bad value (%s) for -march= switch", ix86_arch_string);
1964
1965 if (ix86_cmodel_string != 0)
1966 {
1967 if (!strcmp (ix86_cmodel_string, "small"))
1968 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1969 else if (!strcmp (ix86_cmodel_string, "medium"))
1970 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
1971 else if (!strcmp (ix86_cmodel_string, "large"))
1972 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
1973 else if (flag_pic)
1974 error ("code model %s does not support PIC mode", ix86_cmodel_string);
1975 else if (!strcmp (ix86_cmodel_string, "32"))
1976 ix86_cmodel = CM_32;
1977 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1978 ix86_cmodel = CM_KERNEL;
1979 else
1980 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1981 }
1982 else
1983 {
1984 /* For TARGET_64BIT_MS_ABI, force pic on, in order to enable the
1985 use of rip-relative addressing. This eliminates fixups that
1986 would otherwise be needed if this object is to be placed in a
1987 DLL, and is essentially just as efficient as direct addressing. */
1988 if (TARGET_64BIT_MS_ABI)
1989 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
1990 else if (TARGET_64BIT)
1991 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1992 else
1993 ix86_cmodel = CM_32;
1994 }
1995 if (ix86_asm_string != 0)
1996 {
1997 if (! TARGET_MACHO
1998 && !strcmp (ix86_asm_string, "intel"))
1999 ix86_asm_dialect = ASM_INTEL;
2000 else if (!strcmp (ix86_asm_string, "att"))
2001 ix86_asm_dialect = ASM_ATT;
2002 else
2003 error ("bad value (%s) for -masm= switch", ix86_asm_string);
2004 }
2005 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
2006 error ("code model %qs not supported in the %s bit mode",
2007 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
2008 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
2009 sorry ("%i-bit mode not compiled in",
2010 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
2011
2012 for (i = 0; i < pta_size; i++)
2013 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
2014 {
2015 ix86_arch = processor_alias_table[i].processor;
2016 /* Default cpu tuning to the architecture. */
2017 ix86_tune = ix86_arch;
2018
2019 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2020 error ("CPU you selected does not support x86-64 "
2021 "instruction set");
2022
2023 if (processor_alias_table[i].flags & PTA_MMX
2024 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
2025 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
2026 if (processor_alias_table[i].flags & PTA_3DNOW
2027 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
2028 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
2029 if (processor_alias_table[i].flags & PTA_3DNOW_A
2030 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
2031 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
2032 if (processor_alias_table[i].flags & PTA_SSE
2033 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
2034 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
2035 if (processor_alias_table[i].flags & PTA_SSE2
2036 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
2037 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
2038 if (processor_alias_table[i].flags & PTA_SSE3
2039 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
2040 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
2041 if (processor_alias_table[i].flags & PTA_SSSE3
2042 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
2043 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
2044 if (processor_alias_table[i].flags & PTA_SSE4_1
2045 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
2046 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
2047 if (processor_alias_table[i].flags & PTA_SSE4_2
2048 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
2049 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
2050 if (processor_alias_table[i].flags & PTA_SSE4A
2051 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
2052 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
2053
2054 if (processor_alias_table[i].flags & PTA_ABM)
2055 x86_abm = true;
2056 if (processor_alias_table[i].flags & PTA_CX16)
2057 x86_cmpxchg16b = true;
2058 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM))
2059 x86_popcnt = true;
2060 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
2061 x86_prefetch_sse = true;
2062 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF)))
2063 x86_sahf = true;
2064
2065 break;
2066 }
2067
2068 if (i == pta_size)
2069 error ("bad value (%s) for -march= switch", ix86_arch_string);
2070
2071 ix86_arch_mask = 1u << ix86_arch;
2072 for (i = 0; i < X86_ARCH_LAST; ++i)
2073 ix86_arch_features[i] &= ix86_arch_mask;
2074
2075 for (i = 0; i < pta_size; i++)
2076 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
2077 {
2078 ix86_tune = processor_alias_table[i].processor;
2079 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2080 {
2081 if (ix86_tune_defaulted)
2082 {
2083 ix86_tune_string = "x86-64";
2084 for (i = 0; i < pta_size; i++)
2085 if (! strcmp (ix86_tune_string,
2086 processor_alias_table[i].name))
2087 break;
2088 ix86_tune = processor_alias_table[i].processor;
2089 }
2090 else
2091 error ("CPU you selected does not support x86-64 "
2092 "instruction set");
2093 }
2094 /* Intel CPUs have always interpreted SSE prefetch instructions as
2095 NOPs; so, we can enable SSE prefetch instructions even when
2096 -mtune (rather than -march) points us to a processor that has them.
2097 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
2098 higher processors. */
2099 if (TARGET_CMOVE
2100 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
2101 x86_prefetch_sse = true;
2102 break;
2103 }
2104 if (i == pta_size)
2105 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
2106
2107 ix86_tune_mask = 1u << ix86_tune;
2108 for (i = 0; i < X86_TUNE_LAST; ++i)
2109 ix86_tune_features[i] &= ix86_tune_mask;
2110
2111 if (optimize_size)
2112 ix86_cost = &size_cost;
2113 else
2114 ix86_cost = processor_target_table[ix86_tune].cost;
2115
2116 /* Arrange to set up i386_stack_locals for all functions. */
2117 init_machine_status = ix86_init_machine_status;
2118
2119 /* Validate -mregparm= value. */
2120 if (ix86_regparm_string)
2121 {
2122 if (TARGET_64BIT)
2123 warning (0, "-mregparm is ignored in 64-bit mode");
2124 i = atoi (ix86_regparm_string);
2125 if (i < 0 || i > REGPARM_MAX)
2126 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
2127 else
2128 ix86_regparm = i;
2129 }
2130 if (TARGET_64BIT)
2131 ix86_regparm = REGPARM_MAX;
2132
2133 /* If the user has provided any of the -malign-* options,
2134 warn and use that value only if -falign-* is not set.
2135 Remove this code in GCC 3.2 or later. */
2136 if (ix86_align_loops_string)
2137 {
2138 warning (0, "-malign-loops is obsolete, use -falign-loops");
2139 if (align_loops == 0)
2140 {
2141 i = atoi (ix86_align_loops_string);
2142 if (i < 0 || i > MAX_CODE_ALIGN)
2143 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2144 else
2145 align_loops = 1 << i;
2146 }
2147 }
2148
2149 if (ix86_align_jumps_string)
2150 {
2151 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
2152 if (align_jumps == 0)
2153 {
2154 i = atoi (ix86_align_jumps_string);
2155 if (i < 0 || i > MAX_CODE_ALIGN)
2156 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2157 else
2158 align_jumps = 1 << i;
2159 }
2160 }
2161
2162 if (ix86_align_funcs_string)
2163 {
2164 warning (0, "-malign-functions is obsolete, use -falign-functions");
2165 if (align_functions == 0)
2166 {
2167 i = atoi (ix86_align_funcs_string);
2168 if (i < 0 || i > MAX_CODE_ALIGN)
2169 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2170 else
2171 align_functions = 1 << i;
2172 }
2173 }
2174
2175 /* Default align_* from the processor table. */
2176 if (align_loops == 0)
2177 {
2178 align_loops = processor_target_table[ix86_tune].align_loop;
2179 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
2180 }
2181 if (align_jumps == 0)
2182 {
2183 align_jumps = processor_target_table[ix86_tune].align_jump;
2184 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
2185 }
2186 if (align_functions == 0)
2187 {
2188 align_functions = processor_target_table[ix86_tune].align_func;
2189 }
2190
2191 /* Validate -mbranch-cost= value, or provide default. */
2192 ix86_branch_cost = ix86_cost->branch_cost;
2193 if (ix86_branch_cost_string)
2194 {
2195 i = atoi (ix86_branch_cost_string);
2196 if (i < 0 || i > 5)
2197 error ("-mbranch-cost=%d is not between 0 and 5", i);
2198 else
2199 ix86_branch_cost = i;
2200 }
2201 if (ix86_section_threshold_string)
2202 {
2203 i = atoi (ix86_section_threshold_string);
2204 if (i < 0)
2205 error ("-mlarge-data-threshold=%d is negative", i);
2206 else
2207 ix86_section_threshold = i;
2208 }
2209
2210 if (ix86_tls_dialect_string)
2211 {
2212 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
2213 ix86_tls_dialect = TLS_DIALECT_GNU;
2214 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
2215 ix86_tls_dialect = TLS_DIALECT_GNU2;
2216 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
2217 ix86_tls_dialect = TLS_DIALECT_SUN;
2218 else
2219 error ("bad value (%s) for -mtls-dialect= switch",
2220 ix86_tls_dialect_string);
2221 }
2222
2223 if (ix87_precision_string)
2224 {
2225 i = atoi (ix87_precision_string);
2226 if (i != 32 && i != 64 && i != 80)
2227 error ("pc%d is not valid precision setting (32, 64 or 80)", i);
2228 }
2229
2230 if (TARGET_64BIT)
2231 {
2232 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
2233
2234 /* Enable by default the SSE and MMX builtins. Do allow the user to
2235 explicitly disable any of these. In particular, disabling SSE and
2236 MMX for kernel code is extremely useful. */
2237 if (!ix86_arch_specified)
2238 ix86_isa_flags
2239 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
2240 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
2241
2242 if (TARGET_RTD)
2243 warning (0, "-mrtd is ignored in 64bit mode");
2244 }
2245 else
2246 {
2247 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
2248
2249 if (!ix86_arch_specified)
2250 ix86_isa_flags
2251 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
2252
2253 /* i386 ABI does not specify red zone. It still makes sense to use it
2254 when programmer takes care to stack from being destroyed. */
2255 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
2256 target_flags |= MASK_NO_RED_ZONE;
2257 }
2258
2259 /* Keep nonleaf frame pointers. */
2260 if (flag_omit_frame_pointer)
2261 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
2262 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
2263 flag_omit_frame_pointer = 1;
2264
2265 /* If we're doing fast math, we don't care about comparison order
2266 wrt NaNs. This lets us use a shorter comparison sequence. */
2267 if (flag_finite_math_only)
2268 target_flags &= ~MASK_IEEE_FP;
2269
2270 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
2271 since the insns won't need emulation. */
2272 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
2273 target_flags &= ~MASK_NO_FANCY_MATH_387;
2274
2275 /* Likewise, if the target doesn't have a 387, or we've specified
2276 software floating point, don't use 387 inline intrinsics. */
2277 if (!TARGET_80387)
2278 target_flags |= MASK_NO_FANCY_MATH_387;
2279
2280 /* Turn on SSE4.1 builtins for -msse4.2. */
2281 if (TARGET_SSE4_2)
2282 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
2283
2284 /* Turn on SSSE3 builtins for -msse4.1. */
2285 if (TARGET_SSE4_1)
2286 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
2287
2288 /* Turn on SSE3 builtins for -mssse3. */
2289 if (TARGET_SSSE3)
2290 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
2291
2292 /* Turn on SSE3 builtins for -msse4a. */
2293 if (TARGET_SSE4A)
2294 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
2295
2296 /* Turn on SSE2 builtins for -msse3. */
2297 if (TARGET_SSE3)
2298 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
2299
2300 /* Turn on SSE builtins for -msse2. */
2301 if (TARGET_SSE2)
2302 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
2303
2304 /* Turn on MMX builtins for -msse. */
2305 if (TARGET_SSE)
2306 {
2307 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
2308 x86_prefetch_sse = true;
2309 }
2310
2311 /* Turn on MMX builtins for 3Dnow. */
2312 if (TARGET_3DNOW)
2313 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
2314
2315 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
2316 if (TARGET_SSE4_2 || TARGET_ABM)
2317 x86_popcnt = true;
2318
2319 /* Validate -mpreferred-stack-boundary= value, or provide default.
2320 The default of 128 bits is for Pentium III's SSE __m128. We can't
2321 change it because of optimize_size. Otherwise, we can't mix object
2322 files compiled with -Os and -On. */
2323 ix86_preferred_stack_boundary = 128;
2324 if (ix86_preferred_stack_boundary_string)
2325 {
2326 i = atoi (ix86_preferred_stack_boundary_string);
2327 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
2328 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
2329 TARGET_64BIT ? 4 : 2);
2330 else
2331 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
2332 }
2333
2334 /* Accept -msseregparm only if at least SSE support is enabled. */
2335 if (TARGET_SSEREGPARM
2336 && ! TARGET_SSE)
2337 error ("-msseregparm used without SSE enabled");
2338
2339 ix86_fpmath = TARGET_FPMATH_DEFAULT;
2340 if (ix86_fpmath_string != 0)
2341 {
2342 if (! strcmp (ix86_fpmath_string, "387"))
2343 ix86_fpmath = FPMATH_387;
2344 else if (! strcmp (ix86_fpmath_string, "sse"))
2345 {
2346 if (!TARGET_SSE)
2347 {
2348 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2349 ix86_fpmath = FPMATH_387;
2350 }
2351 else
2352 ix86_fpmath = FPMATH_SSE;
2353 }
2354 else if (! strcmp (ix86_fpmath_string, "387,sse")
2355 || ! strcmp (ix86_fpmath_string, "sse,387"))
2356 {
2357 if (!TARGET_SSE)
2358 {
2359 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2360 ix86_fpmath = FPMATH_387;
2361 }
2362 else if (!TARGET_80387)
2363 {
2364 warning (0, "387 instruction set disabled, using SSE arithmetics");
2365 ix86_fpmath = FPMATH_SSE;
2366 }
2367 else
2368 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
2369 }
2370 else
2371 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
2372 }
2373
2374 /* If the i387 is disabled, then do not return values in it. */
2375 if (!TARGET_80387)
2376 target_flags &= ~MASK_FLOAT_RETURNS;
2377
2378 if ((x86_accumulate_outgoing_args & ix86_tune_mask)
2379 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2380 && !optimize_size)
2381 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2382
2383 /* ??? Unwind info is not correct around the CFG unless either a frame
2384 pointer is present or M_A_O_A is set. Fixing this requires rewriting
2385 unwind info generation to be aware of the CFG and propagating states
2386 around edges. */
2387 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
2388 || flag_exceptions || flag_non_call_exceptions)
2389 && flag_omit_frame_pointer
2390 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
2391 {
2392 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2393 warning (0, "unwind tables currently require either a frame pointer "
2394 "or -maccumulate-outgoing-args for correctness");
2395 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2396 }
2397
2398 /* For sane SSE instruction set generation we need fcomi instruction.
2399 It is safe to enable all CMOVE instructions. */
2400 if (TARGET_SSE)
2401 TARGET_CMOVE = 1;
2402
2403 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
2404 {
2405 char *p;
2406 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
2407 p = strchr (internal_label_prefix, 'X');
2408 internal_label_prefix_len = p - internal_label_prefix;
2409 *p = '\0';
2410 }
2411
2412 /* When scheduling description is not available, disable scheduler pass
2413 so it won't slow down the compilation and make x87 code slower. */
2414 if (!TARGET_SCHEDULE)
2415 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
2416
2417 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
2418 set_param_value ("simultaneous-prefetches",
2419 ix86_cost->simultaneous_prefetches);
2420 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
2421 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
2422 }
2423 \f
2424 /* Return true if this goes in large data/bss. */
2425
2426 static bool
2427 ix86_in_large_data_p (tree exp)
2428 {
2429 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
2430 return false;
2431
2432 /* Functions are never large data. */
2433 if (TREE_CODE (exp) == FUNCTION_DECL)
2434 return false;
2435
2436 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
2437 {
2438 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
2439 if (strcmp (section, ".ldata") == 0
2440 || strcmp (section, ".lbss") == 0)
2441 return true;
2442 return false;
2443 }
2444 else
2445 {
2446 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
2447
2448 /* If this is an incomplete type with size 0, then we can't put it
2449 in data because it might be too big when completed. */
2450 if (!size || size > ix86_section_threshold)
2451 return true;
2452 }
2453
2454 return false;
2455 }
2456
2457 /* Switch to the appropriate section for output of DECL.
2458 DECL is either a `VAR_DECL' node or a constant of some sort.
2459 RELOC indicates whether forming the initial value of DECL requires
2460 link-time relocations. */
2461
2462 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
2463 ATTRIBUTE_UNUSED;
2464
2465 static section *
2466 x86_64_elf_select_section (tree decl, int reloc,
2467 unsigned HOST_WIDE_INT align)
2468 {
2469 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2470 && ix86_in_large_data_p (decl))
2471 {
2472 const char *sname = NULL;
2473 unsigned int flags = SECTION_WRITE;
2474 switch (categorize_decl_for_section (decl, reloc))
2475 {
2476 case SECCAT_DATA:
2477 sname = ".ldata";
2478 break;
2479 case SECCAT_DATA_REL:
2480 sname = ".ldata.rel";
2481 break;
2482 case SECCAT_DATA_REL_LOCAL:
2483 sname = ".ldata.rel.local";
2484 break;
2485 case SECCAT_DATA_REL_RO:
2486 sname = ".ldata.rel.ro";
2487 break;
2488 case SECCAT_DATA_REL_RO_LOCAL:
2489 sname = ".ldata.rel.ro.local";
2490 break;
2491 case SECCAT_BSS:
2492 sname = ".lbss";
2493 flags |= SECTION_BSS;
2494 break;
2495 case SECCAT_RODATA:
2496 case SECCAT_RODATA_MERGE_STR:
2497 case SECCAT_RODATA_MERGE_STR_INIT:
2498 case SECCAT_RODATA_MERGE_CONST:
2499 sname = ".lrodata";
2500 flags = 0;
2501 break;
2502 case SECCAT_SRODATA:
2503 case SECCAT_SDATA:
2504 case SECCAT_SBSS:
2505 gcc_unreachable ();
2506 case SECCAT_TEXT:
2507 case SECCAT_TDATA:
2508 case SECCAT_TBSS:
2509 /* We don't split these for medium model. Place them into
2510 default sections and hope for best. */
2511 break;
2512 }
2513 if (sname)
2514 {
2515 /* We might get called with string constants, but get_named_section
2516 doesn't like them as they are not DECLs. Also, we need to set
2517 flags in that case. */
2518 if (!DECL_P (decl))
2519 return get_section (sname, flags, NULL);
2520 return get_named_section (decl, sname, reloc);
2521 }
2522 }
2523 return default_elf_select_section (decl, reloc, align);
2524 }
2525
2526 /* Build up a unique section name, expressed as a
2527 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2528 RELOC indicates whether the initial value of EXP requires
2529 link-time relocations. */
2530
2531 static void ATTRIBUTE_UNUSED
2532 x86_64_elf_unique_section (tree decl, int reloc)
2533 {
2534 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2535 && ix86_in_large_data_p (decl))
2536 {
2537 const char *prefix = NULL;
2538 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2539 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
2540
2541 switch (categorize_decl_for_section (decl, reloc))
2542 {
2543 case SECCAT_DATA:
2544 case SECCAT_DATA_REL:
2545 case SECCAT_DATA_REL_LOCAL:
2546 case SECCAT_DATA_REL_RO:
2547 case SECCAT_DATA_REL_RO_LOCAL:
2548 prefix = one_only ? ".gnu.linkonce.ld." : ".ldata.";
2549 break;
2550 case SECCAT_BSS:
2551 prefix = one_only ? ".gnu.linkonce.lb." : ".lbss.";
2552 break;
2553 case SECCAT_RODATA:
2554 case SECCAT_RODATA_MERGE_STR:
2555 case SECCAT_RODATA_MERGE_STR_INIT:
2556 case SECCAT_RODATA_MERGE_CONST:
2557 prefix = one_only ? ".gnu.linkonce.lr." : ".lrodata.";
2558 break;
2559 case SECCAT_SRODATA:
2560 case SECCAT_SDATA:
2561 case SECCAT_SBSS:
2562 gcc_unreachable ();
2563 case SECCAT_TEXT:
2564 case SECCAT_TDATA:
2565 case SECCAT_TBSS:
2566 /* We don't split these for medium model. Place them into
2567 default sections and hope for best. */
2568 break;
2569 }
2570 if (prefix)
2571 {
2572 const char *name;
2573 size_t nlen, plen;
2574 char *string;
2575 plen = strlen (prefix);
2576
2577 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
2578 name = targetm.strip_name_encoding (name);
2579 nlen = strlen (name);
2580
2581 string = (char *) alloca (nlen + plen + 1);
2582 memcpy (string, prefix, plen);
2583 memcpy (string + plen, name, nlen + 1);
2584
2585 DECL_SECTION_NAME (decl) = build_string (nlen + plen, string);
2586 return;
2587 }
2588 }
2589 default_unique_section (decl, reloc);
2590 }
2591
2592 #ifdef COMMON_ASM_OP
2593 /* This says how to output assembler code to declare an
2594 uninitialized external linkage data object.
2595
2596 For medium model x86-64 we need to use .largecomm opcode for
2597 large objects. */
2598 void
2599 x86_elf_aligned_common (FILE *file,
2600 const char *name, unsigned HOST_WIDE_INT size,
2601 int align)
2602 {
2603 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2604 && size > (unsigned int)ix86_section_threshold)
2605 fprintf (file, ".largecomm\t");
2606 else
2607 fprintf (file, "%s", COMMON_ASM_OP);
2608 assemble_name (file, name);
2609 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
2610 size, align / BITS_PER_UNIT);
2611 }
2612 #endif
2613
2614 /* Utility function for targets to use in implementing
2615 ASM_OUTPUT_ALIGNED_BSS. */
2616
2617 void
2618 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
2619 const char *name, unsigned HOST_WIDE_INT size,
2620 int align)
2621 {
2622 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2623 && size > (unsigned int)ix86_section_threshold)
2624 switch_to_section (get_named_section (decl, ".lbss", 0));
2625 else
2626 switch_to_section (bss_section);
2627 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
2628 #ifdef ASM_DECLARE_OBJECT_NAME
2629 last_assemble_variable_decl = decl;
2630 ASM_DECLARE_OBJECT_NAME (file, name, decl);
2631 #else
2632 /* Standard thing is just output label for the object. */
2633 ASM_OUTPUT_LABEL (file, name);
2634 #endif /* ASM_DECLARE_OBJECT_NAME */
2635 ASM_OUTPUT_SKIP (file, size ? size : 1);
2636 }
2637 \f
2638 void
2639 optimization_options (int level, int size ATTRIBUTE_UNUSED)
2640 {
2641 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2642 make the problem with not enough registers even worse. */
2643 #ifdef INSN_SCHEDULING
2644 if (level > 1)
2645 flag_schedule_insns = 0;
2646 #endif
2647
2648 if (TARGET_MACHO)
2649 /* The Darwin libraries never set errno, so we might as well
2650 avoid calling them when that's the only reason we would. */
2651 flag_errno_math = 0;
2652
2653 /* The default values of these switches depend on the TARGET_64BIT
2654 that is not known at this moment. Mark these values with 2 and
2655 let user the to override these. In case there is no command line option
2656 specifying them, we will set the defaults in override_options. */
2657 if (optimize >= 1)
2658 flag_omit_frame_pointer = 2;
2659 flag_pcc_struct_return = 2;
2660 flag_asynchronous_unwind_tables = 2;
2661 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2662 SUBTARGET_OPTIMIZATION_OPTIONS;
2663 #endif
2664 }
2665 \f
2666 /* Decide whether we can make a sibling call to a function. DECL is the
2667 declaration of the function being targeted by the call and EXP is the
2668 CALL_EXPR representing the call. */
2669
2670 static bool
2671 ix86_function_ok_for_sibcall (tree decl, tree exp)
2672 {
2673 tree func;
2674 rtx a, b;
2675
2676 /* If we are generating position-independent code, we cannot sibcall
2677 optimize any indirect call, or a direct call to a global function,
2678 as the PLT requires %ebx be live. */
2679 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
2680 return false;
2681
2682 if (decl)
2683 func = decl;
2684 else
2685 {
2686 func = TREE_TYPE (CALL_EXPR_FN (exp));
2687 if (POINTER_TYPE_P (func))
2688 func = TREE_TYPE (func);
2689 }
2690
2691 /* Check that the return value locations are the same. Like
2692 if we are returning floats on the 80387 register stack, we cannot
2693 make a sibcall from a function that doesn't return a float to a
2694 function that does or, conversely, from a function that does return
2695 a float to a function that doesn't; the necessary stack adjustment
2696 would not be executed. This is also the place we notice
2697 differences in the return value ABI. Note that it is ok for one
2698 of the functions to have void return type as long as the return
2699 value of the other is passed in a register. */
2700 a = ix86_function_value (TREE_TYPE (exp), func, false);
2701 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
2702 cfun->decl, false);
2703 if (STACK_REG_P (a) || STACK_REG_P (b))
2704 {
2705 if (!rtx_equal_p (a, b))
2706 return false;
2707 }
2708 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
2709 ;
2710 else if (!rtx_equal_p (a, b))
2711 return false;
2712
2713 /* If this call is indirect, we'll need to be able to use a call-clobbered
2714 register for the address of the target function. Make sure that all
2715 such registers are not used for passing parameters. */
2716 if (!decl && !TARGET_64BIT)
2717 {
2718 tree type;
2719
2720 /* We're looking at the CALL_EXPR, we need the type of the function. */
2721 type = CALL_EXPR_FN (exp); /* pointer expression */
2722 type = TREE_TYPE (type); /* pointer type */
2723 type = TREE_TYPE (type); /* function type */
2724
2725 if (ix86_function_regparm (type, NULL) >= 3)
2726 {
2727 /* ??? Need to count the actual number of registers to be used,
2728 not the possible number of registers. Fix later. */
2729 return false;
2730 }
2731 }
2732
2733 /* Dllimport'd functions are also called indirectly. */
2734 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
2735 && decl && DECL_DLLIMPORT_P (decl)
2736 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
2737 return false;
2738
2739 /* If we forced aligned the stack, then sibcalling would unalign the
2740 stack, which may break the called function. */
2741 if (cfun->machine->force_align_arg_pointer)
2742 return false;
2743
2744 /* Otherwise okay. That also includes certain types of indirect calls. */
2745 return true;
2746 }
2747
2748 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
2749 calling convention attributes;
2750 arguments as in struct attribute_spec.handler. */
2751
2752 static tree
2753 ix86_handle_cconv_attribute (tree *node, tree name,
2754 tree args,
2755 int flags ATTRIBUTE_UNUSED,
2756 bool *no_add_attrs)
2757 {
2758 if (TREE_CODE (*node) != FUNCTION_TYPE
2759 && TREE_CODE (*node) != METHOD_TYPE
2760 && TREE_CODE (*node) != FIELD_DECL
2761 && TREE_CODE (*node) != TYPE_DECL)
2762 {
2763 warning (OPT_Wattributes, "%qs attribute only applies to functions",
2764 IDENTIFIER_POINTER (name));
2765 *no_add_attrs = true;
2766 return NULL_TREE;
2767 }
2768
2769 /* Can combine regparm with all attributes but fastcall. */
2770 if (is_attribute_p ("regparm", name))
2771 {
2772 tree cst;
2773
2774 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2775 {
2776 error ("fastcall and regparm attributes are not compatible");
2777 }
2778
2779 cst = TREE_VALUE (args);
2780 if (TREE_CODE (cst) != INTEGER_CST)
2781 {
2782 warning (OPT_Wattributes,
2783 "%qs attribute requires an integer constant argument",
2784 IDENTIFIER_POINTER (name));
2785 *no_add_attrs = true;
2786 }
2787 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
2788 {
2789 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
2790 IDENTIFIER_POINTER (name), REGPARM_MAX);
2791 *no_add_attrs = true;
2792 }
2793
2794 if (!TARGET_64BIT
2795 && lookup_attribute (ix86_force_align_arg_pointer_string,
2796 TYPE_ATTRIBUTES (*node))
2797 && compare_tree_int (cst, REGPARM_MAX-1))
2798 {
2799 error ("%s functions limited to %d register parameters",
2800 ix86_force_align_arg_pointer_string, REGPARM_MAX-1);
2801 }
2802
2803 return NULL_TREE;
2804 }
2805
2806 if (TARGET_64BIT)
2807 {
2808 /* Do not warn when emulating the MS ABI. */
2809 if (!TARGET_64BIT_MS_ABI)
2810 warning (OPT_Wattributes, "%qs attribute ignored",
2811 IDENTIFIER_POINTER (name));
2812 *no_add_attrs = true;
2813 return NULL_TREE;
2814 }
2815
2816 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
2817 if (is_attribute_p ("fastcall", name))
2818 {
2819 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2820 {
2821 error ("fastcall and cdecl attributes are not compatible");
2822 }
2823 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2824 {
2825 error ("fastcall and stdcall attributes are not compatible");
2826 }
2827 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
2828 {
2829 error ("fastcall and regparm attributes are not compatible");
2830 }
2831 }
2832
2833 /* Can combine stdcall with fastcall (redundant), regparm and
2834 sseregparm. */
2835 else if (is_attribute_p ("stdcall", name))
2836 {
2837 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2838 {
2839 error ("stdcall and cdecl attributes are not compatible");
2840 }
2841 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2842 {
2843 error ("stdcall and fastcall attributes are not compatible");
2844 }
2845 }
2846
2847 /* Can combine cdecl with regparm and sseregparm. */
2848 else if (is_attribute_p ("cdecl", name))
2849 {
2850 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2851 {
2852 error ("stdcall and cdecl attributes are not compatible");
2853 }
2854 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2855 {
2856 error ("fastcall and cdecl attributes are not compatible");
2857 }
2858 }
2859
2860 /* Can combine sseregparm with all attributes. */
2861
2862 return NULL_TREE;
2863 }
2864
2865 /* Return 0 if the attributes for two types are incompatible, 1 if they
2866 are compatible, and 2 if they are nearly compatible (which causes a
2867 warning to be generated). */
2868
2869 static int
2870 ix86_comp_type_attributes (tree type1, tree type2)
2871 {
2872 /* Check for mismatch of non-default calling convention. */
2873 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
2874
2875 if (TREE_CODE (type1) != FUNCTION_TYPE)
2876 return 1;
2877
2878 /* Check for mismatched fastcall/regparm types. */
2879 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
2880 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
2881 || (ix86_function_regparm (type1, NULL)
2882 != ix86_function_regparm (type2, NULL)))
2883 return 0;
2884
2885 /* Check for mismatched sseregparm types. */
2886 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
2887 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
2888 return 0;
2889
2890 /* Check for mismatched return types (cdecl vs stdcall). */
2891 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
2892 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
2893 return 0;
2894
2895 return 1;
2896 }
2897 \f
2898 /* Return the regparm value for a function with the indicated TYPE and DECL.
2899 DECL may be NULL when calling function indirectly
2900 or considering a libcall. */
2901
2902 static int
2903 ix86_function_regparm (tree type, tree decl)
2904 {
2905 tree attr;
2906 int regparm = ix86_regparm;
2907
2908 if (TARGET_64BIT)
2909 return regparm;
2910
2911 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
2912 if (attr)
2913 return TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
2914
2915 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
2916 return 2;
2917
2918 /* Use register calling convention for local functions when possible. */
2919 if (decl && TREE_CODE (decl) == FUNCTION_DECL
2920 && flag_unit_at_a_time && !profile_flag)
2921 {
2922 struct cgraph_local_info *i = cgraph_local_info (decl);
2923 if (i && i->local)
2924 {
2925 int local_regparm, globals = 0, regno;
2926 struct function *f;
2927
2928 /* Make sure no regparm register is taken by a
2929 global register variable. */
2930 for (local_regparm = 0; local_regparm < 3; local_regparm++)
2931 if (global_regs[local_regparm])
2932 break;
2933
2934 /* We can't use regparm(3) for nested functions as these use
2935 static chain pointer in third argument. */
2936 if (local_regparm == 3
2937 && (decl_function_context (decl)
2938 || ix86_force_align_arg_pointer)
2939 && !DECL_NO_STATIC_CHAIN (decl))
2940 local_regparm = 2;
2941
2942 /* If the function realigns its stackpointer, the prologue will
2943 clobber %ecx. If we've already generated code for the callee,
2944 the callee DECL_STRUCT_FUNCTION is gone, so we fall back to
2945 scanning the attributes for the self-realigning property. */
2946 f = DECL_STRUCT_FUNCTION (decl);
2947 if (local_regparm == 3
2948 && (f ? !!f->machine->force_align_arg_pointer
2949 : !!lookup_attribute (ix86_force_align_arg_pointer_string,
2950 TYPE_ATTRIBUTES (TREE_TYPE (decl)))))
2951 local_regparm = 2;
2952
2953 /* Each global register variable increases register preassure,
2954 so the more global reg vars there are, the smaller regparm
2955 optimization use, unless requested by the user explicitly. */
2956 for (regno = 0; regno < 6; regno++)
2957 if (global_regs[regno])
2958 globals++;
2959 local_regparm
2960 = globals < local_regparm ? local_regparm - globals : 0;
2961
2962 if (local_regparm > regparm)
2963 regparm = local_regparm;
2964 }
2965 }
2966
2967 return regparm;
2968 }
2969
2970 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
2971 DFmode (2) arguments in SSE registers for a function with the
2972 indicated TYPE and DECL. DECL may be NULL when calling function
2973 indirectly or considering a libcall. Otherwise return 0. */
2974
2975 static int
2976 ix86_function_sseregparm (tree type, tree decl)
2977 {
2978 gcc_assert (!TARGET_64BIT);
2979
2980 /* Use SSE registers to pass SFmode and DFmode arguments if requested
2981 by the sseregparm attribute. */
2982 if (TARGET_SSEREGPARM
2983 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
2984 {
2985 if (!TARGET_SSE)
2986 {
2987 if (decl)
2988 error ("Calling %qD with attribute sseregparm without "
2989 "SSE/SSE2 enabled", decl);
2990 else
2991 error ("Calling %qT with attribute sseregparm without "
2992 "SSE/SSE2 enabled", type);
2993 return 0;
2994 }
2995
2996 return 2;
2997 }
2998
2999 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
3000 (and DFmode for SSE2) arguments in SSE registers. */
3001 if (decl && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
3002 {
3003 struct cgraph_local_info *i = cgraph_local_info (decl);
3004 if (i && i->local)
3005 return TARGET_SSE2 ? 2 : 1;
3006 }
3007
3008 return 0;
3009 }
3010
3011 /* Return true if EAX is live at the start of the function. Used by
3012 ix86_expand_prologue to determine if we need special help before
3013 calling allocate_stack_worker. */
3014
3015 static bool
3016 ix86_eax_live_at_start_p (void)
3017 {
3018 /* Cheat. Don't bother working forward from ix86_function_regparm
3019 to the function type to whether an actual argument is located in
3020 eax. Instead just look at cfg info, which is still close enough
3021 to correct at this point. This gives false positives for broken
3022 functions that might use uninitialized data that happens to be
3023 allocated in eax, but who cares? */
3024 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
3025 }
3026
3027 /* Return true if TYPE has a variable argument list. */
3028
3029 static bool
3030 type_has_variadic_args_p (tree type)
3031 {
3032 tree n, t = TYPE_ARG_TYPES (type);
3033
3034 if (t == NULL)
3035 return false;
3036
3037 while ((n = TREE_CHAIN (t)) != NULL)
3038 t = n;
3039
3040 return TREE_VALUE (t) != void_type_node;
3041 }
3042
3043 /* Value is the number of bytes of arguments automatically
3044 popped when returning from a subroutine call.
3045 FUNDECL is the declaration node of the function (as a tree),
3046 FUNTYPE is the data type of the function (as a tree),
3047 or for a library call it is an identifier node for the subroutine name.
3048 SIZE is the number of bytes of arguments passed on the stack.
3049
3050 On the 80386, the RTD insn may be used to pop them if the number
3051 of args is fixed, but if the number is variable then the caller
3052 must pop them all. RTD can't be used for library calls now
3053 because the library is compiled with the Unix compiler.
3054 Use of RTD is a selectable option, since it is incompatible with
3055 standard Unix calling sequences. If the option is not selected,
3056 the caller must always pop the args.
3057
3058 The attribute stdcall is equivalent to RTD on a per module basis. */
3059
3060 int
3061 ix86_return_pops_args (tree fundecl, tree funtype, int size)
3062 {
3063 int rtd;
3064
3065 /* None of the 64-bit ABIs pop arguments. */
3066 if (TARGET_64BIT)
3067 return 0;
3068
3069 rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
3070
3071 /* Cdecl functions override -mrtd, and never pop the stack. */
3072 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
3073 {
3074 /* Stdcall and fastcall functions will pop the stack if not
3075 variable args. */
3076 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
3077 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
3078 rtd = 1;
3079
3080 if (rtd && ! type_has_variadic_args_p (funtype))
3081 return size;
3082 }
3083
3084 /* Lose any fake structure return argument if it is passed on the stack. */
3085 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
3086 && !KEEP_AGGREGATE_RETURN_POINTER)
3087 {
3088 int nregs = ix86_function_regparm (funtype, fundecl);
3089 if (nregs == 0)
3090 return GET_MODE_SIZE (Pmode);
3091 }
3092
3093 return 0;
3094 }
3095 \f
3096 /* Argument support functions. */
3097
3098 /* Return true when register may be used to pass function parameters. */
3099 bool
3100 ix86_function_arg_regno_p (int regno)
3101 {
3102 int i;
3103 const int *parm_regs;
3104
3105 if (!TARGET_64BIT)
3106 {
3107 if (TARGET_MACHO)
3108 return (regno < REGPARM_MAX
3109 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
3110 else
3111 return (regno < REGPARM_MAX
3112 || (TARGET_MMX && MMX_REGNO_P (regno)
3113 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
3114 || (TARGET_SSE && SSE_REGNO_P (regno)
3115 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
3116 }
3117
3118 if (TARGET_MACHO)
3119 {
3120 if (SSE_REGNO_P (regno) && TARGET_SSE)
3121 return true;
3122 }
3123 else
3124 {
3125 if (TARGET_SSE && SSE_REGNO_P (regno)
3126 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
3127 return true;
3128 }
3129
3130 /* RAX is used as hidden argument to va_arg functions. */
3131 if (!TARGET_64BIT_MS_ABI && regno == 0)
3132 return true;
3133
3134 if (TARGET_64BIT_MS_ABI)
3135 parm_regs = x86_64_ms_abi_int_parameter_registers;
3136 else
3137 parm_regs = x86_64_int_parameter_registers;
3138 for (i = 0; i < REGPARM_MAX; i++)
3139 if (regno == parm_regs[i])
3140 return true;
3141 return false;
3142 }
3143
3144 /* Return if we do not know how to pass TYPE solely in registers. */
3145
3146 static bool
3147 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
3148 {
3149 if (must_pass_in_stack_var_size_or_pad (mode, type))
3150 return true;
3151
3152 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
3153 The layout_type routine is crafty and tries to trick us into passing
3154 currently unsupported vector types on the stack by using TImode. */
3155 return (!TARGET_64BIT && mode == TImode
3156 && type && TREE_CODE (type) != VECTOR_TYPE);
3157 }
3158
3159 /* Initialize a variable CUM of type CUMULATIVE_ARGS
3160 for a call to a function whose data type is FNTYPE.
3161 For a library call, FNTYPE is 0. */
3162
3163 void
3164 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
3165 tree fntype, /* tree ptr for function decl */
3166 rtx libname, /* SYMBOL_REF of library name or 0 */
3167 tree fndecl)
3168 {
3169 memset (cum, 0, sizeof (*cum));
3170
3171 /* Set up the number of registers to use for passing arguments. */
3172 cum->nregs = ix86_regparm;
3173 if (TARGET_SSE)
3174 cum->sse_nregs = SSE_REGPARM_MAX;
3175 if (TARGET_MMX)
3176 cum->mmx_nregs = MMX_REGPARM_MAX;
3177 cum->warn_sse = true;
3178 cum->warn_mmx = true;
3179 cum->maybe_vaarg = (fntype
3180 ? (!TYPE_ARG_TYPES (fntype)
3181 || type_has_variadic_args_p (fntype))
3182 : !libname);
3183
3184 if (!TARGET_64BIT)
3185 {
3186 /* If there are variable arguments, then we won't pass anything
3187 in registers in 32-bit mode. */
3188 if (cum->maybe_vaarg)
3189 {
3190 cum->nregs = 0;
3191 cum->sse_nregs = 0;
3192 cum->mmx_nregs = 0;
3193 cum->warn_sse = 0;
3194 cum->warn_mmx = 0;
3195 return;
3196 }
3197
3198 /* Use ecx and edx registers if function has fastcall attribute,
3199 else look for regparm information. */
3200 if (fntype)
3201 {
3202 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
3203 {
3204 cum->nregs = 2;
3205 cum->fastcall = 1;
3206 }
3207 else
3208 cum->nregs = ix86_function_regparm (fntype, fndecl);
3209 }
3210
3211 /* Set up the number of SSE registers used for passing SFmode
3212 and DFmode arguments. Warn for mismatching ABI. */
3213 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl);
3214 }
3215 }
3216
3217 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
3218 But in the case of vector types, it is some vector mode.
3219
3220 When we have only some of our vector isa extensions enabled, then there
3221 are some modes for which vector_mode_supported_p is false. For these
3222 modes, the generic vector support in gcc will choose some non-vector mode
3223 in order to implement the type. By computing the natural mode, we'll
3224 select the proper ABI location for the operand and not depend on whatever
3225 the middle-end decides to do with these vector types. */
3226
3227 static enum machine_mode
3228 type_natural_mode (tree type)
3229 {
3230 enum machine_mode mode = TYPE_MODE (type);
3231
3232 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
3233 {
3234 HOST_WIDE_INT size = int_size_in_bytes (type);
3235 if ((size == 8 || size == 16)
3236 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
3237 && TYPE_VECTOR_SUBPARTS (type) > 1)
3238 {
3239 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
3240
3241 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
3242 mode = MIN_MODE_VECTOR_FLOAT;
3243 else
3244 mode = MIN_MODE_VECTOR_INT;
3245
3246 /* Get the mode which has this inner mode and number of units. */
3247 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
3248 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
3249 && GET_MODE_INNER (mode) == innermode)
3250 return mode;
3251
3252 gcc_unreachable ();
3253 }
3254 }
3255
3256 return mode;
3257 }
3258
3259 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
3260 this may not agree with the mode that the type system has chosen for the
3261 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
3262 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
3263
3264 static rtx
3265 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
3266 unsigned int regno)
3267 {
3268 rtx tmp;
3269
3270 if (orig_mode != BLKmode)
3271 tmp = gen_rtx_REG (orig_mode, regno);
3272 else
3273 {
3274 tmp = gen_rtx_REG (mode, regno);
3275 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
3276 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
3277 }
3278
3279 return tmp;
3280 }
3281
3282 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
3283 of this code is to classify each 8bytes of incoming argument by the register
3284 class and assign registers accordingly. */
3285
3286 /* Return the union class of CLASS1 and CLASS2.
3287 See the x86-64 PS ABI for details. */
3288
3289 static enum x86_64_reg_class
3290 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
3291 {
3292 /* Rule #1: If both classes are equal, this is the resulting class. */
3293 if (class1 == class2)
3294 return class1;
3295
3296 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
3297 the other class. */
3298 if (class1 == X86_64_NO_CLASS)
3299 return class2;
3300 if (class2 == X86_64_NO_CLASS)
3301 return class1;
3302
3303 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
3304 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
3305 return X86_64_MEMORY_CLASS;
3306
3307 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
3308 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
3309 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
3310 return X86_64_INTEGERSI_CLASS;
3311 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
3312 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
3313 return X86_64_INTEGER_CLASS;
3314
3315 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
3316 MEMORY is used. */
3317 if (class1 == X86_64_X87_CLASS
3318 || class1 == X86_64_X87UP_CLASS
3319 || class1 == X86_64_COMPLEX_X87_CLASS
3320 || class2 == X86_64_X87_CLASS
3321 || class2 == X86_64_X87UP_CLASS
3322 || class2 == X86_64_COMPLEX_X87_CLASS)
3323 return X86_64_MEMORY_CLASS;
3324
3325 /* Rule #6: Otherwise class SSE is used. */
3326 return X86_64_SSE_CLASS;
3327 }
3328
3329 /* Classify the argument of type TYPE and mode MODE.
3330 CLASSES will be filled by the register class used to pass each word
3331 of the operand. The number of words is returned. In case the parameter
3332 should be passed in memory, 0 is returned. As a special case for zero
3333 sized containers, classes[0] will be NO_CLASS and 1 is returned.
3334
3335 BIT_OFFSET is used internally for handling records and specifies offset
3336 of the offset in bits modulo 256 to avoid overflow cases.
3337
3338 See the x86-64 PS ABI for details.
3339 */
3340
3341 static int
3342 classify_argument (enum machine_mode mode, tree type,
3343 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
3344 {
3345 HOST_WIDE_INT bytes =
3346 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3347 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3348
3349 /* Variable sized entities are always passed/returned in memory. */
3350 if (bytes < 0)
3351 return 0;
3352
3353 if (mode != VOIDmode
3354 && targetm.calls.must_pass_in_stack (mode, type))
3355 return 0;
3356
3357 if (type && AGGREGATE_TYPE_P (type))
3358 {
3359 int i;
3360 tree field;
3361 enum x86_64_reg_class subclasses[MAX_CLASSES];
3362
3363 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
3364 if (bytes > 16)
3365 return 0;
3366
3367 for (i = 0; i < words; i++)
3368 classes[i] = X86_64_NO_CLASS;
3369
3370 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
3371 signalize memory class, so handle it as special case. */
3372 if (!words)
3373 {
3374 classes[0] = X86_64_NO_CLASS;
3375 return 1;
3376 }
3377
3378 /* Classify each field of record and merge classes. */
3379 switch (TREE_CODE (type))
3380 {
3381 case RECORD_TYPE:
3382 /* And now merge the fields of structure. */
3383 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3384 {
3385 if (TREE_CODE (field) == FIELD_DECL)
3386 {
3387 int num;
3388
3389 if (TREE_TYPE (field) == error_mark_node)
3390 continue;
3391
3392 /* Bitfields are always classified as integer. Handle them
3393 early, since later code would consider them to be
3394 misaligned integers. */
3395 if (DECL_BIT_FIELD (field))
3396 {
3397 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3398 i < ((int_bit_position (field) + (bit_offset % 64))
3399 + tree_low_cst (DECL_SIZE (field), 0)
3400 + 63) / 8 / 8; i++)
3401 classes[i] =
3402 merge_classes (X86_64_INTEGER_CLASS,
3403 classes[i]);
3404 }
3405 else
3406 {
3407 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3408 TREE_TYPE (field), subclasses,
3409 (int_bit_position (field)
3410 + bit_offset) % 256);
3411 if (!num)
3412 return 0;
3413 for (i = 0; i < num; i++)
3414 {
3415 int pos =
3416 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3417 classes[i + pos] =
3418 merge_classes (subclasses[i], classes[i + pos]);
3419 }
3420 }
3421 }
3422 }
3423 break;
3424
3425 case ARRAY_TYPE:
3426 /* Arrays are handled as small records. */
3427 {
3428 int num;
3429 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
3430 TREE_TYPE (type), subclasses, bit_offset);
3431 if (!num)
3432 return 0;
3433
3434 /* The partial classes are now full classes. */
3435 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
3436 subclasses[0] = X86_64_SSE_CLASS;
3437 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
3438 subclasses[0] = X86_64_INTEGER_CLASS;
3439
3440 for (i = 0; i < words; i++)
3441 classes[i] = subclasses[i % num];
3442
3443 break;
3444 }
3445 case UNION_TYPE:
3446 case QUAL_UNION_TYPE:
3447 /* Unions are similar to RECORD_TYPE but offset is always 0.
3448 */
3449 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3450 {
3451 if (TREE_CODE (field) == FIELD_DECL)
3452 {
3453 int num;
3454
3455 if (TREE_TYPE (field) == error_mark_node)
3456 continue;
3457
3458 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3459 TREE_TYPE (field), subclasses,
3460 bit_offset);
3461 if (!num)
3462 return 0;
3463 for (i = 0; i < num; i++)
3464 classes[i] = merge_classes (subclasses[i], classes[i]);
3465 }
3466 }
3467 break;
3468
3469 default:
3470 gcc_unreachable ();
3471 }
3472
3473 /* Final merger cleanup. */
3474 for (i = 0; i < words; i++)
3475 {
3476 /* If one class is MEMORY, everything should be passed in
3477 memory. */
3478 if (classes[i] == X86_64_MEMORY_CLASS)
3479 return 0;
3480
3481 /* The X86_64_SSEUP_CLASS should be always preceded by
3482 X86_64_SSE_CLASS. */
3483 if (classes[i] == X86_64_SSEUP_CLASS
3484 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
3485 classes[i] = X86_64_SSE_CLASS;
3486
3487 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
3488 if (classes[i] == X86_64_X87UP_CLASS
3489 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
3490 classes[i] = X86_64_SSE_CLASS;
3491 }
3492 return words;
3493 }
3494
3495 /* Compute alignment needed. We align all types to natural boundaries with
3496 exception of XFmode that is aligned to 64bits. */
3497 if (mode != VOIDmode && mode != BLKmode)
3498 {
3499 int mode_alignment = GET_MODE_BITSIZE (mode);
3500
3501 if (mode == XFmode)
3502 mode_alignment = 128;
3503 else if (mode == XCmode)
3504 mode_alignment = 256;
3505 if (COMPLEX_MODE_P (mode))
3506 mode_alignment /= 2;
3507 /* Misaligned fields are always returned in memory. */
3508 if (bit_offset % mode_alignment)
3509 return 0;
3510 }
3511
3512 /* for V1xx modes, just use the base mode */
3513 if (VECTOR_MODE_P (mode)
3514 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
3515 mode = GET_MODE_INNER (mode);
3516
3517 /* Classification of atomic types. */
3518 switch (mode)
3519 {
3520 case SDmode:
3521 case DDmode:
3522 classes[0] = X86_64_SSE_CLASS;
3523 return 1;
3524 case TDmode:
3525 classes[0] = X86_64_SSE_CLASS;
3526 classes[1] = X86_64_SSEUP_CLASS;
3527 return 2;
3528 case DImode:
3529 case SImode:
3530 case HImode:
3531 case QImode:
3532 case CSImode:
3533 case CHImode:
3534 case CQImode:
3535 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3536 classes[0] = X86_64_INTEGERSI_CLASS;
3537 else
3538 classes[0] = X86_64_INTEGER_CLASS;
3539 return 1;
3540 case CDImode:
3541 case TImode:
3542 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
3543 return 2;
3544 case CTImode:
3545 return 0;
3546 case SFmode:
3547 if (!(bit_offset % 64))
3548 classes[0] = X86_64_SSESF_CLASS;
3549 else
3550 classes[0] = X86_64_SSE_CLASS;
3551 return 1;
3552 case DFmode:
3553 classes[0] = X86_64_SSEDF_CLASS;
3554 return 1;
3555 case XFmode:
3556 classes[0] = X86_64_X87_CLASS;
3557 classes[1] = X86_64_X87UP_CLASS;
3558 return 2;
3559 case TFmode:
3560 classes[0] = X86_64_SSE_CLASS;
3561 classes[1] = X86_64_SSEUP_CLASS;
3562 return 2;
3563 case SCmode:
3564 classes[0] = X86_64_SSE_CLASS;
3565 return 1;
3566 case DCmode:
3567 classes[0] = X86_64_SSEDF_CLASS;
3568 classes[1] = X86_64_SSEDF_CLASS;
3569 return 2;
3570 case XCmode:
3571 classes[0] = X86_64_COMPLEX_X87_CLASS;
3572 return 1;
3573 case TCmode:
3574 /* This modes is larger than 16 bytes. */
3575 return 0;
3576 case V4SFmode:
3577 case V4SImode:
3578 case V16QImode:
3579 case V8HImode:
3580 case V2DFmode:
3581 case V2DImode:
3582 classes[0] = X86_64_SSE_CLASS;
3583 classes[1] = X86_64_SSEUP_CLASS;
3584 return 2;
3585 case V2SFmode:
3586 case V2SImode:
3587 case V4HImode:
3588 case V8QImode:
3589 classes[0] = X86_64_SSE_CLASS;
3590 return 1;
3591 case BLKmode:
3592 case VOIDmode:
3593 return 0;
3594 default:
3595 gcc_assert (VECTOR_MODE_P (mode));
3596
3597 if (bytes > 16)
3598 return 0;
3599
3600 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
3601
3602 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3603 classes[0] = X86_64_INTEGERSI_CLASS;
3604 else
3605 classes[0] = X86_64_INTEGER_CLASS;
3606 classes[1] = X86_64_INTEGER_CLASS;
3607 return 1 + (bytes > 8);
3608 }
3609 }
3610
3611 /* Examine the argument and return set number of register required in each
3612 class. Return 0 iff parameter should be passed in memory. */
3613 static int
3614 examine_argument (enum machine_mode mode, tree type, int in_return,
3615 int *int_nregs, int *sse_nregs)
3616 {
3617 enum x86_64_reg_class regclass[MAX_CLASSES];
3618 int n = classify_argument (mode, type, regclass, 0);
3619
3620 *int_nregs = 0;
3621 *sse_nregs = 0;
3622 if (!n)
3623 return 0;
3624 for (n--; n >= 0; n--)
3625 switch (regclass[n])
3626 {
3627 case X86_64_INTEGER_CLASS:
3628 case X86_64_INTEGERSI_CLASS:
3629 (*int_nregs)++;
3630 break;
3631 case X86_64_SSE_CLASS:
3632 case X86_64_SSESF_CLASS:
3633 case X86_64_SSEDF_CLASS:
3634 (*sse_nregs)++;
3635 break;
3636 case X86_64_NO_CLASS:
3637 case X86_64_SSEUP_CLASS:
3638 break;
3639 case X86_64_X87_CLASS:
3640 case X86_64_X87UP_CLASS:
3641 if (!in_return)
3642 return 0;
3643 break;
3644 case X86_64_COMPLEX_X87_CLASS:
3645 return in_return ? 2 : 0;
3646 case X86_64_MEMORY_CLASS:
3647 gcc_unreachable ();
3648 }
3649 return 1;
3650 }
3651
3652 /* Construct container for the argument used by GCC interface. See
3653 FUNCTION_ARG for the detailed description. */
3654
3655 static rtx
3656 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
3657 tree type, int in_return, int nintregs, int nsseregs,
3658 const int *intreg, int sse_regno)
3659 {
3660 /* The following variables hold the static issued_error state. */
3661 static bool issued_sse_arg_error;
3662 static bool issued_sse_ret_error;
3663 static bool issued_x87_ret_error;
3664
3665 enum machine_mode tmpmode;
3666 int bytes =
3667 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3668 enum x86_64_reg_class regclass[MAX_CLASSES];
3669 int n;
3670 int i;
3671 int nexps = 0;
3672 int needed_sseregs, needed_intregs;
3673 rtx exp[MAX_CLASSES];
3674 rtx ret;
3675
3676 n = classify_argument (mode, type, regclass, 0);
3677 if (!n)
3678 return NULL;
3679 if (!examine_argument (mode, type, in_return, &needed_intregs,
3680 &needed_sseregs))
3681 return NULL;
3682 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
3683 return NULL;
3684
3685 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
3686 some less clueful developer tries to use floating-point anyway. */
3687 if (needed_sseregs && !TARGET_SSE)
3688 {
3689 if (in_return)
3690 {
3691 if (!issued_sse_ret_error)
3692 {
3693 error ("SSE register return with SSE disabled");
3694 issued_sse_ret_error = true;
3695 }
3696 }
3697 else if (!issued_sse_arg_error)
3698 {
3699 error ("SSE register argument with SSE disabled");
3700 issued_sse_arg_error = true;
3701 }
3702 return NULL;
3703 }
3704
3705 /* Likewise, error if the ABI requires us to return values in the
3706 x87 registers and the user specified -mno-80387. */
3707 if (!TARGET_80387 && in_return)
3708 for (i = 0; i < n; i++)
3709 if (regclass[i] == X86_64_X87_CLASS
3710 || regclass[i] == X86_64_X87UP_CLASS
3711 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
3712 {
3713 if (!issued_x87_ret_error)
3714 {
3715 error ("x87 register return with x87 disabled");
3716 issued_x87_ret_error = true;
3717 }
3718 return NULL;
3719 }
3720
3721 /* First construct simple cases. Avoid SCmode, since we want to use
3722 single register to pass this type. */
3723 if (n == 1 && mode != SCmode)
3724 switch (regclass[0])
3725 {
3726 case X86_64_INTEGER_CLASS:
3727 case X86_64_INTEGERSI_CLASS:
3728 return gen_rtx_REG (mode, intreg[0]);
3729 case X86_64_SSE_CLASS:
3730 case X86_64_SSESF_CLASS:
3731 case X86_64_SSEDF_CLASS:
3732 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
3733 case X86_64_X87_CLASS:
3734 case X86_64_COMPLEX_X87_CLASS:
3735 return gen_rtx_REG (mode, FIRST_STACK_REG);
3736 case X86_64_NO_CLASS:
3737 /* Zero sized array, struct or class. */
3738 return NULL;
3739 default:
3740 gcc_unreachable ();
3741 }
3742 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
3743 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
3744 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
3745
3746 if (n == 2
3747 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
3748 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
3749 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
3750 && regclass[1] == X86_64_INTEGER_CLASS
3751 && (mode == CDImode || mode == TImode || mode == TFmode)
3752 && intreg[0] + 1 == intreg[1])
3753 return gen_rtx_REG (mode, intreg[0]);
3754
3755 /* Otherwise figure out the entries of the PARALLEL. */
3756 for (i = 0; i < n; i++)
3757 {
3758 switch (regclass[i])
3759 {
3760 case X86_64_NO_CLASS:
3761 break;
3762 case X86_64_INTEGER_CLASS:
3763 case X86_64_INTEGERSI_CLASS:
3764 /* Merge TImodes on aligned occasions here too. */
3765 if (i * 8 + 8 > bytes)
3766 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
3767 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
3768 tmpmode = SImode;
3769 else
3770 tmpmode = DImode;
3771 /* We've requested 24 bytes we don't have mode for. Use DImode. */
3772 if (tmpmode == BLKmode)
3773 tmpmode = DImode;
3774 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3775 gen_rtx_REG (tmpmode, *intreg),
3776 GEN_INT (i*8));
3777 intreg++;
3778 break;
3779 case X86_64_SSESF_CLASS:
3780 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3781 gen_rtx_REG (SFmode,
3782 SSE_REGNO (sse_regno)),
3783 GEN_INT (i*8));
3784 sse_regno++;
3785 break;
3786 case X86_64_SSEDF_CLASS:
3787 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3788 gen_rtx_REG (DFmode,
3789 SSE_REGNO (sse_regno)),
3790 GEN_INT (i*8));
3791 sse_regno++;
3792 break;
3793 case X86_64_SSE_CLASS:
3794 if (i < n - 1 && regclass[i + 1] == X86_64_SSEUP_CLASS)
3795 tmpmode = TImode;
3796 else
3797 tmpmode = DImode;
3798 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3799 gen_rtx_REG (tmpmode,
3800 SSE_REGNO (sse_regno)),
3801 GEN_INT (i*8));
3802 if (tmpmode == TImode)
3803 i++;
3804 sse_regno++;
3805 break;
3806 default:
3807 gcc_unreachable ();
3808 }
3809 }
3810
3811 /* Empty aligned struct, union or class. */
3812 if (nexps == 0)
3813 return NULL;
3814
3815 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
3816 for (i = 0; i < nexps; i++)
3817 XVECEXP (ret, 0, i) = exp [i];
3818 return ret;
3819 }
3820
3821 /* Update the data in CUM to advance over an argument of mode MODE
3822 and data type TYPE. (TYPE is null for libcalls where that information
3823 may not be available.) */
3824
3825 static void
3826 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3827 tree type, HOST_WIDE_INT bytes, HOST_WIDE_INT words)
3828 {
3829 switch (mode)
3830 {
3831 default:
3832 break;
3833
3834 case BLKmode:
3835 if (bytes < 0)
3836 break;
3837 /* FALLTHRU */
3838
3839 case DImode:
3840 case SImode:
3841 case HImode:
3842 case QImode:
3843 cum->words += words;
3844 cum->nregs -= words;
3845 cum->regno += words;
3846
3847 if (cum->nregs <= 0)
3848 {
3849 cum->nregs = 0;
3850 cum->regno = 0;
3851 }
3852 break;
3853
3854 case DFmode:
3855 if (cum->float_in_sse < 2)
3856 break;
3857 case SFmode:
3858 if (cum->float_in_sse < 1)
3859 break;
3860 /* FALLTHRU */
3861
3862 case TImode:
3863 case V16QImode:
3864 case V8HImode:
3865 case V4SImode:
3866 case V2DImode:
3867 case V4SFmode:
3868 case V2DFmode:
3869 if (!type || !AGGREGATE_TYPE_P (type))
3870 {
3871 cum->sse_words += words;
3872 cum->sse_nregs -= 1;
3873 cum->sse_regno += 1;
3874 if (cum->sse_nregs <= 0)
3875 {
3876 cum->sse_nregs = 0;
3877 cum->sse_regno = 0;
3878 }
3879 }
3880 break;
3881
3882 case V8QImode:
3883 case V4HImode:
3884 case V2SImode:
3885 case V2SFmode:
3886 if (!type || !AGGREGATE_TYPE_P (type))
3887 {
3888 cum->mmx_words += words;
3889 cum->mmx_nregs -= 1;
3890 cum->mmx_regno += 1;
3891 if (cum->mmx_nregs <= 0)
3892 {
3893 cum->mmx_nregs = 0;
3894 cum->mmx_regno = 0;
3895 }
3896 }
3897 break;
3898 }
3899 }
3900
3901 static void
3902 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3903 tree type, HOST_WIDE_INT words)
3904 {
3905 int int_nregs, sse_nregs;
3906
3907 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
3908 cum->words += words;
3909 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
3910 {
3911 cum->nregs -= int_nregs;
3912 cum->sse_nregs -= sse_nregs;
3913 cum->regno += int_nregs;
3914 cum->sse_regno += sse_nregs;
3915 }
3916 else
3917 cum->words += words;
3918 }
3919
3920 static void
3921 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
3922 HOST_WIDE_INT words)
3923 {
3924 /* Otherwise, this should be passed indirect. */
3925 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
3926
3927 cum->words += words;
3928 if (cum->nregs > 0)
3929 {
3930 cum->nregs -= 1;
3931 cum->regno += 1;
3932 }
3933 }
3934
3935 void
3936 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3937 tree type, int named ATTRIBUTE_UNUSED)
3938 {
3939 HOST_WIDE_INT bytes, words;
3940
3941 if (mode == BLKmode)
3942 bytes = int_size_in_bytes (type);
3943 else
3944 bytes = GET_MODE_SIZE (mode);
3945 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3946
3947 if (type)
3948 mode = type_natural_mode (type);
3949
3950 if (TARGET_64BIT_MS_ABI)
3951 function_arg_advance_ms_64 (cum, bytes, words);
3952 else if (TARGET_64BIT)
3953 function_arg_advance_64 (cum, mode, type, words);
3954 else
3955 function_arg_advance_32 (cum, mode, type, bytes, words);
3956 }
3957
3958 /* Define where to put the arguments to a function.
3959 Value is zero to push the argument on the stack,
3960 or a hard register in which to store the argument.
3961
3962 MODE is the argument's machine mode.
3963 TYPE is the data type of the argument (as a tree).
3964 This is null for libcalls where that information may
3965 not be available.
3966 CUM is a variable of type CUMULATIVE_ARGS which gives info about
3967 the preceding args and about the function being called.
3968 NAMED is nonzero if this argument is a named parameter
3969 (otherwise it is an extra parameter matching an ellipsis). */
3970
3971 static rtx
3972 function_arg_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3973 enum machine_mode orig_mode, tree type,
3974 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
3975 {
3976 static bool warnedsse, warnedmmx;
3977
3978 /* Avoid the AL settings for the Unix64 ABI. */
3979 if (mode == VOIDmode)
3980 return constm1_rtx;
3981
3982 switch (mode)
3983 {
3984 default:
3985 break;
3986
3987 case BLKmode:
3988 if (bytes < 0)
3989 break;
3990 /* FALLTHRU */
3991 case DImode:
3992 case SImode:
3993 case HImode:
3994 case QImode:
3995 if (words <= cum->nregs)
3996 {
3997 int regno = cum->regno;
3998
3999 /* Fastcall allocates the first two DWORD (SImode) or
4000 smaller arguments to ECX and EDX. */
4001 if (cum->fastcall)
4002 {
4003 if (mode == BLKmode || mode == DImode)
4004 break;
4005
4006 /* ECX not EAX is the first allocated register. */
4007 if (regno == 0)
4008 regno = 2;
4009 }
4010 return gen_rtx_REG (mode, regno);
4011 }
4012 break;
4013
4014 case DFmode:
4015 if (cum->float_in_sse < 2)
4016 break;
4017 case SFmode:
4018 if (cum->float_in_sse < 1)
4019 break;
4020 /* FALLTHRU */
4021 case TImode:
4022 case V16QImode:
4023 case V8HImode:
4024 case V4SImode:
4025 case V2DImode:
4026 case V4SFmode:
4027 case V2DFmode:
4028 if (!type || !AGGREGATE_TYPE_P (type))
4029 {
4030 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
4031 {
4032 warnedsse = true;
4033 warning (0, "SSE vector argument without SSE enabled "
4034 "changes the ABI");
4035 }
4036 if (cum->sse_nregs)
4037 return gen_reg_or_parallel (mode, orig_mode,
4038 cum->sse_regno + FIRST_SSE_REG);
4039 }
4040 break;
4041
4042 case V8QImode:
4043 case V4HImode:
4044 case V2SImode:
4045 case V2SFmode:
4046 if (!type || !AGGREGATE_TYPE_P (type))
4047 {
4048 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
4049 {
4050 warnedmmx = true;
4051 warning (0, "MMX vector argument without MMX enabled "
4052 "changes the ABI");
4053 }
4054 if (cum->mmx_nregs)
4055 return gen_reg_or_parallel (mode, orig_mode,
4056 cum->mmx_regno + FIRST_MMX_REG);
4057 }
4058 break;
4059 }
4060
4061 return NULL_RTX;
4062 }
4063
4064 static rtx
4065 function_arg_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4066 enum machine_mode orig_mode, tree type)
4067 {
4068 /* Handle a hidden AL argument containing number of registers
4069 for varargs x86-64 functions. */
4070 if (mode == VOIDmode)
4071 return GEN_INT (cum->maybe_vaarg
4072 ? (cum->sse_nregs < 0
4073 ? SSE_REGPARM_MAX
4074 : cum->sse_regno)
4075 : -1);
4076
4077 return construct_container (mode, orig_mode, type, 0, cum->nregs,
4078 cum->sse_nregs,
4079 &x86_64_int_parameter_registers [cum->regno],
4080 cum->sse_regno);
4081 }
4082
4083 static rtx
4084 function_arg_ms_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4085 enum machine_mode orig_mode, int named)
4086 {
4087 unsigned int regno;
4088
4089 /* Avoid the AL settings for the Unix64 ABI. */
4090 if (mode == VOIDmode)
4091 return constm1_rtx;
4092
4093 /* If we've run out of registers, it goes on the stack. */
4094 if (cum->nregs == 0)
4095 return NULL_RTX;
4096
4097 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
4098
4099 /* Only floating point modes are passed in anything but integer regs. */
4100 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
4101 {
4102 if (named)
4103 regno = cum->regno + FIRST_SSE_REG;
4104 else
4105 {
4106 rtx t1, t2;
4107
4108 /* Unnamed floating parameters are passed in both the
4109 SSE and integer registers. */
4110 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
4111 t2 = gen_rtx_REG (mode, regno);
4112 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
4113 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
4114 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
4115 }
4116 }
4117
4118 return gen_reg_or_parallel (mode, orig_mode, regno);
4119 }
4120
4121 rtx
4122 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
4123 tree type, int named)
4124 {
4125 enum machine_mode mode = omode;
4126 HOST_WIDE_INT bytes, words;
4127
4128 if (mode == BLKmode)
4129 bytes = int_size_in_bytes (type);
4130 else
4131 bytes = GET_MODE_SIZE (mode);
4132 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4133
4134 /* To simplify the code below, represent vector types with a vector mode
4135 even if MMX/SSE are not active. */
4136 if (type && TREE_CODE (type) == VECTOR_TYPE)
4137 mode = type_natural_mode (type);
4138
4139 if (TARGET_64BIT_MS_ABI)
4140 return function_arg_ms_64 (cum, mode, omode, named);
4141 else if (TARGET_64BIT)
4142 return function_arg_64 (cum, mode, omode, type);
4143 else
4144 return function_arg_32 (cum, mode, omode, type, bytes, words);
4145 }
4146
4147 /* A C expression that indicates when an argument must be passed by
4148 reference. If nonzero for an argument, a copy of that argument is
4149 made in memory and a pointer to the argument is passed instead of
4150 the argument itself. The pointer is passed in whatever way is
4151 appropriate for passing a pointer to that type. */
4152
4153 static bool
4154 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
4155 enum machine_mode mode ATTRIBUTE_UNUSED,
4156 tree type, bool named ATTRIBUTE_UNUSED)
4157 {
4158 if (TARGET_64BIT_MS_ABI)
4159 {
4160 if (type)
4161 {
4162 /* Arrays are passed by reference. */
4163 if (TREE_CODE (type) == ARRAY_TYPE)
4164 return true;
4165
4166 if (AGGREGATE_TYPE_P (type))
4167 {
4168 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
4169 are passed by reference. */
4170 int el2 = exact_log2 (int_size_in_bytes (type));
4171 return !(el2 >= 0 && el2 <= 3);
4172 }
4173 }
4174
4175 /* __m128 is passed by reference. */
4176 /* ??? How to handle complex? For now treat them as structs,
4177 and pass them by reference if they're too large. */
4178 if (GET_MODE_SIZE (mode) > 8)
4179 return true;
4180 }
4181 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
4182 return 1;
4183
4184 return 0;
4185 }
4186
4187 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
4188 ABI. Only called if TARGET_SSE. */
4189 static bool
4190 contains_128bit_aligned_vector_p (tree type)
4191 {
4192 enum machine_mode mode = TYPE_MODE (type);
4193 if (SSE_REG_MODE_P (mode)
4194 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
4195 return true;
4196 if (TYPE_ALIGN (type) < 128)
4197 return false;
4198
4199 if (AGGREGATE_TYPE_P (type))
4200 {
4201 /* Walk the aggregates recursively. */
4202 switch (TREE_CODE (type))
4203 {
4204 case RECORD_TYPE:
4205 case UNION_TYPE:
4206 case QUAL_UNION_TYPE:
4207 {
4208 tree field;
4209
4210 /* Walk all the structure fields. */
4211 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4212 {
4213 if (TREE_CODE (field) == FIELD_DECL
4214 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
4215 return true;
4216 }
4217 break;
4218 }
4219
4220 case ARRAY_TYPE:
4221 /* Just for use if some languages passes arrays by value. */
4222 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
4223 return true;
4224 break;
4225
4226 default:
4227 gcc_unreachable ();
4228 }
4229 }
4230 return false;
4231 }
4232
4233 /* Gives the alignment boundary, in bits, of an argument with the
4234 specified mode and type. */
4235
4236 int
4237 ix86_function_arg_boundary (enum machine_mode mode, tree type)
4238 {
4239 int align;
4240 if (type)
4241 align = TYPE_ALIGN (type);
4242 else
4243 align = GET_MODE_ALIGNMENT (mode);
4244 if (align < PARM_BOUNDARY)
4245 align = PARM_BOUNDARY;
4246 if (!TARGET_64BIT)
4247 {
4248 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
4249 make an exception for SSE modes since these require 128bit
4250 alignment.
4251
4252 The handling here differs from field_alignment. ICC aligns MMX
4253 arguments to 4 byte boundaries, while structure fields are aligned
4254 to 8 byte boundaries. */
4255 if (!TARGET_SSE)
4256 align = PARM_BOUNDARY;
4257 else if (!type)
4258 {
4259 if (!SSE_REG_MODE_P (mode))
4260 align = PARM_BOUNDARY;
4261 }
4262 else
4263 {
4264 if (!contains_128bit_aligned_vector_p (type))
4265 align = PARM_BOUNDARY;
4266 }
4267 }
4268 if (align > 128)
4269 align = 128;
4270 return align;
4271 }
4272
4273 /* Return true if N is a possible register number of function value. */
4274
4275 bool
4276 ix86_function_value_regno_p (int regno)
4277 {
4278 switch (regno)
4279 {
4280 case 0:
4281 return true;
4282
4283 case FIRST_FLOAT_REG:
4284 if (TARGET_64BIT_MS_ABI)
4285 return false;
4286 return TARGET_FLOAT_RETURNS_IN_80387;
4287
4288 case FIRST_SSE_REG:
4289 return TARGET_SSE;
4290
4291 case FIRST_MMX_REG:
4292 if (TARGET_MACHO || TARGET_64BIT)
4293 return false;
4294 return TARGET_MMX;
4295 }
4296
4297 return false;
4298 }
4299
4300 /* Define how to find the value returned by a function.
4301 VALTYPE is the data type of the value (as a tree).
4302 If the precise function being called is known, FUNC is its FUNCTION_DECL;
4303 otherwise, FUNC is 0. */
4304
4305 static rtx
4306 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
4307 tree fntype, tree fn)
4308 {
4309 unsigned int regno;
4310
4311 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4312 we normally prevent this case when mmx is not available. However
4313 some ABIs may require the result to be returned like DImode. */
4314 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4315 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
4316
4317 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4318 we prevent this case when sse is not available. However some ABIs
4319 may require the result to be returned like integer TImode. */
4320 else if (mode == TImode
4321 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4322 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
4323
4324 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
4325 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
4326 regno = FIRST_FLOAT_REG;
4327 else
4328 /* Most things go in %eax. */
4329 regno = 0;
4330
4331 /* Override FP return register with %xmm0 for local functions when
4332 SSE math is enabled or for functions with sseregparm attribute. */
4333 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
4334 {
4335 int sse_level = ix86_function_sseregparm (fntype, fn);
4336 if ((sse_level >= 1 && mode == SFmode)
4337 || (sse_level == 2 && mode == DFmode))
4338 regno = FIRST_SSE_REG;
4339 }
4340
4341 return gen_rtx_REG (orig_mode, regno);
4342 }
4343
4344 static rtx
4345 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
4346 tree valtype)
4347 {
4348 rtx ret;
4349
4350 /* Handle libcalls, which don't provide a type node. */
4351 if (valtype == NULL)
4352 {
4353 switch (mode)
4354 {
4355 case SFmode:
4356 case SCmode:
4357 case DFmode:
4358 case DCmode:
4359 case TFmode:
4360 case SDmode:
4361 case DDmode:
4362 case TDmode:
4363 return gen_rtx_REG (mode, FIRST_SSE_REG);
4364 case XFmode:
4365 case XCmode:
4366 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
4367 case TCmode:
4368 return NULL;
4369 default:
4370 return gen_rtx_REG (mode, 0);
4371 }
4372 }
4373
4374 ret = construct_container (mode, orig_mode, valtype, 1,
4375 REGPARM_MAX, SSE_REGPARM_MAX,
4376 x86_64_int_return_registers, 0);
4377
4378 /* For zero sized structures, construct_container returns NULL, but we
4379 need to keep rest of compiler happy by returning meaningful value. */
4380 if (!ret)
4381 ret = gen_rtx_REG (orig_mode, 0);
4382
4383 return ret;
4384 }
4385
4386 static rtx
4387 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
4388 {
4389 unsigned int regno = 0;
4390
4391 if (TARGET_SSE)
4392 {
4393 if (mode == SFmode || mode == DFmode)
4394 regno = FIRST_SSE_REG;
4395 else if (VECTOR_MODE_P (mode) || GET_MODE_SIZE (mode) == 16)
4396 regno = FIRST_SSE_REG;
4397 }
4398
4399 return gen_rtx_REG (orig_mode, regno);
4400 }
4401
4402 static rtx
4403 ix86_function_value_1 (tree valtype, tree fntype_or_decl,
4404 enum machine_mode orig_mode, enum machine_mode mode)
4405 {
4406 tree fn, fntype;
4407
4408 fn = NULL_TREE;
4409 if (fntype_or_decl && DECL_P (fntype_or_decl))
4410 fn = fntype_or_decl;
4411 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
4412
4413 if (TARGET_64BIT_MS_ABI)
4414 return function_value_ms_64 (orig_mode, mode);
4415 else if (TARGET_64BIT)
4416 return function_value_64 (orig_mode, mode, valtype);
4417 else
4418 return function_value_32 (orig_mode, mode, fntype, fn);
4419 }
4420
4421 static rtx
4422 ix86_function_value (tree valtype, tree fntype_or_decl,
4423 bool outgoing ATTRIBUTE_UNUSED)
4424 {
4425 enum machine_mode mode, orig_mode;
4426
4427 orig_mode = TYPE_MODE (valtype);
4428 mode = type_natural_mode (valtype);
4429 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
4430 }
4431
4432 rtx
4433 ix86_libcall_value (enum machine_mode mode)
4434 {
4435 return ix86_function_value_1 (NULL, NULL, mode, mode);
4436 }
4437
4438 /* Return true iff type is returned in memory. */
4439
4440 static int
4441 return_in_memory_32 (tree type, enum machine_mode mode)
4442 {
4443 HOST_WIDE_INT size;
4444
4445 if (mode == BLKmode)
4446 return 1;
4447
4448 size = int_size_in_bytes (type);
4449
4450 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
4451 return 0;
4452
4453 if (VECTOR_MODE_P (mode) || mode == TImode)
4454 {
4455 /* User-created vectors small enough to fit in EAX. */
4456 if (size < 8)
4457 return 0;
4458
4459 /* MMX/3dNow values are returned in MM0,
4460 except when it doesn't exits. */
4461 if (size == 8)
4462 return (TARGET_MMX ? 0 : 1);
4463
4464 /* SSE values are returned in XMM0, except when it doesn't exist. */
4465 if (size == 16)
4466 return (TARGET_SSE ? 0 : 1);
4467 }
4468
4469 if (mode == XFmode)
4470 return 0;
4471
4472 if (mode == TDmode)
4473 return 1;
4474
4475 if (size > 12)
4476 return 1;
4477 return 0;
4478 }
4479
4480 static int
4481 return_in_memory_64 (tree type, enum machine_mode mode)
4482 {
4483 int needed_intregs, needed_sseregs;
4484 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
4485 }
4486
4487 static int
4488 return_in_memory_ms_64 (tree type, enum machine_mode mode)
4489 {
4490 HOST_WIDE_INT size = int_size_in_bytes (type);
4491
4492 /* __m128 and friends are returned in xmm0. */
4493 if (size == 16 && VECTOR_MODE_P (mode))
4494 return 0;
4495
4496 /* Otherwise, the size must be exactly in [1248]. */
4497 return (size != 1 && size != 2 && size != 4 && size != 8);
4498 }
4499
4500 int
4501 ix86_return_in_memory (tree type)
4502 {
4503 enum machine_mode mode = type_natural_mode (type);
4504
4505 if (TARGET_64BIT_MS_ABI)
4506 return return_in_memory_ms_64 (type, mode);
4507 else if (TARGET_64BIT)
4508 return return_in_memory_64 (type, mode);
4509 else
4510 return return_in_memory_32 (type, mode);
4511 }
4512
4513 /* Return false iff TYPE is returned in memory. This version is used
4514 on Solaris 10. It is similar to the generic ix86_return_in_memory,
4515 but differs notably in that when MMX is available, 8-byte vectors
4516 are returned in memory, rather than in MMX registers. */
4517
4518 int
4519 ix86_sol10_return_in_memory (tree type)
4520 {
4521 int size;
4522 enum machine_mode mode = type_natural_mode (type);
4523
4524 if (TARGET_64BIT)
4525 return return_in_memory_64 (type, mode);
4526
4527 if (mode == BLKmode)
4528 return 1;
4529
4530 size = int_size_in_bytes (type);
4531
4532 if (VECTOR_MODE_P (mode))
4533 {
4534 /* Return in memory only if MMX registers *are* available. This
4535 seems backwards, but it is consistent with the existing
4536 Solaris x86 ABI. */
4537 if (size == 8)
4538 return TARGET_MMX;
4539 if (size == 16)
4540 return !TARGET_SSE;
4541 }
4542 else if (mode == TImode)
4543 return !TARGET_SSE;
4544 else if (mode == XFmode)
4545 return 0;
4546
4547 return size > 12;
4548 }
4549
4550 /* When returning SSE vector types, we have a choice of either
4551 (1) being abi incompatible with a -march switch, or
4552 (2) generating an error.
4553 Given no good solution, I think the safest thing is one warning.
4554 The user won't be able to use -Werror, but....
4555
4556 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
4557 called in response to actually generating a caller or callee that
4558 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
4559 via aggregate_value_p for general type probing from tree-ssa. */
4560
4561 static rtx
4562 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
4563 {
4564 static bool warnedsse, warnedmmx;
4565
4566 if (!TARGET_64BIT && type)
4567 {
4568 /* Look at the return type of the function, not the function type. */
4569 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
4570
4571 if (!TARGET_SSE && !warnedsse)
4572 {
4573 if (mode == TImode
4574 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4575 {
4576 warnedsse = true;
4577 warning (0, "SSE vector return without SSE enabled "
4578 "changes the ABI");
4579 }
4580 }
4581
4582 if (!TARGET_MMX && !warnedmmx)
4583 {
4584 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4585 {
4586 warnedmmx = true;
4587 warning (0, "MMX vector return without MMX enabled "
4588 "changes the ABI");
4589 }
4590 }
4591 }
4592
4593 return NULL;
4594 }
4595
4596 \f
4597 /* Create the va_list data type. */
4598
4599 static tree
4600 ix86_build_builtin_va_list (void)
4601 {
4602 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
4603
4604 /* For i386 we use plain pointer to argument area. */
4605 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
4606 return build_pointer_type (char_type_node);
4607
4608 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
4609 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
4610
4611 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
4612 unsigned_type_node);
4613 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
4614 unsigned_type_node);
4615 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
4616 ptr_type_node);
4617 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
4618 ptr_type_node);
4619
4620 va_list_gpr_counter_field = f_gpr;
4621 va_list_fpr_counter_field = f_fpr;
4622
4623 DECL_FIELD_CONTEXT (f_gpr) = record;
4624 DECL_FIELD_CONTEXT (f_fpr) = record;
4625 DECL_FIELD_CONTEXT (f_ovf) = record;
4626 DECL_FIELD_CONTEXT (f_sav) = record;
4627
4628 TREE_CHAIN (record) = type_decl;
4629 TYPE_NAME (record) = type_decl;
4630 TYPE_FIELDS (record) = f_gpr;
4631 TREE_CHAIN (f_gpr) = f_fpr;
4632 TREE_CHAIN (f_fpr) = f_ovf;
4633 TREE_CHAIN (f_ovf) = f_sav;
4634
4635 layout_type (record);
4636
4637 /* The correct type is an array type of one element. */
4638 return build_array_type (record, build_index_type (size_zero_node));
4639 }
4640
4641 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4642
4643 static void
4644 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
4645 {
4646 rtx save_area, mem;
4647 rtx label;
4648 rtx label_ref;
4649 rtx tmp_reg;
4650 rtx nsse_reg;
4651 int set;
4652 int i;
4653
4654 if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
4655 return;
4656
4657 /* Indicate to allocate space on the stack for varargs save area. */
4658 ix86_save_varrargs_registers = 1;
4659 cfun->stack_alignment_needed = 128;
4660
4661 save_area = frame_pointer_rtx;
4662 set = get_varargs_alias_set ();
4663
4664 for (i = cum->regno;
4665 i < ix86_regparm
4666 && i < cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
4667 i++)
4668 {
4669 mem = gen_rtx_MEM (Pmode,
4670 plus_constant (save_area, i * UNITS_PER_WORD));
4671 MEM_NOTRAP_P (mem) = 1;
4672 set_mem_alias_set (mem, set);
4673 emit_move_insn (mem, gen_rtx_REG (Pmode,
4674 x86_64_int_parameter_registers[i]));
4675 }
4676
4677 if (cum->sse_nregs && cfun->va_list_fpr_size)
4678 {
4679 /* Now emit code to save SSE registers. The AX parameter contains number
4680 of SSE parameter registers used to call this function. We use
4681 sse_prologue_save insn template that produces computed jump across
4682 SSE saves. We need some preparation work to get this working. */
4683
4684 label = gen_label_rtx ();
4685 label_ref = gen_rtx_LABEL_REF (Pmode, label);
4686
4687 /* Compute address to jump to :
4688 label - 5*eax + nnamed_sse_arguments*5 */
4689 tmp_reg = gen_reg_rtx (Pmode);
4690 nsse_reg = gen_reg_rtx (Pmode);
4691 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
4692 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4693 gen_rtx_MULT (Pmode, nsse_reg,
4694 GEN_INT (4))));
4695 if (cum->sse_regno)
4696 emit_move_insn
4697 (nsse_reg,
4698 gen_rtx_CONST (DImode,
4699 gen_rtx_PLUS (DImode,
4700 label_ref,
4701 GEN_INT (cum->sse_regno * 4))));
4702 else
4703 emit_move_insn (nsse_reg, label_ref);
4704 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
4705
4706 /* Compute address of memory block we save into. We always use pointer
4707 pointing 127 bytes after first byte to store - this is needed to keep
4708 instruction size limited by 4 bytes. */
4709 tmp_reg = gen_reg_rtx (Pmode);
4710 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4711 plus_constant (save_area,
4712 8 * REGPARM_MAX + 127)));
4713 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
4714 MEM_NOTRAP_P (mem) = 1;
4715 set_mem_alias_set (mem, set);
4716 set_mem_align (mem, BITS_PER_WORD);
4717
4718 /* And finally do the dirty job! */
4719 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
4720 GEN_INT (cum->sse_regno), label));
4721 }
4722 }
4723
4724 static void
4725 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
4726 {
4727 int set = get_varargs_alias_set ();
4728 int i;
4729
4730 for (i = cum->regno; i < REGPARM_MAX; i++)
4731 {
4732 rtx reg, mem;
4733
4734 mem = gen_rtx_MEM (Pmode,
4735 plus_constant (virtual_incoming_args_rtx,
4736 i * UNITS_PER_WORD));
4737 MEM_NOTRAP_P (mem) = 1;
4738 set_mem_alias_set (mem, set);
4739
4740 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
4741 emit_move_insn (mem, reg);
4742 }
4743 }
4744
4745 static void
4746 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4747 tree type, int *pretend_size ATTRIBUTE_UNUSED,
4748 int no_rtl)
4749 {
4750 CUMULATIVE_ARGS next_cum;
4751 tree fntype;
4752 int stdarg_p;
4753
4754 /* This argument doesn't appear to be used anymore. Which is good,
4755 because the old code here didn't suppress rtl generation. */
4756 gcc_assert (!no_rtl);
4757
4758 if (!TARGET_64BIT)
4759 return;
4760
4761 fntype = TREE_TYPE (current_function_decl);
4762 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
4763 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
4764 != void_type_node));
4765
4766 /* For varargs, we do not want to skip the dummy va_dcl argument.
4767 For stdargs, we do want to skip the last named argument. */
4768 next_cum = *cum;
4769 if (stdarg_p)
4770 function_arg_advance (&next_cum, mode, type, 1);
4771
4772 if (TARGET_64BIT_MS_ABI)
4773 setup_incoming_varargs_ms_64 (&next_cum);
4774 else
4775 setup_incoming_varargs_64 (&next_cum);
4776 }
4777
4778 /* Implement va_start. */
4779
4780 void
4781 ix86_va_start (tree valist, rtx nextarg)
4782 {
4783 HOST_WIDE_INT words, n_gpr, n_fpr;
4784 tree f_gpr, f_fpr, f_ovf, f_sav;
4785 tree gpr, fpr, ovf, sav, t;
4786 tree type;
4787
4788 /* Only 64bit target needs something special. */
4789 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
4790 {
4791 std_expand_builtin_va_start (valist, nextarg);
4792 return;
4793 }
4794
4795 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4796 f_fpr = TREE_CHAIN (f_gpr);
4797 f_ovf = TREE_CHAIN (f_fpr);
4798 f_sav = TREE_CHAIN (f_ovf);
4799
4800 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
4801 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4802 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4803 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4804 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4805
4806 /* Count number of gp and fp argument registers used. */
4807 words = current_function_args_info.words;
4808 n_gpr = current_function_args_info.regno;
4809 n_fpr = current_function_args_info.sse_regno;
4810
4811 if (cfun->va_list_gpr_size)
4812 {
4813 type = TREE_TYPE (gpr);
4814 t = build2 (GIMPLE_MODIFY_STMT, type, gpr,
4815 build_int_cst (type, n_gpr * 8));
4816 TREE_SIDE_EFFECTS (t) = 1;
4817 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4818 }
4819
4820 if (cfun->va_list_fpr_size)
4821 {
4822 type = TREE_TYPE (fpr);
4823 t = build2 (GIMPLE_MODIFY_STMT, type, fpr,
4824 build_int_cst (type, n_fpr * 16 + 8*REGPARM_MAX));
4825 TREE_SIDE_EFFECTS (t) = 1;
4826 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4827 }
4828
4829 /* Find the overflow area. */
4830 type = TREE_TYPE (ovf);
4831 t = make_tree (type, virtual_incoming_args_rtx);
4832 if (words != 0)
4833 t = build2 (POINTER_PLUS_EXPR, type, t,
4834 size_int (words * UNITS_PER_WORD));
4835 t = build2 (GIMPLE_MODIFY_STMT, type, ovf, t);
4836 TREE_SIDE_EFFECTS (t) = 1;
4837 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4838
4839 if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
4840 {
4841 /* Find the register save area.
4842 Prologue of the function save it right above stack frame. */
4843 type = TREE_TYPE (sav);
4844 t = make_tree (type, frame_pointer_rtx);
4845 t = build2 (GIMPLE_MODIFY_STMT, type, sav, t);
4846 TREE_SIDE_EFFECTS (t) = 1;
4847 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4848 }
4849 }
4850
4851 /* Implement va_arg. */
4852
4853 static tree
4854 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
4855 {
4856 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
4857 tree f_gpr, f_fpr, f_ovf, f_sav;
4858 tree gpr, fpr, ovf, sav, t;
4859 int size, rsize;
4860 tree lab_false, lab_over = NULL_TREE;
4861 tree addr, t2;
4862 rtx container;
4863 int indirect_p = 0;
4864 tree ptrtype;
4865 enum machine_mode nat_mode;
4866
4867 /* Only 64bit target needs something special. */
4868 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
4869 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4870
4871 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4872 f_fpr = TREE_CHAIN (f_gpr);
4873 f_ovf = TREE_CHAIN (f_fpr);
4874 f_sav = TREE_CHAIN (f_ovf);
4875
4876 valist = build_va_arg_indirect_ref (valist);
4877 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4878 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4879 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4880 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4881
4882 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
4883 if (indirect_p)
4884 type = build_pointer_type (type);
4885 size = int_size_in_bytes (type);
4886 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4887
4888 nat_mode = type_natural_mode (type);
4889 container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
4890 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
4891
4892 /* Pull the value out of the saved registers. */
4893
4894 addr = create_tmp_var (ptr_type_node, "addr");
4895 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
4896
4897 if (container)
4898 {
4899 int needed_intregs, needed_sseregs;
4900 bool need_temp;
4901 tree int_addr, sse_addr;
4902
4903 lab_false = create_artificial_label ();
4904 lab_over = create_artificial_label ();
4905
4906 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
4907
4908 need_temp = (!REG_P (container)
4909 && ((needed_intregs && TYPE_ALIGN (type) > 64)
4910 || TYPE_ALIGN (type) > 128));
4911
4912 /* In case we are passing structure, verify that it is consecutive block
4913 on the register save area. If not we need to do moves. */
4914 if (!need_temp && !REG_P (container))
4915 {
4916 /* Verify that all registers are strictly consecutive */
4917 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
4918 {
4919 int i;
4920
4921 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4922 {
4923 rtx slot = XVECEXP (container, 0, i);
4924 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
4925 || INTVAL (XEXP (slot, 1)) != i * 16)
4926 need_temp = 1;
4927 }
4928 }
4929 else
4930 {
4931 int i;
4932
4933 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4934 {
4935 rtx slot = XVECEXP (container, 0, i);
4936 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
4937 || INTVAL (XEXP (slot, 1)) != i * 8)
4938 need_temp = 1;
4939 }
4940 }
4941 }
4942 if (!need_temp)
4943 {
4944 int_addr = addr;
4945 sse_addr = addr;
4946 }
4947 else
4948 {
4949 int_addr = create_tmp_var (ptr_type_node, "int_addr");
4950 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
4951 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
4952 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
4953 }
4954
4955 /* First ensure that we fit completely in registers. */
4956 if (needed_intregs)
4957 {
4958 t = build_int_cst (TREE_TYPE (gpr),
4959 (REGPARM_MAX - needed_intregs + 1) * 8);
4960 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
4961 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4962 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4963 gimplify_and_add (t, pre_p);
4964 }
4965 if (needed_sseregs)
4966 {
4967 t = build_int_cst (TREE_TYPE (fpr),
4968 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
4969 + REGPARM_MAX * 8);
4970 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
4971 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4972 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4973 gimplify_and_add (t, pre_p);
4974 }
4975
4976 /* Compute index to start of area used for integer regs. */
4977 if (needed_intregs)
4978 {
4979 /* int_addr = gpr + sav; */
4980 t = fold_convert (sizetype, gpr);
4981 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
4982 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, int_addr, t);
4983 gimplify_and_add (t, pre_p);
4984 }
4985 if (needed_sseregs)
4986 {
4987 /* sse_addr = fpr + sav; */
4988 t = fold_convert (sizetype, fpr);
4989 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
4990 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, sse_addr, t);
4991 gimplify_and_add (t, pre_p);
4992 }
4993 if (need_temp)
4994 {
4995 int i;
4996 tree temp = create_tmp_var (type, "va_arg_tmp");
4997
4998 /* addr = &temp; */
4999 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
5000 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
5001 gimplify_and_add (t, pre_p);
5002
5003 for (i = 0; i < XVECLEN (container, 0); i++)
5004 {
5005 rtx slot = XVECEXP (container, 0, i);
5006 rtx reg = XEXP (slot, 0);
5007 enum machine_mode mode = GET_MODE (reg);
5008 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
5009 tree addr_type = build_pointer_type (piece_type);
5010 tree src_addr, src;
5011 int src_offset;
5012 tree dest_addr, dest;
5013
5014 if (SSE_REGNO_P (REGNO (reg)))
5015 {
5016 src_addr = sse_addr;
5017 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
5018 }
5019 else
5020 {
5021 src_addr = int_addr;
5022 src_offset = REGNO (reg) * 8;
5023 }
5024 src_addr = fold_convert (addr_type, src_addr);
5025 src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
5026 size_int (src_offset));
5027 src = build_va_arg_indirect_ref (src_addr);
5028
5029 dest_addr = fold_convert (addr_type, addr);
5030 dest_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, dest_addr,
5031 size_int (INTVAL (XEXP (slot, 1))));
5032 dest = build_va_arg_indirect_ref (dest_addr);
5033
5034 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, dest, src);
5035 gimplify_and_add (t, pre_p);
5036 }
5037 }
5038
5039 if (needed_intregs)
5040 {
5041 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
5042 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
5043 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (gpr), gpr, t);
5044 gimplify_and_add (t, pre_p);
5045 }
5046 if (needed_sseregs)
5047 {
5048 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
5049 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
5050 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (fpr), fpr, t);
5051 gimplify_and_add (t, pre_p);
5052 }
5053
5054 t = build1 (GOTO_EXPR, void_type_node, lab_over);
5055 gimplify_and_add (t, pre_p);
5056
5057 t = build1 (LABEL_EXPR, void_type_node, lab_false);
5058 append_to_statement_list (t, pre_p);
5059 }
5060
5061 /* ... otherwise out of the overflow area. */
5062
5063 /* Care for on-stack alignment if needed. */
5064 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64
5065 || integer_zerop (TYPE_SIZE (type)))
5066 t = ovf;
5067 else
5068 {
5069 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
5070 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), ovf,
5071 size_int (align - 1));
5072 t = fold_convert (sizetype, t);
5073 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5074 size_int (-align));
5075 t = fold_convert (TREE_TYPE (ovf), t);
5076 }
5077 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
5078
5079 t2 = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
5080 gimplify_and_add (t2, pre_p);
5081
5082 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t,
5083 size_int (rsize * UNITS_PER_WORD));
5084 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (ovf), ovf, t);
5085 gimplify_and_add (t, pre_p);
5086
5087 if (container)
5088 {
5089 t = build1 (LABEL_EXPR, void_type_node, lab_over);
5090 append_to_statement_list (t, pre_p);
5091 }
5092
5093 ptrtype = build_pointer_type (type);
5094 addr = fold_convert (ptrtype, addr);
5095
5096 if (indirect_p)
5097 addr = build_va_arg_indirect_ref (addr);
5098 return build_va_arg_indirect_ref (addr);
5099 }
5100 \f
5101 /* Return nonzero if OPNUM's MEM should be matched
5102 in movabs* patterns. */
5103
5104 int
5105 ix86_check_movabs (rtx insn, int opnum)
5106 {
5107 rtx set, mem;
5108
5109 set = PATTERN (insn);
5110 if (GET_CODE (set) == PARALLEL)
5111 set = XVECEXP (set, 0, 0);
5112 gcc_assert (GET_CODE (set) == SET);
5113 mem = XEXP (set, opnum);
5114 while (GET_CODE (mem) == SUBREG)
5115 mem = SUBREG_REG (mem);
5116 gcc_assert (MEM_P (mem));
5117 return (volatile_ok || !MEM_VOLATILE_P (mem));
5118 }
5119 \f
5120 /* Initialize the table of extra 80387 mathematical constants. */
5121
5122 static void
5123 init_ext_80387_constants (void)
5124 {
5125 static const char * cst[5] =
5126 {
5127 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
5128 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
5129 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
5130 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
5131 "3.1415926535897932385128089594061862044", /* 4: fldpi */
5132 };
5133 int i;
5134
5135 for (i = 0; i < 5; i++)
5136 {
5137 real_from_string (&ext_80387_constants_table[i], cst[i]);
5138 /* Ensure each constant is rounded to XFmode precision. */
5139 real_convert (&ext_80387_constants_table[i],
5140 XFmode, &ext_80387_constants_table[i]);
5141 }
5142
5143 ext_80387_constants_init = 1;
5144 }
5145
5146 /* Return true if the constant is something that can be loaded with
5147 a special instruction. */
5148
5149 int
5150 standard_80387_constant_p (rtx x)
5151 {
5152 enum machine_mode mode = GET_MODE (x);
5153
5154 REAL_VALUE_TYPE r;
5155
5156 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
5157 return -1;
5158
5159 if (x == CONST0_RTX (mode))
5160 return 1;
5161 if (x == CONST1_RTX (mode))
5162 return 2;
5163
5164 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5165
5166 /* For XFmode constants, try to find a special 80387 instruction when
5167 optimizing for size or on those CPUs that benefit from them. */
5168 if (mode == XFmode
5169 && (optimize_size || TARGET_EXT_80387_CONSTANTS))
5170 {
5171 int i;
5172
5173 if (! ext_80387_constants_init)
5174 init_ext_80387_constants ();
5175
5176 for (i = 0; i < 5; i++)
5177 if (real_identical (&r, &ext_80387_constants_table[i]))
5178 return i + 3;
5179 }
5180
5181 /* Load of the constant -0.0 or -1.0 will be split as
5182 fldz;fchs or fld1;fchs sequence. */
5183 if (real_isnegzero (&r))
5184 return 8;
5185 if (real_identical (&r, &dconstm1))
5186 return 9;
5187
5188 return 0;
5189 }
5190
5191 /* Return the opcode of the special instruction to be used to load
5192 the constant X. */
5193
5194 const char *
5195 standard_80387_constant_opcode (rtx x)
5196 {
5197 switch (standard_80387_constant_p (x))
5198 {
5199 case 1:
5200 return "fldz";
5201 case 2:
5202 return "fld1";
5203 case 3:
5204 return "fldlg2";
5205 case 4:
5206 return "fldln2";
5207 case 5:
5208 return "fldl2e";
5209 case 6:
5210 return "fldl2t";
5211 case 7:
5212 return "fldpi";
5213 case 8:
5214 case 9:
5215 return "#";
5216 default:
5217 gcc_unreachable ();
5218 }
5219 }
5220
5221 /* Return the CONST_DOUBLE representing the 80387 constant that is
5222 loaded by the specified special instruction. The argument IDX
5223 matches the return value from standard_80387_constant_p. */
5224
5225 rtx
5226 standard_80387_constant_rtx (int idx)
5227 {
5228 int i;
5229
5230 if (! ext_80387_constants_init)
5231 init_ext_80387_constants ();
5232
5233 switch (idx)
5234 {
5235 case 3:
5236 case 4:
5237 case 5:
5238 case 6:
5239 case 7:
5240 i = idx - 3;
5241 break;
5242
5243 default:
5244 gcc_unreachable ();
5245 }
5246
5247 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
5248 XFmode);
5249 }
5250
5251 /* Return 1 if mode is a valid mode for sse. */
5252 static int
5253 standard_sse_mode_p (enum machine_mode mode)
5254 {
5255 switch (mode)
5256 {
5257 case V16QImode:
5258 case V8HImode:
5259 case V4SImode:
5260 case V2DImode:
5261 case V4SFmode:
5262 case V2DFmode:
5263 return 1;
5264
5265 default:
5266 return 0;
5267 }
5268 }
5269
5270 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
5271 */
5272 int
5273 standard_sse_constant_p (rtx x)
5274 {
5275 enum machine_mode mode = GET_MODE (x);
5276
5277 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
5278 return 1;
5279 if (vector_all_ones_operand (x, mode)
5280 && standard_sse_mode_p (mode))
5281 return TARGET_SSE2 ? 2 : -1;
5282
5283 return 0;
5284 }
5285
5286 /* Return the opcode of the special instruction to be used to load
5287 the constant X. */
5288
5289 const char *
5290 standard_sse_constant_opcode (rtx insn, rtx x)
5291 {
5292 switch (standard_sse_constant_p (x))
5293 {
5294 case 1:
5295 if (get_attr_mode (insn) == MODE_V4SF)
5296 return "xorps\t%0, %0";
5297 else if (get_attr_mode (insn) == MODE_V2DF)
5298 return "xorpd\t%0, %0";
5299 else
5300 return "pxor\t%0, %0";
5301 case 2:
5302 return "pcmpeqd\t%0, %0";
5303 }
5304 gcc_unreachable ();
5305 }
5306
5307 /* Returns 1 if OP contains a symbol reference */
5308
5309 int
5310 symbolic_reference_mentioned_p (rtx op)
5311 {
5312 const char *fmt;
5313 int i;
5314
5315 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
5316 return 1;
5317
5318 fmt = GET_RTX_FORMAT (GET_CODE (op));
5319 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
5320 {
5321 if (fmt[i] == 'E')
5322 {
5323 int j;
5324
5325 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
5326 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
5327 return 1;
5328 }
5329
5330 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
5331 return 1;
5332 }
5333
5334 return 0;
5335 }
5336
5337 /* Return 1 if it is appropriate to emit `ret' instructions in the
5338 body of a function. Do this only if the epilogue is simple, needing a
5339 couple of insns. Prior to reloading, we can't tell how many registers
5340 must be saved, so return 0 then. Return 0 if there is no frame
5341 marker to de-allocate. */
5342
5343 int
5344 ix86_can_use_return_insn_p (void)
5345 {
5346 struct ix86_frame frame;
5347
5348 if (! reload_completed || frame_pointer_needed)
5349 return 0;
5350
5351 /* Don't allow more than 32 pop, since that's all we can do
5352 with one instruction. */
5353 if (current_function_pops_args
5354 && current_function_args_size >= 32768)
5355 return 0;
5356
5357 ix86_compute_frame_layout (&frame);
5358 return frame.to_allocate == 0 && frame.nregs == 0;
5359 }
5360 \f
5361 /* Value should be nonzero if functions must have frame pointers.
5362 Zero means the frame pointer need not be set up (and parms may
5363 be accessed via the stack pointer) in functions that seem suitable. */
5364
5365 int
5366 ix86_frame_pointer_required (void)
5367 {
5368 /* If we accessed previous frames, then the generated code expects
5369 to be able to access the saved ebp value in our frame. */
5370 if (cfun->machine->accesses_prev_frame)
5371 return 1;
5372
5373 /* Several x86 os'es need a frame pointer for other reasons,
5374 usually pertaining to setjmp. */
5375 if (SUBTARGET_FRAME_POINTER_REQUIRED)
5376 return 1;
5377
5378 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
5379 the frame pointer by default. Turn it back on now if we've not
5380 got a leaf function. */
5381 if (TARGET_OMIT_LEAF_FRAME_POINTER
5382 && (!current_function_is_leaf
5383 || ix86_current_function_calls_tls_descriptor))
5384 return 1;
5385
5386 if (current_function_profile)
5387 return 1;
5388
5389 return 0;
5390 }
5391
5392 /* Record that the current function accesses previous call frames. */
5393
5394 void
5395 ix86_setup_frame_addresses (void)
5396 {
5397 cfun->machine->accesses_prev_frame = 1;
5398 }
5399 \f
5400 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
5401 # define USE_HIDDEN_LINKONCE 1
5402 #else
5403 # define USE_HIDDEN_LINKONCE 0
5404 #endif
5405
5406 static int pic_labels_used;
5407
5408 /* Fills in the label name that should be used for a pc thunk for
5409 the given register. */
5410
5411 static void
5412 get_pc_thunk_name (char name[32], unsigned int regno)
5413 {
5414 gcc_assert (!TARGET_64BIT);
5415
5416 if (USE_HIDDEN_LINKONCE)
5417 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
5418 else
5419 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
5420 }
5421
5422
5423 /* This function generates code for -fpic that loads %ebx with
5424 the return address of the caller and then returns. */
5425
5426 void
5427 ix86_file_end (void)
5428 {
5429 rtx xops[2];
5430 int regno;
5431
5432 for (regno = 0; regno < 8; ++regno)
5433 {
5434 char name[32];
5435
5436 if (! ((pic_labels_used >> regno) & 1))
5437 continue;
5438
5439 get_pc_thunk_name (name, regno);
5440
5441 #if TARGET_MACHO
5442 if (TARGET_MACHO)
5443 {
5444 switch_to_section (darwin_sections[text_coal_section]);
5445 fputs ("\t.weak_definition\t", asm_out_file);
5446 assemble_name (asm_out_file, name);
5447 fputs ("\n\t.private_extern\t", asm_out_file);
5448 assemble_name (asm_out_file, name);
5449 fputs ("\n", asm_out_file);
5450 ASM_OUTPUT_LABEL (asm_out_file, name);
5451 }
5452 else
5453 #endif
5454 if (USE_HIDDEN_LINKONCE)
5455 {
5456 tree decl;
5457
5458 decl = build_decl (FUNCTION_DECL, get_identifier (name),
5459 error_mark_node);
5460 TREE_PUBLIC (decl) = 1;
5461 TREE_STATIC (decl) = 1;
5462 DECL_ONE_ONLY (decl) = 1;
5463
5464 (*targetm.asm_out.unique_section) (decl, 0);
5465 switch_to_section (get_named_section (decl, NULL, 0));
5466
5467 (*targetm.asm_out.globalize_label) (asm_out_file, name);
5468 fputs ("\t.hidden\t", asm_out_file);
5469 assemble_name (asm_out_file, name);
5470 fputc ('\n', asm_out_file);
5471 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
5472 }
5473 else
5474 {
5475 switch_to_section (text_section);
5476 ASM_OUTPUT_LABEL (asm_out_file, name);
5477 }
5478
5479 xops[0] = gen_rtx_REG (SImode, regno);
5480 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
5481 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
5482 output_asm_insn ("ret", xops);
5483 }
5484
5485 if (NEED_INDICATE_EXEC_STACK)
5486 file_end_indicate_exec_stack ();
5487 }
5488
5489 /* Emit code for the SET_GOT patterns. */
5490
5491 const char *
5492 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
5493 {
5494 rtx xops[3];
5495
5496 xops[0] = dest;
5497
5498 if (TARGET_VXWORKS_RTP && flag_pic)
5499 {
5500 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
5501 xops[2] = gen_rtx_MEM (Pmode,
5502 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
5503 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5504
5505 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
5506 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
5507 an unadorned address. */
5508 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
5509 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
5510 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
5511 return "";
5512 }
5513
5514 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
5515
5516 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
5517 {
5518 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
5519
5520 if (!flag_pic)
5521 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5522 else
5523 output_asm_insn ("call\t%a2", xops);
5524
5525 #if TARGET_MACHO
5526 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5527 is what will be referenced by the Mach-O PIC subsystem. */
5528 if (!label)
5529 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5530 #endif
5531
5532 (*targetm.asm_out.internal_label) (asm_out_file, "L",
5533 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
5534
5535 if (flag_pic)
5536 output_asm_insn ("pop{l}\t%0", xops);
5537 }
5538 else
5539 {
5540 char name[32];
5541 get_pc_thunk_name (name, REGNO (dest));
5542 pic_labels_used |= 1 << REGNO (dest);
5543
5544 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
5545 xops[2] = gen_rtx_MEM (QImode, xops[2]);
5546 output_asm_insn ("call\t%X2", xops);
5547 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5548 is what will be referenced by the Mach-O PIC subsystem. */
5549 #if TARGET_MACHO
5550 if (!label)
5551 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5552 else
5553 targetm.asm_out.internal_label (asm_out_file, "L",
5554 CODE_LABEL_NUMBER (label));
5555 #endif
5556 }
5557
5558 if (TARGET_MACHO)
5559 return "";
5560
5561 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
5562 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
5563 else
5564 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
5565
5566 return "";
5567 }
5568
5569 /* Generate an "push" pattern for input ARG. */
5570
5571 static rtx
5572 gen_push (rtx arg)
5573 {
5574 return gen_rtx_SET (VOIDmode,
5575 gen_rtx_MEM (Pmode,
5576 gen_rtx_PRE_DEC (Pmode,
5577 stack_pointer_rtx)),
5578 arg);
5579 }
5580
5581 /* Return >= 0 if there is an unused call-clobbered register available
5582 for the entire function. */
5583
5584 static unsigned int
5585 ix86_select_alt_pic_regnum (void)
5586 {
5587 if (current_function_is_leaf && !current_function_profile
5588 && !ix86_current_function_calls_tls_descriptor)
5589 {
5590 int i;
5591 for (i = 2; i >= 0; --i)
5592 if (!df_regs_ever_live_p (i))
5593 return i;
5594 }
5595
5596 return INVALID_REGNUM;
5597 }
5598
5599 /* Return 1 if we need to save REGNO. */
5600 static int
5601 ix86_save_reg (unsigned int regno, int maybe_eh_return)
5602 {
5603 if (pic_offset_table_rtx
5604 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
5605 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
5606 || current_function_profile
5607 || current_function_calls_eh_return
5608 || current_function_uses_const_pool))
5609 {
5610 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
5611 return 0;
5612 return 1;
5613 }
5614
5615 if (current_function_calls_eh_return && maybe_eh_return)
5616 {
5617 unsigned i;
5618 for (i = 0; ; i++)
5619 {
5620 unsigned test = EH_RETURN_DATA_REGNO (i);
5621 if (test == INVALID_REGNUM)
5622 break;
5623 if (test == regno)
5624 return 1;
5625 }
5626 }
5627
5628 if (cfun->machine->force_align_arg_pointer
5629 && regno == REGNO (cfun->machine->force_align_arg_pointer))
5630 return 1;
5631
5632 return (df_regs_ever_live_p (regno)
5633 && !call_used_regs[regno]
5634 && !fixed_regs[regno]
5635 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
5636 }
5637
5638 /* Return number of registers to be saved on the stack. */
5639
5640 static int
5641 ix86_nsaved_regs (void)
5642 {
5643 int nregs = 0;
5644 int regno;
5645
5646 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5647 if (ix86_save_reg (regno, true))
5648 nregs++;
5649 return nregs;
5650 }
5651
5652 /* Return the offset between two registers, one to be eliminated, and the other
5653 its replacement, at the start of a routine. */
5654
5655 HOST_WIDE_INT
5656 ix86_initial_elimination_offset (int from, int to)
5657 {
5658 struct ix86_frame frame;
5659 ix86_compute_frame_layout (&frame);
5660
5661 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
5662 return frame.hard_frame_pointer_offset;
5663 else if (from == FRAME_POINTER_REGNUM
5664 && to == HARD_FRAME_POINTER_REGNUM)
5665 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
5666 else
5667 {
5668 gcc_assert (to == STACK_POINTER_REGNUM);
5669
5670 if (from == ARG_POINTER_REGNUM)
5671 return frame.stack_pointer_offset;
5672
5673 gcc_assert (from == FRAME_POINTER_REGNUM);
5674 return frame.stack_pointer_offset - frame.frame_pointer_offset;
5675 }
5676 }
5677
5678 /* Fill structure ix86_frame about frame of currently computed function. */
5679
5680 static void
5681 ix86_compute_frame_layout (struct ix86_frame *frame)
5682 {
5683 HOST_WIDE_INT total_size;
5684 unsigned int stack_alignment_needed;
5685 HOST_WIDE_INT offset;
5686 unsigned int preferred_alignment;
5687 HOST_WIDE_INT size = get_frame_size ();
5688
5689 frame->nregs = ix86_nsaved_regs ();
5690 total_size = size;
5691
5692 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
5693 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
5694
5695 /* During reload iteration the amount of registers saved can change.
5696 Recompute the value as needed. Do not recompute when amount of registers
5697 didn't change as reload does multiple calls to the function and does not
5698 expect the decision to change within single iteration. */
5699 if (!optimize_size
5700 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
5701 {
5702 int count = frame->nregs;
5703
5704 cfun->machine->use_fast_prologue_epilogue_nregs = count;
5705 /* The fast prologue uses move instead of push to save registers. This
5706 is significantly longer, but also executes faster as modern hardware
5707 can execute the moves in parallel, but can't do that for push/pop.
5708
5709 Be careful about choosing what prologue to emit: When function takes
5710 many instructions to execute we may use slow version as well as in
5711 case function is known to be outside hot spot (this is known with
5712 feedback only). Weight the size of function by number of registers
5713 to save as it is cheap to use one or two push instructions but very
5714 slow to use many of them. */
5715 if (count)
5716 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
5717 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
5718 || (flag_branch_probabilities
5719 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
5720 cfun->machine->use_fast_prologue_epilogue = false;
5721 else
5722 cfun->machine->use_fast_prologue_epilogue
5723 = !expensive_function_p (count);
5724 }
5725 if (TARGET_PROLOGUE_USING_MOVE
5726 && cfun->machine->use_fast_prologue_epilogue)
5727 frame->save_regs_using_mov = true;
5728 else
5729 frame->save_regs_using_mov = false;
5730
5731
5732 /* Skip return address and saved base pointer. */
5733 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
5734
5735 frame->hard_frame_pointer_offset = offset;
5736
5737 /* Do some sanity checking of stack_alignment_needed and
5738 preferred_alignment, since i386 port is the only using those features
5739 that may break easily. */
5740
5741 gcc_assert (!size || stack_alignment_needed);
5742 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
5743 gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5744 gcc_assert (stack_alignment_needed
5745 <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5746
5747 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
5748 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
5749
5750 /* Register save area */
5751 offset += frame->nregs * UNITS_PER_WORD;
5752
5753 /* Va-arg area */
5754 if (ix86_save_varrargs_registers)
5755 {
5756 offset += X86_64_VARARGS_SIZE;
5757 frame->va_arg_size = X86_64_VARARGS_SIZE;
5758 }
5759 else
5760 frame->va_arg_size = 0;
5761
5762 /* Align start of frame for local function. */
5763 frame->padding1 = ((offset + stack_alignment_needed - 1)
5764 & -stack_alignment_needed) - offset;
5765
5766 offset += frame->padding1;
5767
5768 /* Frame pointer points here. */
5769 frame->frame_pointer_offset = offset;
5770
5771 offset += size;
5772
5773 /* Add outgoing arguments area. Can be skipped if we eliminated
5774 all the function calls as dead code.
5775 Skipping is however impossible when function calls alloca. Alloca
5776 expander assumes that last current_function_outgoing_args_size
5777 of stack frame are unused. */
5778 if (ACCUMULATE_OUTGOING_ARGS
5779 && (!current_function_is_leaf || current_function_calls_alloca
5780 || ix86_current_function_calls_tls_descriptor))
5781 {
5782 offset += current_function_outgoing_args_size;
5783 frame->outgoing_arguments_size = current_function_outgoing_args_size;
5784 }
5785 else
5786 frame->outgoing_arguments_size = 0;
5787
5788 /* Align stack boundary. Only needed if we're calling another function
5789 or using alloca. */
5790 if (!current_function_is_leaf || current_function_calls_alloca
5791 || ix86_current_function_calls_tls_descriptor)
5792 frame->padding2 = ((offset + preferred_alignment - 1)
5793 & -preferred_alignment) - offset;
5794 else
5795 frame->padding2 = 0;
5796
5797 offset += frame->padding2;
5798
5799 /* We've reached end of stack frame. */
5800 frame->stack_pointer_offset = offset;
5801
5802 /* Size prologue needs to allocate. */
5803 frame->to_allocate =
5804 (size + frame->padding1 + frame->padding2
5805 + frame->outgoing_arguments_size + frame->va_arg_size);
5806
5807 if ((!frame->to_allocate && frame->nregs <= 1)
5808 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
5809 frame->save_regs_using_mov = false;
5810
5811 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5812 && current_function_is_leaf
5813 && !ix86_current_function_calls_tls_descriptor)
5814 {
5815 frame->red_zone_size = frame->to_allocate;
5816 if (frame->save_regs_using_mov)
5817 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
5818 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5819 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5820 }
5821 else
5822 frame->red_zone_size = 0;
5823 frame->to_allocate -= frame->red_zone_size;
5824 frame->stack_pointer_offset -= frame->red_zone_size;
5825 #if 0
5826 fprintf (stderr, "\n");
5827 fprintf (stderr, "nregs: %ld\n", (long)frame->nregs);
5828 fprintf (stderr, "size: %ld\n", (long)size);
5829 fprintf (stderr, "alignment1: %ld\n", (long)stack_alignment_needed);
5830 fprintf (stderr, "padding1: %ld\n", (long)frame->padding1);
5831 fprintf (stderr, "va_arg: %ld\n", (long)frame->va_arg_size);
5832 fprintf (stderr, "padding2: %ld\n", (long)frame->padding2);
5833 fprintf (stderr, "to_allocate: %ld\n", (long)frame->to_allocate);
5834 fprintf (stderr, "red_zone_size: %ld\n", (long)frame->red_zone_size);
5835 fprintf (stderr, "frame_pointer_offset: %ld\n", (long)frame->frame_pointer_offset);
5836 fprintf (stderr, "hard_frame_pointer_offset: %ld\n",
5837 (long)frame->hard_frame_pointer_offset);
5838 fprintf (stderr, "stack_pointer_offset: %ld\n", (long)frame->stack_pointer_offset);
5839 fprintf (stderr, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf);
5840 fprintf (stderr, "current_function_calls_alloca: %ld\n", (long)current_function_calls_alloca);
5841 fprintf (stderr, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor);
5842 #endif
5843 }
5844
5845 /* Emit code to save registers in the prologue. */
5846
5847 static void
5848 ix86_emit_save_regs (void)
5849 {
5850 unsigned int regno;
5851 rtx insn;
5852
5853 for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
5854 if (ix86_save_reg (regno, true))
5855 {
5856 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5857 RTX_FRAME_RELATED_P (insn) = 1;
5858 }
5859 }
5860
5861 /* Emit code to save registers using MOV insns. First register
5862 is restored from POINTER + OFFSET. */
5863 static void
5864 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
5865 {
5866 unsigned int regno;
5867 rtx insn;
5868
5869 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5870 if (ix86_save_reg (regno, true))
5871 {
5872 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5873 Pmode, offset),
5874 gen_rtx_REG (Pmode, regno));
5875 RTX_FRAME_RELATED_P (insn) = 1;
5876 offset += UNITS_PER_WORD;
5877 }
5878 }
5879
5880 /* Expand prologue or epilogue stack adjustment.
5881 The pattern exist to put a dependency on all ebp-based memory accesses.
5882 STYLE should be negative if instructions should be marked as frame related,
5883 zero if %r11 register is live and cannot be freely used and positive
5884 otherwise. */
5885
5886 static void
5887 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5888 {
5889 rtx insn;
5890
5891 if (! TARGET_64BIT)
5892 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5893 else if (x86_64_immediate_operand (offset, DImode))
5894 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5895 else
5896 {
5897 rtx r11;
5898 /* r11 is used by indirect sibcall return as well, set before the
5899 epilogue and used after the epilogue. ATM indirect sibcall
5900 shouldn't be used together with huge frame sizes in one
5901 function because of the frame_size check in sibcall.c. */
5902 gcc_assert (style);
5903 r11 = gen_rtx_REG (DImode, R11_REG);
5904 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5905 if (style < 0)
5906 RTX_FRAME_RELATED_P (insn) = 1;
5907 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5908 offset));
5909 }
5910 if (style < 0)
5911 RTX_FRAME_RELATED_P (insn) = 1;
5912 }
5913
5914 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
5915
5916 static rtx
5917 ix86_internal_arg_pointer (void)
5918 {
5919 bool has_force_align_arg_pointer =
5920 (0 != lookup_attribute (ix86_force_align_arg_pointer_string,
5921 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))));
5922 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
5923 && DECL_NAME (current_function_decl)
5924 && MAIN_NAME_P (DECL_NAME (current_function_decl))
5925 && DECL_FILE_SCOPE_P (current_function_decl))
5926 || ix86_force_align_arg_pointer
5927 || has_force_align_arg_pointer)
5928 {
5929 /* Nested functions can't realign the stack due to a register
5930 conflict. */
5931 if (DECL_CONTEXT (current_function_decl)
5932 && TREE_CODE (DECL_CONTEXT (current_function_decl)) == FUNCTION_DECL)
5933 {
5934 if (ix86_force_align_arg_pointer)
5935 warning (0, "-mstackrealign ignored for nested functions");
5936 if (has_force_align_arg_pointer)
5937 error ("%s not supported for nested functions",
5938 ix86_force_align_arg_pointer_string);
5939 return virtual_incoming_args_rtx;
5940 }
5941 cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, 2);
5942 return copy_to_reg (cfun->machine->force_align_arg_pointer);
5943 }
5944 else
5945 return virtual_incoming_args_rtx;
5946 }
5947
5948 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
5949 This is called from dwarf2out.c to emit call frame instructions
5950 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
5951 static void
5952 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
5953 {
5954 rtx unspec = SET_SRC (pattern);
5955 gcc_assert (GET_CODE (unspec) == UNSPEC);
5956
5957 switch (index)
5958 {
5959 case UNSPEC_REG_SAVE:
5960 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
5961 SET_DEST (pattern));
5962 break;
5963 case UNSPEC_DEF_CFA:
5964 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
5965 INTVAL (XVECEXP (unspec, 0, 0)));
5966 break;
5967 default:
5968 gcc_unreachable ();
5969 }
5970 }
5971
5972 /* Expand the prologue into a bunch of separate insns. */
5973
5974 void
5975 ix86_expand_prologue (void)
5976 {
5977 rtx insn;
5978 bool pic_reg_used;
5979 struct ix86_frame frame;
5980 HOST_WIDE_INT allocate;
5981
5982 ix86_compute_frame_layout (&frame);
5983
5984 if (cfun->machine->force_align_arg_pointer)
5985 {
5986 rtx x, y;
5987
5988 /* Grab the argument pointer. */
5989 x = plus_constant (stack_pointer_rtx, 4);
5990 y = cfun->machine->force_align_arg_pointer;
5991 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
5992 RTX_FRAME_RELATED_P (insn) = 1;
5993
5994 /* The unwind info consists of two parts: install the fafp as the cfa,
5995 and record the fafp as the "save register" of the stack pointer.
5996 The later is there in order that the unwinder can see where it
5997 should restore the stack pointer across the and insn. */
5998 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA);
5999 x = gen_rtx_SET (VOIDmode, y, x);
6000 RTX_FRAME_RELATED_P (x) = 1;
6001 y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx),
6002 UNSPEC_REG_SAVE);
6003 y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y);
6004 RTX_FRAME_RELATED_P (y) = 1;
6005 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y));
6006 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
6007 REG_NOTES (insn) = x;
6008
6009 /* Align the stack. */
6010 emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx,
6011 GEN_INT (-16)));
6012
6013 /* And here we cheat like madmen with the unwind info. We force the
6014 cfa register back to sp+4, which is exactly what it was at the
6015 start of the function. Re-pushing the return address results in
6016 the return at the same spot relative to the cfa, and thus is
6017 correct wrt the unwind info. */
6018 x = cfun->machine->force_align_arg_pointer;
6019 x = gen_frame_mem (Pmode, plus_constant (x, -4));
6020 insn = emit_insn (gen_push (x));
6021 RTX_FRAME_RELATED_P (insn) = 1;
6022
6023 x = GEN_INT (4);
6024 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA);
6025 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
6026 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
6027 REG_NOTES (insn) = x;
6028 }
6029
6030 /* Note: AT&T enter does NOT have reversed args. Enter is probably
6031 slower on all targets. Also sdb doesn't like it. */
6032
6033 if (frame_pointer_needed)
6034 {
6035 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
6036 RTX_FRAME_RELATED_P (insn) = 1;
6037
6038 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
6039 RTX_FRAME_RELATED_P (insn) = 1;
6040 }
6041
6042 allocate = frame.to_allocate;
6043
6044 if (!frame.save_regs_using_mov)
6045 ix86_emit_save_regs ();
6046 else
6047 allocate += frame.nregs * UNITS_PER_WORD;
6048
6049 /* When using red zone we may start register saving before allocating
6050 the stack frame saving one cycle of the prologue. */
6051 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
6052 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
6053 : stack_pointer_rtx,
6054 -frame.nregs * UNITS_PER_WORD);
6055
6056 if (allocate == 0)
6057 ;
6058 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
6059 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6060 GEN_INT (-allocate), -1);
6061 else
6062 {
6063 /* Only valid for Win32. */
6064 rtx eax = gen_rtx_REG (Pmode, 0);
6065 bool eax_live;
6066 rtx t;
6067
6068 gcc_assert (!TARGET_64BIT || TARGET_64BIT_MS_ABI);
6069
6070 if (TARGET_64BIT_MS_ABI)
6071 eax_live = false;
6072 else
6073 eax_live = ix86_eax_live_at_start_p ();
6074
6075 if (eax_live)
6076 {
6077 emit_insn (gen_push (eax));
6078 allocate -= UNITS_PER_WORD;
6079 }
6080
6081 emit_move_insn (eax, GEN_INT (allocate));
6082
6083 if (TARGET_64BIT)
6084 insn = gen_allocate_stack_worker_64 (eax);
6085 else
6086 insn = gen_allocate_stack_worker_32 (eax);
6087 insn = emit_insn (insn);
6088 RTX_FRAME_RELATED_P (insn) = 1;
6089 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
6090 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
6091 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
6092 t, REG_NOTES (insn));
6093
6094 if (eax_live)
6095 {
6096 if (frame_pointer_needed)
6097 t = plus_constant (hard_frame_pointer_rtx,
6098 allocate
6099 - frame.to_allocate
6100 - frame.nregs * UNITS_PER_WORD);
6101 else
6102 t = plus_constant (stack_pointer_rtx, allocate);
6103 emit_move_insn (eax, gen_rtx_MEM (Pmode, t));
6104 }
6105 }
6106
6107 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
6108 {
6109 if (!frame_pointer_needed || !frame.to_allocate)
6110 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
6111 else
6112 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
6113 -frame.nregs * UNITS_PER_WORD);
6114 }
6115
6116 pic_reg_used = false;
6117 if (pic_offset_table_rtx
6118 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
6119 || current_function_profile))
6120 {
6121 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
6122
6123 if (alt_pic_reg_used != INVALID_REGNUM)
6124 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
6125
6126 pic_reg_used = true;
6127 }
6128
6129 if (pic_reg_used)
6130 {
6131 if (TARGET_64BIT)
6132 {
6133 if (ix86_cmodel == CM_LARGE_PIC)
6134 {
6135 rtx tmp_reg = gen_rtx_REG (DImode,
6136 FIRST_REX_INT_REG + 3 /* R11 */);
6137 rtx label = gen_label_rtx ();
6138 emit_label (label);
6139 LABEL_PRESERVE_P (label) = 1;
6140 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
6141 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
6142 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
6143 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
6144 pic_offset_table_rtx, tmp_reg));
6145 }
6146 else
6147 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
6148 }
6149 else
6150 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
6151 }
6152
6153 /* Prevent function calls from be scheduled before the call to mcount.
6154 In the pic_reg_used case, make sure that the got load isn't deleted. */
6155 if (current_function_profile)
6156 {
6157 if (pic_reg_used)
6158 emit_insn (gen_prologue_use (pic_offset_table_rtx));
6159 emit_insn (gen_blockage ());
6160 }
6161 }
6162
6163 /* Emit code to restore saved registers using MOV insns. First register
6164 is restored from POINTER + OFFSET. */
6165 static void
6166 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
6167 int maybe_eh_return)
6168 {
6169 int regno;
6170 rtx base_address = gen_rtx_MEM (Pmode, pointer);
6171
6172 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6173 if (ix86_save_reg (regno, maybe_eh_return))
6174 {
6175 /* Ensure that adjust_address won't be forced to produce pointer
6176 out of range allowed by x86-64 instruction set. */
6177 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
6178 {
6179 rtx r11;
6180
6181 r11 = gen_rtx_REG (DImode, R11_REG);
6182 emit_move_insn (r11, GEN_INT (offset));
6183 emit_insn (gen_adddi3 (r11, r11, pointer));
6184 base_address = gen_rtx_MEM (Pmode, r11);
6185 offset = 0;
6186 }
6187 emit_move_insn (gen_rtx_REG (Pmode, regno),
6188 adjust_address (base_address, Pmode, offset));
6189 offset += UNITS_PER_WORD;
6190 }
6191 }
6192
6193 /* Restore function stack, frame, and registers. */
6194
6195 void
6196 ix86_expand_epilogue (int style)
6197 {
6198 int regno;
6199 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
6200 struct ix86_frame frame;
6201 HOST_WIDE_INT offset;
6202
6203 ix86_compute_frame_layout (&frame);
6204
6205 /* Calculate start of saved registers relative to ebp. Special care
6206 must be taken for the normal return case of a function using
6207 eh_return: the eax and edx registers are marked as saved, but not
6208 restored along this path. */
6209 offset = frame.nregs;
6210 if (current_function_calls_eh_return && style != 2)
6211 offset -= 2;
6212 offset *= -UNITS_PER_WORD;
6213
6214 /* If we're only restoring one register and sp is not valid then
6215 using a move instruction to restore the register since it's
6216 less work than reloading sp and popping the register.
6217
6218 The default code result in stack adjustment using add/lea instruction,
6219 while this code results in LEAVE instruction (or discrete equivalent),
6220 so it is profitable in some other cases as well. Especially when there
6221 are no registers to restore. We also use this code when TARGET_USE_LEAVE
6222 and there is exactly one register to pop. This heuristic may need some
6223 tuning in future. */
6224 if ((!sp_valid && frame.nregs <= 1)
6225 || (TARGET_EPILOGUE_USING_MOVE
6226 && cfun->machine->use_fast_prologue_epilogue
6227 && (frame.nregs > 1 || frame.to_allocate))
6228 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
6229 || (frame_pointer_needed && TARGET_USE_LEAVE
6230 && cfun->machine->use_fast_prologue_epilogue
6231 && frame.nregs == 1)
6232 || current_function_calls_eh_return)
6233 {
6234 /* Restore registers. We can use ebp or esp to address the memory
6235 locations. If both are available, default to ebp, since offsets
6236 are known to be small. Only exception is esp pointing directly to the
6237 end of block of saved registers, where we may simplify addressing
6238 mode. */
6239
6240 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
6241 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
6242 frame.to_allocate, style == 2);
6243 else
6244 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
6245 offset, style == 2);
6246
6247 /* eh_return epilogues need %ecx added to the stack pointer. */
6248 if (style == 2)
6249 {
6250 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
6251
6252 if (frame_pointer_needed)
6253 {
6254 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
6255 tmp = plus_constant (tmp, UNITS_PER_WORD);
6256 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
6257
6258 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
6259 emit_move_insn (hard_frame_pointer_rtx, tmp);
6260
6261 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
6262 const0_rtx, style);
6263 }
6264 else
6265 {
6266 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
6267 tmp = plus_constant (tmp, (frame.to_allocate
6268 + frame.nregs * UNITS_PER_WORD));
6269 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
6270 }
6271 }
6272 else if (!frame_pointer_needed)
6273 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6274 GEN_INT (frame.to_allocate
6275 + frame.nregs * UNITS_PER_WORD),
6276 style);
6277 /* If not an i386, mov & pop is faster than "leave". */
6278 else if (TARGET_USE_LEAVE || optimize_size
6279 || !cfun->machine->use_fast_prologue_epilogue)
6280 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
6281 else
6282 {
6283 pro_epilogue_adjust_stack (stack_pointer_rtx,
6284 hard_frame_pointer_rtx,
6285 const0_rtx, style);
6286 if (TARGET_64BIT)
6287 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
6288 else
6289 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
6290 }
6291 }
6292 else
6293 {
6294 /* First step is to deallocate the stack frame so that we can
6295 pop the registers. */
6296 if (!sp_valid)
6297 {
6298 gcc_assert (frame_pointer_needed);
6299 pro_epilogue_adjust_stack (stack_pointer_rtx,
6300 hard_frame_pointer_rtx,
6301 GEN_INT (offset), style);
6302 }
6303 else if (frame.to_allocate)
6304 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6305 GEN_INT (frame.to_allocate), style);
6306
6307 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6308 if (ix86_save_reg (regno, false))
6309 {
6310 if (TARGET_64BIT)
6311 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
6312 else
6313 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
6314 }
6315 if (frame_pointer_needed)
6316 {
6317 /* Leave results in shorter dependency chains on CPUs that are
6318 able to grok it fast. */
6319 if (TARGET_USE_LEAVE)
6320 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
6321 else if (TARGET_64BIT)
6322 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
6323 else
6324 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
6325 }
6326 }
6327
6328 if (cfun->machine->force_align_arg_pointer)
6329 {
6330 emit_insn (gen_addsi3 (stack_pointer_rtx,
6331 cfun->machine->force_align_arg_pointer,
6332 GEN_INT (-4)));
6333 }
6334
6335 /* Sibcall epilogues don't want a return instruction. */
6336 if (style == 0)
6337 return;
6338
6339 if (current_function_pops_args && current_function_args_size)
6340 {
6341 rtx popc = GEN_INT (current_function_pops_args);
6342
6343 /* i386 can only pop 64K bytes. If asked to pop more, pop
6344 return address, do explicit add, and jump indirectly to the
6345 caller. */
6346
6347 if (current_function_pops_args >= 65536)
6348 {
6349 rtx ecx = gen_rtx_REG (SImode, 2);
6350
6351 /* There is no "pascal" calling convention in any 64bit ABI. */
6352 gcc_assert (!TARGET_64BIT);
6353
6354 emit_insn (gen_popsi1 (ecx));
6355 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
6356 emit_jump_insn (gen_return_indirect_internal (ecx));
6357 }
6358 else
6359 emit_jump_insn (gen_return_pop_internal (popc));
6360 }
6361 else
6362 emit_jump_insn (gen_return_internal ());
6363 }
6364
6365 /* Reset from the function's potential modifications. */
6366
6367 static void
6368 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6369 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
6370 {
6371 if (pic_offset_table_rtx)
6372 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
6373 #if TARGET_MACHO
6374 /* Mach-O doesn't support labels at the end of objects, so if
6375 it looks like we might want one, insert a NOP. */
6376 {
6377 rtx insn = get_last_insn ();
6378 while (insn
6379 && NOTE_P (insn)
6380 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
6381 insn = PREV_INSN (insn);
6382 if (insn
6383 && (LABEL_P (insn)
6384 || (NOTE_P (insn)
6385 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
6386 fputs ("\tnop\n", file);
6387 }
6388 #endif
6389
6390 }
6391 \f
6392 /* Extract the parts of an RTL expression that is a valid memory address
6393 for an instruction. Return 0 if the structure of the address is
6394 grossly off. Return -1 if the address contains ASHIFT, so it is not
6395 strictly valid, but still used for computing length of lea instruction. */
6396
6397 int
6398 ix86_decompose_address (rtx addr, struct ix86_address *out)
6399 {
6400 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
6401 rtx base_reg, index_reg;
6402 HOST_WIDE_INT scale = 1;
6403 rtx scale_rtx = NULL_RTX;
6404 int retval = 1;
6405 enum ix86_address_seg seg = SEG_DEFAULT;
6406
6407 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
6408 base = addr;
6409 else if (GET_CODE (addr) == PLUS)
6410 {
6411 rtx addends[4], op;
6412 int n = 0, i;
6413
6414 op = addr;
6415 do
6416 {
6417 if (n >= 4)
6418 return 0;
6419 addends[n++] = XEXP (op, 1);
6420 op = XEXP (op, 0);
6421 }
6422 while (GET_CODE (op) == PLUS);
6423 if (n >= 4)
6424 return 0;
6425 addends[n] = op;
6426
6427 for (i = n; i >= 0; --i)
6428 {
6429 op = addends[i];
6430 switch (GET_CODE (op))
6431 {
6432 case MULT:
6433 if (index)
6434 return 0;
6435 index = XEXP (op, 0);
6436 scale_rtx = XEXP (op, 1);
6437 break;
6438
6439 case UNSPEC:
6440 if (XINT (op, 1) == UNSPEC_TP
6441 && TARGET_TLS_DIRECT_SEG_REFS
6442 && seg == SEG_DEFAULT)
6443 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
6444 else
6445 return 0;
6446 break;
6447
6448 case REG:
6449 case SUBREG:
6450 if (!base)
6451 base = op;
6452 else if (!index)
6453 index = op;
6454 else
6455 return 0;
6456 break;
6457
6458 case CONST:
6459 case CONST_INT:
6460 case SYMBOL_REF:
6461 case LABEL_REF:
6462 if (disp)
6463 return 0;
6464 disp = op;
6465 break;
6466
6467 default:
6468 return 0;
6469 }
6470 }
6471 }
6472 else if (GET_CODE (addr) == MULT)
6473 {
6474 index = XEXP (addr, 0); /* index*scale */
6475 scale_rtx = XEXP (addr, 1);
6476 }
6477 else if (GET_CODE (addr) == ASHIFT)
6478 {
6479 rtx tmp;
6480
6481 /* We're called for lea too, which implements ashift on occasion. */
6482 index = XEXP (addr, 0);
6483 tmp = XEXP (addr, 1);
6484 if (!CONST_INT_P (tmp))
6485 return 0;
6486 scale = INTVAL (tmp);
6487 if ((unsigned HOST_WIDE_INT) scale > 3)
6488 return 0;
6489 scale = 1 << scale;
6490 retval = -1;
6491 }
6492 else
6493 disp = addr; /* displacement */
6494
6495 /* Extract the integral value of scale. */
6496 if (scale_rtx)
6497 {
6498 if (!CONST_INT_P (scale_rtx))
6499 return 0;
6500 scale = INTVAL (scale_rtx);
6501 }
6502
6503 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
6504 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
6505
6506 /* Allow arg pointer and stack pointer as index if there is not scaling. */
6507 if (base_reg && index_reg && scale == 1
6508 && (index_reg == arg_pointer_rtx
6509 || index_reg == frame_pointer_rtx
6510 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
6511 {
6512 rtx tmp;
6513 tmp = base, base = index, index = tmp;
6514 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
6515 }
6516
6517 /* Special case: %ebp cannot be encoded as a base without a displacement. */
6518 if ((base_reg == hard_frame_pointer_rtx
6519 || base_reg == frame_pointer_rtx
6520 || base_reg == arg_pointer_rtx) && !disp)
6521 disp = const0_rtx;
6522
6523 /* Special case: on K6, [%esi] makes the instruction vector decoded.
6524 Avoid this by transforming to [%esi+0]. */
6525 if (ix86_tune == PROCESSOR_K6 && !optimize_size
6526 && base_reg && !index_reg && !disp
6527 && REG_P (base_reg)
6528 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
6529 disp = const0_rtx;
6530
6531 /* Special case: encode reg+reg instead of reg*2. */
6532 if (!base && index && scale && scale == 2)
6533 base = index, base_reg = index_reg, scale = 1;
6534
6535 /* Special case: scaling cannot be encoded without base or displacement. */
6536 if (!base && !disp && index && scale != 1)
6537 disp = const0_rtx;
6538
6539 out->base = base;
6540 out->index = index;
6541 out->disp = disp;
6542 out->scale = scale;
6543 out->seg = seg;
6544
6545 return retval;
6546 }
6547 \f
6548 /* Return cost of the memory address x.
6549 For i386, it is better to use a complex address than let gcc copy
6550 the address into a reg and make a new pseudo. But not if the address
6551 requires to two regs - that would mean more pseudos with longer
6552 lifetimes. */
6553 static int
6554 ix86_address_cost (rtx x)
6555 {
6556 struct ix86_address parts;
6557 int cost = 1;
6558 int ok = ix86_decompose_address (x, &parts);
6559
6560 gcc_assert (ok);
6561
6562 if (parts.base && GET_CODE (parts.base) == SUBREG)
6563 parts.base = SUBREG_REG (parts.base);
6564 if (parts.index && GET_CODE (parts.index) == SUBREG)
6565 parts.index = SUBREG_REG (parts.index);
6566
6567 /* More complex memory references are better. */
6568 if (parts.disp && parts.disp != const0_rtx)
6569 cost--;
6570 if (parts.seg != SEG_DEFAULT)
6571 cost--;
6572
6573 /* Attempt to minimize number of registers in the address. */
6574 if ((parts.base
6575 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
6576 || (parts.index
6577 && (!REG_P (parts.index)
6578 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
6579 cost++;
6580
6581 if (parts.base
6582 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
6583 && parts.index
6584 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
6585 && parts.base != parts.index)
6586 cost++;
6587
6588 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
6589 since it's predecode logic can't detect the length of instructions
6590 and it degenerates to vector decoded. Increase cost of such
6591 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
6592 to split such addresses or even refuse such addresses at all.
6593
6594 Following addressing modes are affected:
6595 [base+scale*index]
6596 [scale*index+disp]
6597 [base+index]
6598
6599 The first and last case may be avoidable by explicitly coding the zero in
6600 memory address, but I don't have AMD-K6 machine handy to check this
6601 theory. */
6602
6603 if (TARGET_K6
6604 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
6605 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
6606 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
6607 cost += 10;
6608
6609 return cost;
6610 }
6611 \f
6612 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
6613 this is used for to form addresses to local data when -fPIC is in
6614 use. */
6615
6616 static bool
6617 darwin_local_data_pic (rtx disp)
6618 {
6619 if (GET_CODE (disp) == MINUS)
6620 {
6621 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
6622 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
6623 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
6624 {
6625 const char *sym_name = XSTR (XEXP (disp, 1), 0);
6626 if (! strcmp (sym_name, "<pic base>"))
6627 return true;
6628 }
6629 }
6630
6631 return false;
6632 }
6633
6634 /* Determine if a given RTX is a valid constant. We already know this
6635 satisfies CONSTANT_P. */
6636
6637 bool
6638 legitimate_constant_p (rtx x)
6639 {
6640 switch (GET_CODE (x))
6641 {
6642 case CONST:
6643 x = XEXP (x, 0);
6644
6645 if (GET_CODE (x) == PLUS)
6646 {
6647 if (!CONST_INT_P (XEXP (x, 1)))
6648 return false;
6649 x = XEXP (x, 0);
6650 }
6651
6652 if (TARGET_MACHO && darwin_local_data_pic (x))
6653 return true;
6654
6655 /* Only some unspecs are valid as "constants". */
6656 if (GET_CODE (x) == UNSPEC)
6657 switch (XINT (x, 1))
6658 {
6659 case UNSPEC_GOT:
6660 case UNSPEC_GOTOFF:
6661 case UNSPEC_PLTOFF:
6662 return TARGET_64BIT;
6663 case UNSPEC_TPOFF:
6664 case UNSPEC_NTPOFF:
6665 x = XVECEXP (x, 0, 0);
6666 return (GET_CODE (x) == SYMBOL_REF
6667 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6668 case UNSPEC_DTPOFF:
6669 x = XVECEXP (x, 0, 0);
6670 return (GET_CODE (x) == SYMBOL_REF
6671 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
6672 default:
6673 return false;
6674 }
6675
6676 /* We must have drilled down to a symbol. */
6677 if (GET_CODE (x) == LABEL_REF)
6678 return true;
6679 if (GET_CODE (x) != SYMBOL_REF)
6680 return false;
6681 /* FALLTHRU */
6682
6683 case SYMBOL_REF:
6684 /* TLS symbols are never valid. */
6685 if (SYMBOL_REF_TLS_MODEL (x))
6686 return false;
6687
6688 /* DLLIMPORT symbols are never valid. */
6689 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
6690 && SYMBOL_REF_DLLIMPORT_P (x))
6691 return false;
6692 break;
6693
6694 case CONST_DOUBLE:
6695 if (GET_MODE (x) == TImode
6696 && x != CONST0_RTX (TImode)
6697 && !TARGET_64BIT)
6698 return false;
6699 break;
6700
6701 case CONST_VECTOR:
6702 if (x == CONST0_RTX (GET_MODE (x)))
6703 return true;
6704 return false;
6705
6706 default:
6707 break;
6708 }
6709
6710 /* Otherwise we handle everything else in the move patterns. */
6711 return true;
6712 }
6713
6714 /* Determine if it's legal to put X into the constant pool. This
6715 is not possible for the address of thread-local symbols, which
6716 is checked above. */
6717
6718 static bool
6719 ix86_cannot_force_const_mem (rtx x)
6720 {
6721 /* We can always put integral constants and vectors in memory. */
6722 switch (GET_CODE (x))
6723 {
6724 case CONST_INT:
6725 case CONST_DOUBLE:
6726 case CONST_VECTOR:
6727 return false;
6728
6729 default:
6730 break;
6731 }
6732 return !legitimate_constant_p (x);
6733 }
6734
6735 /* Determine if a given RTX is a valid constant address. */
6736
6737 bool
6738 constant_address_p (rtx x)
6739 {
6740 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
6741 }
6742
6743 /* Nonzero if the constant value X is a legitimate general operand
6744 when generating PIC code. It is given that flag_pic is on and
6745 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
6746
6747 bool
6748 legitimate_pic_operand_p (rtx x)
6749 {
6750 rtx inner;
6751
6752 switch (GET_CODE (x))
6753 {
6754 case CONST:
6755 inner = XEXP (x, 0);
6756 if (GET_CODE (inner) == PLUS
6757 && CONST_INT_P (XEXP (inner, 1)))
6758 inner = XEXP (inner, 0);
6759
6760 /* Only some unspecs are valid as "constants". */
6761 if (GET_CODE (inner) == UNSPEC)
6762 switch (XINT (inner, 1))
6763 {
6764 case UNSPEC_GOT:
6765 case UNSPEC_GOTOFF:
6766 case UNSPEC_PLTOFF:
6767 return TARGET_64BIT;
6768 case UNSPEC_TPOFF:
6769 x = XVECEXP (inner, 0, 0);
6770 return (GET_CODE (x) == SYMBOL_REF
6771 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6772 default:
6773 return false;
6774 }
6775 /* FALLTHRU */
6776
6777 case SYMBOL_REF:
6778 case LABEL_REF:
6779 return legitimate_pic_address_disp_p (x);
6780
6781 default:
6782 return true;
6783 }
6784 }
6785
6786 /* Determine if a given CONST RTX is a valid memory displacement
6787 in PIC mode. */
6788
6789 int
6790 legitimate_pic_address_disp_p (rtx disp)
6791 {
6792 bool saw_plus;
6793
6794 /* In 64bit mode we can allow direct addresses of symbols and labels
6795 when they are not dynamic symbols. */
6796 if (TARGET_64BIT)
6797 {
6798 rtx op0 = disp, op1;
6799
6800 switch (GET_CODE (disp))
6801 {
6802 case LABEL_REF:
6803 return true;
6804
6805 case CONST:
6806 if (GET_CODE (XEXP (disp, 0)) != PLUS)
6807 break;
6808 op0 = XEXP (XEXP (disp, 0), 0);
6809 op1 = XEXP (XEXP (disp, 0), 1);
6810 if (!CONST_INT_P (op1)
6811 || INTVAL (op1) >= 16*1024*1024
6812 || INTVAL (op1) < -16*1024*1024)
6813 break;
6814 if (GET_CODE (op0) == LABEL_REF)
6815 return true;
6816 if (GET_CODE (op0) != SYMBOL_REF)
6817 break;
6818 /* FALLTHRU */
6819
6820 case SYMBOL_REF:
6821 /* TLS references should always be enclosed in UNSPEC. */
6822 if (SYMBOL_REF_TLS_MODEL (op0))
6823 return false;
6824 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
6825 && ix86_cmodel != CM_LARGE_PIC)
6826 return true;
6827 break;
6828
6829 default:
6830 break;
6831 }
6832 }
6833 if (GET_CODE (disp) != CONST)
6834 return 0;
6835 disp = XEXP (disp, 0);
6836
6837 if (TARGET_64BIT)
6838 {
6839 /* We are unsafe to allow PLUS expressions. This limit allowed distance
6840 of GOT tables. We should not need these anyway. */
6841 if (GET_CODE (disp) != UNSPEC
6842 || (XINT (disp, 1) != UNSPEC_GOTPCREL
6843 && XINT (disp, 1) != UNSPEC_GOTOFF
6844 && XINT (disp, 1) != UNSPEC_PLTOFF))
6845 return 0;
6846
6847 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
6848 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
6849 return 0;
6850 return 1;
6851 }
6852
6853 saw_plus = false;
6854 if (GET_CODE (disp) == PLUS)
6855 {
6856 if (!CONST_INT_P (XEXP (disp, 1)))
6857 return 0;
6858 disp = XEXP (disp, 0);
6859 saw_plus = true;
6860 }
6861
6862 if (TARGET_MACHO && darwin_local_data_pic (disp))
6863 return 1;
6864
6865 if (GET_CODE (disp) != UNSPEC)
6866 return 0;
6867
6868 switch (XINT (disp, 1))
6869 {
6870 case UNSPEC_GOT:
6871 if (saw_plus)
6872 return false;
6873 /* We need to check for both symbols and labels because VxWorks loads
6874 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
6875 details. */
6876 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6877 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
6878 case UNSPEC_GOTOFF:
6879 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
6880 While ABI specify also 32bit relocation but we don't produce it in
6881 small PIC model at all. */
6882 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6883 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
6884 && !TARGET_64BIT)
6885 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
6886 return false;
6887 case UNSPEC_GOTTPOFF:
6888 case UNSPEC_GOTNTPOFF:
6889 case UNSPEC_INDNTPOFF:
6890 if (saw_plus)
6891 return false;
6892 disp = XVECEXP (disp, 0, 0);
6893 return (GET_CODE (disp) == SYMBOL_REF
6894 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
6895 case UNSPEC_NTPOFF:
6896 disp = XVECEXP (disp, 0, 0);
6897 return (GET_CODE (disp) == SYMBOL_REF
6898 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
6899 case UNSPEC_DTPOFF:
6900 disp = XVECEXP (disp, 0, 0);
6901 return (GET_CODE (disp) == SYMBOL_REF
6902 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
6903 }
6904
6905 return 0;
6906 }
6907
6908 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6909 memory address for an instruction. The MODE argument is the machine mode
6910 for the MEM expression that wants to use this address.
6911
6912 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6913 convert common non-canonical forms to canonical form so that they will
6914 be recognized. */
6915
6916 int
6917 legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
6918 rtx addr, int strict)
6919 {
6920 struct ix86_address parts;
6921 rtx base, index, disp;
6922 HOST_WIDE_INT scale;
6923 const char *reason = NULL;
6924 rtx reason_rtx = NULL_RTX;
6925
6926 if (ix86_decompose_address (addr, &parts) <= 0)
6927 {
6928 reason = "decomposition failed";
6929 goto report_error;
6930 }
6931
6932 base = parts.base;
6933 index = parts.index;
6934 disp = parts.disp;
6935 scale = parts.scale;
6936
6937 /* Validate base register.
6938
6939 Don't allow SUBREG's that span more than a word here. It can lead to spill
6940 failures when the base is one word out of a two word structure, which is
6941 represented internally as a DImode int. */
6942
6943 if (base)
6944 {
6945 rtx reg;
6946 reason_rtx = base;
6947
6948 if (REG_P (base))
6949 reg = base;
6950 else if (GET_CODE (base) == SUBREG
6951 && REG_P (SUBREG_REG (base))
6952 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
6953 <= UNITS_PER_WORD)
6954 reg = SUBREG_REG (base);
6955 else
6956 {
6957 reason = "base is not a register";
6958 goto report_error;
6959 }
6960
6961 if (GET_MODE (base) != Pmode)
6962 {
6963 reason = "base is not in Pmode";
6964 goto report_error;
6965 }
6966
6967 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
6968 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
6969 {
6970 reason = "base is not valid";
6971 goto report_error;
6972 }
6973 }
6974
6975 /* Validate index register.
6976
6977 Don't allow SUBREG's that span more than a word here -- same as above. */
6978
6979 if (index)
6980 {
6981 rtx reg;
6982 reason_rtx = index;
6983
6984 if (REG_P (index))
6985 reg = index;
6986 else if (GET_CODE (index) == SUBREG
6987 && REG_P (SUBREG_REG (index))
6988 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
6989 <= UNITS_PER_WORD)
6990 reg = SUBREG_REG (index);
6991 else
6992 {
6993 reason = "index is not a register";
6994 goto report_error;
6995 }
6996
6997 if (GET_MODE (index) != Pmode)
6998 {
6999 reason = "index is not in Pmode";
7000 goto report_error;
7001 }
7002
7003 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
7004 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
7005 {
7006 reason = "index is not valid";
7007 goto report_error;
7008 }
7009 }
7010
7011 /* Validate scale factor. */
7012 if (scale != 1)
7013 {
7014 reason_rtx = GEN_INT (scale);
7015 if (!index)
7016 {
7017 reason = "scale without index";
7018 goto report_error;
7019 }
7020
7021 if (scale != 2 && scale != 4 && scale != 8)
7022 {
7023 reason = "scale is not a valid multiplier";
7024 goto report_error;
7025 }
7026 }
7027
7028 /* Validate displacement. */
7029 if (disp)
7030 {
7031 reason_rtx = disp;
7032
7033 if (GET_CODE (disp) == CONST
7034 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
7035 switch (XINT (XEXP (disp, 0), 1))
7036 {
7037 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
7038 used. While ABI specify also 32bit relocations, we don't produce
7039 them at all and use IP relative instead. */
7040 case UNSPEC_GOT:
7041 case UNSPEC_GOTOFF:
7042 gcc_assert (flag_pic);
7043 if (!TARGET_64BIT)
7044 goto is_legitimate_pic;
7045 reason = "64bit address unspec";
7046 goto report_error;
7047
7048 case UNSPEC_GOTPCREL:
7049 gcc_assert (flag_pic);
7050 goto is_legitimate_pic;
7051
7052 case UNSPEC_GOTTPOFF:
7053 case UNSPEC_GOTNTPOFF:
7054 case UNSPEC_INDNTPOFF:
7055 case UNSPEC_NTPOFF:
7056 case UNSPEC_DTPOFF:
7057 break;
7058
7059 default:
7060 reason = "invalid address unspec";
7061 goto report_error;
7062 }
7063
7064 else if (SYMBOLIC_CONST (disp)
7065 && (flag_pic
7066 || (TARGET_MACHO
7067 #if TARGET_MACHO
7068 && MACHOPIC_INDIRECT
7069 && !machopic_operand_p (disp)
7070 #endif
7071 )))
7072 {
7073
7074 is_legitimate_pic:
7075 if (TARGET_64BIT && (index || base))
7076 {
7077 /* foo@dtpoff(%rX) is ok. */
7078 if (GET_CODE (disp) != CONST
7079 || GET_CODE (XEXP (disp, 0)) != PLUS
7080 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
7081 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
7082 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
7083 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
7084 {
7085 reason = "non-constant pic memory reference";
7086 goto report_error;
7087 }
7088 }
7089 else if (! legitimate_pic_address_disp_p (disp))
7090 {
7091 reason = "displacement is an invalid pic construct";
7092 goto report_error;
7093 }
7094
7095 /* This code used to verify that a symbolic pic displacement
7096 includes the pic_offset_table_rtx register.
7097
7098 While this is good idea, unfortunately these constructs may
7099 be created by "adds using lea" optimization for incorrect
7100 code like:
7101
7102 int a;
7103 int foo(int i)
7104 {
7105 return *(&a+i);
7106 }
7107
7108 This code is nonsensical, but results in addressing
7109 GOT table with pic_offset_table_rtx base. We can't
7110 just refuse it easily, since it gets matched by
7111 "addsi3" pattern, that later gets split to lea in the
7112 case output register differs from input. While this
7113 can be handled by separate addsi pattern for this case
7114 that never results in lea, this seems to be easier and
7115 correct fix for crash to disable this test. */
7116 }
7117 else if (GET_CODE (disp) != LABEL_REF
7118 && !CONST_INT_P (disp)
7119 && (GET_CODE (disp) != CONST
7120 || !legitimate_constant_p (disp))
7121 && (GET_CODE (disp) != SYMBOL_REF
7122 || !legitimate_constant_p (disp)))
7123 {
7124 reason = "displacement is not constant";
7125 goto report_error;
7126 }
7127 else if (TARGET_64BIT
7128 && !x86_64_immediate_operand (disp, VOIDmode))
7129 {
7130 reason = "displacement is out of range";
7131 goto report_error;
7132 }
7133 }
7134
7135 /* Everything looks valid. */
7136 return TRUE;
7137
7138 report_error:
7139 return FALSE;
7140 }
7141 \f
7142 /* Return a unique alias set for the GOT. */
7143
7144 static HOST_WIDE_INT
7145 ix86_GOT_alias_set (void)
7146 {
7147 static HOST_WIDE_INT set = -1;
7148 if (set == -1)
7149 set = new_alias_set ();
7150 return set;
7151 }
7152
7153 /* Return a legitimate reference for ORIG (an address) using the
7154 register REG. If REG is 0, a new pseudo is generated.
7155
7156 There are two types of references that must be handled:
7157
7158 1. Global data references must load the address from the GOT, via
7159 the PIC reg. An insn is emitted to do this load, and the reg is
7160 returned.
7161
7162 2. Static data references, constant pool addresses, and code labels
7163 compute the address as an offset from the GOT, whose base is in
7164 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
7165 differentiate them from global data objects. The returned
7166 address is the PIC reg + an unspec constant.
7167
7168 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
7169 reg also appears in the address. */
7170
7171 static rtx
7172 legitimize_pic_address (rtx orig, rtx reg)
7173 {
7174 rtx addr = orig;
7175 rtx new_rtx = orig;
7176 rtx base;
7177
7178 #if TARGET_MACHO
7179 if (TARGET_MACHO && !TARGET_64BIT)
7180 {
7181 if (reg == 0)
7182 reg = gen_reg_rtx (Pmode);
7183 /* Use the generic Mach-O PIC machinery. */
7184 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
7185 }
7186 #endif
7187
7188 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
7189 new_rtx = addr;
7190 else if (TARGET_64BIT
7191 && ix86_cmodel != CM_SMALL_PIC
7192 && gotoff_operand (addr, Pmode))
7193 {
7194 rtx tmpreg;
7195 /* This symbol may be referenced via a displacement from the PIC
7196 base address (@GOTOFF). */
7197
7198 if (reload_in_progress)
7199 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7200 if (GET_CODE (addr) == CONST)
7201 addr = XEXP (addr, 0);
7202 if (GET_CODE (addr) == PLUS)
7203 {
7204 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
7205 UNSPEC_GOTOFF);
7206 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
7207 }
7208 else
7209 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
7210 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7211 if (!reg)
7212 tmpreg = gen_reg_rtx (Pmode);
7213 else
7214 tmpreg = reg;
7215 emit_move_insn (tmpreg, new_rtx);
7216
7217 if (reg != 0)
7218 {
7219 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
7220 tmpreg, 1, OPTAB_DIRECT);
7221 new_rtx = reg;
7222 }
7223 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
7224 }
7225 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
7226 {
7227 /* This symbol may be referenced via a displacement from the PIC
7228 base address (@GOTOFF). */
7229
7230 if (reload_in_progress)
7231 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7232 if (GET_CODE (addr) == CONST)
7233 addr = XEXP (addr, 0);
7234 if (GET_CODE (addr) == PLUS)
7235 {
7236 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
7237 UNSPEC_GOTOFF);
7238 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
7239 }
7240 else
7241 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
7242 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7243 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
7244
7245 if (reg != 0)
7246 {
7247 emit_move_insn (reg, new_rtx);
7248 new_rtx = reg;
7249 }
7250 }
7251 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
7252 /* We can't use @GOTOFF for text labels on VxWorks;
7253 see gotoff_operand. */
7254 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
7255 {
7256 /* Given that we've already handled dllimport variables separately
7257 in legitimize_address, and all other variables should satisfy
7258 legitimate_pic_address_disp_p, we should never arrive here. */
7259 gcc_assert (!TARGET_64BIT_MS_ABI);
7260
7261 if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
7262 {
7263 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
7264 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7265 new_rtx = gen_const_mem (Pmode, new_rtx);
7266 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
7267
7268 if (reg == 0)
7269 reg = gen_reg_rtx (Pmode);
7270 /* Use directly gen_movsi, otherwise the address is loaded
7271 into register for CSE. We don't want to CSE this addresses,
7272 instead we CSE addresses from the GOT table, so skip this. */
7273 emit_insn (gen_movsi (reg, new_rtx));
7274 new_rtx = reg;
7275 }
7276 else
7277 {
7278 /* This symbol must be referenced via a load from the
7279 Global Offset Table (@GOT). */
7280
7281 if (reload_in_progress)
7282 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7283 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
7284 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7285 if (TARGET_64BIT)
7286 new_rtx = force_reg (Pmode, new_rtx);
7287 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
7288 new_rtx = gen_const_mem (Pmode, new_rtx);
7289 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
7290
7291 if (reg == 0)
7292 reg = gen_reg_rtx (Pmode);
7293 emit_move_insn (reg, new_rtx);
7294 new_rtx = reg;
7295 }
7296 }
7297 else
7298 {
7299 if (CONST_INT_P (addr)
7300 && !x86_64_immediate_operand (addr, VOIDmode))
7301 {
7302 if (reg)
7303 {
7304 emit_move_insn (reg, addr);
7305 new_rtx = reg;
7306 }
7307 else
7308 new_rtx = force_reg (Pmode, addr);
7309 }
7310 else if (GET_CODE (addr) == CONST)
7311 {
7312 addr = XEXP (addr, 0);
7313
7314 /* We must match stuff we generate before. Assume the only
7315 unspecs that can get here are ours. Not that we could do
7316 anything with them anyway.... */
7317 if (GET_CODE (addr) == UNSPEC
7318 || (GET_CODE (addr) == PLUS
7319 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
7320 return orig;
7321 gcc_assert (GET_CODE (addr) == PLUS);
7322 }
7323 if (GET_CODE (addr) == PLUS)
7324 {
7325 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
7326
7327 /* Check first to see if this is a constant offset from a @GOTOFF
7328 symbol reference. */
7329 if (gotoff_operand (op0, Pmode)
7330 && CONST_INT_P (op1))
7331 {
7332 if (!TARGET_64BIT)
7333 {
7334 if (reload_in_progress)
7335 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7336 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
7337 UNSPEC_GOTOFF);
7338 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
7339 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7340 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
7341
7342 if (reg != 0)
7343 {
7344 emit_move_insn (reg, new_rtx);
7345 new_rtx = reg;
7346 }
7347 }
7348 else
7349 {
7350 if (INTVAL (op1) < -16*1024*1024
7351 || INTVAL (op1) >= 16*1024*1024)
7352 {
7353 if (!x86_64_immediate_operand (op1, Pmode))
7354 op1 = force_reg (Pmode, op1);
7355 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
7356 }
7357 }
7358 }
7359 else
7360 {
7361 base = legitimize_pic_address (XEXP (addr, 0), reg);
7362 new_rtx = legitimize_pic_address (XEXP (addr, 1),
7363 base == reg ? NULL_RTX : reg);
7364
7365 if (CONST_INT_P (new_rtx))
7366 new_rtx = plus_constant (base, INTVAL (new_rtx));
7367 else
7368 {
7369 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
7370 {
7371 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
7372 new_rtx = XEXP (new_rtx, 1);
7373 }
7374 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
7375 }
7376 }
7377 }
7378 }
7379 return new_rtx;
7380 }
7381 \f
7382 /* Load the thread pointer. If TO_REG is true, force it into a register. */
7383
7384 static rtx
7385 get_thread_pointer (int to_reg)
7386 {
7387 rtx tp, reg, insn;
7388
7389 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
7390 if (!to_reg)
7391 return tp;
7392
7393 reg = gen_reg_rtx (Pmode);
7394 insn = gen_rtx_SET (VOIDmode, reg, tp);
7395 insn = emit_insn (insn);
7396
7397 return reg;
7398 }
7399
7400 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
7401 false if we expect this to be used for a memory address and true if
7402 we expect to load the address into a register. */
7403
7404 static rtx
7405 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
7406 {
7407 rtx dest, base, off, pic, tp;
7408 int type;
7409
7410 switch (model)
7411 {
7412 case TLS_MODEL_GLOBAL_DYNAMIC:
7413 dest = gen_reg_rtx (Pmode);
7414 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7415
7416 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7417 {
7418 rtx rax = gen_rtx_REG (Pmode, 0), insns;
7419
7420 start_sequence ();
7421 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
7422 insns = get_insns ();
7423 end_sequence ();
7424
7425 CONST_OR_PURE_CALL_P (insns) = 1;
7426 emit_libcall_block (insns, dest, rax, x);
7427 }
7428 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7429 emit_insn (gen_tls_global_dynamic_64 (dest, x));
7430 else
7431 emit_insn (gen_tls_global_dynamic_32 (dest, x));
7432
7433 if (TARGET_GNU2_TLS)
7434 {
7435 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
7436
7437 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7438 }
7439 break;
7440
7441 case TLS_MODEL_LOCAL_DYNAMIC:
7442 base = gen_reg_rtx (Pmode);
7443 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7444
7445 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7446 {
7447 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
7448
7449 start_sequence ();
7450 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
7451 insns = get_insns ();
7452 end_sequence ();
7453
7454 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
7455 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
7456 CONST_OR_PURE_CALL_P (insns) = 1;
7457 emit_libcall_block (insns, base, rax, note);
7458 }
7459 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7460 emit_insn (gen_tls_local_dynamic_base_64 (base));
7461 else
7462 emit_insn (gen_tls_local_dynamic_base_32 (base));
7463
7464 if (TARGET_GNU2_TLS)
7465 {
7466 rtx x = ix86_tls_module_base ();
7467
7468 set_unique_reg_note (get_last_insn (), REG_EQUIV,
7469 gen_rtx_MINUS (Pmode, x, tp));
7470 }
7471
7472 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
7473 off = gen_rtx_CONST (Pmode, off);
7474
7475 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
7476
7477 if (TARGET_GNU2_TLS)
7478 {
7479 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
7480
7481 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7482 }
7483
7484 break;
7485
7486 case TLS_MODEL_INITIAL_EXEC:
7487 if (TARGET_64BIT)
7488 {
7489 pic = NULL;
7490 type = UNSPEC_GOTNTPOFF;
7491 }
7492 else if (flag_pic)
7493 {
7494 if (reload_in_progress)
7495 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7496 pic = pic_offset_table_rtx;
7497 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
7498 }
7499 else if (!TARGET_ANY_GNU_TLS)
7500 {
7501 pic = gen_reg_rtx (Pmode);
7502 emit_insn (gen_set_got (pic));
7503 type = UNSPEC_GOTTPOFF;
7504 }
7505 else
7506 {
7507 pic = NULL;
7508 type = UNSPEC_INDNTPOFF;
7509 }
7510
7511 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
7512 off = gen_rtx_CONST (Pmode, off);
7513 if (pic)
7514 off = gen_rtx_PLUS (Pmode, pic, off);
7515 off = gen_const_mem (Pmode, off);
7516 set_mem_alias_set (off, ix86_GOT_alias_set ());
7517
7518 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7519 {
7520 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7521 off = force_reg (Pmode, off);
7522 return gen_rtx_PLUS (Pmode, base, off);
7523 }
7524 else
7525 {
7526 base = get_thread_pointer (true);
7527 dest = gen_reg_rtx (Pmode);
7528 emit_insn (gen_subsi3 (dest, base, off));
7529 }
7530 break;
7531
7532 case TLS_MODEL_LOCAL_EXEC:
7533 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
7534 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7535 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
7536 off = gen_rtx_CONST (Pmode, off);
7537
7538 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7539 {
7540 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7541 return gen_rtx_PLUS (Pmode, base, off);
7542 }
7543 else
7544 {
7545 base = get_thread_pointer (true);
7546 dest = gen_reg_rtx (Pmode);
7547 emit_insn (gen_subsi3 (dest, base, off));
7548 }
7549 break;
7550
7551 default:
7552 gcc_unreachable ();
7553 }
7554
7555 return dest;
7556 }
7557
7558 /* Create or return the unique __imp_DECL dllimport symbol corresponding
7559 to symbol DECL. */
7560
7561 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
7562 htab_t dllimport_map;
7563
7564 static tree
7565 get_dllimport_decl (tree decl)
7566 {
7567 struct tree_map *h, in;
7568 void **loc;
7569 const char *name;
7570 const char *prefix;
7571 size_t namelen, prefixlen;
7572 char *imp_name;
7573 tree to;
7574 rtx rtl;
7575
7576 if (!dllimport_map)
7577 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
7578
7579 in.hash = htab_hash_pointer (decl);
7580 in.base.from = decl;
7581 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
7582 h = (struct tree_map *) *loc;
7583 if (h)
7584 return h->to;
7585
7586 *loc = h = GGC_NEW (struct tree_map);
7587 h->hash = in.hash;
7588 h->base.from = decl;
7589 h->to = to = build_decl (VAR_DECL, NULL, ptr_type_node);
7590 DECL_ARTIFICIAL (to) = 1;
7591 DECL_IGNORED_P (to) = 1;
7592 DECL_EXTERNAL (to) = 1;
7593 TREE_READONLY (to) = 1;
7594
7595 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
7596 name = targetm.strip_name_encoding (name);
7597 if (name[0] == FASTCALL_PREFIX)
7598 {
7599 name++;
7600 prefix = "*__imp_";
7601 }
7602 else
7603 prefix = "*__imp__";
7604
7605 namelen = strlen (name);
7606 prefixlen = strlen (prefix);
7607 imp_name = (char *) alloca (namelen + prefixlen + 1);
7608 memcpy (imp_name, prefix, prefixlen);
7609 memcpy (imp_name + prefixlen, name, namelen + 1);
7610
7611 name = ggc_alloc_string (imp_name, namelen + prefixlen);
7612 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
7613 SET_SYMBOL_REF_DECL (rtl, to);
7614 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
7615
7616 rtl = gen_const_mem (Pmode, rtl);
7617 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
7618
7619 SET_DECL_RTL (to, rtl);
7620
7621 return to;
7622 }
7623
7624 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
7625 true if we require the result be a register. */
7626
7627 static rtx
7628 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
7629 {
7630 tree imp_decl;
7631 rtx x;
7632
7633 gcc_assert (SYMBOL_REF_DECL (symbol));
7634 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
7635
7636 x = DECL_RTL (imp_decl);
7637 if (want_reg)
7638 x = force_reg (Pmode, x);
7639 return x;
7640 }
7641
7642 /* Try machine-dependent ways of modifying an illegitimate address
7643 to be legitimate. If we find one, return the new, valid address.
7644 This macro is used in only one place: `memory_address' in explow.c.
7645
7646 OLDX is the address as it was before break_out_memory_refs was called.
7647 In some cases it is useful to look at this to decide what needs to be done.
7648
7649 MODE and WIN are passed so that this macro can use
7650 GO_IF_LEGITIMATE_ADDRESS.
7651
7652 It is always safe for this macro to do nothing. It exists to recognize
7653 opportunities to optimize the output.
7654
7655 For the 80386, we handle X+REG by loading X into a register R and
7656 using R+REG. R will go in a general reg and indexing will be used.
7657 However, if REG is a broken-out memory address or multiplication,
7658 nothing needs to be done because REG can certainly go in a general reg.
7659
7660 When -fpic is used, special handling is needed for symbolic references.
7661 See comments by legitimize_pic_address in i386.c for details. */
7662
7663 rtx
7664 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
7665 {
7666 int changed = 0;
7667 unsigned log;
7668
7669 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
7670 if (log)
7671 return legitimize_tls_address (x, (enum tls_model) log, false);
7672 if (GET_CODE (x) == CONST
7673 && GET_CODE (XEXP (x, 0)) == PLUS
7674 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
7675 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
7676 {
7677 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
7678 (enum tls_model) log, false);
7679 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
7680 }
7681
7682 if (flag_pic && SYMBOLIC_CONST (x))
7683 return legitimize_pic_address (x, 0);
7684
7685 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
7686 {
7687 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
7688 return legitimize_dllimport_symbol (x, true);
7689 if (GET_CODE (x) == CONST
7690 && GET_CODE (XEXP (x, 0)) == PLUS
7691 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
7692 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
7693 {
7694 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
7695 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
7696 }
7697 }
7698
7699 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
7700 if (GET_CODE (x) == ASHIFT
7701 && CONST_INT_P (XEXP (x, 1))
7702 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
7703 {
7704 changed = 1;
7705 log = INTVAL (XEXP (x, 1));
7706 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
7707 GEN_INT (1 << log));
7708 }
7709
7710 if (GET_CODE (x) == PLUS)
7711 {
7712 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
7713
7714 if (GET_CODE (XEXP (x, 0)) == ASHIFT
7715 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
7716 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
7717 {
7718 changed = 1;
7719 log = INTVAL (XEXP (XEXP (x, 0), 1));
7720 XEXP (x, 0) = gen_rtx_MULT (Pmode,
7721 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
7722 GEN_INT (1 << log));
7723 }
7724
7725 if (GET_CODE (XEXP (x, 1)) == ASHIFT
7726 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
7727 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
7728 {
7729 changed = 1;
7730 log = INTVAL (XEXP (XEXP (x, 1), 1));
7731 XEXP (x, 1) = gen_rtx_MULT (Pmode,
7732 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
7733 GEN_INT (1 << log));
7734 }
7735
7736 /* Put multiply first if it isn't already. */
7737 if (GET_CODE (XEXP (x, 1)) == MULT)
7738 {
7739 rtx tmp = XEXP (x, 0);
7740 XEXP (x, 0) = XEXP (x, 1);
7741 XEXP (x, 1) = tmp;
7742 changed = 1;
7743 }
7744
7745 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
7746 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
7747 created by virtual register instantiation, register elimination, and
7748 similar optimizations. */
7749 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
7750 {
7751 changed = 1;
7752 x = gen_rtx_PLUS (Pmode,
7753 gen_rtx_PLUS (Pmode, XEXP (x, 0),
7754 XEXP (XEXP (x, 1), 0)),
7755 XEXP (XEXP (x, 1), 1));
7756 }
7757
7758 /* Canonicalize
7759 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
7760 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
7761 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
7762 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
7763 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
7764 && CONSTANT_P (XEXP (x, 1)))
7765 {
7766 rtx constant;
7767 rtx other = NULL_RTX;
7768
7769 if (CONST_INT_P (XEXP (x, 1)))
7770 {
7771 constant = XEXP (x, 1);
7772 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
7773 }
7774 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
7775 {
7776 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
7777 other = XEXP (x, 1);
7778 }
7779 else
7780 constant = 0;
7781
7782 if (constant)
7783 {
7784 changed = 1;
7785 x = gen_rtx_PLUS (Pmode,
7786 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
7787 XEXP (XEXP (XEXP (x, 0), 1), 0)),
7788 plus_constant (other, INTVAL (constant)));
7789 }
7790 }
7791
7792 if (changed && legitimate_address_p (mode, x, FALSE))
7793 return x;
7794
7795 if (GET_CODE (XEXP (x, 0)) == MULT)
7796 {
7797 changed = 1;
7798 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
7799 }
7800
7801 if (GET_CODE (XEXP (x, 1)) == MULT)
7802 {
7803 changed = 1;
7804 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
7805 }
7806
7807 if (changed
7808 && REG_P (XEXP (x, 1))
7809 && REG_P (XEXP (x, 0)))
7810 return x;
7811
7812 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
7813 {
7814 changed = 1;
7815 x = legitimize_pic_address (x, 0);
7816 }
7817
7818 if (changed && legitimate_address_p (mode, x, FALSE))
7819 return x;
7820
7821 if (REG_P (XEXP (x, 0)))
7822 {
7823 rtx temp = gen_reg_rtx (Pmode);
7824 rtx val = force_operand (XEXP (x, 1), temp);
7825 if (val != temp)
7826 emit_move_insn (temp, val);
7827
7828 XEXP (x, 1) = temp;
7829 return x;
7830 }
7831
7832 else if (REG_P (XEXP (x, 1)))
7833 {
7834 rtx temp = gen_reg_rtx (Pmode);
7835 rtx val = force_operand (XEXP (x, 0), temp);
7836 if (val != temp)
7837 emit_move_insn (temp, val);
7838
7839 XEXP (x, 0) = temp;
7840 return x;
7841 }
7842 }
7843
7844 return x;
7845 }
7846 \f
7847 /* Print an integer constant expression in assembler syntax. Addition
7848 and subtraction are the only arithmetic that may appear in these
7849 expressions. FILE is the stdio stream to write to, X is the rtx, and
7850 CODE is the operand print code from the output string. */
7851
7852 static void
7853 output_pic_addr_const (FILE *file, rtx x, int code)
7854 {
7855 char buf[256];
7856
7857 switch (GET_CODE (x))
7858 {
7859 case PC:
7860 gcc_assert (flag_pic);
7861 putc ('.', file);
7862 break;
7863
7864 case SYMBOL_REF:
7865 if (! TARGET_MACHO || TARGET_64BIT)
7866 output_addr_const (file, x);
7867 else
7868 {
7869 const char *name = XSTR (x, 0);
7870
7871 /* Mark the decl as referenced so that cgraph will
7872 output the function. */
7873 if (SYMBOL_REF_DECL (x))
7874 mark_decl_referenced (SYMBOL_REF_DECL (x));
7875
7876 #if TARGET_MACHO
7877 if (MACHOPIC_INDIRECT
7878 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
7879 name = machopic_indirection_name (x, /*stub_p=*/true);
7880 #endif
7881 assemble_name (file, name);
7882 }
7883 if (!TARGET_MACHO && !TARGET_64BIT_MS_ABI
7884 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
7885 fputs ("@PLT", file);
7886 break;
7887
7888 case LABEL_REF:
7889 x = XEXP (x, 0);
7890 /* FALLTHRU */
7891 case CODE_LABEL:
7892 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
7893 assemble_name (asm_out_file, buf);
7894 break;
7895
7896 case CONST_INT:
7897 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7898 break;
7899
7900 case CONST:
7901 /* This used to output parentheses around the expression,
7902 but that does not work on the 386 (either ATT or BSD assembler). */
7903 output_pic_addr_const (file, XEXP (x, 0), code);
7904 break;
7905
7906 case CONST_DOUBLE:
7907 if (GET_MODE (x) == VOIDmode)
7908 {
7909 /* We can use %d if the number is <32 bits and positive. */
7910 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
7911 fprintf (file, "0x%lx%08lx",
7912 (unsigned long) CONST_DOUBLE_HIGH (x),
7913 (unsigned long) CONST_DOUBLE_LOW (x));
7914 else
7915 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
7916 }
7917 else
7918 /* We can't handle floating point constants;
7919 PRINT_OPERAND must handle them. */
7920 output_operand_lossage ("floating constant misused");
7921 break;
7922
7923 case PLUS:
7924 /* Some assemblers need integer constants to appear first. */
7925 if (CONST_INT_P (XEXP (x, 0)))
7926 {
7927 output_pic_addr_const (file, XEXP (x, 0), code);
7928 putc ('+', file);
7929 output_pic_addr_const (file, XEXP (x, 1), code);
7930 }
7931 else
7932 {
7933 gcc_assert (CONST_INT_P (XEXP (x, 1)));
7934 output_pic_addr_const (file, XEXP (x, 1), code);
7935 putc ('+', file);
7936 output_pic_addr_const (file, XEXP (x, 0), code);
7937 }
7938 break;
7939
7940 case MINUS:
7941 if (!TARGET_MACHO)
7942 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
7943 output_pic_addr_const (file, XEXP (x, 0), code);
7944 putc ('-', file);
7945 output_pic_addr_const (file, XEXP (x, 1), code);
7946 if (!TARGET_MACHO)
7947 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
7948 break;
7949
7950 case UNSPEC:
7951 gcc_assert (XVECLEN (x, 0) == 1);
7952 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
7953 switch (XINT (x, 1))
7954 {
7955 case UNSPEC_GOT:
7956 fputs ("@GOT", file);
7957 break;
7958 case UNSPEC_GOTOFF:
7959 fputs ("@GOTOFF", file);
7960 break;
7961 case UNSPEC_PLTOFF:
7962 fputs ("@PLTOFF", file);
7963 break;
7964 case UNSPEC_GOTPCREL:
7965 fputs ("@GOTPCREL(%rip)", file);
7966 break;
7967 case UNSPEC_GOTTPOFF:
7968 /* FIXME: This might be @TPOFF in Sun ld too. */
7969 fputs ("@GOTTPOFF", file);
7970 break;
7971 case UNSPEC_TPOFF:
7972 fputs ("@TPOFF", file);
7973 break;
7974 case UNSPEC_NTPOFF:
7975 if (TARGET_64BIT)
7976 fputs ("@TPOFF", file);
7977 else
7978 fputs ("@NTPOFF", file);
7979 break;
7980 case UNSPEC_DTPOFF:
7981 fputs ("@DTPOFF", file);
7982 break;
7983 case UNSPEC_GOTNTPOFF:
7984 if (TARGET_64BIT)
7985 fputs ("@GOTTPOFF(%rip)", file);
7986 else
7987 fputs ("@GOTNTPOFF", file);
7988 break;
7989 case UNSPEC_INDNTPOFF:
7990 fputs ("@INDNTPOFF", file);
7991 break;
7992 default:
7993 output_operand_lossage ("invalid UNSPEC as operand");
7994 break;
7995 }
7996 break;
7997
7998 default:
7999 output_operand_lossage ("invalid expression as operand");
8000 }
8001 }
8002
8003 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
8004 We need to emit DTP-relative relocations. */
8005
8006 static void ATTRIBUTE_UNUSED
8007 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
8008 {
8009 fputs (ASM_LONG, file);
8010 output_addr_const (file, x);
8011 fputs ("@DTPOFF", file);
8012 switch (size)
8013 {
8014 case 4:
8015 break;
8016 case 8:
8017 fputs (", 0", file);
8018 break;
8019 default:
8020 gcc_unreachable ();
8021 }
8022 }
8023
8024 /* In the name of slightly smaller debug output, and to cater to
8025 general assembler lossage, recognize PIC+GOTOFF and turn it back
8026 into a direct symbol reference.
8027
8028 On Darwin, this is necessary to avoid a crash, because Darwin
8029 has a different PIC label for each routine but the DWARF debugging
8030 information is not associated with any particular routine, so it's
8031 necessary to remove references to the PIC label from RTL stored by
8032 the DWARF output code. */
8033
8034 static rtx
8035 ix86_delegitimize_address (rtx orig_x)
8036 {
8037 rtx x = orig_x;
8038 /* reg_addend is NULL or a multiple of some register. */
8039 rtx reg_addend = NULL_RTX;
8040 /* const_addend is NULL or a const_int. */
8041 rtx const_addend = NULL_RTX;
8042 /* This is the result, or NULL. */
8043 rtx result = NULL_RTX;
8044
8045 if (MEM_P (x))
8046 x = XEXP (x, 0);
8047
8048 if (TARGET_64BIT)
8049 {
8050 if (GET_CODE (x) != CONST
8051 || GET_CODE (XEXP (x, 0)) != UNSPEC
8052 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
8053 || !MEM_P (orig_x))
8054 return orig_x;
8055 return XVECEXP (XEXP (x, 0), 0, 0);
8056 }
8057
8058 if (GET_CODE (x) != PLUS
8059 || GET_CODE (XEXP (x, 1)) != CONST)
8060 return orig_x;
8061
8062 if (REG_P (XEXP (x, 0))
8063 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
8064 /* %ebx + GOT/GOTOFF */
8065 ;
8066 else if (GET_CODE (XEXP (x, 0)) == PLUS)
8067 {
8068 /* %ebx + %reg * scale + GOT/GOTOFF */
8069 reg_addend = XEXP (x, 0);
8070 if (REG_P (XEXP (reg_addend, 0))
8071 && REGNO (XEXP (reg_addend, 0)) == PIC_OFFSET_TABLE_REGNUM)
8072 reg_addend = XEXP (reg_addend, 1);
8073 else if (REG_P (XEXP (reg_addend, 1))
8074 && REGNO (XEXP (reg_addend, 1)) == PIC_OFFSET_TABLE_REGNUM)
8075 reg_addend = XEXP (reg_addend, 0);
8076 else
8077 return orig_x;
8078 if (!REG_P (reg_addend)
8079 && GET_CODE (reg_addend) != MULT
8080 && GET_CODE (reg_addend) != ASHIFT)
8081 return orig_x;
8082 }
8083 else
8084 return orig_x;
8085
8086 x = XEXP (XEXP (x, 1), 0);
8087 if (GET_CODE (x) == PLUS
8088 && CONST_INT_P (XEXP (x, 1)))
8089 {
8090 const_addend = XEXP (x, 1);
8091 x = XEXP (x, 0);
8092 }
8093
8094 if (GET_CODE (x) == UNSPEC
8095 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x))
8096 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
8097 result = XVECEXP (x, 0, 0);
8098
8099 if (TARGET_MACHO && darwin_local_data_pic (x)
8100 && !MEM_P (orig_x))
8101 result = XEXP (x, 0);
8102
8103 if (! result)
8104 return orig_x;
8105
8106 if (const_addend)
8107 result = gen_rtx_PLUS (Pmode, result, const_addend);
8108 if (reg_addend)
8109 result = gen_rtx_PLUS (Pmode, reg_addend, result);
8110 return result;
8111 }
8112
8113 /* If X is a machine specific address (i.e. a symbol or label being
8114 referenced as a displacement from the GOT implemented using an
8115 UNSPEC), then return the base term. Otherwise return X. */
8116
8117 rtx
8118 ix86_find_base_term (rtx x)
8119 {
8120 rtx term;
8121
8122 if (TARGET_64BIT)
8123 {
8124 if (GET_CODE (x) != CONST)
8125 return x;
8126 term = XEXP (x, 0);
8127 if (GET_CODE (term) == PLUS
8128 && (CONST_INT_P (XEXP (term, 1))
8129 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
8130 term = XEXP (term, 0);
8131 if (GET_CODE (term) != UNSPEC
8132 || XINT (term, 1) != UNSPEC_GOTPCREL)
8133 return x;
8134
8135 term = XVECEXP (term, 0, 0);
8136
8137 if (GET_CODE (term) != SYMBOL_REF
8138 && GET_CODE (term) != LABEL_REF)
8139 return x;
8140
8141 return term;
8142 }
8143
8144 term = ix86_delegitimize_address (x);
8145
8146 if (GET_CODE (term) != SYMBOL_REF
8147 && GET_CODE (term) != LABEL_REF)
8148 return x;
8149
8150 return term;
8151 }
8152 \f
8153 static void
8154 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
8155 int fp, FILE *file)
8156 {
8157 const char *suffix;
8158
8159 if (mode == CCFPmode || mode == CCFPUmode)
8160 {
8161 enum rtx_code second_code, bypass_code;
8162 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
8163 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
8164 code = ix86_fp_compare_code_to_integer (code);
8165 mode = CCmode;
8166 }
8167 if (reverse)
8168 code = reverse_condition (code);
8169
8170 switch (code)
8171 {
8172 case EQ:
8173 switch (mode)
8174 {
8175 case CCAmode:
8176 suffix = "a";
8177 break;
8178
8179 case CCCmode:
8180 suffix = "c";
8181 break;
8182
8183 case CCOmode:
8184 suffix = "o";
8185 break;
8186
8187 case CCSmode:
8188 suffix = "s";
8189 break;
8190
8191 default:
8192 suffix = "e";
8193 }
8194 break;
8195 case NE:
8196 switch (mode)
8197 {
8198 case CCAmode:
8199 suffix = "na";
8200 break;
8201
8202 case CCCmode:
8203 suffix = "nc";
8204 break;
8205
8206 case CCOmode:
8207 suffix = "no";
8208 break;
8209
8210 case CCSmode:
8211 suffix = "ns";
8212 break;
8213
8214 default:
8215 suffix = "ne";
8216 }
8217 break;
8218 case GT:
8219 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
8220 suffix = "g";
8221 break;
8222 case GTU:
8223 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
8224 Those same assemblers have the same but opposite lossage on cmov. */
8225 gcc_assert (mode == CCmode);
8226 suffix = fp ? "nbe" : "a";
8227 break;
8228 case LT:
8229 switch (mode)
8230 {
8231 case CCNOmode:
8232 case CCGOCmode:
8233 suffix = "s";
8234 break;
8235
8236 case CCmode:
8237 case CCGCmode:
8238 suffix = "l";
8239 break;
8240
8241 default:
8242 gcc_unreachable ();
8243 }
8244 break;
8245 case LTU:
8246 gcc_assert (mode == CCmode);
8247 suffix = "b";
8248 break;
8249 case GE:
8250 switch (mode)
8251 {
8252 case CCNOmode:
8253 case CCGOCmode:
8254 suffix = "ns";
8255 break;
8256
8257 case CCmode:
8258 case CCGCmode:
8259 suffix = "ge";
8260 break;
8261
8262 default:
8263 gcc_unreachable ();
8264 }
8265 break;
8266 case GEU:
8267 /* ??? As above. */
8268 gcc_assert (mode == CCmode);
8269 suffix = fp ? "nb" : "ae";
8270 break;
8271 case LE:
8272 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
8273 suffix = "le";
8274 break;
8275 case LEU:
8276 gcc_assert (mode == CCmode);
8277 suffix = "be";
8278 break;
8279 case UNORDERED:
8280 suffix = fp ? "u" : "p";
8281 break;
8282 case ORDERED:
8283 suffix = fp ? "nu" : "np";
8284 break;
8285 default:
8286 gcc_unreachable ();
8287 }
8288 fputs (suffix, file);
8289 }
8290
8291 /* Print the name of register X to FILE based on its machine mode and number.
8292 If CODE is 'w', pretend the mode is HImode.
8293 If CODE is 'b', pretend the mode is QImode.
8294 If CODE is 'k', pretend the mode is SImode.
8295 If CODE is 'q', pretend the mode is DImode.
8296 If CODE is 'h', pretend the reg is the 'high' byte register.
8297 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
8298
8299 void
8300 print_reg (rtx x, int code, FILE *file)
8301 {
8302 gcc_assert (REGNO (x) != ARG_POINTER_REGNUM
8303 && REGNO (x) != FRAME_POINTER_REGNUM
8304 && REGNO (x) != FLAGS_REG
8305 && REGNO (x) != FPSR_REG
8306 && REGNO (x) != FPCR_REG);
8307
8308 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
8309 putc ('%', file);
8310
8311 if (code == 'w' || MMX_REG_P (x))
8312 code = 2;
8313 else if (code == 'b')
8314 code = 1;
8315 else if (code == 'k')
8316 code = 4;
8317 else if (code == 'q')
8318 code = 8;
8319 else if (code == 'y')
8320 code = 3;
8321 else if (code == 'h')
8322 code = 0;
8323 else
8324 code = GET_MODE_SIZE (GET_MODE (x));
8325
8326 /* Irritatingly, AMD extended registers use different naming convention
8327 from the normal registers. */
8328 if (REX_INT_REG_P (x))
8329 {
8330 gcc_assert (TARGET_64BIT);
8331 switch (code)
8332 {
8333 case 0:
8334 error ("extended registers have no high halves");
8335 break;
8336 case 1:
8337 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
8338 break;
8339 case 2:
8340 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
8341 break;
8342 case 4:
8343 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
8344 break;
8345 case 8:
8346 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
8347 break;
8348 default:
8349 error ("unsupported operand size for extended register");
8350 break;
8351 }
8352 return;
8353 }
8354 switch (code)
8355 {
8356 case 3:
8357 if (STACK_TOP_P (x))
8358 {
8359 fputs ("st(0)", file);
8360 break;
8361 }
8362 /* FALLTHRU */
8363 case 8:
8364 case 4:
8365 case 12:
8366 if (! ANY_FP_REG_P (x))
8367 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
8368 /* FALLTHRU */
8369 case 16:
8370 case 2:
8371 normal:
8372 fputs (hi_reg_name[REGNO (x)], file);
8373 break;
8374 case 1:
8375 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
8376 goto normal;
8377 fputs (qi_reg_name[REGNO (x)], file);
8378 break;
8379 case 0:
8380 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
8381 goto normal;
8382 fputs (qi_high_reg_name[REGNO (x)], file);
8383 break;
8384 default:
8385 gcc_unreachable ();
8386 }
8387 }
8388
8389 /* Locate some local-dynamic symbol still in use by this function
8390 so that we can print its name in some tls_local_dynamic_base
8391 pattern. */
8392
8393 static int
8394 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
8395 {
8396 rtx x = *px;
8397
8398 if (GET_CODE (x) == SYMBOL_REF
8399 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
8400 {
8401 cfun->machine->some_ld_name = XSTR (x, 0);
8402 return 1;
8403 }
8404
8405 return 0;
8406 }
8407
8408 static const char *
8409 get_some_local_dynamic_name (void)
8410 {
8411 rtx insn;
8412
8413 if (cfun->machine->some_ld_name)
8414 return cfun->machine->some_ld_name;
8415
8416 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
8417 if (INSN_P (insn)
8418 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
8419 return cfun->machine->some_ld_name;
8420
8421 gcc_unreachable ();
8422 }
8423
8424 /* Meaning of CODE:
8425 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
8426 C -- print opcode suffix for set/cmov insn.
8427 c -- like C, but print reversed condition
8428 F,f -- likewise, but for floating-point.
8429 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
8430 otherwise nothing
8431 R -- print the prefix for register names.
8432 z -- print the opcode suffix for the size of the current operand.
8433 * -- print a star (in certain assembler syntax)
8434 A -- print an absolute memory reference.
8435 w -- print the operand as if it's a "word" (HImode) even if it isn't.
8436 s -- print a shift double count, followed by the assemblers argument
8437 delimiter.
8438 b -- print the QImode name of the register for the indicated operand.
8439 %b0 would print %al if operands[0] is reg 0.
8440 w -- likewise, print the HImode name of the register.
8441 k -- likewise, print the SImode name of the register.
8442 q -- likewise, print the DImode name of the register.
8443 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
8444 y -- print "st(0)" instead of "st" as a register.
8445 D -- print condition for SSE cmp instruction.
8446 P -- if PIC, print an @PLT suffix.
8447 X -- don't print any sort of PIC '@' suffix for a symbol.
8448 & -- print some in-use local-dynamic symbol name.
8449 H -- print a memory address offset by 8; used for sse high-parts
8450 */
8451
8452 void
8453 print_operand (FILE *file, rtx x, int code)
8454 {
8455 if (code)
8456 {
8457 switch (code)
8458 {
8459 case '*':
8460 if (ASSEMBLER_DIALECT == ASM_ATT)
8461 putc ('*', file);
8462 return;
8463
8464 case '&':
8465 assemble_name (file, get_some_local_dynamic_name ());
8466 return;
8467
8468 case 'A':
8469 switch (ASSEMBLER_DIALECT)
8470 {
8471 case ASM_ATT:
8472 putc ('*', file);
8473 break;
8474
8475 case ASM_INTEL:
8476 /* Intel syntax. For absolute addresses, registers should not
8477 be surrounded by braces. */
8478 if (!REG_P (x))
8479 {
8480 putc ('[', file);
8481 PRINT_OPERAND (file, x, 0);
8482 putc (']', file);
8483 return;
8484 }
8485 break;
8486
8487 default:
8488 gcc_unreachable ();
8489 }
8490
8491 PRINT_OPERAND (file, x, 0);
8492 return;
8493
8494
8495 case 'L':
8496 if (ASSEMBLER_DIALECT == ASM_ATT)
8497 putc ('l', file);
8498 return;
8499
8500 case 'W':
8501 if (ASSEMBLER_DIALECT == ASM_ATT)
8502 putc ('w', file);
8503 return;
8504
8505 case 'B':
8506 if (ASSEMBLER_DIALECT == ASM_ATT)
8507 putc ('b', file);
8508 return;
8509
8510 case 'Q':
8511 if (ASSEMBLER_DIALECT == ASM_ATT)
8512 putc ('l', file);
8513 return;
8514
8515 case 'S':
8516 if (ASSEMBLER_DIALECT == ASM_ATT)
8517 putc ('s', file);
8518 return;
8519
8520 case 'T':
8521 if (ASSEMBLER_DIALECT == ASM_ATT)
8522 putc ('t', file);
8523 return;
8524
8525 case 'z':
8526 /* 387 opcodes don't get size suffixes if the operands are
8527 registers. */
8528 if (STACK_REG_P (x))
8529 return;
8530
8531 /* Likewise if using Intel opcodes. */
8532 if (ASSEMBLER_DIALECT == ASM_INTEL)
8533 return;
8534
8535 /* This is the size of op from size of operand. */
8536 switch (GET_MODE_SIZE (GET_MODE (x)))
8537 {
8538 case 1:
8539 putc ('b', file);
8540 return;
8541
8542 case 2:
8543 if (MEM_P (x))
8544 {
8545 #ifdef HAVE_GAS_FILDS_FISTS
8546 putc ('s', file);
8547 #endif
8548 return;
8549 }
8550 else
8551 putc ('w', file);
8552 return;
8553
8554 case 4:
8555 if (GET_MODE (x) == SFmode)
8556 {
8557 putc ('s', file);
8558 return;
8559 }
8560 else
8561 putc ('l', file);
8562 return;
8563
8564 case 12:
8565 case 16:
8566 putc ('t', file);
8567 return;
8568
8569 case 8:
8570 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
8571 {
8572 #ifdef GAS_MNEMONICS
8573 putc ('q', file);
8574 #else
8575 putc ('l', file);
8576 putc ('l', file);
8577 #endif
8578 }
8579 else
8580 putc ('l', file);
8581 return;
8582
8583 default:
8584 gcc_unreachable ();
8585 }
8586
8587 case 'b':
8588 case 'w':
8589 case 'k':
8590 case 'q':
8591 case 'h':
8592 case 'y':
8593 case 'X':
8594 case 'P':
8595 break;
8596
8597 case 's':
8598 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
8599 {
8600 PRINT_OPERAND (file, x, 0);
8601 putc (',', file);
8602 }
8603 return;
8604
8605 case 'D':
8606 /* Little bit of braindamage here. The SSE compare instructions
8607 does use completely different names for the comparisons that the
8608 fp conditional moves. */
8609 switch (GET_CODE (x))
8610 {
8611 case EQ:
8612 case UNEQ:
8613 fputs ("eq", file);
8614 break;
8615 case LT:
8616 case UNLT:
8617 fputs ("lt", file);
8618 break;
8619 case LE:
8620 case UNLE:
8621 fputs ("le", file);
8622 break;
8623 case UNORDERED:
8624 fputs ("unord", file);
8625 break;
8626 case NE:
8627 case LTGT:
8628 fputs ("neq", file);
8629 break;
8630 case UNGE:
8631 case GE:
8632 fputs ("nlt", file);
8633 break;
8634 case UNGT:
8635 case GT:
8636 fputs ("nle", file);
8637 break;
8638 case ORDERED:
8639 fputs ("ord", file);
8640 break;
8641 default:
8642 gcc_unreachable ();
8643 }
8644 return;
8645 case 'O':
8646 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8647 if (ASSEMBLER_DIALECT == ASM_ATT)
8648 {
8649 switch (GET_MODE (x))
8650 {
8651 case HImode: putc ('w', file); break;
8652 case SImode:
8653 case SFmode: putc ('l', file); break;
8654 case DImode:
8655 case DFmode: putc ('q', file); break;
8656 default: gcc_unreachable ();
8657 }
8658 putc ('.', file);
8659 }
8660 #endif
8661 return;
8662 case 'C':
8663 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
8664 return;
8665 case 'F':
8666 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8667 if (ASSEMBLER_DIALECT == ASM_ATT)
8668 putc ('.', file);
8669 #endif
8670 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
8671 return;
8672
8673 /* Like above, but reverse condition */
8674 case 'c':
8675 /* Check to see if argument to %c is really a constant
8676 and not a condition code which needs to be reversed. */
8677 if (!COMPARISON_P (x))
8678 {
8679 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
8680 return;
8681 }
8682 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
8683 return;
8684 case 'f':
8685 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8686 if (ASSEMBLER_DIALECT == ASM_ATT)
8687 putc ('.', file);
8688 #endif
8689 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
8690 return;
8691
8692 case 'H':
8693 /* It doesn't actually matter what mode we use here, as we're
8694 only going to use this for printing. */
8695 x = adjust_address_nv (x, DImode, 8);
8696 break;
8697
8698 case '+':
8699 {
8700 rtx x;
8701
8702 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
8703 return;
8704
8705 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
8706 if (x)
8707 {
8708 int pred_val = INTVAL (XEXP (x, 0));
8709
8710 if (pred_val < REG_BR_PROB_BASE * 45 / 100
8711 || pred_val > REG_BR_PROB_BASE * 55 / 100)
8712 {
8713 int taken = pred_val > REG_BR_PROB_BASE / 2;
8714 int cputaken = final_forward_branch_p (current_output_insn) == 0;
8715
8716 /* Emit hints only in the case default branch prediction
8717 heuristics would fail. */
8718 if (taken != cputaken)
8719 {
8720 /* We use 3e (DS) prefix for taken branches and
8721 2e (CS) prefix for not taken branches. */
8722 if (taken)
8723 fputs ("ds ; ", file);
8724 else
8725 fputs ("cs ; ", file);
8726 }
8727 }
8728 }
8729 return;
8730 }
8731 default:
8732 output_operand_lossage ("invalid operand code '%c'", code);
8733 }
8734 }
8735
8736 if (REG_P (x))
8737 print_reg (x, code, file);
8738
8739 else if (MEM_P (x))
8740 {
8741 /* No `byte ptr' prefix for call instructions. */
8742 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
8743 {
8744 const char * size;
8745 switch (GET_MODE_SIZE (GET_MODE (x)))
8746 {
8747 case 1: size = "BYTE"; break;
8748 case 2: size = "WORD"; break;
8749 case 4: size = "DWORD"; break;
8750 case 8: size = "QWORD"; break;
8751 case 12: size = "XWORD"; break;
8752 case 16: size = "XMMWORD"; break;
8753 default:
8754 gcc_unreachable ();
8755 }
8756
8757 /* Check for explicit size override (codes 'b', 'w' and 'k') */
8758 if (code == 'b')
8759 size = "BYTE";
8760 else if (code == 'w')
8761 size = "WORD";
8762 else if (code == 'k')
8763 size = "DWORD";
8764
8765 fputs (size, file);
8766 fputs (" PTR ", file);
8767 }
8768
8769 x = XEXP (x, 0);
8770 /* Avoid (%rip) for call operands. */
8771 if (CONSTANT_ADDRESS_P (x) && code == 'P'
8772 && !CONST_INT_P (x))
8773 output_addr_const (file, x);
8774 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
8775 output_operand_lossage ("invalid constraints for operand");
8776 else
8777 output_address (x);
8778 }
8779
8780 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
8781 {
8782 REAL_VALUE_TYPE r;
8783 long l;
8784
8785 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8786 REAL_VALUE_TO_TARGET_SINGLE (r, l);
8787
8788 if (ASSEMBLER_DIALECT == ASM_ATT)
8789 putc ('$', file);
8790 fprintf (file, "0x%08lx", l);
8791 }
8792
8793 /* These float cases don't actually occur as immediate operands. */
8794 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
8795 {
8796 char dstr[30];
8797
8798 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8799 fprintf (file, "%s", dstr);
8800 }
8801
8802 else if (GET_CODE (x) == CONST_DOUBLE
8803 && GET_MODE (x) == XFmode)
8804 {
8805 char dstr[30];
8806
8807 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8808 fprintf (file, "%s", dstr);
8809 }
8810
8811 else
8812 {
8813 /* We have patterns that allow zero sets of memory, for instance.
8814 In 64-bit mode, we should probably support all 8-byte vectors,
8815 since we can in fact encode that into an immediate. */
8816 if (GET_CODE (x) == CONST_VECTOR)
8817 {
8818 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
8819 x = const0_rtx;
8820 }
8821
8822 if (code != 'P')
8823 {
8824 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
8825 {
8826 if (ASSEMBLER_DIALECT == ASM_ATT)
8827 putc ('$', file);
8828 }
8829 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
8830 || GET_CODE (x) == LABEL_REF)
8831 {
8832 if (ASSEMBLER_DIALECT == ASM_ATT)
8833 putc ('$', file);
8834 else
8835 fputs ("OFFSET FLAT:", file);
8836 }
8837 }
8838 if (CONST_INT_P (x))
8839 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
8840 else if (flag_pic)
8841 output_pic_addr_const (file, x, code);
8842 else
8843 output_addr_const (file, x);
8844 }
8845 }
8846 \f
8847 /* Print a memory operand whose address is ADDR. */
8848
8849 void
8850 print_operand_address (FILE *file, rtx addr)
8851 {
8852 struct ix86_address parts;
8853 rtx base, index, disp;
8854 int scale;
8855 int ok = ix86_decompose_address (addr, &parts);
8856
8857 gcc_assert (ok);
8858
8859 base = parts.base;
8860 index = parts.index;
8861 disp = parts.disp;
8862 scale = parts.scale;
8863
8864 switch (parts.seg)
8865 {
8866 case SEG_DEFAULT:
8867 break;
8868 case SEG_FS:
8869 case SEG_GS:
8870 if (USER_LABEL_PREFIX[0] == 0)
8871 putc ('%', file);
8872 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
8873 break;
8874 default:
8875 gcc_unreachable ();
8876 }
8877
8878 if (!base && !index)
8879 {
8880 /* Displacement only requires special attention. */
8881
8882 if (CONST_INT_P (disp))
8883 {
8884 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
8885 {
8886 if (USER_LABEL_PREFIX[0] == 0)
8887 putc ('%', file);
8888 fputs ("ds:", file);
8889 }
8890 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
8891 }
8892 else if (flag_pic)
8893 output_pic_addr_const (file, disp, 0);
8894 else
8895 output_addr_const (file, disp);
8896
8897 /* Use one byte shorter RIP relative addressing for 64bit mode. */
8898 if (TARGET_64BIT)
8899 {
8900 if (GET_CODE (disp) == CONST
8901 && GET_CODE (XEXP (disp, 0)) == PLUS
8902 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
8903 disp = XEXP (XEXP (disp, 0), 0);
8904 if (GET_CODE (disp) == LABEL_REF
8905 || (GET_CODE (disp) == SYMBOL_REF
8906 && SYMBOL_REF_TLS_MODEL (disp) == 0))
8907 fputs ("(%rip)", file);
8908 }
8909 }
8910 else
8911 {
8912 if (ASSEMBLER_DIALECT == ASM_ATT)
8913 {
8914 if (disp)
8915 {
8916 if (flag_pic)
8917 output_pic_addr_const (file, disp, 0);
8918 else if (GET_CODE (disp) == LABEL_REF)
8919 output_asm_label (disp);
8920 else
8921 output_addr_const (file, disp);
8922 }
8923
8924 putc ('(', file);
8925 if (base)
8926 print_reg (base, 0, file);
8927 if (index)
8928 {
8929 putc (',', file);
8930 print_reg (index, 0, file);
8931 if (scale != 1)
8932 fprintf (file, ",%d", scale);
8933 }
8934 putc (')', file);
8935 }
8936 else
8937 {
8938 rtx offset = NULL_RTX;
8939
8940 if (disp)
8941 {
8942 /* Pull out the offset of a symbol; print any symbol itself. */
8943 if (GET_CODE (disp) == CONST
8944 && GET_CODE (XEXP (disp, 0)) == PLUS
8945 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
8946 {
8947 offset = XEXP (XEXP (disp, 0), 1);
8948 disp = gen_rtx_CONST (VOIDmode,
8949 XEXP (XEXP (disp, 0), 0));
8950 }
8951
8952 if (flag_pic)
8953 output_pic_addr_const (file, disp, 0);
8954 else if (GET_CODE (disp) == LABEL_REF)
8955 output_asm_label (disp);
8956 else if (CONST_INT_P (disp))
8957 offset = disp;
8958 else
8959 output_addr_const (file, disp);
8960 }
8961
8962 putc ('[', file);
8963 if (base)
8964 {
8965 print_reg (base, 0, file);
8966 if (offset)
8967 {
8968 if (INTVAL (offset) >= 0)
8969 putc ('+', file);
8970 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8971 }
8972 }
8973 else if (offset)
8974 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8975 else
8976 putc ('0', file);
8977
8978 if (index)
8979 {
8980 putc ('+', file);
8981 print_reg (index, 0, file);
8982 if (scale != 1)
8983 fprintf (file, "*%d", scale);
8984 }
8985 putc (']', file);
8986 }
8987 }
8988 }
8989
8990 bool
8991 output_addr_const_extra (FILE *file, rtx x)
8992 {
8993 rtx op;
8994
8995 if (GET_CODE (x) != UNSPEC)
8996 return false;
8997
8998 op = XVECEXP (x, 0, 0);
8999 switch (XINT (x, 1))
9000 {
9001 case UNSPEC_GOTTPOFF:
9002 output_addr_const (file, op);
9003 /* FIXME: This might be @TPOFF in Sun ld. */
9004 fputs ("@GOTTPOFF", file);
9005 break;
9006 case UNSPEC_TPOFF:
9007 output_addr_const (file, op);
9008 fputs ("@TPOFF", file);
9009 break;
9010 case UNSPEC_NTPOFF:
9011 output_addr_const (file, op);
9012 if (TARGET_64BIT)
9013 fputs ("@TPOFF", file);
9014 else
9015 fputs ("@NTPOFF", file);
9016 break;
9017 case UNSPEC_DTPOFF:
9018 output_addr_const (file, op);
9019 fputs ("@DTPOFF", file);
9020 break;
9021 case UNSPEC_GOTNTPOFF:
9022 output_addr_const (file, op);
9023 if (TARGET_64BIT)
9024 fputs ("@GOTTPOFF(%rip)", file);
9025 else
9026 fputs ("@GOTNTPOFF", file);
9027 break;
9028 case UNSPEC_INDNTPOFF:
9029 output_addr_const (file, op);
9030 fputs ("@INDNTPOFF", file);
9031 break;
9032
9033 default:
9034 return false;
9035 }
9036
9037 return true;
9038 }
9039 \f
9040 /* Split one or more DImode RTL references into pairs of SImode
9041 references. The RTL can be REG, offsettable MEM, integer constant, or
9042 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
9043 split and "num" is its length. lo_half and hi_half are output arrays
9044 that parallel "operands". */
9045
9046 void
9047 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
9048 {
9049 while (num--)
9050 {
9051 rtx op = operands[num];
9052
9053 /* simplify_subreg refuse to split volatile memory addresses,
9054 but we still have to handle it. */
9055 if (MEM_P (op))
9056 {
9057 lo_half[num] = adjust_address (op, SImode, 0);
9058 hi_half[num] = adjust_address (op, SImode, 4);
9059 }
9060 else
9061 {
9062 lo_half[num] = simplify_gen_subreg (SImode, op,
9063 GET_MODE (op) == VOIDmode
9064 ? DImode : GET_MODE (op), 0);
9065 hi_half[num] = simplify_gen_subreg (SImode, op,
9066 GET_MODE (op) == VOIDmode
9067 ? DImode : GET_MODE (op), 4);
9068 }
9069 }
9070 }
9071 /* Split one or more TImode RTL references into pairs of DImode
9072 references. The RTL can be REG, offsettable MEM, integer constant, or
9073 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
9074 split and "num" is its length. lo_half and hi_half are output arrays
9075 that parallel "operands". */
9076
9077 void
9078 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
9079 {
9080 while (num--)
9081 {
9082 rtx op = operands[num];
9083
9084 /* simplify_subreg refuse to split volatile memory addresses, but we
9085 still have to handle it. */
9086 if (MEM_P (op))
9087 {
9088 lo_half[num] = adjust_address (op, DImode, 0);
9089 hi_half[num] = adjust_address (op, DImode, 8);
9090 }
9091 else
9092 {
9093 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
9094 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
9095 }
9096 }
9097 }
9098 \f
9099 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
9100 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
9101 is the expression of the binary operation. The output may either be
9102 emitted here, or returned to the caller, like all output_* functions.
9103
9104 There is no guarantee that the operands are the same mode, as they
9105 might be within FLOAT or FLOAT_EXTEND expressions. */
9106
9107 #ifndef SYSV386_COMPAT
9108 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
9109 wants to fix the assemblers because that causes incompatibility
9110 with gcc. No-one wants to fix gcc because that causes
9111 incompatibility with assemblers... You can use the option of
9112 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
9113 #define SYSV386_COMPAT 1
9114 #endif
9115
9116 const char *
9117 output_387_binary_op (rtx insn, rtx *operands)
9118 {
9119 static char buf[30];
9120 const char *p;
9121 const char *ssep;
9122 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
9123
9124 #ifdef ENABLE_CHECKING
9125 /* Even if we do not want to check the inputs, this documents input
9126 constraints. Which helps in understanding the following code. */
9127 if (STACK_REG_P (operands[0])
9128 && ((REG_P (operands[1])
9129 && REGNO (operands[0]) == REGNO (operands[1])
9130 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
9131 || (REG_P (operands[2])
9132 && REGNO (operands[0]) == REGNO (operands[2])
9133 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
9134 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
9135 ; /* ok */
9136 else
9137 gcc_assert (is_sse);
9138 #endif
9139
9140 switch (GET_CODE (operands[3]))
9141 {
9142 case PLUS:
9143 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9144 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9145 p = "fiadd";
9146 else
9147 p = "fadd";
9148 ssep = "add";
9149 break;
9150
9151 case MINUS:
9152 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9153 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9154 p = "fisub";
9155 else
9156 p = "fsub";
9157 ssep = "sub";
9158 break;
9159
9160 case MULT:
9161 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9162 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9163 p = "fimul";
9164 else
9165 p = "fmul";
9166 ssep = "mul";
9167 break;
9168
9169 case DIV:
9170 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9171 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9172 p = "fidiv";
9173 else
9174 p = "fdiv";
9175 ssep = "div";
9176 break;
9177
9178 default:
9179 gcc_unreachable ();
9180 }
9181
9182 if (is_sse)
9183 {
9184 strcpy (buf, ssep);
9185 if (GET_MODE (operands[0]) == SFmode)
9186 strcat (buf, "ss\t{%2, %0|%0, %2}");
9187 else
9188 strcat (buf, "sd\t{%2, %0|%0, %2}");
9189 return buf;
9190 }
9191 strcpy (buf, p);
9192
9193 switch (GET_CODE (operands[3]))
9194 {
9195 case MULT:
9196 case PLUS:
9197 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
9198 {
9199 rtx temp = operands[2];
9200 operands[2] = operands[1];
9201 operands[1] = temp;
9202 }
9203
9204 /* know operands[0] == operands[1]. */
9205
9206 if (MEM_P (operands[2]))
9207 {
9208 p = "%z2\t%2";
9209 break;
9210 }
9211
9212 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
9213 {
9214 if (STACK_TOP_P (operands[0]))
9215 /* How is it that we are storing to a dead operand[2]?
9216 Well, presumably operands[1] is dead too. We can't
9217 store the result to st(0) as st(0) gets popped on this
9218 instruction. Instead store to operands[2] (which I
9219 think has to be st(1)). st(1) will be popped later.
9220 gcc <= 2.8.1 didn't have this check and generated
9221 assembly code that the Unixware assembler rejected. */
9222 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
9223 else
9224 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
9225 break;
9226 }
9227
9228 if (STACK_TOP_P (operands[0]))
9229 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
9230 else
9231 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
9232 break;
9233
9234 case MINUS:
9235 case DIV:
9236 if (MEM_P (operands[1]))
9237 {
9238 p = "r%z1\t%1";
9239 break;
9240 }
9241
9242 if (MEM_P (operands[2]))
9243 {
9244 p = "%z2\t%2";
9245 break;
9246 }
9247
9248 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
9249 {
9250 #if SYSV386_COMPAT
9251 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
9252 derived assemblers, confusingly reverse the direction of
9253 the operation for fsub{r} and fdiv{r} when the
9254 destination register is not st(0). The Intel assembler
9255 doesn't have this brain damage. Read !SYSV386_COMPAT to
9256 figure out what the hardware really does. */
9257 if (STACK_TOP_P (operands[0]))
9258 p = "{p\t%0, %2|rp\t%2, %0}";
9259 else
9260 p = "{rp\t%2, %0|p\t%0, %2}";
9261 #else
9262 if (STACK_TOP_P (operands[0]))
9263 /* As above for fmul/fadd, we can't store to st(0). */
9264 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
9265 else
9266 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
9267 #endif
9268 break;
9269 }
9270
9271 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
9272 {
9273 #if SYSV386_COMPAT
9274 if (STACK_TOP_P (operands[0]))
9275 p = "{rp\t%0, %1|p\t%1, %0}";
9276 else
9277 p = "{p\t%1, %0|rp\t%0, %1}";
9278 #else
9279 if (STACK_TOP_P (operands[0]))
9280 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
9281 else
9282 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
9283 #endif
9284 break;
9285 }
9286
9287 if (STACK_TOP_P (operands[0]))
9288 {
9289 if (STACK_TOP_P (operands[1]))
9290 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
9291 else
9292 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
9293 break;
9294 }
9295 else if (STACK_TOP_P (operands[1]))
9296 {
9297 #if SYSV386_COMPAT
9298 p = "{\t%1, %0|r\t%0, %1}";
9299 #else
9300 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
9301 #endif
9302 }
9303 else
9304 {
9305 #if SYSV386_COMPAT
9306 p = "{r\t%2, %0|\t%0, %2}";
9307 #else
9308 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
9309 #endif
9310 }
9311 break;
9312
9313 default:
9314 gcc_unreachable ();
9315 }
9316
9317 strcat (buf, p);
9318 return buf;
9319 }
9320
9321 /* Return needed mode for entity in optimize_mode_switching pass. */
9322
9323 int
9324 ix86_mode_needed (int entity, rtx insn)
9325 {
9326 enum attr_i387_cw mode;
9327
9328 /* The mode UNINITIALIZED is used to store control word after a
9329 function call or ASM pattern. The mode ANY specify that function
9330 has no requirements on the control word and make no changes in the
9331 bits we are interested in. */
9332
9333 if (CALL_P (insn)
9334 || (NONJUMP_INSN_P (insn)
9335 && (asm_noperands (PATTERN (insn)) >= 0
9336 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
9337 return I387_CW_UNINITIALIZED;
9338
9339 if (recog_memoized (insn) < 0)
9340 return I387_CW_ANY;
9341
9342 mode = get_attr_i387_cw (insn);
9343
9344 switch (entity)
9345 {
9346 case I387_TRUNC:
9347 if (mode == I387_CW_TRUNC)
9348 return mode;
9349 break;
9350
9351 case I387_FLOOR:
9352 if (mode == I387_CW_FLOOR)
9353 return mode;
9354 break;
9355
9356 case I387_CEIL:
9357 if (mode == I387_CW_CEIL)
9358 return mode;
9359 break;
9360
9361 case I387_MASK_PM:
9362 if (mode == I387_CW_MASK_PM)
9363 return mode;
9364 break;
9365
9366 default:
9367 gcc_unreachable ();
9368 }
9369
9370 return I387_CW_ANY;
9371 }
9372
9373 /* Output code to initialize control word copies used by trunc?f?i and
9374 rounding patterns. CURRENT_MODE is set to current control word,
9375 while NEW_MODE is set to new control word. */
9376
9377 void
9378 emit_i387_cw_initialization (int mode)
9379 {
9380 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
9381 rtx new_mode;
9382
9383 enum ix86_stack_slot slot;
9384
9385 rtx reg = gen_reg_rtx (HImode);
9386
9387 emit_insn (gen_x86_fnstcw_1 (stored_mode));
9388 emit_move_insn (reg, copy_rtx (stored_mode));
9389
9390 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
9391 {
9392 switch (mode)
9393 {
9394 case I387_CW_TRUNC:
9395 /* round toward zero (truncate) */
9396 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
9397 slot = SLOT_CW_TRUNC;
9398 break;
9399
9400 case I387_CW_FLOOR:
9401 /* round down toward -oo */
9402 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
9403 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
9404 slot = SLOT_CW_FLOOR;
9405 break;
9406
9407 case I387_CW_CEIL:
9408 /* round up toward +oo */
9409 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
9410 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
9411 slot = SLOT_CW_CEIL;
9412 break;
9413
9414 case I387_CW_MASK_PM:
9415 /* mask precision exception for nearbyint() */
9416 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
9417 slot = SLOT_CW_MASK_PM;
9418 break;
9419
9420 default:
9421 gcc_unreachable ();
9422 }
9423 }
9424 else
9425 {
9426 switch (mode)
9427 {
9428 case I387_CW_TRUNC:
9429 /* round toward zero (truncate) */
9430 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
9431 slot = SLOT_CW_TRUNC;
9432 break;
9433
9434 case I387_CW_FLOOR:
9435 /* round down toward -oo */
9436 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
9437 slot = SLOT_CW_FLOOR;
9438 break;
9439
9440 case I387_CW_CEIL:
9441 /* round up toward +oo */
9442 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
9443 slot = SLOT_CW_CEIL;
9444 break;
9445
9446 case I387_CW_MASK_PM:
9447 /* mask precision exception for nearbyint() */
9448 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
9449 slot = SLOT_CW_MASK_PM;
9450 break;
9451
9452 default:
9453 gcc_unreachable ();
9454 }
9455 }
9456
9457 gcc_assert (slot < MAX_386_STACK_LOCALS);
9458
9459 new_mode = assign_386_stack_local (HImode, slot);
9460 emit_move_insn (new_mode, reg);
9461 }
9462
9463 /* Output code for INSN to convert a float to a signed int. OPERANDS
9464 are the insn operands. The output may be [HSD]Imode and the input
9465 operand may be [SDX]Fmode. */
9466
9467 const char *
9468 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
9469 {
9470 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
9471 int dimode_p = GET_MODE (operands[0]) == DImode;
9472 int round_mode = get_attr_i387_cw (insn);
9473
9474 /* Jump through a hoop or two for DImode, since the hardware has no
9475 non-popping instruction. We used to do this a different way, but
9476 that was somewhat fragile and broke with post-reload splitters. */
9477 if ((dimode_p || fisttp) && !stack_top_dies)
9478 output_asm_insn ("fld\t%y1", operands);
9479
9480 gcc_assert (STACK_TOP_P (operands[1]));
9481 gcc_assert (MEM_P (operands[0]));
9482 gcc_assert (GET_MODE (operands[1]) != TFmode);
9483
9484 if (fisttp)
9485 output_asm_insn ("fisttp%z0\t%0", operands);
9486 else
9487 {
9488 if (round_mode != I387_CW_ANY)
9489 output_asm_insn ("fldcw\t%3", operands);
9490 if (stack_top_dies || dimode_p)
9491 output_asm_insn ("fistp%z0\t%0", operands);
9492 else
9493 output_asm_insn ("fist%z0\t%0", operands);
9494 if (round_mode != I387_CW_ANY)
9495 output_asm_insn ("fldcw\t%2", operands);
9496 }
9497
9498 return "";
9499 }
9500
9501 /* Output code for x87 ffreep insn. The OPNO argument, which may only
9502 have the values zero or one, indicates the ffreep insn's operand
9503 from the OPERANDS array. */
9504
9505 static const char *
9506 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
9507 {
9508 if (TARGET_USE_FFREEP)
9509 #if HAVE_AS_IX86_FFREEP
9510 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
9511 #else
9512 {
9513 static char retval[] = ".word\t0xc_df";
9514 int regno = REGNO (operands[opno]);
9515
9516 gcc_assert (FP_REGNO_P (regno));
9517
9518 retval[9] = '0' + (regno - FIRST_STACK_REG);
9519 return retval;
9520 }
9521 #endif
9522
9523 return opno ? "fstp\t%y1" : "fstp\t%y0";
9524 }
9525
9526
9527 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
9528 should be used. UNORDERED_P is true when fucom should be used. */
9529
9530 const char *
9531 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
9532 {
9533 int stack_top_dies;
9534 rtx cmp_op0, cmp_op1;
9535 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
9536
9537 if (eflags_p)
9538 {
9539 cmp_op0 = operands[0];
9540 cmp_op1 = operands[1];
9541 }
9542 else
9543 {
9544 cmp_op0 = operands[1];
9545 cmp_op1 = operands[2];
9546 }
9547
9548 if (is_sse)
9549 {
9550 if (GET_MODE (operands[0]) == SFmode)
9551 if (unordered_p)
9552 return "ucomiss\t{%1, %0|%0, %1}";
9553 else
9554 return "comiss\t{%1, %0|%0, %1}";
9555 else
9556 if (unordered_p)
9557 return "ucomisd\t{%1, %0|%0, %1}";
9558 else
9559 return "comisd\t{%1, %0|%0, %1}";
9560 }
9561
9562 gcc_assert (STACK_TOP_P (cmp_op0));
9563
9564 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
9565
9566 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
9567 {
9568 if (stack_top_dies)
9569 {
9570 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
9571 return output_387_ffreep (operands, 1);
9572 }
9573 else
9574 return "ftst\n\tfnstsw\t%0";
9575 }
9576
9577 if (STACK_REG_P (cmp_op1)
9578 && stack_top_dies
9579 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
9580 && REGNO (cmp_op1) != FIRST_STACK_REG)
9581 {
9582 /* If both the top of the 387 stack dies, and the other operand
9583 is also a stack register that dies, then this must be a
9584 `fcompp' float compare */
9585
9586 if (eflags_p)
9587 {
9588 /* There is no double popping fcomi variant. Fortunately,
9589 eflags is immune from the fstp's cc clobbering. */
9590 if (unordered_p)
9591 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
9592 else
9593 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
9594 return output_387_ffreep (operands, 0);
9595 }
9596 else
9597 {
9598 if (unordered_p)
9599 return "fucompp\n\tfnstsw\t%0";
9600 else
9601 return "fcompp\n\tfnstsw\t%0";
9602 }
9603 }
9604 else
9605 {
9606 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
9607
9608 static const char * const alt[16] =
9609 {
9610 "fcom%z2\t%y2\n\tfnstsw\t%0",
9611 "fcomp%z2\t%y2\n\tfnstsw\t%0",
9612 "fucom%z2\t%y2\n\tfnstsw\t%0",
9613 "fucomp%z2\t%y2\n\tfnstsw\t%0",
9614
9615 "ficom%z2\t%y2\n\tfnstsw\t%0",
9616 "ficomp%z2\t%y2\n\tfnstsw\t%0",
9617 NULL,
9618 NULL,
9619
9620 "fcomi\t{%y1, %0|%0, %y1}",
9621 "fcomip\t{%y1, %0|%0, %y1}",
9622 "fucomi\t{%y1, %0|%0, %y1}",
9623 "fucomip\t{%y1, %0|%0, %y1}",
9624
9625 NULL,
9626 NULL,
9627 NULL,
9628 NULL
9629 };
9630
9631 int mask;
9632 const char *ret;
9633
9634 mask = eflags_p << 3;
9635 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
9636 mask |= unordered_p << 1;
9637 mask |= stack_top_dies;
9638
9639 gcc_assert (mask < 16);
9640 ret = alt[mask];
9641 gcc_assert (ret);
9642
9643 return ret;
9644 }
9645 }
9646
9647 void
9648 ix86_output_addr_vec_elt (FILE *file, int value)
9649 {
9650 const char *directive = ASM_LONG;
9651
9652 #ifdef ASM_QUAD
9653 if (TARGET_64BIT)
9654 directive = ASM_QUAD;
9655 #else
9656 gcc_assert (!TARGET_64BIT);
9657 #endif
9658
9659 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
9660 }
9661
9662 void
9663 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
9664 {
9665 const char *directive = ASM_LONG;
9666
9667 #ifdef ASM_QUAD
9668 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
9669 directive = ASM_QUAD;
9670 #else
9671 gcc_assert (!TARGET_64BIT);
9672 #endif
9673 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
9674 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
9675 fprintf (file, "%s%s%d-%s%d\n",
9676 directive, LPREFIX, value, LPREFIX, rel);
9677 else if (HAVE_AS_GOTOFF_IN_DATA)
9678 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
9679 #if TARGET_MACHO
9680 else if (TARGET_MACHO)
9681 {
9682 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
9683 machopic_output_function_base_name (file);
9684 fprintf(file, "\n");
9685 }
9686 #endif
9687 else
9688 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
9689 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
9690 }
9691 \f
9692 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
9693 for the target. */
9694
9695 void
9696 ix86_expand_clear (rtx dest)
9697 {
9698 rtx tmp;
9699
9700 /* We play register width games, which are only valid after reload. */
9701 gcc_assert (reload_completed);
9702
9703 /* Avoid HImode and its attendant prefix byte. */
9704 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
9705 dest = gen_rtx_REG (SImode, REGNO (dest));
9706 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
9707
9708 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
9709 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
9710 {
9711 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
9712 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
9713 }
9714
9715 emit_insn (tmp);
9716 }
9717
9718 /* X is an unchanging MEM. If it is a constant pool reference, return
9719 the constant pool rtx, else NULL. */
9720
9721 rtx
9722 maybe_get_pool_constant (rtx x)
9723 {
9724 x = ix86_delegitimize_address (XEXP (x, 0));
9725
9726 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9727 return get_pool_constant (x);
9728
9729 return NULL_RTX;
9730 }
9731
9732 void
9733 ix86_expand_move (enum machine_mode mode, rtx operands[])
9734 {
9735 int strict = (reload_in_progress || reload_completed);
9736 rtx op0, op1;
9737 enum tls_model model;
9738
9739 op0 = operands[0];
9740 op1 = operands[1];
9741
9742 if (GET_CODE (op1) == SYMBOL_REF)
9743 {
9744 model = SYMBOL_REF_TLS_MODEL (op1);
9745 if (model)
9746 {
9747 op1 = legitimize_tls_address (op1, model, true);
9748 op1 = force_operand (op1, op0);
9749 if (op1 == op0)
9750 return;
9751 }
9752 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
9753 && SYMBOL_REF_DLLIMPORT_P (op1))
9754 op1 = legitimize_dllimport_symbol (op1, false);
9755 }
9756 else if (GET_CODE (op1) == CONST
9757 && GET_CODE (XEXP (op1, 0)) == PLUS
9758 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
9759 {
9760 rtx addend = XEXP (XEXP (op1, 0), 1);
9761 rtx symbol = XEXP (XEXP (op1, 0), 0);
9762 rtx tmp = NULL;
9763
9764 model = SYMBOL_REF_TLS_MODEL (symbol);
9765 if (model)
9766 tmp = legitimize_tls_address (symbol, model, true);
9767 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
9768 && SYMBOL_REF_DLLIMPORT_P (symbol))
9769 tmp = legitimize_dllimport_symbol (symbol, true);
9770
9771 if (tmp)
9772 {
9773 tmp = force_operand (tmp, NULL);
9774 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
9775 op0, 1, OPTAB_DIRECT);
9776 if (tmp == op0)
9777 return;
9778 }
9779 }
9780
9781 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
9782 {
9783 if (TARGET_MACHO && !TARGET_64BIT)
9784 {
9785 #if TARGET_MACHO
9786 if (MACHOPIC_PURE)
9787 {
9788 rtx temp = ((reload_in_progress
9789 || ((op0 && REG_P (op0))
9790 && mode == Pmode))
9791 ? op0 : gen_reg_rtx (Pmode));
9792 op1 = machopic_indirect_data_reference (op1, temp);
9793 op1 = machopic_legitimize_pic_address (op1, mode,
9794 temp == op1 ? 0 : temp);
9795 }
9796 else if (MACHOPIC_INDIRECT)
9797 op1 = machopic_indirect_data_reference (op1, 0);
9798 if (op0 == op1)
9799 return;
9800 #endif
9801 }
9802 else
9803 {
9804 if (MEM_P (op0))
9805 op1 = force_reg (Pmode, op1);
9806 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
9807 {
9808 rtx reg = no_new_pseudos ? op0 : NULL_RTX;
9809 op1 = legitimize_pic_address (op1, reg);
9810 if (op0 == op1)
9811 return;
9812 }
9813 }
9814 }
9815 else
9816 {
9817 if (MEM_P (op0)
9818 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
9819 || !push_operand (op0, mode))
9820 && MEM_P (op1))
9821 op1 = force_reg (mode, op1);
9822
9823 if (push_operand (op0, mode)
9824 && ! general_no_elim_operand (op1, mode))
9825 op1 = copy_to_mode_reg (mode, op1);
9826
9827 /* Force large constants in 64bit compilation into register
9828 to get them CSEed. */
9829 if (TARGET_64BIT && mode == DImode
9830 && immediate_operand (op1, mode)
9831 && !x86_64_zext_immediate_operand (op1, VOIDmode)
9832 && !register_operand (op0, mode)
9833 && optimize && !reload_completed && !reload_in_progress)
9834 op1 = copy_to_mode_reg (mode, op1);
9835
9836 if (FLOAT_MODE_P (mode))
9837 {
9838 /* If we are loading a floating point constant to a register,
9839 force the value to memory now, since we'll get better code
9840 out the back end. */
9841
9842 if (strict)
9843 ;
9844 else if (GET_CODE (op1) == CONST_DOUBLE)
9845 {
9846 op1 = validize_mem (force_const_mem (mode, op1));
9847 if (!register_operand (op0, mode))
9848 {
9849 rtx temp = gen_reg_rtx (mode);
9850 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
9851 emit_move_insn (op0, temp);
9852 return;
9853 }
9854 }
9855 }
9856 }
9857
9858 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9859 }
9860
9861 void
9862 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
9863 {
9864 rtx op0 = operands[0], op1 = operands[1];
9865 unsigned int align = GET_MODE_ALIGNMENT (mode);
9866
9867 /* Force constants other than zero into memory. We do not know how
9868 the instructions used to build constants modify the upper 64 bits
9869 of the register, once we have that information we may be able
9870 to handle some of them more efficiently. */
9871 if ((reload_in_progress | reload_completed) == 0
9872 && register_operand (op0, mode)
9873 && (CONSTANT_P (op1)
9874 || (GET_CODE (op1) == SUBREG
9875 && CONSTANT_P (SUBREG_REG (op1))))
9876 && standard_sse_constant_p (op1) <= 0)
9877 op1 = validize_mem (force_const_mem (mode, op1));
9878
9879 /* TDmode values are passed as TImode on the stack. Timode values
9880 are moved via xmm registers, and moving them to stack can result in
9881 unaligned memory access. Use ix86_expand_vector_move_misalign()
9882 if memory operand is not aligned correctly. */
9883 if (!no_new_pseudos
9884 && (mode == TImode) && !TARGET_64BIT
9885 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
9886 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
9887 {
9888 rtx tmp[2];
9889
9890 /* ix86_expand_vector_move_misalign() does not like constants ... */
9891 if (CONSTANT_P (op1)
9892 || (GET_CODE (op1) == SUBREG
9893 && CONSTANT_P (SUBREG_REG (op1))))
9894 op1 = validize_mem (force_const_mem (mode, op1));
9895
9896 /* ... nor both arguments in memory. */
9897 if (!register_operand (op0, mode)
9898 && !register_operand (op1, mode))
9899 op1 = force_reg (mode, op1);
9900
9901 tmp[0] = op0; tmp[1] = op1;
9902 ix86_expand_vector_move_misalign (mode, tmp);
9903 return;
9904 }
9905
9906 /* Make operand1 a register if it isn't already. */
9907 if (!no_new_pseudos
9908 && !register_operand (op0, mode)
9909 && !register_operand (op1, mode))
9910 {
9911 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
9912 return;
9913 }
9914
9915 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9916 }
9917
9918 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
9919 straight to ix86_expand_vector_move. */
9920 /* Code generation for scalar reg-reg moves of single and double precision data:
9921 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
9922 movaps reg, reg
9923 else
9924 movss reg, reg
9925 if (x86_sse_partial_reg_dependency == true)
9926 movapd reg, reg
9927 else
9928 movsd reg, reg
9929
9930 Code generation for scalar loads of double precision data:
9931 if (x86_sse_split_regs == true)
9932 movlpd mem, reg (gas syntax)
9933 else
9934 movsd mem, reg
9935
9936 Code generation for unaligned packed loads of single precision data
9937 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
9938 if (x86_sse_unaligned_move_optimal)
9939 movups mem, reg
9940
9941 if (x86_sse_partial_reg_dependency == true)
9942 {
9943 xorps reg, reg
9944 movlps mem, reg
9945 movhps mem+8, reg
9946 }
9947 else
9948 {
9949 movlps mem, reg
9950 movhps mem+8, reg
9951 }
9952
9953 Code generation for unaligned packed loads of double precision data
9954 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
9955 if (x86_sse_unaligned_move_optimal)
9956 movupd mem, reg
9957
9958 if (x86_sse_split_regs == true)
9959 {
9960 movlpd mem, reg
9961 movhpd mem+8, reg
9962 }
9963 else
9964 {
9965 movsd mem, reg
9966 movhpd mem+8, reg
9967 }
9968 */
9969
9970 void
9971 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
9972 {
9973 rtx op0, op1, m;
9974
9975 op0 = operands[0];
9976 op1 = operands[1];
9977
9978 if (MEM_P (op1))
9979 {
9980 /* If we're optimizing for size, movups is the smallest. */
9981 if (optimize_size)
9982 {
9983 op0 = gen_lowpart (V4SFmode, op0);
9984 op1 = gen_lowpart (V4SFmode, op1);
9985 emit_insn (gen_sse_movups (op0, op1));
9986 return;
9987 }
9988
9989 /* ??? If we have typed data, then it would appear that using
9990 movdqu is the only way to get unaligned data loaded with
9991 integer type. */
9992 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9993 {
9994 op0 = gen_lowpart (V16QImode, op0);
9995 op1 = gen_lowpart (V16QImode, op1);
9996 emit_insn (gen_sse2_movdqu (op0, op1));
9997 return;
9998 }
9999
10000 if (TARGET_SSE2 && mode == V2DFmode)
10001 {
10002 rtx zero;
10003
10004 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
10005 {
10006 op0 = gen_lowpart (V2DFmode, op0);
10007 op1 = gen_lowpart (V2DFmode, op1);
10008 emit_insn (gen_sse2_movupd (op0, op1));
10009 return;
10010 }
10011
10012 /* When SSE registers are split into halves, we can avoid
10013 writing to the top half twice. */
10014 if (TARGET_SSE_SPLIT_REGS)
10015 {
10016 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
10017 zero = op0;
10018 }
10019 else
10020 {
10021 /* ??? Not sure about the best option for the Intel chips.
10022 The following would seem to satisfy; the register is
10023 entirely cleared, breaking the dependency chain. We
10024 then store to the upper half, with a dependency depth
10025 of one. A rumor has it that Intel recommends two movsd
10026 followed by an unpacklpd, but this is unconfirmed. And
10027 given that the dependency depth of the unpacklpd would
10028 still be one, I'm not sure why this would be better. */
10029 zero = CONST0_RTX (V2DFmode);
10030 }
10031
10032 m = adjust_address (op1, DFmode, 0);
10033 emit_insn (gen_sse2_loadlpd (op0, zero, m));
10034 m = adjust_address (op1, DFmode, 8);
10035 emit_insn (gen_sse2_loadhpd (op0, op0, m));
10036 }
10037 else
10038 {
10039 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
10040 {
10041 op0 = gen_lowpart (V4SFmode, op0);
10042 op1 = gen_lowpart (V4SFmode, op1);
10043 emit_insn (gen_sse_movups (op0, op1));
10044 return;
10045 }
10046
10047 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
10048 emit_move_insn (op0, CONST0_RTX (mode));
10049 else
10050 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
10051
10052 if (mode != V4SFmode)
10053 op0 = gen_lowpart (V4SFmode, op0);
10054 m = adjust_address (op1, V2SFmode, 0);
10055 emit_insn (gen_sse_loadlps (op0, op0, m));
10056 m = adjust_address (op1, V2SFmode, 8);
10057 emit_insn (gen_sse_loadhps (op0, op0, m));
10058 }
10059 }
10060 else if (MEM_P (op0))
10061 {
10062 /* If we're optimizing for size, movups is the smallest. */
10063 if (optimize_size)
10064 {
10065 op0 = gen_lowpart (V4SFmode, op0);
10066 op1 = gen_lowpart (V4SFmode, op1);
10067 emit_insn (gen_sse_movups (op0, op1));
10068 return;
10069 }
10070
10071 /* ??? Similar to above, only less clear because of quote
10072 typeless stores unquote. */
10073 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
10074 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
10075 {
10076 op0 = gen_lowpart (V16QImode, op0);
10077 op1 = gen_lowpart (V16QImode, op1);
10078 emit_insn (gen_sse2_movdqu (op0, op1));
10079 return;
10080 }
10081
10082 if (TARGET_SSE2 && mode == V2DFmode)
10083 {
10084 m = adjust_address (op0, DFmode, 0);
10085 emit_insn (gen_sse2_storelpd (m, op1));
10086 m = adjust_address (op0, DFmode, 8);
10087 emit_insn (gen_sse2_storehpd (m, op1));
10088 }
10089 else
10090 {
10091 if (mode != V4SFmode)
10092 op1 = gen_lowpart (V4SFmode, op1);
10093 m = adjust_address (op0, V2SFmode, 0);
10094 emit_insn (gen_sse_storelps (m, op1));
10095 m = adjust_address (op0, V2SFmode, 8);
10096 emit_insn (gen_sse_storehps (m, op1));
10097 }
10098 }
10099 else
10100 gcc_unreachable ();
10101 }
10102
10103 /* Expand a push in MODE. This is some mode for which we do not support
10104 proper push instructions, at least from the registers that we expect
10105 the value to live in. */
10106
10107 void
10108 ix86_expand_push (enum machine_mode mode, rtx x)
10109 {
10110 rtx tmp;
10111
10112 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
10113 GEN_INT (-GET_MODE_SIZE (mode)),
10114 stack_pointer_rtx, 1, OPTAB_DIRECT);
10115 if (tmp != stack_pointer_rtx)
10116 emit_move_insn (stack_pointer_rtx, tmp);
10117
10118 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
10119 emit_move_insn (tmp, x);
10120 }
10121
10122 /* Helper function of ix86_fixup_binary_operands to canonicalize
10123 operand order. Returns true if the operands should be swapped. */
10124
10125 static bool
10126 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
10127 rtx operands[])
10128 {
10129 rtx dst = operands[0];
10130 rtx src1 = operands[1];
10131 rtx src2 = operands[2];
10132
10133 /* If the operation is not commutative, we can't do anything. */
10134 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
10135 return false;
10136
10137 /* Highest priority is that src1 should match dst. */
10138 if (rtx_equal_p (dst, src1))
10139 return false;
10140 if (rtx_equal_p (dst, src2))
10141 return true;
10142
10143 /* Next highest priority is that immediate constants come second. */
10144 if (immediate_operand (src2, mode))
10145 return false;
10146 if (immediate_operand (src1, mode))
10147 return true;
10148
10149 /* Lowest priority is that memory references should come second. */
10150 if (MEM_P (src2))
10151 return false;
10152 if (MEM_P (src1))
10153 return true;
10154
10155 return false;
10156 }
10157
10158
10159 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
10160 destination to use for the operation. If different from the true
10161 destination in operands[0], a copy operation will be required. */
10162
10163 rtx
10164 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
10165 rtx operands[])
10166 {
10167 rtx dst = operands[0];
10168 rtx src1 = operands[1];
10169 rtx src2 = operands[2];
10170
10171 /* Canonicalize operand order. */
10172 if (ix86_swap_binary_operands_p (code, mode, operands))
10173 {
10174 rtx temp = src1;
10175 src1 = src2;
10176 src2 = temp;
10177 }
10178
10179 /* Both source operands cannot be in memory. */
10180 if (MEM_P (src1) && MEM_P (src2))
10181 {
10182 /* Optimization: Only read from memory once. */
10183 if (rtx_equal_p (src1, src2))
10184 {
10185 src2 = force_reg (mode, src2);
10186 src1 = src2;
10187 }
10188 else
10189 src2 = force_reg (mode, src2);
10190 }
10191
10192 /* If the destination is memory, and we do not have matching source
10193 operands, do things in registers. */
10194 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
10195 dst = gen_reg_rtx (mode);
10196
10197 /* Source 1 cannot be a constant. */
10198 if (CONSTANT_P (src1))
10199 src1 = force_reg (mode, src1);
10200
10201 /* Source 1 cannot be a non-matching memory. */
10202 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
10203 src1 = force_reg (mode, src1);
10204
10205 operands[1] = src1;
10206 operands[2] = src2;
10207 return dst;
10208 }
10209
10210 /* Similarly, but assume that the destination has already been
10211 set up properly. */
10212
10213 void
10214 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
10215 enum machine_mode mode, rtx operands[])
10216 {
10217 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
10218 gcc_assert (dst == operands[0]);
10219 }
10220
10221 /* Attempt to expand a binary operator. Make the expansion closer to the
10222 actual machine, then just general_operand, which will allow 3 separate
10223 memory references (one output, two input) in a single insn. */
10224
10225 void
10226 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
10227 rtx operands[])
10228 {
10229 rtx src1, src2, dst, op, clob;
10230
10231 dst = ix86_fixup_binary_operands (code, mode, operands);
10232 src1 = operands[1];
10233 src2 = operands[2];
10234
10235 /* Emit the instruction. */
10236
10237 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
10238 if (reload_in_progress)
10239 {
10240 /* Reload doesn't know about the flags register, and doesn't know that
10241 it doesn't want to clobber it. We can only do this with PLUS. */
10242 gcc_assert (code == PLUS);
10243 emit_insn (op);
10244 }
10245 else
10246 {
10247 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10248 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
10249 }
10250
10251 /* Fix up the destination if needed. */
10252 if (dst != operands[0])
10253 emit_move_insn (operands[0], dst);
10254 }
10255
10256 /* Return TRUE or FALSE depending on whether the binary operator meets the
10257 appropriate constraints. */
10258
10259 int
10260 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
10261 rtx operands[3])
10262 {
10263 rtx dst = operands[0];
10264 rtx src1 = operands[1];
10265 rtx src2 = operands[2];
10266
10267 /* Both source operands cannot be in memory. */
10268 if (MEM_P (src1) && MEM_P (src2))
10269 return 0;
10270
10271 /* Canonicalize operand order for commutative operators. */
10272 if (ix86_swap_binary_operands_p (code, mode, operands))
10273 {
10274 rtx temp = src1;
10275 src1 = src2;
10276 src2 = temp;
10277 }
10278
10279 /* If the destination is memory, we must have a matching source operand. */
10280 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
10281 return 0;
10282
10283 /* Source 1 cannot be a constant. */
10284 if (CONSTANT_P (src1))
10285 return 0;
10286
10287 /* Source 1 cannot be a non-matching memory. */
10288 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
10289 return 0;
10290
10291 return 1;
10292 }
10293
10294 /* Attempt to expand a unary operator. Make the expansion closer to the
10295 actual machine, then just general_operand, which will allow 2 separate
10296 memory references (one output, one input) in a single insn. */
10297
10298 void
10299 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
10300 rtx operands[])
10301 {
10302 int matching_memory;
10303 rtx src, dst, op, clob;
10304
10305 dst = operands[0];
10306 src = operands[1];
10307
10308 /* If the destination is memory, and we do not have matching source
10309 operands, do things in registers. */
10310 matching_memory = 0;
10311 if (MEM_P (dst))
10312 {
10313 if (rtx_equal_p (dst, src))
10314 matching_memory = 1;
10315 else
10316 dst = gen_reg_rtx (mode);
10317 }
10318
10319 /* When source operand is memory, destination must match. */
10320 if (MEM_P (src) && !matching_memory)
10321 src = force_reg (mode, src);
10322
10323 /* Emit the instruction. */
10324
10325 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
10326 if (reload_in_progress || code == NOT)
10327 {
10328 /* Reload doesn't know about the flags register, and doesn't know that
10329 it doesn't want to clobber it. */
10330 gcc_assert (code == NOT);
10331 emit_insn (op);
10332 }
10333 else
10334 {
10335 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10336 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
10337 }
10338
10339 /* Fix up the destination if needed. */
10340 if (dst != operands[0])
10341 emit_move_insn (operands[0], dst);
10342 }
10343
10344 /* Return TRUE or FALSE depending on whether the unary operator meets the
10345 appropriate constraints. */
10346
10347 int
10348 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
10349 enum machine_mode mode ATTRIBUTE_UNUSED,
10350 rtx operands[2] ATTRIBUTE_UNUSED)
10351 {
10352 /* If one of operands is memory, source and destination must match. */
10353 if ((MEM_P (operands[0])
10354 || MEM_P (operands[1]))
10355 && ! rtx_equal_p (operands[0], operands[1]))
10356 return FALSE;
10357 return TRUE;
10358 }
10359
10360 /* Post-reload splitter for converting an SF or DFmode value in an
10361 SSE register into an unsigned SImode. */
10362
10363 void
10364 ix86_split_convert_uns_si_sse (rtx operands[])
10365 {
10366 enum machine_mode vecmode;
10367 rtx value, large, zero_or_two31, input, two31, x;
10368
10369 large = operands[1];
10370 zero_or_two31 = operands[2];
10371 input = operands[3];
10372 two31 = operands[4];
10373 vecmode = GET_MODE (large);
10374 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
10375
10376 /* Load up the value into the low element. We must ensure that the other
10377 elements are valid floats -- zero is the easiest such value. */
10378 if (MEM_P (input))
10379 {
10380 if (vecmode == V4SFmode)
10381 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
10382 else
10383 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
10384 }
10385 else
10386 {
10387 input = gen_rtx_REG (vecmode, REGNO (input));
10388 emit_move_insn (value, CONST0_RTX (vecmode));
10389 if (vecmode == V4SFmode)
10390 emit_insn (gen_sse_movss (value, value, input));
10391 else
10392 emit_insn (gen_sse2_movsd (value, value, input));
10393 }
10394
10395 emit_move_insn (large, two31);
10396 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
10397
10398 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
10399 emit_insn (gen_rtx_SET (VOIDmode, large, x));
10400
10401 x = gen_rtx_AND (vecmode, zero_or_two31, large);
10402 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
10403
10404 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
10405 emit_insn (gen_rtx_SET (VOIDmode, value, x));
10406
10407 large = gen_rtx_REG (V4SImode, REGNO (large));
10408 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
10409
10410 x = gen_rtx_REG (V4SImode, REGNO (value));
10411 if (vecmode == V4SFmode)
10412 emit_insn (gen_sse2_cvttps2dq (x, value));
10413 else
10414 emit_insn (gen_sse2_cvttpd2dq (x, value));
10415 value = x;
10416
10417 emit_insn (gen_xorv4si3 (value, value, large));
10418 }
10419
10420 /* Convert an unsigned DImode value into a DFmode, using only SSE.
10421 Expects the 64-bit DImode to be supplied in a pair of integral
10422 registers. Requires SSE2; will use SSE3 if available. For x86_32,
10423 -mfpmath=sse, !optimize_size only. */
10424
10425 void
10426 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
10427 {
10428 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
10429 rtx int_xmm, fp_xmm;
10430 rtx biases, exponents;
10431 rtx x;
10432
10433 int_xmm = gen_reg_rtx (V4SImode);
10434 if (TARGET_INTER_UNIT_MOVES)
10435 emit_insn (gen_movdi_to_sse (int_xmm, input));
10436 else if (TARGET_SSE_SPLIT_REGS)
10437 {
10438 emit_insn (gen_rtx_CLOBBER (VOIDmode, int_xmm));
10439 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
10440 }
10441 else
10442 {
10443 x = gen_reg_rtx (V2DImode);
10444 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
10445 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
10446 }
10447
10448 x = gen_rtx_CONST_VECTOR (V4SImode,
10449 gen_rtvec (4, GEN_INT (0x43300000UL),
10450 GEN_INT (0x45300000UL),
10451 const0_rtx, const0_rtx));
10452 exponents = validize_mem (force_const_mem (V4SImode, x));
10453
10454 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
10455 emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, exponents));
10456
10457 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
10458 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
10459 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
10460 (0x1.0p84 + double(fp_value_hi_xmm)).
10461 Note these exponents differ by 32. */
10462
10463 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
10464
10465 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
10466 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
10467 real_ldexp (&bias_lo_rvt, &dconst1, 52);
10468 real_ldexp (&bias_hi_rvt, &dconst1, 84);
10469 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
10470 x = const_double_from_real_value (bias_hi_rvt, DFmode);
10471 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
10472 biases = validize_mem (force_const_mem (V2DFmode, biases));
10473 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
10474
10475 /* Add the upper and lower DFmode values together. */
10476 if (TARGET_SSE3)
10477 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
10478 else
10479 {
10480 x = copy_to_mode_reg (V2DFmode, fp_xmm);
10481 emit_insn (gen_sse2_unpckhpd (fp_xmm, fp_xmm, fp_xmm));
10482 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
10483 }
10484
10485 ix86_expand_vector_extract (false, target, fp_xmm, 0);
10486 }
10487
10488 /* Convert an unsigned SImode value into a DFmode. Only currently used
10489 for SSE, but applicable anywhere. */
10490
10491 void
10492 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
10493 {
10494 REAL_VALUE_TYPE TWO31r;
10495 rtx x, fp;
10496
10497 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
10498 NULL, 1, OPTAB_DIRECT);
10499
10500 fp = gen_reg_rtx (DFmode);
10501 emit_insn (gen_floatsidf2 (fp, x));
10502
10503 real_ldexp (&TWO31r, &dconst1, 31);
10504 x = const_double_from_real_value (TWO31r, DFmode);
10505
10506 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
10507 if (x != target)
10508 emit_move_insn (target, x);
10509 }
10510
10511 /* Convert a signed DImode value into a DFmode. Only used for SSE in
10512 32-bit mode; otherwise we have a direct convert instruction. */
10513
10514 void
10515 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
10516 {
10517 REAL_VALUE_TYPE TWO32r;
10518 rtx fp_lo, fp_hi, x;
10519
10520 fp_lo = gen_reg_rtx (DFmode);
10521 fp_hi = gen_reg_rtx (DFmode);
10522
10523 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
10524
10525 real_ldexp (&TWO32r, &dconst1, 32);
10526 x = const_double_from_real_value (TWO32r, DFmode);
10527 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
10528
10529 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
10530
10531 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
10532 0, OPTAB_DIRECT);
10533 if (x != target)
10534 emit_move_insn (target, x);
10535 }
10536
10537 /* Convert an unsigned SImode value into a SFmode, using only SSE.
10538 For x86_32, -mfpmath=sse, !optimize_size only. */
10539 void
10540 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
10541 {
10542 REAL_VALUE_TYPE ONE16r;
10543 rtx fp_hi, fp_lo, int_hi, int_lo, x;
10544
10545 real_ldexp (&ONE16r, &dconst1, 16);
10546 x = const_double_from_real_value (ONE16r, SFmode);
10547 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
10548 NULL, 0, OPTAB_DIRECT);
10549 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
10550 NULL, 0, OPTAB_DIRECT);
10551 fp_hi = gen_reg_rtx (SFmode);
10552 fp_lo = gen_reg_rtx (SFmode);
10553 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
10554 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
10555 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
10556 0, OPTAB_DIRECT);
10557 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
10558 0, OPTAB_DIRECT);
10559 if (!rtx_equal_p (target, fp_hi))
10560 emit_move_insn (target, fp_hi);
10561 }
10562
10563 /* A subroutine of ix86_build_signbit_mask_vector. If VECT is true,
10564 then replicate the value for all elements of the vector
10565 register. */
10566
10567 rtx
10568 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
10569 {
10570 rtvec v;
10571 switch (mode)
10572 {
10573 case SImode:
10574 gcc_assert (vect);
10575 v = gen_rtvec (4, value, value, value, value);
10576 return gen_rtx_CONST_VECTOR (V4SImode, v);
10577
10578 case DImode:
10579 gcc_assert (vect);
10580 v = gen_rtvec (2, value, value);
10581 return gen_rtx_CONST_VECTOR (V2DImode, v);
10582
10583 case SFmode:
10584 if (vect)
10585 v = gen_rtvec (4, value, value, value, value);
10586 else
10587 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
10588 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
10589 return gen_rtx_CONST_VECTOR (V4SFmode, v);
10590
10591 case DFmode:
10592 if (vect)
10593 v = gen_rtvec (2, value, value);
10594 else
10595 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
10596 return gen_rtx_CONST_VECTOR (V2DFmode, v);
10597
10598 default:
10599 gcc_unreachable ();
10600 }
10601 }
10602
10603 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
10604 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
10605 for an SSE register. If VECT is true, then replicate the mask for
10606 all elements of the vector register. If INVERT is true, then create
10607 a mask excluding the sign bit. */
10608
10609 rtx
10610 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
10611 {
10612 enum machine_mode vec_mode, imode;
10613 HOST_WIDE_INT hi, lo;
10614 int shift = 63;
10615 rtx v;
10616 rtx mask;
10617
10618 /* Find the sign bit, sign extended to 2*HWI. */
10619 switch (mode)
10620 {
10621 case SImode:
10622 case SFmode:
10623 imode = SImode;
10624 vec_mode = (mode == SImode) ? V4SImode : V4SFmode;
10625 lo = 0x80000000, hi = lo < 0;
10626 break;
10627
10628 case DImode:
10629 case DFmode:
10630 imode = DImode;
10631 vec_mode = (mode == DImode) ? V2DImode : V2DFmode;
10632 if (HOST_BITS_PER_WIDE_INT >= 64)
10633 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
10634 else
10635 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
10636 break;
10637
10638 case TImode:
10639 case TFmode:
10640 imode = TImode;
10641 vec_mode = VOIDmode;
10642 gcc_assert (HOST_BITS_PER_WIDE_INT >= 64);
10643 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
10644 break;
10645
10646 default:
10647 gcc_unreachable ();
10648 }
10649
10650 if (invert)
10651 lo = ~lo, hi = ~hi;
10652
10653 /* Force this value into the low part of a fp vector constant. */
10654 mask = immed_double_const (lo, hi, imode);
10655 mask = gen_lowpart (mode, mask);
10656
10657 if (vec_mode == VOIDmode)
10658 return force_reg (mode, mask);
10659
10660 v = ix86_build_const_vector (mode, vect, mask);
10661 return force_reg (vec_mode, v);
10662 }
10663
10664 /* Generate code for floating point ABS or NEG. */
10665
10666 void
10667 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
10668 rtx operands[])
10669 {
10670 rtx mask, set, use, clob, dst, src;
10671 bool matching_memory;
10672 bool use_sse = false;
10673 bool vector_mode = VECTOR_MODE_P (mode);
10674 enum machine_mode elt_mode = mode;
10675
10676 if (vector_mode)
10677 {
10678 elt_mode = GET_MODE_INNER (mode);
10679 use_sse = true;
10680 }
10681 else if (mode == TFmode)
10682 use_sse = true;
10683 else if (TARGET_SSE_MATH)
10684 use_sse = SSE_FLOAT_MODE_P (mode);
10685
10686 /* NEG and ABS performed with SSE use bitwise mask operations.
10687 Create the appropriate mask now. */
10688 if (use_sse)
10689 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
10690 else
10691 mask = NULL_RTX;
10692
10693 dst = operands[0];
10694 src = operands[1];
10695
10696 /* If the destination is memory, and we don't have matching source
10697 operands or we're using the x87, do things in registers. */
10698 matching_memory = false;
10699 if (MEM_P (dst))
10700 {
10701 if (use_sse && rtx_equal_p (dst, src))
10702 matching_memory = true;
10703 else
10704 dst = gen_reg_rtx (mode);
10705 }
10706 if (MEM_P (src) && !matching_memory)
10707 src = force_reg (mode, src);
10708
10709 if (vector_mode)
10710 {
10711 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
10712 set = gen_rtx_SET (VOIDmode, dst, set);
10713 emit_insn (set);
10714 }
10715 else
10716 {
10717 set = gen_rtx_fmt_e (code, mode, src);
10718 set = gen_rtx_SET (VOIDmode, dst, set);
10719 if (mask)
10720 {
10721 use = gen_rtx_USE (VOIDmode, mask);
10722 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10723 emit_insn (gen_rtx_PARALLEL (VOIDmode,
10724 gen_rtvec (3, set, use, clob)));
10725 }
10726 else
10727 emit_insn (set);
10728 }
10729
10730 if (dst != operands[0])
10731 emit_move_insn (operands[0], dst);
10732 }
10733
10734 /* Expand a copysign operation. Special case operand 0 being a constant. */
10735
10736 void
10737 ix86_expand_copysign (rtx operands[])
10738 {
10739 enum machine_mode mode, vmode;
10740 rtx dest, op0, op1, mask, nmask;
10741
10742 dest = operands[0];
10743 op0 = operands[1];
10744 op1 = operands[2];
10745
10746 mode = GET_MODE (dest);
10747 vmode = mode == SFmode ? V4SFmode : V2DFmode;
10748
10749 if (GET_CODE (op0) == CONST_DOUBLE)
10750 {
10751 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
10752
10753 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
10754 op0 = simplify_unary_operation (ABS, mode, op0, mode);
10755
10756 if (mode == SFmode || mode == DFmode)
10757 {
10758 if (op0 == CONST0_RTX (mode))
10759 op0 = CONST0_RTX (vmode);
10760 else
10761 {
10762 rtvec v;
10763
10764 if (mode == SFmode)
10765 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
10766 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
10767 else
10768 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
10769 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
10770 }
10771 }
10772
10773 mask = ix86_build_signbit_mask (mode, 0, 0);
10774
10775 if (mode == SFmode)
10776 copysign_insn = gen_copysignsf3_const;
10777 else if (mode == DFmode)
10778 copysign_insn = gen_copysigndf3_const;
10779 else
10780 copysign_insn = gen_copysigntf3_const;
10781
10782 emit_insn (copysign_insn (dest, op0, op1, mask));
10783 }
10784 else
10785 {
10786 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
10787
10788 nmask = ix86_build_signbit_mask (mode, 0, 1);
10789 mask = ix86_build_signbit_mask (mode, 0, 0);
10790
10791 if (mode == SFmode)
10792 copysign_insn = gen_copysignsf3_var;
10793 else if (mode == DFmode)
10794 copysign_insn = gen_copysigndf3_var;
10795 else
10796 copysign_insn = gen_copysigntf3_var;
10797
10798 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
10799 }
10800 }
10801
10802 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
10803 be a constant, and so has already been expanded into a vector constant. */
10804
10805 void
10806 ix86_split_copysign_const (rtx operands[])
10807 {
10808 enum machine_mode mode, vmode;
10809 rtx dest, op0, op1, mask, x;
10810
10811 dest = operands[0];
10812 op0 = operands[1];
10813 op1 = operands[2];
10814 mask = operands[3];
10815
10816 mode = GET_MODE (dest);
10817 vmode = GET_MODE (mask);
10818
10819 dest = simplify_gen_subreg (vmode, dest, mode, 0);
10820 x = gen_rtx_AND (vmode, dest, mask);
10821 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10822
10823 if (op0 != CONST0_RTX (vmode))
10824 {
10825 x = gen_rtx_IOR (vmode, dest, op0);
10826 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10827 }
10828 }
10829
10830 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
10831 so we have to do two masks. */
10832
10833 void
10834 ix86_split_copysign_var (rtx operands[])
10835 {
10836 enum machine_mode mode, vmode;
10837 rtx dest, scratch, op0, op1, mask, nmask, x;
10838
10839 dest = operands[0];
10840 scratch = operands[1];
10841 op0 = operands[2];
10842 op1 = operands[3];
10843 nmask = operands[4];
10844 mask = operands[5];
10845
10846 mode = GET_MODE (dest);
10847 vmode = GET_MODE (mask);
10848
10849 if (rtx_equal_p (op0, op1))
10850 {
10851 /* Shouldn't happen often (it's useless, obviously), but when it does
10852 we'd generate incorrect code if we continue below. */
10853 emit_move_insn (dest, op0);
10854 return;
10855 }
10856
10857 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
10858 {
10859 gcc_assert (REGNO (op1) == REGNO (scratch));
10860
10861 x = gen_rtx_AND (vmode, scratch, mask);
10862 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
10863
10864 dest = mask;
10865 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
10866 x = gen_rtx_NOT (vmode, dest);
10867 x = gen_rtx_AND (vmode, x, op0);
10868 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10869 }
10870 else
10871 {
10872 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
10873 {
10874 x = gen_rtx_AND (vmode, scratch, mask);
10875 }
10876 else /* alternative 2,4 */
10877 {
10878 gcc_assert (REGNO (mask) == REGNO (scratch));
10879 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
10880 x = gen_rtx_AND (vmode, scratch, op1);
10881 }
10882 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
10883
10884 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
10885 {
10886 dest = simplify_gen_subreg (vmode, op0, mode, 0);
10887 x = gen_rtx_AND (vmode, dest, nmask);
10888 }
10889 else /* alternative 3,4 */
10890 {
10891 gcc_assert (REGNO (nmask) == REGNO (dest));
10892 dest = nmask;
10893 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
10894 x = gen_rtx_AND (vmode, dest, op0);
10895 }
10896 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10897 }
10898
10899 x = gen_rtx_IOR (vmode, dest, scratch);
10900 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10901 }
10902
10903 /* Return TRUE or FALSE depending on whether the first SET in INSN
10904 has source and destination with matching CC modes, and that the
10905 CC mode is at least as constrained as REQ_MODE. */
10906
10907 int
10908 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
10909 {
10910 rtx set;
10911 enum machine_mode set_mode;
10912
10913 set = PATTERN (insn);
10914 if (GET_CODE (set) == PARALLEL)
10915 set = XVECEXP (set, 0, 0);
10916 gcc_assert (GET_CODE (set) == SET);
10917 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
10918
10919 set_mode = GET_MODE (SET_DEST (set));
10920 switch (set_mode)
10921 {
10922 case CCNOmode:
10923 if (req_mode != CCNOmode
10924 && (req_mode != CCmode
10925 || XEXP (SET_SRC (set), 1) != const0_rtx))
10926 return 0;
10927 break;
10928 case CCmode:
10929 if (req_mode == CCGCmode)
10930 return 0;
10931 /* FALLTHRU */
10932 case CCGCmode:
10933 if (req_mode == CCGOCmode || req_mode == CCNOmode)
10934 return 0;
10935 /* FALLTHRU */
10936 case CCGOCmode:
10937 if (req_mode == CCZmode)
10938 return 0;
10939 /* FALLTHRU */
10940 case CCZmode:
10941 break;
10942
10943 default:
10944 gcc_unreachable ();
10945 }
10946
10947 return (GET_MODE (SET_SRC (set)) == set_mode);
10948 }
10949
10950 /* Generate insn patterns to do an integer compare of OPERANDS. */
10951
10952 static rtx
10953 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
10954 {
10955 enum machine_mode cmpmode;
10956 rtx tmp, flags;
10957
10958 cmpmode = SELECT_CC_MODE (code, op0, op1);
10959 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
10960
10961 /* This is very simple, but making the interface the same as in the
10962 FP case makes the rest of the code easier. */
10963 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
10964 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
10965
10966 /* Return the test that should be put into the flags user, i.e.
10967 the bcc, scc, or cmov instruction. */
10968 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
10969 }
10970
10971 /* Figure out whether to use ordered or unordered fp comparisons.
10972 Return the appropriate mode to use. */
10973
10974 enum machine_mode
10975 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
10976 {
10977 /* ??? In order to make all comparisons reversible, we do all comparisons
10978 non-trapping when compiling for IEEE. Once gcc is able to distinguish
10979 all forms trapping and nontrapping comparisons, we can make inequality
10980 comparisons trapping again, since it results in better code when using
10981 FCOM based compares. */
10982 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
10983 }
10984
10985 enum machine_mode
10986 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
10987 {
10988 enum machine_mode mode = GET_MODE (op0);
10989
10990 if (SCALAR_FLOAT_MODE_P (mode))
10991 {
10992 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
10993 return ix86_fp_compare_mode (code);
10994 }
10995
10996 switch (code)
10997 {
10998 /* Only zero flag is needed. */
10999 case EQ: /* ZF=0 */
11000 case NE: /* ZF!=0 */
11001 return CCZmode;
11002 /* Codes needing carry flag. */
11003 case GEU: /* CF=0 */
11004 case GTU: /* CF=0 & ZF=0 */
11005 case LTU: /* CF=1 */
11006 case LEU: /* CF=1 | ZF=1 */
11007 return CCmode;
11008 /* Codes possibly doable only with sign flag when
11009 comparing against zero. */
11010 case GE: /* SF=OF or SF=0 */
11011 case LT: /* SF<>OF or SF=1 */
11012 if (op1 == const0_rtx)
11013 return CCGOCmode;
11014 else
11015 /* For other cases Carry flag is not required. */
11016 return CCGCmode;
11017 /* Codes doable only with sign flag when comparing
11018 against zero, but we miss jump instruction for it
11019 so we need to use relational tests against overflow
11020 that thus needs to be zero. */
11021 case GT: /* ZF=0 & SF=OF */
11022 case LE: /* ZF=1 | SF<>OF */
11023 if (op1 == const0_rtx)
11024 return CCNOmode;
11025 else
11026 return CCGCmode;
11027 /* strcmp pattern do (use flags) and combine may ask us for proper
11028 mode. */
11029 case USE:
11030 return CCmode;
11031 default:
11032 gcc_unreachable ();
11033 }
11034 }
11035
11036 /* Return the fixed registers used for condition codes. */
11037
11038 static bool
11039 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
11040 {
11041 *p1 = FLAGS_REG;
11042 *p2 = FPSR_REG;
11043 return true;
11044 }
11045
11046 /* If two condition code modes are compatible, return a condition code
11047 mode which is compatible with both. Otherwise, return
11048 VOIDmode. */
11049
11050 static enum machine_mode
11051 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
11052 {
11053 if (m1 == m2)
11054 return m1;
11055
11056 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
11057 return VOIDmode;
11058
11059 if ((m1 == CCGCmode && m2 == CCGOCmode)
11060 || (m1 == CCGOCmode && m2 == CCGCmode))
11061 return CCGCmode;
11062
11063 switch (m1)
11064 {
11065 default:
11066 gcc_unreachable ();
11067
11068 case CCmode:
11069 case CCGCmode:
11070 case CCGOCmode:
11071 case CCNOmode:
11072 case CCAmode:
11073 case CCCmode:
11074 case CCOmode:
11075 case CCSmode:
11076 case CCZmode:
11077 switch (m2)
11078 {
11079 default:
11080 return VOIDmode;
11081
11082 case CCmode:
11083 case CCGCmode:
11084 case CCGOCmode:
11085 case CCNOmode:
11086 case CCAmode:
11087 case CCCmode:
11088 case CCOmode:
11089 case CCSmode:
11090 case CCZmode:
11091 return CCmode;
11092 }
11093
11094 case CCFPmode:
11095 case CCFPUmode:
11096 /* These are only compatible with themselves, which we already
11097 checked above. */
11098 return VOIDmode;
11099 }
11100 }
11101
11102 /* Split comparison code CODE into comparisons we can do using branch
11103 instructions. BYPASS_CODE is comparison code for branch that will
11104 branch around FIRST_CODE and SECOND_CODE. If some of branches
11105 is not required, set value to UNKNOWN.
11106 We never require more than two branches. */
11107
11108 void
11109 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
11110 enum rtx_code *first_code,
11111 enum rtx_code *second_code)
11112 {
11113 *first_code = code;
11114 *bypass_code = UNKNOWN;
11115 *second_code = UNKNOWN;
11116
11117 /* The fcomi comparison sets flags as follows:
11118
11119 cmp ZF PF CF
11120 > 0 0 0
11121 < 0 0 1
11122 = 1 0 0
11123 un 1 1 1 */
11124
11125 switch (code)
11126 {
11127 case GT: /* GTU - CF=0 & ZF=0 */
11128 case GE: /* GEU - CF=0 */
11129 case ORDERED: /* PF=0 */
11130 case UNORDERED: /* PF=1 */
11131 case UNEQ: /* EQ - ZF=1 */
11132 case UNLT: /* LTU - CF=1 */
11133 case UNLE: /* LEU - CF=1 | ZF=1 */
11134 case LTGT: /* EQ - ZF=0 */
11135 break;
11136 case LT: /* LTU - CF=1 - fails on unordered */
11137 *first_code = UNLT;
11138 *bypass_code = UNORDERED;
11139 break;
11140 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
11141 *first_code = UNLE;
11142 *bypass_code = UNORDERED;
11143 break;
11144 case EQ: /* EQ - ZF=1 - fails on unordered */
11145 *first_code = UNEQ;
11146 *bypass_code = UNORDERED;
11147 break;
11148 case NE: /* NE - ZF=0 - fails on unordered */
11149 *first_code = LTGT;
11150 *second_code = UNORDERED;
11151 break;
11152 case UNGE: /* GEU - CF=0 - fails on unordered */
11153 *first_code = GE;
11154 *second_code = UNORDERED;
11155 break;
11156 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
11157 *first_code = GT;
11158 *second_code = UNORDERED;
11159 break;
11160 default:
11161 gcc_unreachable ();
11162 }
11163 if (!TARGET_IEEE_FP)
11164 {
11165 *second_code = UNKNOWN;
11166 *bypass_code = UNKNOWN;
11167 }
11168 }
11169
11170 /* Return cost of comparison done fcom + arithmetics operations on AX.
11171 All following functions do use number of instructions as a cost metrics.
11172 In future this should be tweaked to compute bytes for optimize_size and
11173 take into account performance of various instructions on various CPUs. */
11174 static int
11175 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
11176 {
11177 if (!TARGET_IEEE_FP)
11178 return 4;
11179 /* The cost of code output by ix86_expand_fp_compare. */
11180 switch (code)
11181 {
11182 case UNLE:
11183 case UNLT:
11184 case LTGT:
11185 case GT:
11186 case GE:
11187 case UNORDERED:
11188 case ORDERED:
11189 case UNEQ:
11190 return 4;
11191 break;
11192 case LT:
11193 case NE:
11194 case EQ:
11195 case UNGE:
11196 return 5;
11197 break;
11198 case LE:
11199 case UNGT:
11200 return 6;
11201 break;
11202 default:
11203 gcc_unreachable ();
11204 }
11205 }
11206
11207 /* Return cost of comparison done using fcomi operation.
11208 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11209 static int
11210 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
11211 {
11212 enum rtx_code bypass_code, first_code, second_code;
11213 /* Return arbitrarily high cost when instruction is not supported - this
11214 prevents gcc from using it. */
11215 if (!TARGET_CMOVE)
11216 return 1024;
11217 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11218 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
11219 }
11220
11221 /* Return cost of comparison done using sahf operation.
11222 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11223 static int
11224 ix86_fp_comparison_sahf_cost (enum rtx_code code)
11225 {
11226 enum rtx_code bypass_code, first_code, second_code;
11227 /* Return arbitrarily high cost when instruction is not preferred - this
11228 avoids gcc from using it. */
11229 if (!(TARGET_SAHF && (TARGET_USE_SAHF || optimize_size)))
11230 return 1024;
11231 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11232 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
11233 }
11234
11235 /* Compute cost of the comparison done using any method.
11236 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11237 static int
11238 ix86_fp_comparison_cost (enum rtx_code code)
11239 {
11240 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
11241 int min;
11242
11243 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
11244 sahf_cost = ix86_fp_comparison_sahf_cost (code);
11245
11246 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
11247 if (min > sahf_cost)
11248 min = sahf_cost;
11249 if (min > fcomi_cost)
11250 min = fcomi_cost;
11251 return min;
11252 }
11253
11254 /* Return true if we should use an FCOMI instruction for this
11255 fp comparison. */
11256
11257 int
11258 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
11259 {
11260 enum rtx_code swapped_code = swap_condition (code);
11261
11262 return ((ix86_fp_comparison_cost (code)
11263 == ix86_fp_comparison_fcomi_cost (code))
11264 || (ix86_fp_comparison_cost (swapped_code)
11265 == ix86_fp_comparison_fcomi_cost (swapped_code)));
11266 }
11267
11268 /* Swap, force into registers, or otherwise massage the two operands
11269 to a fp comparison. The operands are updated in place; the new
11270 comparison code is returned. */
11271
11272 static enum rtx_code
11273 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
11274 {
11275 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
11276 rtx op0 = *pop0, op1 = *pop1;
11277 enum machine_mode op_mode = GET_MODE (op0);
11278 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
11279
11280 /* All of the unordered compare instructions only work on registers.
11281 The same is true of the fcomi compare instructions. The XFmode
11282 compare instructions require registers except when comparing
11283 against zero or when converting operand 1 from fixed point to
11284 floating point. */
11285
11286 if (!is_sse
11287 && (fpcmp_mode == CCFPUmode
11288 || (op_mode == XFmode
11289 && ! (standard_80387_constant_p (op0) == 1
11290 || standard_80387_constant_p (op1) == 1)
11291 && GET_CODE (op1) != FLOAT)
11292 || ix86_use_fcomi_compare (code)))
11293 {
11294 op0 = force_reg (op_mode, op0);
11295 op1 = force_reg (op_mode, op1);
11296 }
11297 else
11298 {
11299 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
11300 things around if they appear profitable, otherwise force op0
11301 into a register. */
11302
11303 if (standard_80387_constant_p (op0) == 0
11304 || (MEM_P (op0)
11305 && ! (standard_80387_constant_p (op1) == 0
11306 || MEM_P (op1))))
11307 {
11308 rtx tmp;
11309 tmp = op0, op0 = op1, op1 = tmp;
11310 code = swap_condition (code);
11311 }
11312
11313 if (!REG_P (op0))
11314 op0 = force_reg (op_mode, op0);
11315
11316 if (CONSTANT_P (op1))
11317 {
11318 int tmp = standard_80387_constant_p (op1);
11319 if (tmp == 0)
11320 op1 = validize_mem (force_const_mem (op_mode, op1));
11321 else if (tmp == 1)
11322 {
11323 if (TARGET_CMOVE)
11324 op1 = force_reg (op_mode, op1);
11325 }
11326 else
11327 op1 = force_reg (op_mode, op1);
11328 }
11329 }
11330
11331 /* Try to rearrange the comparison to make it cheaper. */
11332 if (ix86_fp_comparison_cost (code)
11333 > ix86_fp_comparison_cost (swap_condition (code))
11334 && (REG_P (op1) || !no_new_pseudos))
11335 {
11336 rtx tmp;
11337 tmp = op0, op0 = op1, op1 = tmp;
11338 code = swap_condition (code);
11339 if (!REG_P (op0))
11340 op0 = force_reg (op_mode, op0);
11341 }
11342
11343 *pop0 = op0;
11344 *pop1 = op1;
11345 return code;
11346 }
11347
11348 /* Convert comparison codes we use to represent FP comparison to integer
11349 code that will result in proper branch. Return UNKNOWN if no such code
11350 is available. */
11351
11352 enum rtx_code
11353 ix86_fp_compare_code_to_integer (enum rtx_code code)
11354 {
11355 switch (code)
11356 {
11357 case GT:
11358 return GTU;
11359 case GE:
11360 return GEU;
11361 case ORDERED:
11362 case UNORDERED:
11363 return code;
11364 break;
11365 case UNEQ:
11366 return EQ;
11367 break;
11368 case UNLT:
11369 return LTU;
11370 break;
11371 case UNLE:
11372 return LEU;
11373 break;
11374 case LTGT:
11375 return NE;
11376 break;
11377 default:
11378 return UNKNOWN;
11379 }
11380 }
11381
11382 /* Generate insn patterns to do a floating point compare of OPERANDS. */
11383
11384 static rtx
11385 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
11386 rtx *second_test, rtx *bypass_test)
11387 {
11388 enum machine_mode fpcmp_mode, intcmp_mode;
11389 rtx tmp, tmp2;
11390 int cost = ix86_fp_comparison_cost (code);
11391 enum rtx_code bypass_code, first_code, second_code;
11392
11393 fpcmp_mode = ix86_fp_compare_mode (code);
11394 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
11395
11396 if (second_test)
11397 *second_test = NULL_RTX;
11398 if (bypass_test)
11399 *bypass_test = NULL_RTX;
11400
11401 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11402
11403 /* Do fcomi/sahf based test when profitable. */
11404 if ((TARGET_CMOVE || TARGET_SAHF)
11405 && (bypass_code == UNKNOWN || bypass_test)
11406 && (second_code == UNKNOWN || second_test)
11407 && ix86_fp_comparison_arithmetics_cost (code) > cost)
11408 {
11409 if (TARGET_CMOVE)
11410 {
11411 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
11412 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
11413 tmp);
11414 emit_insn (tmp);
11415 }
11416 else
11417 {
11418 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
11419 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
11420 if (!scratch)
11421 scratch = gen_reg_rtx (HImode);
11422 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
11423 emit_insn (gen_x86_sahf_1 (scratch));
11424 }
11425
11426 /* The FP codes work out to act like unsigned. */
11427 intcmp_mode = fpcmp_mode;
11428 code = first_code;
11429 if (bypass_code != UNKNOWN)
11430 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
11431 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11432 const0_rtx);
11433 if (second_code != UNKNOWN)
11434 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
11435 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11436 const0_rtx);
11437 }
11438 else
11439 {
11440 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
11441 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
11442 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
11443 if (!scratch)
11444 scratch = gen_reg_rtx (HImode);
11445 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
11446
11447 /* In the unordered case, we have to check C2 for NaN's, which
11448 doesn't happen to work out to anything nice combination-wise.
11449 So do some bit twiddling on the value we've got in AH to come
11450 up with an appropriate set of condition codes. */
11451
11452 intcmp_mode = CCNOmode;
11453 switch (code)
11454 {
11455 case GT:
11456 case UNGT:
11457 if (code == GT || !TARGET_IEEE_FP)
11458 {
11459 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
11460 code = EQ;
11461 }
11462 else
11463 {
11464 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11465 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
11466 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
11467 intcmp_mode = CCmode;
11468 code = GEU;
11469 }
11470 break;
11471 case LT:
11472 case UNLT:
11473 if (code == LT && TARGET_IEEE_FP)
11474 {
11475 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11476 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
11477 intcmp_mode = CCmode;
11478 code = EQ;
11479 }
11480 else
11481 {
11482 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
11483 code = NE;
11484 }
11485 break;
11486 case GE:
11487 case UNGE:
11488 if (code == GE || !TARGET_IEEE_FP)
11489 {
11490 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
11491 code = EQ;
11492 }
11493 else
11494 {
11495 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11496 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
11497 GEN_INT (0x01)));
11498 code = NE;
11499 }
11500 break;
11501 case LE:
11502 case UNLE:
11503 if (code == LE && TARGET_IEEE_FP)
11504 {
11505 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11506 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
11507 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
11508 intcmp_mode = CCmode;
11509 code = LTU;
11510 }
11511 else
11512 {
11513 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
11514 code = NE;
11515 }
11516 break;
11517 case EQ:
11518 case UNEQ:
11519 if (code == EQ && TARGET_IEEE_FP)
11520 {
11521 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11522 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
11523 intcmp_mode = CCmode;
11524 code = EQ;
11525 }
11526 else
11527 {
11528 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
11529 code = NE;
11530 break;
11531 }
11532 break;
11533 case NE:
11534 case LTGT:
11535 if (code == NE && TARGET_IEEE_FP)
11536 {
11537 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11538 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
11539 GEN_INT (0x40)));
11540 code = NE;
11541 }
11542 else
11543 {
11544 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
11545 code = EQ;
11546 }
11547 break;
11548
11549 case UNORDERED:
11550 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
11551 code = NE;
11552 break;
11553 case ORDERED:
11554 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
11555 code = EQ;
11556 break;
11557
11558 default:
11559 gcc_unreachable ();
11560 }
11561 }
11562
11563 /* Return the test that should be put into the flags user, i.e.
11564 the bcc, scc, or cmov instruction. */
11565 return gen_rtx_fmt_ee (code, VOIDmode,
11566 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11567 const0_rtx);
11568 }
11569
11570 rtx
11571 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
11572 {
11573 rtx op0, op1, ret;
11574 op0 = ix86_compare_op0;
11575 op1 = ix86_compare_op1;
11576
11577 if (second_test)
11578 *second_test = NULL_RTX;
11579 if (bypass_test)
11580 *bypass_test = NULL_RTX;
11581
11582 if (ix86_compare_emitted)
11583 {
11584 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
11585 ix86_compare_emitted = NULL_RTX;
11586 }
11587 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
11588 {
11589 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
11590 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
11591 second_test, bypass_test);
11592 }
11593 else
11594 ret = ix86_expand_int_compare (code, op0, op1);
11595
11596 return ret;
11597 }
11598
11599 /* Return true if the CODE will result in nontrivial jump sequence. */
11600 bool
11601 ix86_fp_jump_nontrivial_p (enum rtx_code code)
11602 {
11603 enum rtx_code bypass_code, first_code, second_code;
11604 if (!TARGET_CMOVE)
11605 return true;
11606 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11607 return bypass_code != UNKNOWN || second_code != UNKNOWN;
11608 }
11609
11610 void
11611 ix86_expand_branch (enum rtx_code code, rtx label)
11612 {
11613 rtx tmp;
11614
11615 /* If we have emitted a compare insn, go straight to simple.
11616 ix86_expand_compare won't emit anything if ix86_compare_emitted
11617 is non NULL. */
11618 if (ix86_compare_emitted)
11619 goto simple;
11620
11621 switch (GET_MODE (ix86_compare_op0))
11622 {
11623 case QImode:
11624 case HImode:
11625 case SImode:
11626 simple:
11627 tmp = ix86_expand_compare (code, NULL, NULL);
11628 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11629 gen_rtx_LABEL_REF (VOIDmode, label),
11630 pc_rtx);
11631 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11632 return;
11633
11634 case SFmode:
11635 case DFmode:
11636 case XFmode:
11637 {
11638 rtvec vec;
11639 int use_fcomi;
11640 enum rtx_code bypass_code, first_code, second_code;
11641
11642 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
11643 &ix86_compare_op1);
11644
11645 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11646
11647 /* Check whether we will use the natural sequence with one jump. If
11648 so, we can expand jump early. Otherwise delay expansion by
11649 creating compound insn to not confuse optimizers. */
11650 if (bypass_code == UNKNOWN && second_code == UNKNOWN
11651 && TARGET_CMOVE)
11652 {
11653 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
11654 gen_rtx_LABEL_REF (VOIDmode, label),
11655 pc_rtx, NULL_RTX, NULL_RTX);
11656 }
11657 else
11658 {
11659 tmp = gen_rtx_fmt_ee (code, VOIDmode,
11660 ix86_compare_op0, ix86_compare_op1);
11661 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11662 gen_rtx_LABEL_REF (VOIDmode, label),
11663 pc_rtx);
11664 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
11665
11666 use_fcomi = ix86_use_fcomi_compare (code);
11667 vec = rtvec_alloc (3 + !use_fcomi);
11668 RTVEC_ELT (vec, 0) = tmp;
11669 RTVEC_ELT (vec, 1)
11670 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
11671 RTVEC_ELT (vec, 2)
11672 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
11673 if (! use_fcomi)
11674 RTVEC_ELT (vec, 3)
11675 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
11676
11677 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
11678 }
11679 return;
11680 }
11681
11682 case DImode:
11683 if (TARGET_64BIT)
11684 goto simple;
11685 case TImode:
11686 /* Expand DImode branch into multiple compare+branch. */
11687 {
11688 rtx lo[2], hi[2], label2;
11689 enum rtx_code code1, code2, code3;
11690 enum machine_mode submode;
11691
11692 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
11693 {
11694 tmp = ix86_compare_op0;
11695 ix86_compare_op0 = ix86_compare_op1;
11696 ix86_compare_op1 = tmp;
11697 code = swap_condition (code);
11698 }
11699 if (GET_MODE (ix86_compare_op0) == DImode)
11700 {
11701 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
11702 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
11703 submode = SImode;
11704 }
11705 else
11706 {
11707 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
11708 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
11709 submode = DImode;
11710 }
11711
11712 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
11713 avoid two branches. This costs one extra insn, so disable when
11714 optimizing for size. */
11715
11716 if ((code == EQ || code == NE)
11717 && (!optimize_size
11718 || hi[1] == const0_rtx || lo[1] == const0_rtx))
11719 {
11720 rtx xor0, xor1;
11721
11722 xor1 = hi[0];
11723 if (hi[1] != const0_rtx)
11724 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
11725 NULL_RTX, 0, OPTAB_WIDEN);
11726
11727 xor0 = lo[0];
11728 if (lo[1] != const0_rtx)
11729 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
11730 NULL_RTX, 0, OPTAB_WIDEN);
11731
11732 tmp = expand_binop (submode, ior_optab, xor1, xor0,
11733 NULL_RTX, 0, OPTAB_WIDEN);
11734
11735 ix86_compare_op0 = tmp;
11736 ix86_compare_op1 = const0_rtx;
11737 ix86_expand_branch (code, label);
11738 return;
11739 }
11740
11741 /* Otherwise, if we are doing less-than or greater-or-equal-than,
11742 op1 is a constant and the low word is zero, then we can just
11743 examine the high word. */
11744
11745 if (CONST_INT_P (hi[1]) && lo[1] == const0_rtx)
11746 switch (code)
11747 {
11748 case LT: case LTU: case GE: case GEU:
11749 ix86_compare_op0 = hi[0];
11750 ix86_compare_op1 = hi[1];
11751 ix86_expand_branch (code, label);
11752 return;
11753 default:
11754 break;
11755 }
11756
11757 /* Otherwise, we need two or three jumps. */
11758
11759 label2 = gen_label_rtx ();
11760
11761 code1 = code;
11762 code2 = swap_condition (code);
11763 code3 = unsigned_condition (code);
11764
11765 switch (code)
11766 {
11767 case LT: case GT: case LTU: case GTU:
11768 break;
11769
11770 case LE: code1 = LT; code2 = GT; break;
11771 case GE: code1 = GT; code2 = LT; break;
11772 case LEU: code1 = LTU; code2 = GTU; break;
11773 case GEU: code1 = GTU; code2 = LTU; break;
11774
11775 case EQ: code1 = UNKNOWN; code2 = NE; break;
11776 case NE: code2 = UNKNOWN; break;
11777
11778 default:
11779 gcc_unreachable ();
11780 }
11781
11782 /*
11783 * a < b =>
11784 * if (hi(a) < hi(b)) goto true;
11785 * if (hi(a) > hi(b)) goto false;
11786 * if (lo(a) < lo(b)) goto true;
11787 * false:
11788 */
11789
11790 ix86_compare_op0 = hi[0];
11791 ix86_compare_op1 = hi[1];
11792
11793 if (code1 != UNKNOWN)
11794 ix86_expand_branch (code1, label);
11795 if (code2 != UNKNOWN)
11796 ix86_expand_branch (code2, label2);
11797
11798 ix86_compare_op0 = lo[0];
11799 ix86_compare_op1 = lo[1];
11800 ix86_expand_branch (code3, label);
11801
11802 if (code2 != UNKNOWN)
11803 emit_label (label2);
11804 return;
11805 }
11806
11807 default:
11808 gcc_unreachable ();
11809 }
11810 }
11811
11812 /* Split branch based on floating point condition. */
11813 void
11814 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
11815 rtx target1, rtx target2, rtx tmp, rtx pushed)
11816 {
11817 rtx second, bypass;
11818 rtx label = NULL_RTX;
11819 rtx condition;
11820 int bypass_probability = -1, second_probability = -1, probability = -1;
11821 rtx i;
11822
11823 if (target2 != pc_rtx)
11824 {
11825 rtx tmp = target2;
11826 code = reverse_condition_maybe_unordered (code);
11827 target2 = target1;
11828 target1 = tmp;
11829 }
11830
11831 condition = ix86_expand_fp_compare (code, op1, op2,
11832 tmp, &second, &bypass);
11833
11834 /* Remove pushed operand from stack. */
11835 if (pushed)
11836 ix86_free_from_memory (GET_MODE (pushed));
11837
11838 if (split_branch_probability >= 0)
11839 {
11840 /* Distribute the probabilities across the jumps.
11841 Assume the BYPASS and SECOND to be always test
11842 for UNORDERED. */
11843 probability = split_branch_probability;
11844
11845 /* Value of 1 is low enough to make no need for probability
11846 to be updated. Later we may run some experiments and see
11847 if unordered values are more frequent in practice. */
11848 if (bypass)
11849 bypass_probability = 1;
11850 if (second)
11851 second_probability = 1;
11852 }
11853 if (bypass != NULL_RTX)
11854 {
11855 label = gen_label_rtx ();
11856 i = emit_jump_insn (gen_rtx_SET
11857 (VOIDmode, pc_rtx,
11858 gen_rtx_IF_THEN_ELSE (VOIDmode,
11859 bypass,
11860 gen_rtx_LABEL_REF (VOIDmode,
11861 label),
11862 pc_rtx)));
11863 if (bypass_probability >= 0)
11864 REG_NOTES (i)
11865 = gen_rtx_EXPR_LIST (REG_BR_PROB,
11866 GEN_INT (bypass_probability),
11867 REG_NOTES (i));
11868 }
11869 i = emit_jump_insn (gen_rtx_SET
11870 (VOIDmode, pc_rtx,
11871 gen_rtx_IF_THEN_ELSE (VOIDmode,
11872 condition, target1, target2)));
11873 if (probability >= 0)
11874 REG_NOTES (i)
11875 = gen_rtx_EXPR_LIST (REG_BR_PROB,
11876 GEN_INT (probability),
11877 REG_NOTES (i));
11878 if (second != NULL_RTX)
11879 {
11880 i = emit_jump_insn (gen_rtx_SET
11881 (VOIDmode, pc_rtx,
11882 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
11883 target2)));
11884 if (second_probability >= 0)
11885 REG_NOTES (i)
11886 = gen_rtx_EXPR_LIST (REG_BR_PROB,
11887 GEN_INT (second_probability),
11888 REG_NOTES (i));
11889 }
11890 if (label != NULL_RTX)
11891 emit_label (label);
11892 }
11893
11894 int
11895 ix86_expand_setcc (enum rtx_code code, rtx dest)
11896 {
11897 rtx ret, tmp, tmpreg, equiv;
11898 rtx second_test, bypass_test;
11899
11900 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
11901 return 0; /* FAIL */
11902
11903 gcc_assert (GET_MODE (dest) == QImode);
11904
11905 ret = ix86_expand_compare (code, &second_test, &bypass_test);
11906 PUT_MODE (ret, QImode);
11907
11908 tmp = dest;
11909 tmpreg = dest;
11910
11911 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
11912 if (bypass_test || second_test)
11913 {
11914 rtx test = second_test;
11915 int bypass = 0;
11916 rtx tmp2 = gen_reg_rtx (QImode);
11917 if (bypass_test)
11918 {
11919 gcc_assert (!second_test);
11920 test = bypass_test;
11921 bypass = 1;
11922 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
11923 }
11924 PUT_MODE (test, QImode);
11925 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
11926
11927 if (bypass)
11928 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
11929 else
11930 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
11931 }
11932
11933 /* Attach a REG_EQUAL note describing the comparison result. */
11934 if (ix86_compare_op0 && ix86_compare_op1)
11935 {
11936 equiv = simplify_gen_relational (code, QImode,
11937 GET_MODE (ix86_compare_op0),
11938 ix86_compare_op0, ix86_compare_op1);
11939 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
11940 }
11941
11942 return 1; /* DONE */
11943 }
11944
11945 /* Expand comparison setting or clearing carry flag. Return true when
11946 successful and set pop for the operation. */
11947 static bool
11948 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
11949 {
11950 enum machine_mode mode =
11951 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
11952
11953 /* Do not handle DImode compares that go through special path.
11954 Also we can't deal with FP compares yet. This is possible to add. */
11955 if (mode == (TARGET_64BIT ? TImode : DImode))
11956 return false;
11957
11958 if (SCALAR_FLOAT_MODE_P (mode))
11959 {
11960 rtx second_test = NULL, bypass_test = NULL;
11961 rtx compare_op, compare_seq;
11962
11963 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
11964
11965 /* Shortcut: following common codes never translate
11966 into carry flag compares. */
11967 if (code == EQ || code == NE || code == UNEQ || code == LTGT
11968 || code == ORDERED || code == UNORDERED)
11969 return false;
11970
11971 /* These comparisons require zero flag; swap operands so they won't. */
11972 if ((code == GT || code == UNLE || code == LE || code == UNGT)
11973 && !TARGET_IEEE_FP)
11974 {
11975 rtx tmp = op0;
11976 op0 = op1;
11977 op1 = tmp;
11978 code = swap_condition (code);
11979 }
11980
11981 /* Try to expand the comparison and verify that we end up with carry flag
11982 based comparison. This is fails to be true only when we decide to expand
11983 comparison using arithmetic that is not too common scenario. */
11984 start_sequence ();
11985 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
11986 &second_test, &bypass_test);
11987 compare_seq = get_insns ();
11988 end_sequence ();
11989
11990 if (second_test || bypass_test)
11991 return false;
11992 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11993 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11994 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
11995 else
11996 code = GET_CODE (compare_op);
11997 if (code != LTU && code != GEU)
11998 return false;
11999 emit_insn (compare_seq);
12000 *pop = compare_op;
12001 return true;
12002 }
12003 if (!INTEGRAL_MODE_P (mode))
12004 return false;
12005 switch (code)
12006 {
12007 case LTU:
12008 case GEU:
12009 break;
12010
12011 /* Convert a==0 into (unsigned)a<1. */
12012 case EQ:
12013 case NE:
12014 if (op1 != const0_rtx)
12015 return false;
12016 op1 = const1_rtx;
12017 code = (code == EQ ? LTU : GEU);
12018 break;
12019
12020 /* Convert a>b into b<a or a>=b-1. */
12021 case GTU:
12022 case LEU:
12023 if (CONST_INT_P (op1))
12024 {
12025 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
12026 /* Bail out on overflow. We still can swap operands but that
12027 would force loading of the constant into register. */
12028 if (op1 == const0_rtx
12029 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
12030 return false;
12031 code = (code == GTU ? GEU : LTU);
12032 }
12033 else
12034 {
12035 rtx tmp = op1;
12036 op1 = op0;
12037 op0 = tmp;
12038 code = (code == GTU ? LTU : GEU);
12039 }
12040 break;
12041
12042 /* Convert a>=0 into (unsigned)a<0x80000000. */
12043 case LT:
12044 case GE:
12045 if (mode == DImode || op1 != const0_rtx)
12046 return false;
12047 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
12048 code = (code == LT ? GEU : LTU);
12049 break;
12050 case LE:
12051 case GT:
12052 if (mode == DImode || op1 != constm1_rtx)
12053 return false;
12054 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
12055 code = (code == LE ? GEU : LTU);
12056 break;
12057
12058 default:
12059 return false;
12060 }
12061 /* Swapping operands may cause constant to appear as first operand. */
12062 if (!nonimmediate_operand (op0, VOIDmode))
12063 {
12064 if (no_new_pseudos)
12065 return false;
12066 op0 = force_reg (mode, op0);
12067 }
12068 ix86_compare_op0 = op0;
12069 ix86_compare_op1 = op1;
12070 *pop = ix86_expand_compare (code, NULL, NULL);
12071 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
12072 return true;
12073 }
12074
12075 int
12076 ix86_expand_int_movcc (rtx operands[])
12077 {
12078 enum rtx_code code = GET_CODE (operands[1]), compare_code;
12079 rtx compare_seq, compare_op;
12080 rtx second_test, bypass_test;
12081 enum machine_mode mode = GET_MODE (operands[0]);
12082 bool sign_bit_compare_p = false;;
12083
12084 start_sequence ();
12085 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
12086 compare_seq = get_insns ();
12087 end_sequence ();
12088
12089 compare_code = GET_CODE (compare_op);
12090
12091 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
12092 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
12093 sign_bit_compare_p = true;
12094
12095 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
12096 HImode insns, we'd be swallowed in word prefix ops. */
12097
12098 if ((mode != HImode || TARGET_FAST_PREFIX)
12099 && (mode != (TARGET_64BIT ? TImode : DImode))
12100 && CONST_INT_P (operands[2])
12101 && CONST_INT_P (operands[3]))
12102 {
12103 rtx out = operands[0];
12104 HOST_WIDE_INT ct = INTVAL (operands[2]);
12105 HOST_WIDE_INT cf = INTVAL (operands[3]);
12106 HOST_WIDE_INT diff;
12107
12108 diff = ct - cf;
12109 /* Sign bit compares are better done using shifts than we do by using
12110 sbb. */
12111 if (sign_bit_compare_p
12112 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
12113 ix86_compare_op1, &compare_op))
12114 {
12115 /* Detect overlap between destination and compare sources. */
12116 rtx tmp = out;
12117
12118 if (!sign_bit_compare_p)
12119 {
12120 bool fpcmp = false;
12121
12122 compare_code = GET_CODE (compare_op);
12123
12124 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
12125 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
12126 {
12127 fpcmp = true;
12128 compare_code = ix86_fp_compare_code_to_integer (compare_code);
12129 }
12130
12131 /* To simplify rest of code, restrict to the GEU case. */
12132 if (compare_code == LTU)
12133 {
12134 HOST_WIDE_INT tmp = ct;
12135 ct = cf;
12136 cf = tmp;
12137 compare_code = reverse_condition (compare_code);
12138 code = reverse_condition (code);
12139 }
12140 else
12141 {
12142 if (fpcmp)
12143 PUT_CODE (compare_op,
12144 reverse_condition_maybe_unordered
12145 (GET_CODE (compare_op)));
12146 else
12147 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
12148 }
12149 diff = ct - cf;
12150
12151 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
12152 || reg_overlap_mentioned_p (out, ix86_compare_op1))
12153 tmp = gen_reg_rtx (mode);
12154
12155 if (mode == DImode)
12156 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
12157 else
12158 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
12159 }
12160 else
12161 {
12162 if (code == GT || code == GE)
12163 code = reverse_condition (code);
12164 else
12165 {
12166 HOST_WIDE_INT tmp = ct;
12167 ct = cf;
12168 cf = tmp;
12169 diff = ct - cf;
12170 }
12171 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
12172 ix86_compare_op1, VOIDmode, 0, -1);
12173 }
12174
12175 if (diff == 1)
12176 {
12177 /*
12178 * cmpl op0,op1
12179 * sbbl dest,dest
12180 * [addl dest, ct]
12181 *
12182 * Size 5 - 8.
12183 */
12184 if (ct)
12185 tmp = expand_simple_binop (mode, PLUS,
12186 tmp, GEN_INT (ct),
12187 copy_rtx (tmp), 1, OPTAB_DIRECT);
12188 }
12189 else if (cf == -1)
12190 {
12191 /*
12192 * cmpl op0,op1
12193 * sbbl dest,dest
12194 * orl $ct, dest
12195 *
12196 * Size 8.
12197 */
12198 tmp = expand_simple_binop (mode, IOR,
12199 tmp, GEN_INT (ct),
12200 copy_rtx (tmp), 1, OPTAB_DIRECT);
12201 }
12202 else if (diff == -1 && ct)
12203 {
12204 /*
12205 * cmpl op0,op1
12206 * sbbl dest,dest
12207 * notl dest
12208 * [addl dest, cf]
12209 *
12210 * Size 8 - 11.
12211 */
12212 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
12213 if (cf)
12214 tmp = expand_simple_binop (mode, PLUS,
12215 copy_rtx (tmp), GEN_INT (cf),
12216 copy_rtx (tmp), 1, OPTAB_DIRECT);
12217 }
12218 else
12219 {
12220 /*
12221 * cmpl op0,op1
12222 * sbbl dest,dest
12223 * [notl dest]
12224 * andl cf - ct, dest
12225 * [addl dest, ct]
12226 *
12227 * Size 8 - 11.
12228 */
12229
12230 if (cf == 0)
12231 {
12232 cf = ct;
12233 ct = 0;
12234 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
12235 }
12236
12237 tmp = expand_simple_binop (mode, AND,
12238 copy_rtx (tmp),
12239 gen_int_mode (cf - ct, mode),
12240 copy_rtx (tmp), 1, OPTAB_DIRECT);
12241 if (ct)
12242 tmp = expand_simple_binop (mode, PLUS,
12243 copy_rtx (tmp), GEN_INT (ct),
12244 copy_rtx (tmp), 1, OPTAB_DIRECT);
12245 }
12246
12247 if (!rtx_equal_p (tmp, out))
12248 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
12249
12250 return 1; /* DONE */
12251 }
12252
12253 if (diff < 0)
12254 {
12255 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
12256
12257 HOST_WIDE_INT tmp;
12258 tmp = ct, ct = cf, cf = tmp;
12259 diff = -diff;
12260
12261 if (SCALAR_FLOAT_MODE_P (cmp_mode))
12262 {
12263 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
12264
12265 /* We may be reversing unordered compare to normal compare, that
12266 is not valid in general (we may convert non-trapping condition
12267 to trapping one), however on i386 we currently emit all
12268 comparisons unordered. */
12269 compare_code = reverse_condition_maybe_unordered (compare_code);
12270 code = reverse_condition_maybe_unordered (code);
12271 }
12272 else
12273 {
12274 compare_code = reverse_condition (compare_code);
12275 code = reverse_condition (code);
12276 }
12277 }
12278
12279 compare_code = UNKNOWN;
12280 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
12281 && CONST_INT_P (ix86_compare_op1))
12282 {
12283 if (ix86_compare_op1 == const0_rtx
12284 && (code == LT || code == GE))
12285 compare_code = code;
12286 else if (ix86_compare_op1 == constm1_rtx)
12287 {
12288 if (code == LE)
12289 compare_code = LT;
12290 else if (code == GT)
12291 compare_code = GE;
12292 }
12293 }
12294
12295 /* Optimize dest = (op0 < 0) ? -1 : cf. */
12296 if (compare_code != UNKNOWN
12297 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
12298 && (cf == -1 || ct == -1))
12299 {
12300 /* If lea code below could be used, only optimize
12301 if it results in a 2 insn sequence. */
12302
12303 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
12304 || diff == 3 || diff == 5 || diff == 9)
12305 || (compare_code == LT && ct == -1)
12306 || (compare_code == GE && cf == -1))
12307 {
12308 /*
12309 * notl op1 (if necessary)
12310 * sarl $31, op1
12311 * orl cf, op1
12312 */
12313 if (ct != -1)
12314 {
12315 cf = ct;
12316 ct = -1;
12317 code = reverse_condition (code);
12318 }
12319
12320 out = emit_store_flag (out, code, ix86_compare_op0,
12321 ix86_compare_op1, VOIDmode, 0, -1);
12322
12323 out = expand_simple_binop (mode, IOR,
12324 out, GEN_INT (cf),
12325 out, 1, OPTAB_DIRECT);
12326 if (out != operands[0])
12327 emit_move_insn (operands[0], out);
12328
12329 return 1; /* DONE */
12330 }
12331 }
12332
12333
12334 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
12335 || diff == 3 || diff == 5 || diff == 9)
12336 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
12337 && (mode != DImode
12338 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
12339 {
12340 /*
12341 * xorl dest,dest
12342 * cmpl op1,op2
12343 * setcc dest
12344 * lea cf(dest*(ct-cf)),dest
12345 *
12346 * Size 14.
12347 *
12348 * This also catches the degenerate setcc-only case.
12349 */
12350
12351 rtx tmp;
12352 int nops;
12353
12354 out = emit_store_flag (out, code, ix86_compare_op0,
12355 ix86_compare_op1, VOIDmode, 0, 1);
12356
12357 nops = 0;
12358 /* On x86_64 the lea instruction operates on Pmode, so we need
12359 to get arithmetics done in proper mode to match. */
12360 if (diff == 1)
12361 tmp = copy_rtx (out);
12362 else
12363 {
12364 rtx out1;
12365 out1 = copy_rtx (out);
12366 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
12367 nops++;
12368 if (diff & 1)
12369 {
12370 tmp = gen_rtx_PLUS (mode, tmp, out1);
12371 nops++;
12372 }
12373 }
12374 if (cf != 0)
12375 {
12376 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
12377 nops++;
12378 }
12379 if (!rtx_equal_p (tmp, out))
12380 {
12381 if (nops == 1)
12382 out = force_operand (tmp, copy_rtx (out));
12383 else
12384 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
12385 }
12386 if (!rtx_equal_p (out, operands[0]))
12387 emit_move_insn (operands[0], copy_rtx (out));
12388
12389 return 1; /* DONE */
12390 }
12391
12392 /*
12393 * General case: Jumpful:
12394 * xorl dest,dest cmpl op1, op2
12395 * cmpl op1, op2 movl ct, dest
12396 * setcc dest jcc 1f
12397 * decl dest movl cf, dest
12398 * andl (cf-ct),dest 1:
12399 * addl ct,dest
12400 *
12401 * Size 20. Size 14.
12402 *
12403 * This is reasonably steep, but branch mispredict costs are
12404 * high on modern cpus, so consider failing only if optimizing
12405 * for space.
12406 */
12407
12408 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
12409 && BRANCH_COST >= 2)
12410 {
12411 if (cf == 0)
12412 {
12413 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
12414
12415 cf = ct;
12416 ct = 0;
12417
12418 if (SCALAR_FLOAT_MODE_P (cmp_mode))
12419 {
12420 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
12421
12422 /* We may be reversing unordered compare to normal compare,
12423 that is not valid in general (we may convert non-trapping
12424 condition to trapping one), however on i386 we currently
12425 emit all comparisons unordered. */
12426 code = reverse_condition_maybe_unordered (code);
12427 }
12428 else
12429 {
12430 code = reverse_condition (code);
12431 if (compare_code != UNKNOWN)
12432 compare_code = reverse_condition (compare_code);
12433 }
12434 }
12435
12436 if (compare_code != UNKNOWN)
12437 {
12438 /* notl op1 (if needed)
12439 sarl $31, op1
12440 andl (cf-ct), op1
12441 addl ct, op1
12442
12443 For x < 0 (resp. x <= -1) there will be no notl,
12444 so if possible swap the constants to get rid of the
12445 complement.
12446 True/false will be -1/0 while code below (store flag
12447 followed by decrement) is 0/-1, so the constants need
12448 to be exchanged once more. */
12449
12450 if (compare_code == GE || !cf)
12451 {
12452 code = reverse_condition (code);
12453 compare_code = LT;
12454 }
12455 else
12456 {
12457 HOST_WIDE_INT tmp = cf;
12458 cf = ct;
12459 ct = tmp;
12460 }
12461
12462 out = emit_store_flag (out, code, ix86_compare_op0,
12463 ix86_compare_op1, VOIDmode, 0, -1);
12464 }
12465 else
12466 {
12467 out = emit_store_flag (out, code, ix86_compare_op0,
12468 ix86_compare_op1, VOIDmode, 0, 1);
12469
12470 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
12471 copy_rtx (out), 1, OPTAB_DIRECT);
12472 }
12473
12474 out = expand_simple_binop (mode, AND, copy_rtx (out),
12475 gen_int_mode (cf - ct, mode),
12476 copy_rtx (out), 1, OPTAB_DIRECT);
12477 if (ct)
12478 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
12479 copy_rtx (out), 1, OPTAB_DIRECT);
12480 if (!rtx_equal_p (out, operands[0]))
12481 emit_move_insn (operands[0], copy_rtx (out));
12482
12483 return 1; /* DONE */
12484 }
12485 }
12486
12487 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
12488 {
12489 /* Try a few things more with specific constants and a variable. */
12490
12491 optab op;
12492 rtx var, orig_out, out, tmp;
12493
12494 if (BRANCH_COST <= 2)
12495 return 0; /* FAIL */
12496
12497 /* If one of the two operands is an interesting constant, load a
12498 constant with the above and mask it in with a logical operation. */
12499
12500 if (CONST_INT_P (operands[2]))
12501 {
12502 var = operands[3];
12503 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
12504 operands[3] = constm1_rtx, op = and_optab;
12505 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
12506 operands[3] = const0_rtx, op = ior_optab;
12507 else
12508 return 0; /* FAIL */
12509 }
12510 else if (CONST_INT_P (operands[3]))
12511 {
12512 var = operands[2];
12513 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
12514 operands[2] = constm1_rtx, op = and_optab;
12515 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
12516 operands[2] = const0_rtx, op = ior_optab;
12517 else
12518 return 0; /* FAIL */
12519 }
12520 else
12521 return 0; /* FAIL */
12522
12523 orig_out = operands[0];
12524 tmp = gen_reg_rtx (mode);
12525 operands[0] = tmp;
12526
12527 /* Recurse to get the constant loaded. */
12528 if (ix86_expand_int_movcc (operands) == 0)
12529 return 0; /* FAIL */
12530
12531 /* Mask in the interesting variable. */
12532 out = expand_binop (mode, op, var, tmp, orig_out, 0,
12533 OPTAB_WIDEN);
12534 if (!rtx_equal_p (out, orig_out))
12535 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
12536
12537 return 1; /* DONE */
12538 }
12539
12540 /*
12541 * For comparison with above,
12542 *
12543 * movl cf,dest
12544 * movl ct,tmp
12545 * cmpl op1,op2
12546 * cmovcc tmp,dest
12547 *
12548 * Size 15.
12549 */
12550
12551 if (! nonimmediate_operand (operands[2], mode))
12552 operands[2] = force_reg (mode, operands[2]);
12553 if (! nonimmediate_operand (operands[3], mode))
12554 operands[3] = force_reg (mode, operands[3]);
12555
12556 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
12557 {
12558 rtx tmp = gen_reg_rtx (mode);
12559 emit_move_insn (tmp, operands[3]);
12560 operands[3] = tmp;
12561 }
12562 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
12563 {
12564 rtx tmp = gen_reg_rtx (mode);
12565 emit_move_insn (tmp, operands[2]);
12566 operands[2] = tmp;
12567 }
12568
12569 if (! register_operand (operands[2], VOIDmode)
12570 && (mode == QImode
12571 || ! register_operand (operands[3], VOIDmode)))
12572 operands[2] = force_reg (mode, operands[2]);
12573
12574 if (mode == QImode
12575 && ! register_operand (operands[3], VOIDmode))
12576 operands[3] = force_reg (mode, operands[3]);
12577
12578 emit_insn (compare_seq);
12579 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12580 gen_rtx_IF_THEN_ELSE (mode,
12581 compare_op, operands[2],
12582 operands[3])));
12583 if (bypass_test)
12584 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
12585 gen_rtx_IF_THEN_ELSE (mode,
12586 bypass_test,
12587 copy_rtx (operands[3]),
12588 copy_rtx (operands[0]))));
12589 if (second_test)
12590 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
12591 gen_rtx_IF_THEN_ELSE (mode,
12592 second_test,
12593 copy_rtx (operands[2]),
12594 copy_rtx (operands[0]))));
12595
12596 return 1; /* DONE */
12597 }
12598
12599 /* Swap, force into registers, or otherwise massage the two operands
12600 to an sse comparison with a mask result. Thus we differ a bit from
12601 ix86_prepare_fp_compare_args which expects to produce a flags result.
12602
12603 The DEST operand exists to help determine whether to commute commutative
12604 operators. The POP0/POP1 operands are updated in place. The new
12605 comparison code is returned, or UNKNOWN if not implementable. */
12606
12607 static enum rtx_code
12608 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
12609 rtx *pop0, rtx *pop1)
12610 {
12611 rtx tmp;
12612
12613 switch (code)
12614 {
12615 case LTGT:
12616 case UNEQ:
12617 /* We have no LTGT as an operator. We could implement it with
12618 NE & ORDERED, but this requires an extra temporary. It's
12619 not clear that it's worth it. */
12620 return UNKNOWN;
12621
12622 case LT:
12623 case LE:
12624 case UNGT:
12625 case UNGE:
12626 /* These are supported directly. */
12627 break;
12628
12629 case EQ:
12630 case NE:
12631 case UNORDERED:
12632 case ORDERED:
12633 /* For commutative operators, try to canonicalize the destination
12634 operand to be first in the comparison - this helps reload to
12635 avoid extra moves. */
12636 if (!dest || !rtx_equal_p (dest, *pop1))
12637 break;
12638 /* FALLTHRU */
12639
12640 case GE:
12641 case GT:
12642 case UNLE:
12643 case UNLT:
12644 /* These are not supported directly. Swap the comparison operands
12645 to transform into something that is supported. */
12646 tmp = *pop0;
12647 *pop0 = *pop1;
12648 *pop1 = tmp;
12649 code = swap_condition (code);
12650 break;
12651
12652 default:
12653 gcc_unreachable ();
12654 }
12655
12656 return code;
12657 }
12658
12659 /* Detect conditional moves that exactly match min/max operational
12660 semantics. Note that this is IEEE safe, as long as we don't
12661 interchange the operands.
12662
12663 Returns FALSE if this conditional move doesn't match a MIN/MAX,
12664 and TRUE if the operation is successful and instructions are emitted. */
12665
12666 static bool
12667 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
12668 rtx cmp_op1, rtx if_true, rtx if_false)
12669 {
12670 enum machine_mode mode;
12671 bool is_min;
12672 rtx tmp;
12673
12674 if (code == LT)
12675 ;
12676 else if (code == UNGE)
12677 {
12678 tmp = if_true;
12679 if_true = if_false;
12680 if_false = tmp;
12681 }
12682 else
12683 return false;
12684
12685 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
12686 is_min = true;
12687 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
12688 is_min = false;
12689 else
12690 return false;
12691
12692 mode = GET_MODE (dest);
12693
12694 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
12695 but MODE may be a vector mode and thus not appropriate. */
12696 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
12697 {
12698 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
12699 rtvec v;
12700
12701 if_true = force_reg (mode, if_true);
12702 v = gen_rtvec (2, if_true, if_false);
12703 tmp = gen_rtx_UNSPEC (mode, v, u);
12704 }
12705 else
12706 {
12707 code = is_min ? SMIN : SMAX;
12708 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
12709 }
12710
12711 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
12712 return true;
12713 }
12714
12715 /* Expand an sse vector comparison. Return the register with the result. */
12716
12717 static rtx
12718 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
12719 rtx op_true, rtx op_false)
12720 {
12721 enum machine_mode mode = GET_MODE (dest);
12722 rtx x;
12723
12724 cmp_op0 = force_reg (mode, cmp_op0);
12725 if (!nonimmediate_operand (cmp_op1, mode))
12726 cmp_op1 = force_reg (mode, cmp_op1);
12727
12728 if (optimize
12729 || reg_overlap_mentioned_p (dest, op_true)
12730 || reg_overlap_mentioned_p (dest, op_false))
12731 dest = gen_reg_rtx (mode);
12732
12733 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
12734 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12735
12736 return dest;
12737 }
12738
12739 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
12740 operations. This is used for both scalar and vector conditional moves. */
12741
12742 static void
12743 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
12744 {
12745 enum machine_mode mode = GET_MODE (dest);
12746 rtx t2, t3, x;
12747
12748 if (op_false == CONST0_RTX (mode))
12749 {
12750 op_true = force_reg (mode, op_true);
12751 x = gen_rtx_AND (mode, cmp, op_true);
12752 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12753 }
12754 else if (op_true == CONST0_RTX (mode))
12755 {
12756 op_false = force_reg (mode, op_false);
12757 x = gen_rtx_NOT (mode, cmp);
12758 x = gen_rtx_AND (mode, x, op_false);
12759 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12760 }
12761 else
12762 {
12763 op_true = force_reg (mode, op_true);
12764 op_false = force_reg (mode, op_false);
12765
12766 t2 = gen_reg_rtx (mode);
12767 if (optimize)
12768 t3 = gen_reg_rtx (mode);
12769 else
12770 t3 = dest;
12771
12772 x = gen_rtx_AND (mode, op_true, cmp);
12773 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
12774
12775 x = gen_rtx_NOT (mode, cmp);
12776 x = gen_rtx_AND (mode, x, op_false);
12777 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
12778
12779 x = gen_rtx_IOR (mode, t3, t2);
12780 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12781 }
12782 }
12783
12784 /* Expand a floating-point conditional move. Return true if successful. */
12785
12786 int
12787 ix86_expand_fp_movcc (rtx operands[])
12788 {
12789 enum machine_mode mode = GET_MODE (operands[0]);
12790 enum rtx_code code = GET_CODE (operands[1]);
12791 rtx tmp, compare_op, second_test, bypass_test;
12792
12793 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
12794 {
12795 enum machine_mode cmode;
12796
12797 /* Since we've no cmove for sse registers, don't force bad register
12798 allocation just to gain access to it. Deny movcc when the
12799 comparison mode doesn't match the move mode. */
12800 cmode = GET_MODE (ix86_compare_op0);
12801 if (cmode == VOIDmode)
12802 cmode = GET_MODE (ix86_compare_op1);
12803 if (cmode != mode)
12804 return 0;
12805
12806 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
12807 &ix86_compare_op0,
12808 &ix86_compare_op1);
12809 if (code == UNKNOWN)
12810 return 0;
12811
12812 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
12813 ix86_compare_op1, operands[2],
12814 operands[3]))
12815 return 1;
12816
12817 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
12818 ix86_compare_op1, operands[2], operands[3]);
12819 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
12820 return 1;
12821 }
12822
12823 /* The floating point conditional move instructions don't directly
12824 support conditions resulting from a signed integer comparison. */
12825
12826 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
12827
12828 /* The floating point conditional move instructions don't directly
12829 support signed integer comparisons. */
12830
12831 if (!fcmov_comparison_operator (compare_op, VOIDmode))
12832 {
12833 gcc_assert (!second_test && !bypass_test);
12834 tmp = gen_reg_rtx (QImode);
12835 ix86_expand_setcc (code, tmp);
12836 code = NE;
12837 ix86_compare_op0 = tmp;
12838 ix86_compare_op1 = const0_rtx;
12839 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
12840 }
12841 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
12842 {
12843 tmp = gen_reg_rtx (mode);
12844 emit_move_insn (tmp, operands[3]);
12845 operands[3] = tmp;
12846 }
12847 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
12848 {
12849 tmp = gen_reg_rtx (mode);
12850 emit_move_insn (tmp, operands[2]);
12851 operands[2] = tmp;
12852 }
12853
12854 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12855 gen_rtx_IF_THEN_ELSE (mode, compare_op,
12856 operands[2], operands[3])));
12857 if (bypass_test)
12858 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12859 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
12860 operands[3], operands[0])));
12861 if (second_test)
12862 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12863 gen_rtx_IF_THEN_ELSE (mode, second_test,
12864 operands[2], operands[0])));
12865
12866 return 1;
12867 }
12868
12869 /* Expand a floating-point vector conditional move; a vcond operation
12870 rather than a movcc operation. */
12871
12872 bool
12873 ix86_expand_fp_vcond (rtx operands[])
12874 {
12875 enum rtx_code code = GET_CODE (operands[3]);
12876 rtx cmp;
12877
12878 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
12879 &operands[4], &operands[5]);
12880 if (code == UNKNOWN)
12881 return false;
12882
12883 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
12884 operands[5], operands[1], operands[2]))
12885 return true;
12886
12887 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
12888 operands[1], operands[2]);
12889 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
12890 return true;
12891 }
12892
12893 /* Expand a signed/unsigned integral vector conditional move. */
12894
12895 bool
12896 ix86_expand_int_vcond (rtx operands[])
12897 {
12898 enum machine_mode mode = GET_MODE (operands[0]);
12899 enum rtx_code code = GET_CODE (operands[3]);
12900 bool negate = false;
12901 rtx x, cop0, cop1;
12902
12903 cop0 = operands[4];
12904 cop1 = operands[5];
12905
12906 /* Canonicalize the comparison to EQ, GT, GTU. */
12907 switch (code)
12908 {
12909 case EQ:
12910 case GT:
12911 case GTU:
12912 break;
12913
12914 case NE:
12915 case LE:
12916 case LEU:
12917 code = reverse_condition (code);
12918 negate = true;
12919 break;
12920
12921 case GE:
12922 case GEU:
12923 code = reverse_condition (code);
12924 negate = true;
12925 /* FALLTHRU */
12926
12927 case LT:
12928 case LTU:
12929 code = swap_condition (code);
12930 x = cop0, cop0 = cop1, cop1 = x;
12931 break;
12932
12933 default:
12934 gcc_unreachable ();
12935 }
12936
12937 /* Only SSE4.1/SSE4.2 supports V2DImode. */
12938 if (mode == V2DImode)
12939 {
12940 switch (code)
12941 {
12942 case EQ:
12943 /* SSE4.1 supports EQ. */
12944 if (!TARGET_SSE4_1)
12945 return false;
12946 break;
12947
12948 case GT:
12949 case GTU:
12950 /* SSE4.2 supports GT/GTU. */
12951 if (!TARGET_SSE4_2)
12952 return false;
12953 break;
12954
12955 default:
12956 gcc_unreachable ();
12957 }
12958 }
12959
12960 /* Unsigned parallel compare is not supported by the hardware. Play some
12961 tricks to turn this into a signed comparison against 0. */
12962 if (code == GTU)
12963 {
12964 cop0 = force_reg (mode, cop0);
12965
12966 switch (mode)
12967 {
12968 case V4SImode:
12969 case V2DImode:
12970 {
12971 rtx t1, t2, mask;
12972
12973 /* Perform a parallel modulo subtraction. */
12974 t1 = gen_reg_rtx (mode);
12975 emit_insn ((mode == V4SImode
12976 ? gen_subv4si3
12977 : gen_subv2di3) (t1, cop0, cop1));
12978
12979 /* Extract the original sign bit of op0. */
12980 mask = ix86_build_signbit_mask (GET_MODE_INNER (mode),
12981 true, false);
12982 t2 = gen_reg_rtx (mode);
12983 emit_insn ((mode == V4SImode
12984 ? gen_andv4si3
12985 : gen_andv2di3) (t2, cop0, mask));
12986
12987 /* XOR it back into the result of the subtraction. This results
12988 in the sign bit set iff we saw unsigned underflow. */
12989 x = gen_reg_rtx (mode);
12990 emit_insn ((mode == V4SImode
12991 ? gen_xorv4si3
12992 : gen_xorv2di3) (x, t1, t2));
12993
12994 code = GT;
12995 }
12996 break;
12997
12998 case V16QImode:
12999 case V8HImode:
13000 /* Perform a parallel unsigned saturating subtraction. */
13001 x = gen_reg_rtx (mode);
13002 emit_insn (gen_rtx_SET (VOIDmode, x,
13003 gen_rtx_US_MINUS (mode, cop0, cop1)));
13004
13005 code = EQ;
13006 negate = !negate;
13007 break;
13008
13009 default:
13010 gcc_unreachable ();
13011 }
13012
13013 cop0 = x;
13014 cop1 = CONST0_RTX (mode);
13015 }
13016
13017 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
13018 operands[1+negate], operands[2-negate]);
13019
13020 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
13021 operands[2-negate]);
13022 return true;
13023 }
13024
13025 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
13026 true if we should do zero extension, else sign extension. HIGH_P is
13027 true if we want the N/2 high elements, else the low elements. */
13028
13029 void
13030 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
13031 {
13032 enum machine_mode imode = GET_MODE (operands[1]);
13033 rtx (*unpack)(rtx, rtx, rtx);
13034 rtx se, dest;
13035
13036 switch (imode)
13037 {
13038 case V16QImode:
13039 if (high_p)
13040 unpack = gen_vec_interleave_highv16qi;
13041 else
13042 unpack = gen_vec_interleave_lowv16qi;
13043 break;
13044 case V8HImode:
13045 if (high_p)
13046 unpack = gen_vec_interleave_highv8hi;
13047 else
13048 unpack = gen_vec_interleave_lowv8hi;
13049 break;
13050 case V4SImode:
13051 if (high_p)
13052 unpack = gen_vec_interleave_highv4si;
13053 else
13054 unpack = gen_vec_interleave_lowv4si;
13055 break;
13056 default:
13057 gcc_unreachable ();
13058 }
13059
13060 dest = gen_lowpart (imode, operands[0]);
13061
13062 if (unsigned_p)
13063 se = force_reg (imode, CONST0_RTX (imode));
13064 else
13065 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
13066 operands[1], pc_rtx, pc_rtx);
13067
13068 emit_insn (unpack (dest, operands[1], se));
13069 }
13070
13071 /* This function performs the same task as ix86_expand_sse_unpack,
13072 but with SSE4.1 instructions. */
13073
13074 void
13075 ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
13076 {
13077 enum machine_mode imode = GET_MODE (operands[1]);
13078 rtx (*unpack)(rtx, rtx);
13079 rtx src, dest;
13080
13081 switch (imode)
13082 {
13083 case V16QImode:
13084 if (unsigned_p)
13085 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
13086 else
13087 unpack = gen_sse4_1_extendv8qiv8hi2;
13088 break;
13089 case V8HImode:
13090 if (unsigned_p)
13091 unpack = gen_sse4_1_zero_extendv4hiv4si2;
13092 else
13093 unpack = gen_sse4_1_extendv4hiv4si2;
13094 break;
13095 case V4SImode:
13096 if (unsigned_p)
13097 unpack = gen_sse4_1_zero_extendv2siv2di2;
13098 else
13099 unpack = gen_sse4_1_extendv2siv2di2;
13100 break;
13101 default:
13102 gcc_unreachable ();
13103 }
13104
13105 dest = operands[0];
13106 if (high_p)
13107 {
13108 /* Shift higher 8 bytes to lower 8 bytes. */
13109 src = gen_reg_rtx (imode);
13110 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, src),
13111 gen_lowpart (TImode, operands[1]),
13112 GEN_INT (64)));
13113 }
13114 else
13115 src = operands[1];
13116
13117 emit_insn (unpack (dest, src));
13118 }
13119
13120 /* Expand conditional increment or decrement using adb/sbb instructions.
13121 The default case using setcc followed by the conditional move can be
13122 done by generic code. */
13123 int
13124 ix86_expand_int_addcc (rtx operands[])
13125 {
13126 enum rtx_code code = GET_CODE (operands[1]);
13127 rtx compare_op;
13128 rtx val = const0_rtx;
13129 bool fpcmp = false;
13130 enum machine_mode mode = GET_MODE (operands[0]);
13131
13132 if (operands[3] != const1_rtx
13133 && operands[3] != constm1_rtx)
13134 return 0;
13135 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
13136 ix86_compare_op1, &compare_op))
13137 return 0;
13138 code = GET_CODE (compare_op);
13139
13140 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
13141 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
13142 {
13143 fpcmp = true;
13144 code = ix86_fp_compare_code_to_integer (code);
13145 }
13146
13147 if (code != LTU)
13148 {
13149 val = constm1_rtx;
13150 if (fpcmp)
13151 PUT_CODE (compare_op,
13152 reverse_condition_maybe_unordered
13153 (GET_CODE (compare_op)));
13154 else
13155 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
13156 }
13157 PUT_MODE (compare_op, mode);
13158
13159 /* Construct either adc or sbb insn. */
13160 if ((code == LTU) == (operands[3] == constm1_rtx))
13161 {
13162 switch (GET_MODE (operands[0]))
13163 {
13164 case QImode:
13165 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
13166 break;
13167 case HImode:
13168 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
13169 break;
13170 case SImode:
13171 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
13172 break;
13173 case DImode:
13174 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
13175 break;
13176 default:
13177 gcc_unreachable ();
13178 }
13179 }
13180 else
13181 {
13182 switch (GET_MODE (operands[0]))
13183 {
13184 case QImode:
13185 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
13186 break;
13187 case HImode:
13188 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
13189 break;
13190 case SImode:
13191 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
13192 break;
13193 case DImode:
13194 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
13195 break;
13196 default:
13197 gcc_unreachable ();
13198 }
13199 }
13200 return 1; /* DONE */
13201 }
13202
13203
13204 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
13205 works for floating pointer parameters and nonoffsetable memories.
13206 For pushes, it returns just stack offsets; the values will be saved
13207 in the right order. Maximally three parts are generated. */
13208
13209 static int
13210 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
13211 {
13212 int size;
13213
13214 if (!TARGET_64BIT)
13215 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
13216 else
13217 size = (GET_MODE_SIZE (mode) + 4) / 8;
13218
13219 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
13220 gcc_assert (size >= 2 && size <= 3);
13221
13222 /* Optimize constant pool reference to immediates. This is used by fp
13223 moves, that force all constants to memory to allow combining. */
13224 if (MEM_P (operand) && MEM_READONLY_P (operand))
13225 {
13226 rtx tmp = maybe_get_pool_constant (operand);
13227 if (tmp)
13228 operand = tmp;
13229 }
13230
13231 if (MEM_P (operand) && !offsettable_memref_p (operand))
13232 {
13233 /* The only non-offsetable memories we handle are pushes. */
13234 int ok = push_operand (operand, VOIDmode);
13235
13236 gcc_assert (ok);
13237
13238 operand = copy_rtx (operand);
13239 PUT_MODE (operand, Pmode);
13240 parts[0] = parts[1] = parts[2] = operand;
13241 return size;
13242 }
13243
13244 if (GET_CODE (operand) == CONST_VECTOR)
13245 {
13246 enum machine_mode imode = int_mode_for_mode (mode);
13247 /* Caution: if we looked through a constant pool memory above,
13248 the operand may actually have a different mode now. That's
13249 ok, since we want to pun this all the way back to an integer. */
13250 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
13251 gcc_assert (operand != NULL);
13252 mode = imode;
13253 }
13254
13255 if (!TARGET_64BIT)
13256 {
13257 if (mode == DImode)
13258 split_di (&operand, 1, &parts[0], &parts[1]);
13259 else
13260 {
13261 if (REG_P (operand))
13262 {
13263 gcc_assert (reload_completed);
13264 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
13265 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
13266 if (size == 3)
13267 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
13268 }
13269 else if (offsettable_memref_p (operand))
13270 {
13271 operand = adjust_address (operand, SImode, 0);
13272 parts[0] = operand;
13273 parts[1] = adjust_address (operand, SImode, 4);
13274 if (size == 3)
13275 parts[2] = adjust_address (operand, SImode, 8);
13276 }
13277 else if (GET_CODE (operand) == CONST_DOUBLE)
13278 {
13279 REAL_VALUE_TYPE r;
13280 long l[4];
13281
13282 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
13283 switch (mode)
13284 {
13285 case XFmode:
13286 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
13287 parts[2] = gen_int_mode (l[2], SImode);
13288 break;
13289 case DFmode:
13290 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
13291 break;
13292 default:
13293 gcc_unreachable ();
13294 }
13295 parts[1] = gen_int_mode (l[1], SImode);
13296 parts[0] = gen_int_mode (l[0], SImode);
13297 }
13298 else
13299 gcc_unreachable ();
13300 }
13301 }
13302 else
13303 {
13304 if (mode == TImode)
13305 split_ti (&operand, 1, &parts[0], &parts[1]);
13306 if (mode == XFmode || mode == TFmode)
13307 {
13308 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
13309 if (REG_P (operand))
13310 {
13311 gcc_assert (reload_completed);
13312 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
13313 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
13314 }
13315 else if (offsettable_memref_p (operand))
13316 {
13317 operand = adjust_address (operand, DImode, 0);
13318 parts[0] = operand;
13319 parts[1] = adjust_address (operand, upper_mode, 8);
13320 }
13321 else if (GET_CODE (operand) == CONST_DOUBLE)
13322 {
13323 REAL_VALUE_TYPE r;
13324 long l[4];
13325
13326 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
13327 real_to_target (l, &r, mode);
13328
13329 /* Do not use shift by 32 to avoid warning on 32bit systems. */
13330 if (HOST_BITS_PER_WIDE_INT >= 64)
13331 parts[0]
13332 = gen_int_mode
13333 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
13334 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
13335 DImode);
13336 else
13337 parts[0] = immed_double_const (l[0], l[1], DImode);
13338
13339 if (upper_mode == SImode)
13340 parts[1] = gen_int_mode (l[2], SImode);
13341 else if (HOST_BITS_PER_WIDE_INT >= 64)
13342 parts[1]
13343 = gen_int_mode
13344 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
13345 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
13346 DImode);
13347 else
13348 parts[1] = immed_double_const (l[2], l[3], DImode);
13349 }
13350 else
13351 gcc_unreachable ();
13352 }
13353 }
13354
13355 return size;
13356 }
13357
13358 /* Emit insns to perform a move or push of DI, DF, and XF values.
13359 Return false when normal moves are needed; true when all required
13360 insns have been emitted. Operands 2-4 contain the input values
13361 int the correct order; operands 5-7 contain the output values. */
13362
13363 void
13364 ix86_split_long_move (rtx operands[])
13365 {
13366 rtx part[2][3];
13367 int nparts;
13368 int push = 0;
13369 int collisions = 0;
13370 enum machine_mode mode = GET_MODE (operands[0]);
13371
13372 /* The DFmode expanders may ask us to move double.
13373 For 64bit target this is single move. By hiding the fact
13374 here we simplify i386.md splitters. */
13375 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
13376 {
13377 /* Optimize constant pool reference to immediates. This is used by
13378 fp moves, that force all constants to memory to allow combining. */
13379
13380 if (MEM_P (operands[1])
13381 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
13382 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
13383 operands[1] = get_pool_constant (XEXP (operands[1], 0));
13384 if (push_operand (operands[0], VOIDmode))
13385 {
13386 operands[0] = copy_rtx (operands[0]);
13387 PUT_MODE (operands[0], Pmode);
13388 }
13389 else
13390 operands[0] = gen_lowpart (DImode, operands[0]);
13391 operands[1] = gen_lowpart (DImode, operands[1]);
13392 emit_move_insn (operands[0], operands[1]);
13393 return;
13394 }
13395
13396 /* The only non-offsettable memory we handle is push. */
13397 if (push_operand (operands[0], VOIDmode))
13398 push = 1;
13399 else
13400 gcc_assert (!MEM_P (operands[0])
13401 || offsettable_memref_p (operands[0]));
13402
13403 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
13404 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
13405
13406 /* When emitting push, take care for source operands on the stack. */
13407 if (push && MEM_P (operands[1])
13408 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
13409 {
13410 if (nparts == 3)
13411 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
13412 XEXP (part[1][2], 0));
13413 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
13414 XEXP (part[1][1], 0));
13415 }
13416
13417 /* We need to do copy in the right order in case an address register
13418 of the source overlaps the destination. */
13419 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
13420 {
13421 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
13422 collisions++;
13423 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
13424 collisions++;
13425 if (nparts == 3
13426 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
13427 collisions++;
13428
13429 /* Collision in the middle part can be handled by reordering. */
13430 if (collisions == 1 && nparts == 3
13431 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
13432 {
13433 rtx tmp;
13434 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
13435 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
13436 }
13437
13438 /* If there are more collisions, we can't handle it by reordering.
13439 Do an lea to the last part and use only one colliding move. */
13440 else if (collisions > 1)
13441 {
13442 rtx base;
13443
13444 collisions = 1;
13445
13446 base = part[0][nparts - 1];
13447
13448 /* Handle the case when the last part isn't valid for lea.
13449 Happens in 64-bit mode storing the 12-byte XFmode. */
13450 if (GET_MODE (base) != Pmode)
13451 base = gen_rtx_REG (Pmode, REGNO (base));
13452
13453 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
13454 part[1][0] = replace_equiv_address (part[1][0], base);
13455 part[1][1] = replace_equiv_address (part[1][1],
13456 plus_constant (base, UNITS_PER_WORD));
13457 if (nparts == 3)
13458 part[1][2] = replace_equiv_address (part[1][2],
13459 plus_constant (base, 8));
13460 }
13461 }
13462
13463 if (push)
13464 {
13465 if (!TARGET_64BIT)
13466 {
13467 if (nparts == 3)
13468 {
13469 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
13470 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
13471 emit_move_insn (part[0][2], part[1][2]);
13472 }
13473 }
13474 else
13475 {
13476 /* In 64bit mode we don't have 32bit push available. In case this is
13477 register, it is OK - we will just use larger counterpart. We also
13478 retype memory - these comes from attempt to avoid REX prefix on
13479 moving of second half of TFmode value. */
13480 if (GET_MODE (part[1][1]) == SImode)
13481 {
13482 switch (GET_CODE (part[1][1]))
13483 {
13484 case MEM:
13485 part[1][1] = adjust_address (part[1][1], DImode, 0);
13486 break;
13487
13488 case REG:
13489 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
13490 break;
13491
13492 default:
13493 gcc_unreachable ();
13494 }
13495
13496 if (GET_MODE (part[1][0]) == SImode)
13497 part[1][0] = part[1][1];
13498 }
13499 }
13500 emit_move_insn (part[0][1], part[1][1]);
13501 emit_move_insn (part[0][0], part[1][0]);
13502 return;
13503 }
13504
13505 /* Choose correct order to not overwrite the source before it is copied. */
13506 if ((REG_P (part[0][0])
13507 && REG_P (part[1][1])
13508 && (REGNO (part[0][0]) == REGNO (part[1][1])
13509 || (nparts == 3
13510 && REGNO (part[0][0]) == REGNO (part[1][2]))))
13511 || (collisions > 0
13512 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
13513 {
13514 if (nparts == 3)
13515 {
13516 operands[2] = part[0][2];
13517 operands[3] = part[0][1];
13518 operands[4] = part[0][0];
13519 operands[5] = part[1][2];
13520 operands[6] = part[1][1];
13521 operands[7] = part[1][0];
13522 }
13523 else
13524 {
13525 operands[2] = part[0][1];
13526 operands[3] = part[0][0];
13527 operands[5] = part[1][1];
13528 operands[6] = part[1][0];
13529 }
13530 }
13531 else
13532 {
13533 if (nparts == 3)
13534 {
13535 operands[2] = part[0][0];
13536 operands[3] = part[0][1];
13537 operands[4] = part[0][2];
13538 operands[5] = part[1][0];
13539 operands[6] = part[1][1];
13540 operands[7] = part[1][2];
13541 }
13542 else
13543 {
13544 operands[2] = part[0][0];
13545 operands[3] = part[0][1];
13546 operands[5] = part[1][0];
13547 operands[6] = part[1][1];
13548 }
13549 }
13550
13551 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
13552 if (optimize_size)
13553 {
13554 if (CONST_INT_P (operands[5])
13555 && operands[5] != const0_rtx
13556 && REG_P (operands[2]))
13557 {
13558 if (CONST_INT_P (operands[6])
13559 && INTVAL (operands[6]) == INTVAL (operands[5]))
13560 operands[6] = operands[2];
13561
13562 if (nparts == 3
13563 && CONST_INT_P (operands[7])
13564 && INTVAL (operands[7]) == INTVAL (operands[5]))
13565 operands[7] = operands[2];
13566 }
13567
13568 if (nparts == 3
13569 && CONST_INT_P (operands[6])
13570 && operands[6] != const0_rtx
13571 && REG_P (operands[3])
13572 && CONST_INT_P (operands[7])
13573 && INTVAL (operands[7]) == INTVAL (operands[6]))
13574 operands[7] = operands[3];
13575 }
13576
13577 emit_move_insn (operands[2], operands[5]);
13578 emit_move_insn (operands[3], operands[6]);
13579 if (nparts == 3)
13580 emit_move_insn (operands[4], operands[7]);
13581
13582 return;
13583 }
13584
13585 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
13586 left shift by a constant, either using a single shift or
13587 a sequence of add instructions. */
13588
13589 static void
13590 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
13591 {
13592 if (count == 1)
13593 {
13594 emit_insn ((mode == DImode
13595 ? gen_addsi3
13596 : gen_adddi3) (operand, operand, operand));
13597 }
13598 else if (!optimize_size
13599 && count * ix86_cost->add <= ix86_cost->shift_const)
13600 {
13601 int i;
13602 for (i=0; i<count; i++)
13603 {
13604 emit_insn ((mode == DImode
13605 ? gen_addsi3
13606 : gen_adddi3) (operand, operand, operand));
13607 }
13608 }
13609 else
13610 emit_insn ((mode == DImode
13611 ? gen_ashlsi3
13612 : gen_ashldi3) (operand, operand, GEN_INT (count)));
13613 }
13614
13615 void
13616 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
13617 {
13618 rtx low[2], high[2];
13619 int count;
13620 const int single_width = mode == DImode ? 32 : 64;
13621
13622 if (CONST_INT_P (operands[2]))
13623 {
13624 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
13625 count = INTVAL (operands[2]) & (single_width * 2 - 1);
13626
13627 if (count >= single_width)
13628 {
13629 emit_move_insn (high[0], low[1]);
13630 emit_move_insn (low[0], const0_rtx);
13631
13632 if (count > single_width)
13633 ix86_expand_ashl_const (high[0], count - single_width, mode);
13634 }
13635 else
13636 {
13637 if (!rtx_equal_p (operands[0], operands[1]))
13638 emit_move_insn (operands[0], operands[1]);
13639 emit_insn ((mode == DImode
13640 ? gen_x86_shld_1
13641 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
13642 ix86_expand_ashl_const (low[0], count, mode);
13643 }
13644 return;
13645 }
13646
13647 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13648
13649 if (operands[1] == const1_rtx)
13650 {
13651 /* Assuming we've chosen a QImode capable registers, then 1 << N
13652 can be done with two 32/64-bit shifts, no branches, no cmoves. */
13653 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
13654 {
13655 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
13656
13657 ix86_expand_clear (low[0]);
13658 ix86_expand_clear (high[0]);
13659 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
13660
13661 d = gen_lowpart (QImode, low[0]);
13662 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
13663 s = gen_rtx_EQ (QImode, flags, const0_rtx);
13664 emit_insn (gen_rtx_SET (VOIDmode, d, s));
13665
13666 d = gen_lowpart (QImode, high[0]);
13667 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
13668 s = gen_rtx_NE (QImode, flags, const0_rtx);
13669 emit_insn (gen_rtx_SET (VOIDmode, d, s));
13670 }
13671
13672 /* Otherwise, we can get the same results by manually performing
13673 a bit extract operation on bit 5/6, and then performing the two
13674 shifts. The two methods of getting 0/1 into low/high are exactly
13675 the same size. Avoiding the shift in the bit extract case helps
13676 pentium4 a bit; no one else seems to care much either way. */
13677 else
13678 {
13679 rtx x;
13680
13681 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
13682 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
13683 else
13684 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
13685 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
13686
13687 emit_insn ((mode == DImode
13688 ? gen_lshrsi3
13689 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
13690 emit_insn ((mode == DImode
13691 ? gen_andsi3
13692 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
13693 emit_move_insn (low[0], high[0]);
13694 emit_insn ((mode == DImode
13695 ? gen_xorsi3
13696 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
13697 }
13698
13699 emit_insn ((mode == DImode
13700 ? gen_ashlsi3
13701 : gen_ashldi3) (low[0], low[0], operands[2]));
13702 emit_insn ((mode == DImode
13703 ? gen_ashlsi3
13704 : gen_ashldi3) (high[0], high[0], operands[2]));
13705 return;
13706 }
13707
13708 if (operands[1] == constm1_rtx)
13709 {
13710 /* For -1 << N, we can avoid the shld instruction, because we
13711 know that we're shifting 0...31/63 ones into a -1. */
13712 emit_move_insn (low[0], constm1_rtx);
13713 if (optimize_size)
13714 emit_move_insn (high[0], low[0]);
13715 else
13716 emit_move_insn (high[0], constm1_rtx);
13717 }
13718 else
13719 {
13720 if (!rtx_equal_p (operands[0], operands[1]))
13721 emit_move_insn (operands[0], operands[1]);
13722
13723 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13724 emit_insn ((mode == DImode
13725 ? gen_x86_shld_1
13726 : gen_x86_64_shld) (high[0], low[0], operands[2]));
13727 }
13728
13729 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
13730
13731 if (TARGET_CMOVE && scratch)
13732 {
13733 ix86_expand_clear (scratch);
13734 emit_insn ((mode == DImode
13735 ? gen_x86_shift_adj_1
13736 : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch));
13737 }
13738 else
13739 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
13740 }
13741
13742 void
13743 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
13744 {
13745 rtx low[2], high[2];
13746 int count;
13747 const int single_width = mode == DImode ? 32 : 64;
13748
13749 if (CONST_INT_P (operands[2]))
13750 {
13751 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
13752 count = INTVAL (operands[2]) & (single_width * 2 - 1);
13753
13754 if (count == single_width * 2 - 1)
13755 {
13756 emit_move_insn (high[0], high[1]);
13757 emit_insn ((mode == DImode
13758 ? gen_ashrsi3
13759 : gen_ashrdi3) (high[0], high[0],
13760 GEN_INT (single_width - 1)));
13761 emit_move_insn (low[0], high[0]);
13762
13763 }
13764 else if (count >= single_width)
13765 {
13766 emit_move_insn (low[0], high[1]);
13767 emit_move_insn (high[0], low[0]);
13768 emit_insn ((mode == DImode
13769 ? gen_ashrsi3
13770 : gen_ashrdi3) (high[0], high[0],
13771 GEN_INT (single_width - 1)));
13772 if (count > single_width)
13773 emit_insn ((mode == DImode
13774 ? gen_ashrsi3
13775 : gen_ashrdi3) (low[0], low[0],
13776 GEN_INT (count - single_width)));
13777 }
13778 else
13779 {
13780 if (!rtx_equal_p (operands[0], operands[1]))
13781 emit_move_insn (operands[0], operands[1]);
13782 emit_insn ((mode == DImode
13783 ? gen_x86_shrd_1
13784 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
13785 emit_insn ((mode == DImode
13786 ? gen_ashrsi3
13787 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
13788 }
13789 }
13790 else
13791 {
13792 if (!rtx_equal_p (operands[0], operands[1]))
13793 emit_move_insn (operands[0], operands[1]);
13794
13795 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13796
13797 emit_insn ((mode == DImode
13798 ? gen_x86_shrd_1
13799 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
13800 emit_insn ((mode == DImode
13801 ? gen_ashrsi3
13802 : gen_ashrdi3) (high[0], high[0], operands[2]));
13803
13804 if (TARGET_CMOVE && scratch)
13805 {
13806 emit_move_insn (scratch, high[0]);
13807 emit_insn ((mode == DImode
13808 ? gen_ashrsi3
13809 : gen_ashrdi3) (scratch, scratch,
13810 GEN_INT (single_width - 1)));
13811 emit_insn ((mode == DImode
13812 ? gen_x86_shift_adj_1
13813 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
13814 scratch));
13815 }
13816 else
13817 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
13818 }
13819 }
13820
13821 void
13822 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
13823 {
13824 rtx low[2], high[2];
13825 int count;
13826 const int single_width = mode == DImode ? 32 : 64;
13827
13828 if (CONST_INT_P (operands[2]))
13829 {
13830 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
13831 count = INTVAL (operands[2]) & (single_width * 2 - 1);
13832
13833 if (count >= single_width)
13834 {
13835 emit_move_insn (low[0], high[1]);
13836 ix86_expand_clear (high[0]);
13837
13838 if (count > single_width)
13839 emit_insn ((mode == DImode
13840 ? gen_lshrsi3
13841 : gen_lshrdi3) (low[0], low[0],
13842 GEN_INT (count - single_width)));
13843 }
13844 else
13845 {
13846 if (!rtx_equal_p (operands[0], operands[1]))
13847 emit_move_insn (operands[0], operands[1]);
13848 emit_insn ((mode == DImode
13849 ? gen_x86_shrd_1
13850 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
13851 emit_insn ((mode == DImode
13852 ? gen_lshrsi3
13853 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
13854 }
13855 }
13856 else
13857 {
13858 if (!rtx_equal_p (operands[0], operands[1]))
13859 emit_move_insn (operands[0], operands[1]);
13860
13861 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13862
13863 emit_insn ((mode == DImode
13864 ? gen_x86_shrd_1
13865 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
13866 emit_insn ((mode == DImode
13867 ? gen_lshrsi3
13868 : gen_lshrdi3) (high[0], high[0], operands[2]));
13869
13870 /* Heh. By reversing the arguments, we can reuse this pattern. */
13871 if (TARGET_CMOVE && scratch)
13872 {
13873 ix86_expand_clear (scratch);
13874 emit_insn ((mode == DImode
13875 ? gen_x86_shift_adj_1
13876 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
13877 scratch));
13878 }
13879 else
13880 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
13881 }
13882 }
13883
13884 /* Predict just emitted jump instruction to be taken with probability PROB. */
13885 static void
13886 predict_jump (int prob)
13887 {
13888 rtx insn = get_last_insn ();
13889 gcc_assert (JUMP_P (insn));
13890 REG_NOTES (insn)
13891 = gen_rtx_EXPR_LIST (REG_BR_PROB,
13892 GEN_INT (prob),
13893 REG_NOTES (insn));
13894 }
13895
13896 /* Helper function for the string operations below. Dest VARIABLE whether
13897 it is aligned to VALUE bytes. If true, jump to the label. */
13898 static rtx
13899 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
13900 {
13901 rtx label = gen_label_rtx ();
13902 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
13903 if (GET_MODE (variable) == DImode)
13904 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
13905 else
13906 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
13907 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
13908 1, label);
13909 if (epilogue)
13910 predict_jump (REG_BR_PROB_BASE * 50 / 100);
13911 else
13912 predict_jump (REG_BR_PROB_BASE * 90 / 100);
13913 return label;
13914 }
13915
13916 /* Adjust COUNTER by the VALUE. */
13917 static void
13918 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
13919 {
13920 if (GET_MODE (countreg) == DImode)
13921 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
13922 else
13923 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
13924 }
13925
13926 /* Zero extend possibly SImode EXP to Pmode register. */
13927 rtx
13928 ix86_zero_extend_to_Pmode (rtx exp)
13929 {
13930 rtx r;
13931 if (GET_MODE (exp) == VOIDmode)
13932 return force_reg (Pmode, exp);
13933 if (GET_MODE (exp) == Pmode)
13934 return copy_to_mode_reg (Pmode, exp);
13935 r = gen_reg_rtx (Pmode);
13936 emit_insn (gen_zero_extendsidi2 (r, exp));
13937 return r;
13938 }
13939
13940 /* Divide COUNTREG by SCALE. */
13941 static rtx
13942 scale_counter (rtx countreg, int scale)
13943 {
13944 rtx sc;
13945 rtx piece_size_mask;
13946
13947 if (scale == 1)
13948 return countreg;
13949 if (CONST_INT_P (countreg))
13950 return GEN_INT (INTVAL (countreg) / scale);
13951 gcc_assert (REG_P (countreg));
13952
13953 piece_size_mask = GEN_INT (scale - 1);
13954 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
13955 GEN_INT (exact_log2 (scale)),
13956 NULL, 1, OPTAB_DIRECT);
13957 return sc;
13958 }
13959
13960 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
13961 DImode for constant loop counts. */
13962
13963 static enum machine_mode
13964 counter_mode (rtx count_exp)
13965 {
13966 if (GET_MODE (count_exp) != VOIDmode)
13967 return GET_MODE (count_exp);
13968 if (GET_CODE (count_exp) != CONST_INT)
13969 return Pmode;
13970 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
13971 return DImode;
13972 return SImode;
13973 }
13974
13975 /* When SRCPTR is non-NULL, output simple loop to move memory
13976 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
13977 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
13978 equivalent loop to set memory by VALUE (supposed to be in MODE).
13979
13980 The size is rounded down to whole number of chunk size moved at once.
13981 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
13982
13983
13984 static void
13985 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
13986 rtx destptr, rtx srcptr, rtx value,
13987 rtx count, enum machine_mode mode, int unroll,
13988 int expected_size)
13989 {
13990 rtx out_label, top_label, iter, tmp;
13991 enum machine_mode iter_mode = counter_mode (count);
13992 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
13993 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
13994 rtx size;
13995 rtx x_addr;
13996 rtx y_addr;
13997 int i;
13998
13999 top_label = gen_label_rtx ();
14000 out_label = gen_label_rtx ();
14001 iter = gen_reg_rtx (iter_mode);
14002
14003 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
14004 NULL, 1, OPTAB_DIRECT);
14005 /* Those two should combine. */
14006 if (piece_size == const1_rtx)
14007 {
14008 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
14009 true, out_label);
14010 predict_jump (REG_BR_PROB_BASE * 10 / 100);
14011 }
14012 emit_move_insn (iter, const0_rtx);
14013
14014 emit_label (top_label);
14015
14016 tmp = convert_modes (Pmode, iter_mode, iter, true);
14017 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
14018 destmem = change_address (destmem, mode, x_addr);
14019
14020 if (srcmem)
14021 {
14022 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
14023 srcmem = change_address (srcmem, mode, y_addr);
14024
14025 /* When unrolling for chips that reorder memory reads and writes,
14026 we can save registers by using single temporary.
14027 Also using 4 temporaries is overkill in 32bit mode. */
14028 if (!TARGET_64BIT && 0)
14029 {
14030 for (i = 0; i < unroll; i++)
14031 {
14032 if (i)
14033 {
14034 destmem =
14035 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
14036 srcmem =
14037 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
14038 }
14039 emit_move_insn (destmem, srcmem);
14040 }
14041 }
14042 else
14043 {
14044 rtx tmpreg[4];
14045 gcc_assert (unroll <= 4);
14046 for (i = 0; i < unroll; i++)
14047 {
14048 tmpreg[i] = gen_reg_rtx (mode);
14049 if (i)
14050 {
14051 srcmem =
14052 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
14053 }
14054 emit_move_insn (tmpreg[i], srcmem);
14055 }
14056 for (i = 0; i < unroll; i++)
14057 {
14058 if (i)
14059 {
14060 destmem =
14061 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
14062 }
14063 emit_move_insn (destmem, tmpreg[i]);
14064 }
14065 }
14066 }
14067 else
14068 for (i = 0; i < unroll; i++)
14069 {
14070 if (i)
14071 destmem =
14072 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
14073 emit_move_insn (destmem, value);
14074 }
14075
14076 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
14077 true, OPTAB_LIB_WIDEN);
14078 if (tmp != iter)
14079 emit_move_insn (iter, tmp);
14080
14081 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
14082 true, top_label);
14083 if (expected_size != -1)
14084 {
14085 expected_size /= GET_MODE_SIZE (mode) * unroll;
14086 if (expected_size == 0)
14087 predict_jump (0);
14088 else if (expected_size > REG_BR_PROB_BASE)
14089 predict_jump (REG_BR_PROB_BASE - 1);
14090 else
14091 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
14092 }
14093 else
14094 predict_jump (REG_BR_PROB_BASE * 80 / 100);
14095 iter = ix86_zero_extend_to_Pmode (iter);
14096 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
14097 true, OPTAB_LIB_WIDEN);
14098 if (tmp != destptr)
14099 emit_move_insn (destptr, tmp);
14100 if (srcptr)
14101 {
14102 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
14103 true, OPTAB_LIB_WIDEN);
14104 if (tmp != srcptr)
14105 emit_move_insn (srcptr, tmp);
14106 }
14107 emit_label (out_label);
14108 }
14109
14110 /* Output "rep; mov" instruction.
14111 Arguments have same meaning as for previous function */
14112 static void
14113 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
14114 rtx destptr, rtx srcptr,
14115 rtx count,
14116 enum machine_mode mode)
14117 {
14118 rtx destexp;
14119 rtx srcexp;
14120 rtx countreg;
14121
14122 /* If the size is known, it is shorter to use rep movs. */
14123 if (mode == QImode && CONST_INT_P (count)
14124 && !(INTVAL (count) & 3))
14125 mode = SImode;
14126
14127 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
14128 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
14129 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
14130 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
14131 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
14132 if (mode != QImode)
14133 {
14134 destexp = gen_rtx_ASHIFT (Pmode, countreg,
14135 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
14136 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
14137 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
14138 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
14139 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
14140 }
14141 else
14142 {
14143 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
14144 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
14145 }
14146 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
14147 destexp, srcexp));
14148 }
14149
14150 /* Output "rep; stos" instruction.
14151 Arguments have same meaning as for previous function */
14152 static void
14153 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
14154 rtx count,
14155 enum machine_mode mode)
14156 {
14157 rtx destexp;
14158 rtx countreg;
14159
14160 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
14161 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
14162 value = force_reg (mode, gen_lowpart (mode, value));
14163 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
14164 if (mode != QImode)
14165 {
14166 destexp = gen_rtx_ASHIFT (Pmode, countreg,
14167 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
14168 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
14169 }
14170 else
14171 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
14172 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
14173 }
14174
14175 static void
14176 emit_strmov (rtx destmem, rtx srcmem,
14177 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
14178 {
14179 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
14180 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
14181 emit_insn (gen_strmov (destptr, dest, srcptr, src));
14182 }
14183
14184 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
14185 static void
14186 expand_movmem_epilogue (rtx destmem, rtx srcmem,
14187 rtx destptr, rtx srcptr, rtx count, int max_size)
14188 {
14189 rtx src, dest;
14190 if (CONST_INT_P (count))
14191 {
14192 HOST_WIDE_INT countval = INTVAL (count);
14193 int offset = 0;
14194
14195 if ((countval & 0x10) && max_size > 16)
14196 {
14197 if (TARGET_64BIT)
14198 {
14199 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
14200 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
14201 }
14202 else
14203 gcc_unreachable ();
14204 offset += 16;
14205 }
14206 if ((countval & 0x08) && max_size > 8)
14207 {
14208 if (TARGET_64BIT)
14209 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
14210 else
14211 {
14212 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
14213 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
14214 }
14215 offset += 8;
14216 }
14217 if ((countval & 0x04) && max_size > 4)
14218 {
14219 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
14220 offset += 4;
14221 }
14222 if ((countval & 0x02) && max_size > 2)
14223 {
14224 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
14225 offset += 2;
14226 }
14227 if ((countval & 0x01) && max_size > 1)
14228 {
14229 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
14230 offset += 1;
14231 }
14232 return;
14233 }
14234 if (max_size > 8)
14235 {
14236 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
14237 count, 1, OPTAB_DIRECT);
14238 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
14239 count, QImode, 1, 4);
14240 return;
14241 }
14242
14243 /* When there are stringops, we can cheaply increase dest and src pointers.
14244 Otherwise we save code size by maintaining offset (zero is readily
14245 available from preceding rep operation) and using x86 addressing modes.
14246 */
14247 if (TARGET_SINGLE_STRINGOP)
14248 {
14249 if (max_size > 4)
14250 {
14251 rtx label = ix86_expand_aligntest (count, 4, true);
14252 src = change_address (srcmem, SImode, srcptr);
14253 dest = change_address (destmem, SImode, destptr);
14254 emit_insn (gen_strmov (destptr, dest, srcptr, src));
14255 emit_label (label);
14256 LABEL_NUSES (label) = 1;
14257 }
14258 if (max_size > 2)
14259 {
14260 rtx label = ix86_expand_aligntest (count, 2, true);
14261 src = change_address (srcmem, HImode, srcptr);
14262 dest = change_address (destmem, HImode, destptr);
14263 emit_insn (gen_strmov (destptr, dest, srcptr, src));
14264 emit_label (label);
14265 LABEL_NUSES (label) = 1;
14266 }
14267 if (max_size > 1)
14268 {
14269 rtx label = ix86_expand_aligntest (count, 1, true);
14270 src = change_address (srcmem, QImode, srcptr);
14271 dest = change_address (destmem, QImode, destptr);
14272 emit_insn (gen_strmov (destptr, dest, srcptr, src));
14273 emit_label (label);
14274 LABEL_NUSES (label) = 1;
14275 }
14276 }
14277 else
14278 {
14279 rtx offset = force_reg (Pmode, const0_rtx);
14280 rtx tmp;
14281
14282 if (max_size > 4)
14283 {
14284 rtx label = ix86_expand_aligntest (count, 4, true);
14285 src = change_address (srcmem, SImode, srcptr);
14286 dest = change_address (destmem, SImode, destptr);
14287 emit_move_insn (dest, src);
14288 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
14289 true, OPTAB_LIB_WIDEN);
14290 if (tmp != offset)
14291 emit_move_insn (offset, tmp);
14292 emit_label (label);
14293 LABEL_NUSES (label) = 1;
14294 }
14295 if (max_size > 2)
14296 {
14297 rtx label = ix86_expand_aligntest (count, 2, true);
14298 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
14299 src = change_address (srcmem, HImode, tmp);
14300 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
14301 dest = change_address (destmem, HImode, tmp);
14302 emit_move_insn (dest, src);
14303 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
14304 true, OPTAB_LIB_WIDEN);
14305 if (tmp != offset)
14306 emit_move_insn (offset, tmp);
14307 emit_label (label);
14308 LABEL_NUSES (label) = 1;
14309 }
14310 if (max_size > 1)
14311 {
14312 rtx label = ix86_expand_aligntest (count, 1, true);
14313 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
14314 src = change_address (srcmem, QImode, tmp);
14315 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
14316 dest = change_address (destmem, QImode, tmp);
14317 emit_move_insn (dest, src);
14318 emit_label (label);
14319 LABEL_NUSES (label) = 1;
14320 }
14321 }
14322 }
14323
14324 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
14325 static void
14326 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
14327 rtx count, int max_size)
14328 {
14329 count =
14330 expand_simple_binop (counter_mode (count), AND, count,
14331 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
14332 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
14333 gen_lowpart (QImode, value), count, QImode,
14334 1, max_size / 2);
14335 }
14336
14337 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
14338 static void
14339 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
14340 {
14341 rtx dest;
14342
14343 if (CONST_INT_P (count))
14344 {
14345 HOST_WIDE_INT countval = INTVAL (count);
14346 int offset = 0;
14347
14348 if ((countval & 0x10) && max_size > 16)
14349 {
14350 if (TARGET_64BIT)
14351 {
14352 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
14353 emit_insn (gen_strset (destptr, dest, value));
14354 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
14355 emit_insn (gen_strset (destptr, dest, value));
14356 }
14357 else
14358 gcc_unreachable ();
14359 offset += 16;
14360 }
14361 if ((countval & 0x08) && max_size > 8)
14362 {
14363 if (TARGET_64BIT)
14364 {
14365 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
14366 emit_insn (gen_strset (destptr, dest, value));
14367 }
14368 else
14369 {
14370 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
14371 emit_insn (gen_strset (destptr, dest, value));
14372 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
14373 emit_insn (gen_strset (destptr, dest, value));
14374 }
14375 offset += 8;
14376 }
14377 if ((countval & 0x04) && max_size > 4)
14378 {
14379 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
14380 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
14381 offset += 4;
14382 }
14383 if ((countval & 0x02) && max_size > 2)
14384 {
14385 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
14386 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
14387 offset += 2;
14388 }
14389 if ((countval & 0x01) && max_size > 1)
14390 {
14391 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
14392 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
14393 offset += 1;
14394 }
14395 return;
14396 }
14397 if (max_size > 32)
14398 {
14399 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
14400 return;
14401 }
14402 if (max_size > 16)
14403 {
14404 rtx label = ix86_expand_aligntest (count, 16, true);
14405 if (TARGET_64BIT)
14406 {
14407 dest = change_address (destmem, DImode, destptr);
14408 emit_insn (gen_strset (destptr, dest, value));
14409 emit_insn (gen_strset (destptr, dest, value));
14410 }
14411 else
14412 {
14413 dest = change_address (destmem, SImode, destptr);
14414 emit_insn (gen_strset (destptr, dest, value));
14415 emit_insn (gen_strset (destptr, dest, value));
14416 emit_insn (gen_strset (destptr, dest, value));
14417 emit_insn (gen_strset (destptr, dest, value));
14418 }
14419 emit_label (label);
14420 LABEL_NUSES (label) = 1;
14421 }
14422 if (max_size > 8)
14423 {
14424 rtx label = ix86_expand_aligntest (count, 8, true);
14425 if (TARGET_64BIT)
14426 {
14427 dest = change_address (destmem, DImode, destptr);
14428 emit_insn (gen_strset (destptr, dest, value));
14429 }
14430 else
14431 {
14432 dest = change_address (destmem, SImode, destptr);
14433 emit_insn (gen_strset (destptr, dest, value));
14434 emit_insn (gen_strset (destptr, dest, value));
14435 }
14436 emit_label (label);
14437 LABEL_NUSES (label) = 1;
14438 }
14439 if (max_size > 4)
14440 {
14441 rtx label = ix86_expand_aligntest (count, 4, true);
14442 dest = change_address (destmem, SImode, destptr);
14443 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
14444 emit_label (label);
14445 LABEL_NUSES (label) = 1;
14446 }
14447 if (max_size > 2)
14448 {
14449 rtx label = ix86_expand_aligntest (count, 2, true);
14450 dest = change_address (destmem, HImode, destptr);
14451 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
14452 emit_label (label);
14453 LABEL_NUSES (label) = 1;
14454 }
14455 if (max_size > 1)
14456 {
14457 rtx label = ix86_expand_aligntest (count, 1, true);
14458 dest = change_address (destmem, QImode, destptr);
14459 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
14460 emit_label (label);
14461 LABEL_NUSES (label) = 1;
14462 }
14463 }
14464
14465 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
14466 DESIRED_ALIGNMENT. */
14467 static void
14468 expand_movmem_prologue (rtx destmem, rtx srcmem,
14469 rtx destptr, rtx srcptr, rtx count,
14470 int align, int desired_alignment)
14471 {
14472 if (align <= 1 && desired_alignment > 1)
14473 {
14474 rtx label = ix86_expand_aligntest (destptr, 1, false);
14475 srcmem = change_address (srcmem, QImode, srcptr);
14476 destmem = change_address (destmem, QImode, destptr);
14477 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
14478 ix86_adjust_counter (count, 1);
14479 emit_label (label);
14480 LABEL_NUSES (label) = 1;
14481 }
14482 if (align <= 2 && desired_alignment > 2)
14483 {
14484 rtx label = ix86_expand_aligntest (destptr, 2, false);
14485 srcmem = change_address (srcmem, HImode, srcptr);
14486 destmem = change_address (destmem, HImode, destptr);
14487 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
14488 ix86_adjust_counter (count, 2);
14489 emit_label (label);
14490 LABEL_NUSES (label) = 1;
14491 }
14492 if (align <= 4 && desired_alignment > 4)
14493 {
14494 rtx label = ix86_expand_aligntest (destptr, 4, false);
14495 srcmem = change_address (srcmem, SImode, srcptr);
14496 destmem = change_address (destmem, SImode, destptr);
14497 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
14498 ix86_adjust_counter (count, 4);
14499 emit_label (label);
14500 LABEL_NUSES (label) = 1;
14501 }
14502 gcc_assert (desired_alignment <= 8);
14503 }
14504
14505 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
14506 DESIRED_ALIGNMENT. */
14507 static void
14508 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
14509 int align, int desired_alignment)
14510 {
14511 if (align <= 1 && desired_alignment > 1)
14512 {
14513 rtx label = ix86_expand_aligntest (destptr, 1, false);
14514 destmem = change_address (destmem, QImode, destptr);
14515 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
14516 ix86_adjust_counter (count, 1);
14517 emit_label (label);
14518 LABEL_NUSES (label) = 1;
14519 }
14520 if (align <= 2 && desired_alignment > 2)
14521 {
14522 rtx label = ix86_expand_aligntest (destptr, 2, false);
14523 destmem = change_address (destmem, HImode, destptr);
14524 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
14525 ix86_adjust_counter (count, 2);
14526 emit_label (label);
14527 LABEL_NUSES (label) = 1;
14528 }
14529 if (align <= 4 && desired_alignment > 4)
14530 {
14531 rtx label = ix86_expand_aligntest (destptr, 4, false);
14532 destmem = change_address (destmem, SImode, destptr);
14533 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
14534 ix86_adjust_counter (count, 4);
14535 emit_label (label);
14536 LABEL_NUSES (label) = 1;
14537 }
14538 gcc_assert (desired_alignment <= 8);
14539 }
14540
14541 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
14542 static enum stringop_alg
14543 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
14544 int *dynamic_check)
14545 {
14546 const struct stringop_algs * algs;
14547
14548 *dynamic_check = -1;
14549 if (memset)
14550 algs = &ix86_cost->memset[TARGET_64BIT != 0];
14551 else
14552 algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
14553 if (stringop_alg != no_stringop)
14554 return stringop_alg;
14555 /* rep; movq or rep; movl is the smallest variant. */
14556 else if (optimize_size)
14557 {
14558 if (!count || (count & 3))
14559 return rep_prefix_1_byte;
14560 else
14561 return rep_prefix_4_byte;
14562 }
14563 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
14564 */
14565 else if (expected_size != -1 && expected_size < 4)
14566 return loop_1_byte;
14567 else if (expected_size != -1)
14568 {
14569 unsigned int i;
14570 enum stringop_alg alg = libcall;
14571 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
14572 {
14573 gcc_assert (algs->size[i].max);
14574 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
14575 {
14576 if (algs->size[i].alg != libcall)
14577 alg = algs->size[i].alg;
14578 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
14579 last non-libcall inline algorithm. */
14580 if (TARGET_INLINE_ALL_STRINGOPS)
14581 {
14582 /* When the current size is best to be copied by a libcall,
14583 but we are still forced to inline, run the heuristic bellow
14584 that will pick code for medium sized blocks. */
14585 if (alg != libcall)
14586 return alg;
14587 break;
14588 }
14589 else
14590 return algs->size[i].alg;
14591 }
14592 }
14593 gcc_assert (TARGET_INLINE_ALL_STRINGOPS);
14594 }
14595 /* When asked to inline the call anyway, try to pick meaningful choice.
14596 We look for maximal size of block that is faster to copy by hand and
14597 take blocks of at most of that size guessing that average size will
14598 be roughly half of the block.
14599
14600 If this turns out to be bad, we might simply specify the preferred
14601 choice in ix86_costs. */
14602 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
14603 && algs->unknown_size == libcall)
14604 {
14605 int max = -1;
14606 enum stringop_alg alg;
14607 int i;
14608
14609 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
14610 if (algs->size[i].alg != libcall && algs->size[i].alg)
14611 max = algs->size[i].max;
14612 if (max == -1)
14613 max = 4096;
14614 alg = decide_alg (count, max / 2, memset, dynamic_check);
14615 gcc_assert (*dynamic_check == -1);
14616 gcc_assert (alg != libcall);
14617 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
14618 *dynamic_check = max;
14619 return alg;
14620 }
14621 return algs->unknown_size;
14622 }
14623
14624 /* Decide on alignment. We know that the operand is already aligned to ALIGN
14625 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
14626 static int
14627 decide_alignment (int align,
14628 enum stringop_alg alg,
14629 int expected_size)
14630 {
14631 int desired_align = 0;
14632 switch (alg)
14633 {
14634 case no_stringop:
14635 gcc_unreachable ();
14636 case loop:
14637 case unrolled_loop:
14638 desired_align = GET_MODE_SIZE (Pmode);
14639 break;
14640 case rep_prefix_8_byte:
14641 desired_align = 8;
14642 break;
14643 case rep_prefix_4_byte:
14644 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
14645 copying whole cacheline at once. */
14646 if (TARGET_PENTIUMPRO)
14647 desired_align = 8;
14648 else
14649 desired_align = 4;
14650 break;
14651 case rep_prefix_1_byte:
14652 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
14653 copying whole cacheline at once. */
14654 if (TARGET_PENTIUMPRO)
14655 desired_align = 8;
14656 else
14657 desired_align = 1;
14658 break;
14659 case loop_1_byte:
14660 desired_align = 1;
14661 break;
14662 case libcall:
14663 return 0;
14664 }
14665
14666 if (optimize_size)
14667 desired_align = 1;
14668 if (desired_align < align)
14669 desired_align = align;
14670 if (expected_size != -1 && expected_size < 4)
14671 desired_align = align;
14672 return desired_align;
14673 }
14674
14675 /* Return the smallest power of 2 greater than VAL. */
14676 static int
14677 smallest_pow2_greater_than (int val)
14678 {
14679 int ret = 1;
14680 while (ret <= val)
14681 ret <<= 1;
14682 return ret;
14683 }
14684
14685 /* Expand string move (memcpy) operation. Use i386 string operations when
14686 profitable. expand_clrmem contains similar code. The code depends upon
14687 architecture, block size and alignment, but always has the same
14688 overall structure:
14689
14690 1) Prologue guard: Conditional that jumps up to epilogues for small
14691 blocks that can be handled by epilogue alone. This is faster but
14692 also needed for correctness, since prologue assume the block is larger
14693 than the desired alignment.
14694
14695 Optional dynamic check for size and libcall for large
14696 blocks is emitted here too, with -minline-stringops-dynamically.
14697
14698 2) Prologue: copy first few bytes in order to get destination aligned
14699 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
14700 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
14701 We emit either a jump tree on power of two sized blocks, or a byte loop.
14702
14703 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
14704 with specified algorithm.
14705
14706 4) Epilogue: code copying tail of the block that is too small to be
14707 handled by main body (or up to size guarded by prologue guard). */
14708
14709 int
14710 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
14711 rtx expected_align_exp, rtx expected_size_exp)
14712 {
14713 rtx destreg;
14714 rtx srcreg;
14715 rtx label = NULL;
14716 rtx tmp;
14717 rtx jump_around_label = NULL;
14718 HOST_WIDE_INT align = 1;
14719 unsigned HOST_WIDE_INT count = 0;
14720 HOST_WIDE_INT expected_size = -1;
14721 int size_needed = 0, epilogue_size_needed;
14722 int desired_align = 0;
14723 enum stringop_alg alg;
14724 int dynamic_check;
14725
14726 if (CONST_INT_P (align_exp))
14727 align = INTVAL (align_exp);
14728 /* i386 can do misaligned access on reasonably increased cost. */
14729 if (CONST_INT_P (expected_align_exp)
14730 && INTVAL (expected_align_exp) > align)
14731 align = INTVAL (expected_align_exp);
14732 if (CONST_INT_P (count_exp))
14733 count = expected_size = INTVAL (count_exp);
14734 if (CONST_INT_P (expected_size_exp) && count == 0)
14735 expected_size = INTVAL (expected_size_exp);
14736
14737 /* Step 0: Decide on preferred algorithm, desired alignment and
14738 size of chunks to be copied by main loop. */
14739
14740 alg = decide_alg (count, expected_size, false, &dynamic_check);
14741 desired_align = decide_alignment (align, alg, expected_size);
14742
14743 if (!TARGET_ALIGN_STRINGOPS)
14744 align = desired_align;
14745
14746 if (alg == libcall)
14747 return 0;
14748 gcc_assert (alg != no_stringop);
14749 if (!count)
14750 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
14751 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
14752 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
14753 switch (alg)
14754 {
14755 case libcall:
14756 case no_stringop:
14757 gcc_unreachable ();
14758 case loop:
14759 size_needed = GET_MODE_SIZE (Pmode);
14760 break;
14761 case unrolled_loop:
14762 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
14763 break;
14764 case rep_prefix_8_byte:
14765 size_needed = 8;
14766 break;
14767 case rep_prefix_4_byte:
14768 size_needed = 4;
14769 break;
14770 case rep_prefix_1_byte:
14771 case loop_1_byte:
14772 size_needed = 1;
14773 break;
14774 }
14775
14776 epilogue_size_needed = size_needed;
14777
14778 /* Step 1: Prologue guard. */
14779
14780 /* Alignment code needs count to be in register. */
14781 if (CONST_INT_P (count_exp) && desired_align > align)
14782 {
14783 enum machine_mode mode = SImode;
14784 if (TARGET_64BIT && (count & ~0xffffffff))
14785 mode = DImode;
14786 count_exp = force_reg (mode, count_exp);
14787 }
14788 gcc_assert (desired_align >= 1 && align >= 1);
14789
14790 /* Ensure that alignment prologue won't copy past end of block. */
14791 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
14792 {
14793 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
14794 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
14795 Make sure it is power of 2. */
14796 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
14797
14798 label = gen_label_rtx ();
14799 emit_cmp_and_jump_insns (count_exp,
14800 GEN_INT (epilogue_size_needed),
14801 LTU, 0, counter_mode (count_exp), 1, label);
14802 if (GET_CODE (count_exp) == CONST_INT)
14803 ;
14804 else if (expected_size == -1 || expected_size < epilogue_size_needed)
14805 predict_jump (REG_BR_PROB_BASE * 60 / 100);
14806 else
14807 predict_jump (REG_BR_PROB_BASE * 20 / 100);
14808 }
14809 /* Emit code to decide on runtime whether library call or inline should be
14810 used. */
14811 if (dynamic_check != -1)
14812 {
14813 rtx hot_label = gen_label_rtx ();
14814 jump_around_label = gen_label_rtx ();
14815 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
14816 LEU, 0, GET_MODE (count_exp), 1, hot_label);
14817 predict_jump (REG_BR_PROB_BASE * 90 / 100);
14818 emit_block_move_via_libcall (dst, src, count_exp, false);
14819 emit_jump (jump_around_label);
14820 emit_label (hot_label);
14821 }
14822
14823 /* Step 2: Alignment prologue. */
14824
14825 if (desired_align > align)
14826 {
14827 /* Except for the first move in epilogue, we no longer know
14828 constant offset in aliasing info. It don't seems to worth
14829 the pain to maintain it for the first move, so throw away
14830 the info early. */
14831 src = change_address (src, BLKmode, srcreg);
14832 dst = change_address (dst, BLKmode, destreg);
14833 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
14834 desired_align);
14835 }
14836 if (label && size_needed == 1)
14837 {
14838 emit_label (label);
14839 LABEL_NUSES (label) = 1;
14840 label = NULL;
14841 }
14842
14843 /* Step 3: Main loop. */
14844
14845 switch (alg)
14846 {
14847 case libcall:
14848 case no_stringop:
14849 gcc_unreachable ();
14850 case loop_1_byte:
14851 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
14852 count_exp, QImode, 1, expected_size);
14853 break;
14854 case loop:
14855 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
14856 count_exp, Pmode, 1, expected_size);
14857 break;
14858 case unrolled_loop:
14859 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
14860 registers for 4 temporaries anyway. */
14861 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
14862 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
14863 expected_size);
14864 break;
14865 case rep_prefix_8_byte:
14866 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
14867 DImode);
14868 break;
14869 case rep_prefix_4_byte:
14870 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
14871 SImode);
14872 break;
14873 case rep_prefix_1_byte:
14874 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
14875 QImode);
14876 break;
14877 }
14878 /* Adjust properly the offset of src and dest memory for aliasing. */
14879 if (CONST_INT_P (count_exp))
14880 {
14881 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
14882 (count / size_needed) * size_needed);
14883 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
14884 (count / size_needed) * size_needed);
14885 }
14886 else
14887 {
14888 src = change_address (src, BLKmode, srcreg);
14889 dst = change_address (dst, BLKmode, destreg);
14890 }
14891
14892 /* Step 4: Epilogue to copy the remaining bytes. */
14893
14894 if (label)
14895 {
14896 /* When the main loop is done, COUNT_EXP might hold original count,
14897 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
14898 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
14899 bytes. Compensate if needed. */
14900
14901 if (size_needed < epilogue_size_needed)
14902 {
14903 tmp =
14904 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
14905 GEN_INT (size_needed - 1), count_exp, 1,
14906 OPTAB_DIRECT);
14907 if (tmp != count_exp)
14908 emit_move_insn (count_exp, tmp);
14909 }
14910 emit_label (label);
14911 LABEL_NUSES (label) = 1;
14912 }
14913
14914 if (count_exp != const0_rtx && epilogue_size_needed > 1)
14915 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
14916 epilogue_size_needed);
14917 if (jump_around_label)
14918 emit_label (jump_around_label);
14919 return 1;
14920 }
14921
14922 /* Helper function for memcpy. For QImode value 0xXY produce
14923 0xXYXYXYXY of wide specified by MODE. This is essentially
14924 a * 0x10101010, but we can do slightly better than
14925 synth_mult by unwinding the sequence by hand on CPUs with
14926 slow multiply. */
14927 static rtx
14928 promote_duplicated_reg (enum machine_mode mode, rtx val)
14929 {
14930 enum machine_mode valmode = GET_MODE (val);
14931 rtx tmp;
14932 int nops = mode == DImode ? 3 : 2;
14933
14934 gcc_assert (mode == SImode || mode == DImode);
14935 if (val == const0_rtx)
14936 return copy_to_mode_reg (mode, const0_rtx);
14937 if (CONST_INT_P (val))
14938 {
14939 HOST_WIDE_INT v = INTVAL (val) & 255;
14940
14941 v |= v << 8;
14942 v |= v << 16;
14943 if (mode == DImode)
14944 v |= (v << 16) << 16;
14945 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
14946 }
14947
14948 if (valmode == VOIDmode)
14949 valmode = QImode;
14950 if (valmode != QImode)
14951 val = gen_lowpart (QImode, val);
14952 if (mode == QImode)
14953 return val;
14954 if (!TARGET_PARTIAL_REG_STALL)
14955 nops--;
14956 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
14957 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
14958 <= (ix86_cost->shift_const + ix86_cost->add) * nops
14959 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
14960 {
14961 rtx reg = convert_modes (mode, QImode, val, true);
14962 tmp = promote_duplicated_reg (mode, const1_rtx);
14963 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
14964 OPTAB_DIRECT);
14965 }
14966 else
14967 {
14968 rtx reg = convert_modes (mode, QImode, val, true);
14969
14970 if (!TARGET_PARTIAL_REG_STALL)
14971 if (mode == SImode)
14972 emit_insn (gen_movsi_insv_1 (reg, reg));
14973 else
14974 emit_insn (gen_movdi_insv_1_rex64 (reg, reg));
14975 else
14976 {
14977 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
14978 NULL, 1, OPTAB_DIRECT);
14979 reg =
14980 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
14981 }
14982 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
14983 NULL, 1, OPTAB_DIRECT);
14984 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
14985 if (mode == SImode)
14986 return reg;
14987 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
14988 NULL, 1, OPTAB_DIRECT);
14989 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
14990 return reg;
14991 }
14992 }
14993
14994 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
14995 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
14996 alignment from ALIGN to DESIRED_ALIGN. */
14997 static rtx
14998 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
14999 {
15000 rtx promoted_val;
15001
15002 if (TARGET_64BIT
15003 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
15004 promoted_val = promote_duplicated_reg (DImode, val);
15005 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
15006 promoted_val = promote_duplicated_reg (SImode, val);
15007 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
15008 promoted_val = promote_duplicated_reg (HImode, val);
15009 else
15010 promoted_val = val;
15011
15012 return promoted_val;
15013 }
15014
15015 /* Expand string clear operation (bzero). Use i386 string operations when
15016 profitable. See expand_movmem comment for explanation of individual
15017 steps performed. */
15018 int
15019 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
15020 rtx expected_align_exp, rtx expected_size_exp)
15021 {
15022 rtx destreg;
15023 rtx label = NULL;
15024 rtx tmp;
15025 rtx jump_around_label = NULL;
15026 HOST_WIDE_INT align = 1;
15027 unsigned HOST_WIDE_INT count = 0;
15028 HOST_WIDE_INT expected_size = -1;
15029 int size_needed = 0, epilogue_size_needed;
15030 int desired_align = 0;
15031 enum stringop_alg alg;
15032 rtx promoted_val = NULL;
15033 bool force_loopy_epilogue = false;
15034 int dynamic_check;
15035
15036 if (CONST_INT_P (align_exp))
15037 align = INTVAL (align_exp);
15038 /* i386 can do misaligned access on reasonably increased cost. */
15039 if (CONST_INT_P (expected_align_exp)
15040 && INTVAL (expected_align_exp) > align)
15041 align = INTVAL (expected_align_exp);
15042 if (CONST_INT_P (count_exp))
15043 count = expected_size = INTVAL (count_exp);
15044 if (CONST_INT_P (expected_size_exp) && count == 0)
15045 expected_size = INTVAL (expected_size_exp);
15046
15047 /* Step 0: Decide on preferred algorithm, desired alignment and
15048 size of chunks to be copied by main loop. */
15049
15050 alg = decide_alg (count, expected_size, true, &dynamic_check);
15051 desired_align = decide_alignment (align, alg, expected_size);
15052
15053 if (!TARGET_ALIGN_STRINGOPS)
15054 align = desired_align;
15055
15056 if (alg == libcall)
15057 return 0;
15058 gcc_assert (alg != no_stringop);
15059 if (!count)
15060 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
15061 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
15062 switch (alg)
15063 {
15064 case libcall:
15065 case no_stringop:
15066 gcc_unreachable ();
15067 case loop:
15068 size_needed = GET_MODE_SIZE (Pmode);
15069 break;
15070 case unrolled_loop:
15071 size_needed = GET_MODE_SIZE (Pmode) * 4;
15072 break;
15073 case rep_prefix_8_byte:
15074 size_needed = 8;
15075 break;
15076 case rep_prefix_4_byte:
15077 size_needed = 4;
15078 break;
15079 case rep_prefix_1_byte:
15080 case loop_1_byte:
15081 size_needed = 1;
15082 break;
15083 }
15084 epilogue_size_needed = size_needed;
15085
15086 /* Step 1: Prologue guard. */
15087
15088 /* Alignment code needs count to be in register. */
15089 if (CONST_INT_P (count_exp) && desired_align > align)
15090 {
15091 enum machine_mode mode = SImode;
15092 if (TARGET_64BIT && (count & ~0xffffffff))
15093 mode = DImode;
15094 count_exp = force_reg (mode, count_exp);
15095 }
15096 /* Do the cheap promotion to allow better CSE across the
15097 main loop and epilogue (ie one load of the big constant in the
15098 front of all code. */
15099 if (CONST_INT_P (val_exp))
15100 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
15101 desired_align, align);
15102 /* Ensure that alignment prologue won't copy past end of block. */
15103 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
15104 {
15105 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
15106 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
15107 Make sure it is power of 2. */
15108 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
15109
15110 /* To improve performance of small blocks, we jump around the VAL
15111 promoting mode. This mean that if the promoted VAL is not constant,
15112 we might not use it in the epilogue and have to use byte
15113 loop variant. */
15114 if (epilogue_size_needed > 2 && !promoted_val)
15115 force_loopy_epilogue = true;
15116 label = gen_label_rtx ();
15117 emit_cmp_and_jump_insns (count_exp,
15118 GEN_INT (epilogue_size_needed),
15119 LTU, 0, counter_mode (count_exp), 1, label);
15120 if (GET_CODE (count_exp) == CONST_INT)
15121 ;
15122 else if (expected_size == -1 || expected_size <= epilogue_size_needed)
15123 predict_jump (REG_BR_PROB_BASE * 60 / 100);
15124 else
15125 predict_jump (REG_BR_PROB_BASE * 20 / 100);
15126 }
15127 if (dynamic_check != -1)
15128 {
15129 rtx hot_label = gen_label_rtx ();
15130 jump_around_label = gen_label_rtx ();
15131 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
15132 LEU, 0, counter_mode (count_exp), 1, hot_label);
15133 predict_jump (REG_BR_PROB_BASE * 90 / 100);
15134 set_storage_via_libcall (dst, count_exp, val_exp, false);
15135 emit_jump (jump_around_label);
15136 emit_label (hot_label);
15137 }
15138
15139 /* Step 2: Alignment prologue. */
15140
15141 /* Do the expensive promotion once we branched off the small blocks. */
15142 if (!promoted_val)
15143 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
15144 desired_align, align);
15145 gcc_assert (desired_align >= 1 && align >= 1);
15146
15147 if (desired_align > align)
15148 {
15149 /* Except for the first move in epilogue, we no longer know
15150 constant offset in aliasing info. It don't seems to worth
15151 the pain to maintain it for the first move, so throw away
15152 the info early. */
15153 dst = change_address (dst, BLKmode, destreg);
15154 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
15155 desired_align);
15156 }
15157 if (label && size_needed == 1)
15158 {
15159 emit_label (label);
15160 LABEL_NUSES (label) = 1;
15161 label = NULL;
15162 }
15163
15164 /* Step 3: Main loop. */
15165
15166 switch (alg)
15167 {
15168 case libcall:
15169 case no_stringop:
15170 gcc_unreachable ();
15171 case loop_1_byte:
15172 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
15173 count_exp, QImode, 1, expected_size);
15174 break;
15175 case loop:
15176 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
15177 count_exp, Pmode, 1, expected_size);
15178 break;
15179 case unrolled_loop:
15180 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
15181 count_exp, Pmode, 4, expected_size);
15182 break;
15183 case rep_prefix_8_byte:
15184 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
15185 DImode);
15186 break;
15187 case rep_prefix_4_byte:
15188 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
15189 SImode);
15190 break;
15191 case rep_prefix_1_byte:
15192 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
15193 QImode);
15194 break;
15195 }
15196 /* Adjust properly the offset of src and dest memory for aliasing. */
15197 if (CONST_INT_P (count_exp))
15198 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
15199 (count / size_needed) * size_needed);
15200 else
15201 dst = change_address (dst, BLKmode, destreg);
15202
15203 /* Step 4: Epilogue to copy the remaining bytes. */
15204
15205 if (label)
15206 {
15207 /* When the main loop is done, COUNT_EXP might hold original count,
15208 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
15209 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
15210 bytes. Compensate if needed. */
15211
15212 if (size_needed < desired_align - align)
15213 {
15214 tmp =
15215 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
15216 GEN_INT (size_needed - 1), count_exp, 1,
15217 OPTAB_DIRECT);
15218 size_needed = desired_align - align + 1;
15219 if (tmp != count_exp)
15220 emit_move_insn (count_exp, tmp);
15221 }
15222 emit_label (label);
15223 LABEL_NUSES (label) = 1;
15224 }
15225 if (count_exp != const0_rtx && epilogue_size_needed > 1)
15226 {
15227 if (force_loopy_epilogue)
15228 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
15229 size_needed);
15230 else
15231 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
15232 size_needed);
15233 }
15234 if (jump_around_label)
15235 emit_label (jump_around_label);
15236 return 1;
15237 }
15238
15239 /* Expand the appropriate insns for doing strlen if not just doing
15240 repnz; scasb
15241
15242 out = result, initialized with the start address
15243 align_rtx = alignment of the address.
15244 scratch = scratch register, initialized with the startaddress when
15245 not aligned, otherwise undefined
15246
15247 This is just the body. It needs the initializations mentioned above and
15248 some address computing at the end. These things are done in i386.md. */
15249
15250 static void
15251 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
15252 {
15253 int align;
15254 rtx tmp;
15255 rtx align_2_label = NULL_RTX;
15256 rtx align_3_label = NULL_RTX;
15257 rtx align_4_label = gen_label_rtx ();
15258 rtx end_0_label = gen_label_rtx ();
15259 rtx mem;
15260 rtx tmpreg = gen_reg_rtx (SImode);
15261 rtx scratch = gen_reg_rtx (SImode);
15262 rtx cmp;
15263
15264 align = 0;
15265 if (CONST_INT_P (align_rtx))
15266 align = INTVAL (align_rtx);
15267
15268 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
15269
15270 /* Is there a known alignment and is it less than 4? */
15271 if (align < 4)
15272 {
15273 rtx scratch1 = gen_reg_rtx (Pmode);
15274 emit_move_insn (scratch1, out);
15275 /* Is there a known alignment and is it not 2? */
15276 if (align != 2)
15277 {
15278 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
15279 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
15280
15281 /* Leave just the 3 lower bits. */
15282 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
15283 NULL_RTX, 0, OPTAB_WIDEN);
15284
15285 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
15286 Pmode, 1, align_4_label);
15287 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
15288 Pmode, 1, align_2_label);
15289 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
15290 Pmode, 1, align_3_label);
15291 }
15292 else
15293 {
15294 /* Since the alignment is 2, we have to check 2 or 0 bytes;
15295 check if is aligned to 4 - byte. */
15296
15297 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
15298 NULL_RTX, 0, OPTAB_WIDEN);
15299
15300 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
15301 Pmode, 1, align_4_label);
15302 }
15303
15304 mem = change_address (src, QImode, out);
15305
15306 /* Now compare the bytes. */
15307
15308 /* Compare the first n unaligned byte on a byte per byte basis. */
15309 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
15310 QImode, 1, end_0_label);
15311
15312 /* Increment the address. */
15313 if (TARGET_64BIT)
15314 emit_insn (gen_adddi3 (out, out, const1_rtx));
15315 else
15316 emit_insn (gen_addsi3 (out, out, const1_rtx));
15317
15318 /* Not needed with an alignment of 2 */
15319 if (align != 2)
15320 {
15321 emit_label (align_2_label);
15322
15323 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
15324 end_0_label);
15325
15326 if (TARGET_64BIT)
15327 emit_insn (gen_adddi3 (out, out, const1_rtx));
15328 else
15329 emit_insn (gen_addsi3 (out, out, const1_rtx));
15330
15331 emit_label (align_3_label);
15332 }
15333
15334 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
15335 end_0_label);
15336
15337 if (TARGET_64BIT)
15338 emit_insn (gen_adddi3 (out, out, const1_rtx));
15339 else
15340 emit_insn (gen_addsi3 (out, out, const1_rtx));
15341 }
15342
15343 /* Generate loop to check 4 bytes at a time. It is not a good idea to
15344 align this loop. It gives only huge programs, but does not help to
15345 speed up. */
15346 emit_label (align_4_label);
15347
15348 mem = change_address (src, SImode, out);
15349 emit_move_insn (scratch, mem);
15350 if (TARGET_64BIT)
15351 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
15352 else
15353 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
15354
15355 /* This formula yields a nonzero result iff one of the bytes is zero.
15356 This saves three branches inside loop and many cycles. */
15357
15358 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
15359 emit_insn (gen_one_cmplsi2 (scratch, scratch));
15360 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
15361 emit_insn (gen_andsi3 (tmpreg, tmpreg,
15362 gen_int_mode (0x80808080, SImode)));
15363 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
15364 align_4_label);
15365
15366 if (TARGET_CMOVE)
15367 {
15368 rtx reg = gen_reg_rtx (SImode);
15369 rtx reg2 = gen_reg_rtx (Pmode);
15370 emit_move_insn (reg, tmpreg);
15371 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
15372
15373 /* If zero is not in the first two bytes, move two bytes forward. */
15374 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
15375 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
15376 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
15377 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
15378 gen_rtx_IF_THEN_ELSE (SImode, tmp,
15379 reg,
15380 tmpreg)));
15381 /* Emit lea manually to avoid clobbering of flags. */
15382 emit_insn (gen_rtx_SET (SImode, reg2,
15383 gen_rtx_PLUS (Pmode, out, const2_rtx)));
15384
15385 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
15386 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
15387 emit_insn (gen_rtx_SET (VOIDmode, out,
15388 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
15389 reg2,
15390 out)));
15391
15392 }
15393 else
15394 {
15395 rtx end_2_label = gen_label_rtx ();
15396 /* Is zero in the first two bytes? */
15397
15398 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
15399 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
15400 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
15401 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
15402 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
15403 pc_rtx);
15404 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
15405 JUMP_LABEL (tmp) = end_2_label;
15406
15407 /* Not in the first two. Move two bytes forward. */
15408 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
15409 if (TARGET_64BIT)
15410 emit_insn (gen_adddi3 (out, out, const2_rtx));
15411 else
15412 emit_insn (gen_addsi3 (out, out, const2_rtx));
15413
15414 emit_label (end_2_label);
15415
15416 }
15417
15418 /* Avoid branch in fixing the byte. */
15419 tmpreg = gen_lowpart (QImode, tmpreg);
15420 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
15421 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
15422 if (TARGET_64BIT)
15423 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
15424 else
15425 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
15426
15427 emit_label (end_0_label);
15428 }
15429
15430 /* Expand strlen. */
15431
15432 int
15433 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
15434 {
15435 rtx addr, scratch1, scratch2, scratch3, scratch4;
15436
15437 /* The generic case of strlen expander is long. Avoid it's
15438 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
15439
15440 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
15441 && !TARGET_INLINE_ALL_STRINGOPS
15442 && !optimize_size
15443 && (!CONST_INT_P (align) || INTVAL (align) < 4))
15444 return 0;
15445
15446 addr = force_reg (Pmode, XEXP (src, 0));
15447 scratch1 = gen_reg_rtx (Pmode);
15448
15449 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
15450 && !optimize_size)
15451 {
15452 /* Well it seems that some optimizer does not combine a call like
15453 foo(strlen(bar), strlen(bar));
15454 when the move and the subtraction is done here. It does calculate
15455 the length just once when these instructions are done inside of
15456 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
15457 often used and I use one fewer register for the lifetime of
15458 output_strlen_unroll() this is better. */
15459
15460 emit_move_insn (out, addr);
15461
15462 ix86_expand_strlensi_unroll_1 (out, src, align);
15463
15464 /* strlensi_unroll_1 returns the address of the zero at the end of
15465 the string, like memchr(), so compute the length by subtracting
15466 the start address. */
15467 if (TARGET_64BIT)
15468 emit_insn (gen_subdi3 (out, out, addr));
15469 else
15470 emit_insn (gen_subsi3 (out, out, addr));
15471 }
15472 else
15473 {
15474 rtx unspec;
15475 scratch2 = gen_reg_rtx (Pmode);
15476 scratch3 = gen_reg_rtx (Pmode);
15477 scratch4 = force_reg (Pmode, constm1_rtx);
15478
15479 emit_move_insn (scratch3, addr);
15480 eoschar = force_reg (QImode, eoschar);
15481
15482 src = replace_equiv_address_nv (src, scratch3);
15483
15484 /* If .md starts supporting :P, this can be done in .md. */
15485 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
15486 scratch4), UNSPEC_SCAS);
15487 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
15488 if (TARGET_64BIT)
15489 {
15490 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
15491 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
15492 }
15493 else
15494 {
15495 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
15496 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
15497 }
15498 }
15499 return 1;
15500 }
15501
15502 /* For given symbol (function) construct code to compute address of it's PLT
15503 entry in large x86-64 PIC model. */
15504 rtx
15505 construct_plt_address (rtx symbol)
15506 {
15507 rtx tmp = gen_reg_rtx (Pmode);
15508 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
15509
15510 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
15511 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
15512
15513 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
15514 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
15515 return tmp;
15516 }
15517
15518 void
15519 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
15520 rtx callarg2 ATTRIBUTE_UNUSED,
15521 rtx pop, int sibcall)
15522 {
15523 rtx use = NULL, call;
15524
15525 if (pop == const0_rtx)
15526 pop = NULL;
15527 gcc_assert (!TARGET_64BIT || !pop);
15528
15529 if (TARGET_MACHO && !TARGET_64BIT)
15530 {
15531 #if TARGET_MACHO
15532 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
15533 fnaddr = machopic_indirect_call_target (fnaddr);
15534 #endif
15535 }
15536 else
15537 {
15538 /* Static functions and indirect calls don't need the pic register. */
15539 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
15540 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
15541 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
15542 use_reg (&use, pic_offset_table_rtx);
15543 }
15544
15545 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
15546 {
15547 rtx al = gen_rtx_REG (QImode, 0);
15548 emit_move_insn (al, callarg2);
15549 use_reg (&use, al);
15550 }
15551
15552 if (ix86_cmodel == CM_LARGE_PIC
15553 && GET_CODE (fnaddr) == MEM
15554 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
15555 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
15556 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
15557 else if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
15558 {
15559 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
15560 fnaddr = gen_rtx_MEM (QImode, fnaddr);
15561 }
15562 if (sibcall && TARGET_64BIT
15563 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
15564 {
15565 rtx addr;
15566 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
15567 fnaddr = gen_rtx_REG (Pmode, R11_REG);
15568 emit_move_insn (fnaddr, addr);
15569 fnaddr = gen_rtx_MEM (QImode, fnaddr);
15570 }
15571
15572 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
15573 if (retval)
15574 call = gen_rtx_SET (VOIDmode, retval, call);
15575 if (pop)
15576 {
15577 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
15578 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
15579 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
15580 }
15581
15582 call = emit_call_insn (call);
15583 if (use)
15584 CALL_INSN_FUNCTION_USAGE (call) = use;
15585 }
15586
15587 \f
15588 /* Clear stack slot assignments remembered from previous functions.
15589 This is called from INIT_EXPANDERS once before RTL is emitted for each
15590 function. */
15591
15592 static struct machine_function *
15593 ix86_init_machine_status (void)
15594 {
15595 struct machine_function *f;
15596
15597 f = GGC_CNEW (struct machine_function);
15598 f->use_fast_prologue_epilogue_nregs = -1;
15599 f->tls_descriptor_call_expanded_p = 0;
15600
15601 return f;
15602 }
15603
15604 /* Return a MEM corresponding to a stack slot with mode MODE.
15605 Allocate a new slot if necessary.
15606
15607 The RTL for a function can have several slots available: N is
15608 which slot to use. */
15609
15610 rtx
15611 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
15612 {
15613 struct stack_local_entry *s;
15614
15615 gcc_assert (n < MAX_386_STACK_LOCALS);
15616
15617 /* Virtual slot is valid only before vregs are instantiated. */
15618 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
15619
15620 for (s = ix86_stack_locals; s; s = s->next)
15621 if (s->mode == mode && s->n == n)
15622 return copy_rtx (s->rtl);
15623
15624 s = (struct stack_local_entry *)
15625 ggc_alloc (sizeof (struct stack_local_entry));
15626 s->n = n;
15627 s->mode = mode;
15628 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
15629
15630 s->next = ix86_stack_locals;
15631 ix86_stack_locals = s;
15632 return s->rtl;
15633 }
15634
15635 /* Construct the SYMBOL_REF for the tls_get_addr function. */
15636
15637 static GTY(()) rtx ix86_tls_symbol;
15638 rtx
15639 ix86_tls_get_addr (void)
15640 {
15641
15642 if (!ix86_tls_symbol)
15643 {
15644 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
15645 (TARGET_ANY_GNU_TLS
15646 && !TARGET_64BIT)
15647 ? "___tls_get_addr"
15648 : "__tls_get_addr");
15649 }
15650
15651 return ix86_tls_symbol;
15652 }
15653
15654 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
15655
15656 static GTY(()) rtx ix86_tls_module_base_symbol;
15657 rtx
15658 ix86_tls_module_base (void)
15659 {
15660
15661 if (!ix86_tls_module_base_symbol)
15662 {
15663 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
15664 "_TLS_MODULE_BASE_");
15665 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
15666 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
15667 }
15668
15669 return ix86_tls_module_base_symbol;
15670 }
15671 \f
15672 /* Calculate the length of the memory address in the instruction
15673 encoding. Does not include the one-byte modrm, opcode, or prefix. */
15674
15675 int
15676 memory_address_length (rtx addr)
15677 {
15678 struct ix86_address parts;
15679 rtx base, index, disp;
15680 int len;
15681 int ok;
15682
15683 if (GET_CODE (addr) == PRE_DEC
15684 || GET_CODE (addr) == POST_INC
15685 || GET_CODE (addr) == PRE_MODIFY
15686 || GET_CODE (addr) == POST_MODIFY)
15687 return 0;
15688
15689 ok = ix86_decompose_address (addr, &parts);
15690 gcc_assert (ok);
15691
15692 if (parts.base && GET_CODE (parts.base) == SUBREG)
15693 parts.base = SUBREG_REG (parts.base);
15694 if (parts.index && GET_CODE (parts.index) == SUBREG)
15695 parts.index = SUBREG_REG (parts.index);
15696
15697 base = parts.base;
15698 index = parts.index;
15699 disp = parts.disp;
15700 len = 0;
15701
15702 /* Rule of thumb:
15703 - esp as the base always wants an index,
15704 - ebp as the base always wants a displacement. */
15705
15706 /* Register Indirect. */
15707 if (base && !index && !disp)
15708 {
15709 /* esp (for its index) and ebp (for its displacement) need
15710 the two-byte modrm form. */
15711 if (addr == stack_pointer_rtx
15712 || addr == arg_pointer_rtx
15713 || addr == frame_pointer_rtx
15714 || addr == hard_frame_pointer_rtx)
15715 len = 1;
15716 }
15717
15718 /* Direct Addressing. */
15719 else if (disp && !base && !index)
15720 len = 4;
15721
15722 else
15723 {
15724 /* Find the length of the displacement constant. */
15725 if (disp)
15726 {
15727 if (base && satisfies_constraint_K (disp))
15728 len = 1;
15729 else
15730 len = 4;
15731 }
15732 /* ebp always wants a displacement. */
15733 else if (base == hard_frame_pointer_rtx)
15734 len = 1;
15735
15736 /* An index requires the two-byte modrm form.... */
15737 if (index
15738 /* ...like esp, which always wants an index. */
15739 || base == stack_pointer_rtx
15740 || base == arg_pointer_rtx
15741 || base == frame_pointer_rtx)
15742 len += 1;
15743 }
15744
15745 return len;
15746 }
15747
15748 /* Compute default value for "length_immediate" attribute. When SHORTFORM
15749 is set, expect that insn have 8bit immediate alternative. */
15750 int
15751 ix86_attr_length_immediate_default (rtx insn, int shortform)
15752 {
15753 int len = 0;
15754 int i;
15755 extract_insn_cached (insn);
15756 for (i = recog_data.n_operands - 1; i >= 0; --i)
15757 if (CONSTANT_P (recog_data.operand[i]))
15758 {
15759 gcc_assert (!len);
15760 if (shortform && satisfies_constraint_K (recog_data.operand[i]))
15761 len = 1;
15762 else
15763 {
15764 switch (get_attr_mode (insn))
15765 {
15766 case MODE_QI:
15767 len+=1;
15768 break;
15769 case MODE_HI:
15770 len+=2;
15771 break;
15772 case MODE_SI:
15773 len+=4;
15774 break;
15775 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
15776 case MODE_DI:
15777 len+=4;
15778 break;
15779 default:
15780 fatal_insn ("unknown insn mode", insn);
15781 }
15782 }
15783 }
15784 return len;
15785 }
15786 /* Compute default value for "length_address" attribute. */
15787 int
15788 ix86_attr_length_address_default (rtx insn)
15789 {
15790 int i;
15791
15792 if (get_attr_type (insn) == TYPE_LEA)
15793 {
15794 rtx set = PATTERN (insn);
15795
15796 if (GET_CODE (set) == PARALLEL)
15797 set = XVECEXP (set, 0, 0);
15798
15799 gcc_assert (GET_CODE (set) == SET);
15800
15801 return memory_address_length (SET_SRC (set));
15802 }
15803
15804 extract_insn_cached (insn);
15805 for (i = recog_data.n_operands - 1; i >= 0; --i)
15806 if (MEM_P (recog_data.operand[i]))
15807 {
15808 return memory_address_length (XEXP (recog_data.operand[i], 0));
15809 break;
15810 }
15811 return 0;
15812 }
15813 \f
15814 /* Return the maximum number of instructions a cpu can issue. */
15815
15816 static int
15817 ix86_issue_rate (void)
15818 {
15819 switch (ix86_tune)
15820 {
15821 case PROCESSOR_PENTIUM:
15822 case PROCESSOR_K6:
15823 return 2;
15824
15825 case PROCESSOR_PENTIUMPRO:
15826 case PROCESSOR_PENTIUM4:
15827 case PROCESSOR_ATHLON:
15828 case PROCESSOR_K8:
15829 case PROCESSOR_AMDFAM10:
15830 case PROCESSOR_NOCONA:
15831 case PROCESSOR_GENERIC32:
15832 case PROCESSOR_GENERIC64:
15833 return 3;
15834
15835 case PROCESSOR_CORE2:
15836 return 4;
15837
15838 default:
15839 return 1;
15840 }
15841 }
15842
15843 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
15844 by DEP_INSN and nothing set by DEP_INSN. */
15845
15846 static int
15847 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
15848 {
15849 rtx set, set2;
15850
15851 /* Simplify the test for uninteresting insns. */
15852 if (insn_type != TYPE_SETCC
15853 && insn_type != TYPE_ICMOV
15854 && insn_type != TYPE_FCMOV
15855 && insn_type != TYPE_IBR)
15856 return 0;
15857
15858 if ((set = single_set (dep_insn)) != 0)
15859 {
15860 set = SET_DEST (set);
15861 set2 = NULL_RTX;
15862 }
15863 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
15864 && XVECLEN (PATTERN (dep_insn), 0) == 2
15865 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
15866 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
15867 {
15868 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
15869 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
15870 }
15871 else
15872 return 0;
15873
15874 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
15875 return 0;
15876
15877 /* This test is true if the dependent insn reads the flags but
15878 not any other potentially set register. */
15879 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
15880 return 0;
15881
15882 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
15883 return 0;
15884
15885 return 1;
15886 }
15887
15888 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
15889 address with operands set by DEP_INSN. */
15890
15891 static int
15892 ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
15893 {
15894 rtx addr;
15895
15896 if (insn_type == TYPE_LEA
15897 && TARGET_PENTIUM)
15898 {
15899 addr = PATTERN (insn);
15900
15901 if (GET_CODE (addr) == PARALLEL)
15902 addr = XVECEXP (addr, 0, 0);
15903
15904 gcc_assert (GET_CODE (addr) == SET);
15905
15906 addr = SET_SRC (addr);
15907 }
15908 else
15909 {
15910 int i;
15911 extract_insn_cached (insn);
15912 for (i = recog_data.n_operands - 1; i >= 0; --i)
15913 if (MEM_P (recog_data.operand[i]))
15914 {
15915 addr = XEXP (recog_data.operand[i], 0);
15916 goto found;
15917 }
15918 return 0;
15919 found:;
15920 }
15921
15922 return modified_in_p (addr, dep_insn);
15923 }
15924
15925 static int
15926 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
15927 {
15928 enum attr_type insn_type, dep_insn_type;
15929 enum attr_memory memory;
15930 rtx set, set2;
15931 int dep_insn_code_number;
15932
15933 /* Anti and output dependencies have zero cost on all CPUs. */
15934 if (REG_NOTE_KIND (link) != 0)
15935 return 0;
15936
15937 dep_insn_code_number = recog_memoized (dep_insn);
15938
15939 /* If we can't recognize the insns, we can't really do anything. */
15940 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
15941 return cost;
15942
15943 insn_type = get_attr_type (insn);
15944 dep_insn_type = get_attr_type (dep_insn);
15945
15946 switch (ix86_tune)
15947 {
15948 case PROCESSOR_PENTIUM:
15949 /* Address Generation Interlock adds a cycle of latency. */
15950 if (ix86_agi_dependent (insn, dep_insn, insn_type))
15951 cost += 1;
15952
15953 /* ??? Compares pair with jump/setcc. */
15954 if (ix86_flags_dependent (insn, dep_insn, insn_type))
15955 cost = 0;
15956
15957 /* Floating point stores require value to be ready one cycle earlier. */
15958 if (insn_type == TYPE_FMOV
15959 && get_attr_memory (insn) == MEMORY_STORE
15960 && !ix86_agi_dependent (insn, dep_insn, insn_type))
15961 cost += 1;
15962 break;
15963
15964 case PROCESSOR_PENTIUMPRO:
15965 memory = get_attr_memory (insn);
15966
15967 /* INT->FP conversion is expensive. */
15968 if (get_attr_fp_int_src (dep_insn))
15969 cost += 5;
15970
15971 /* There is one cycle extra latency between an FP op and a store. */
15972 if (insn_type == TYPE_FMOV
15973 && (set = single_set (dep_insn)) != NULL_RTX
15974 && (set2 = single_set (insn)) != NULL_RTX
15975 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
15976 && MEM_P (SET_DEST (set2)))
15977 cost += 1;
15978
15979 /* Show ability of reorder buffer to hide latency of load by executing
15980 in parallel with previous instruction in case
15981 previous instruction is not needed to compute the address. */
15982 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
15983 && !ix86_agi_dependent (insn, dep_insn, insn_type))
15984 {
15985 /* Claim moves to take one cycle, as core can issue one load
15986 at time and the next load can start cycle later. */
15987 if (dep_insn_type == TYPE_IMOV
15988 || dep_insn_type == TYPE_FMOV)
15989 cost = 1;
15990 else if (cost > 1)
15991 cost--;
15992 }
15993 break;
15994
15995 case PROCESSOR_K6:
15996 memory = get_attr_memory (insn);
15997
15998 /* The esp dependency is resolved before the instruction is really
15999 finished. */
16000 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
16001 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
16002 return 1;
16003
16004 /* INT->FP conversion is expensive. */
16005 if (get_attr_fp_int_src (dep_insn))
16006 cost += 5;
16007
16008 /* Show ability of reorder buffer to hide latency of load by executing
16009 in parallel with previous instruction in case
16010 previous instruction is not needed to compute the address. */
16011 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
16012 && !ix86_agi_dependent (insn, dep_insn, insn_type))
16013 {
16014 /* Claim moves to take one cycle, as core can issue one load
16015 at time and the next load can start cycle later. */
16016 if (dep_insn_type == TYPE_IMOV
16017 || dep_insn_type == TYPE_FMOV)
16018 cost = 1;
16019 else if (cost > 2)
16020 cost -= 2;
16021 else
16022 cost = 1;
16023 }
16024 break;
16025
16026 case PROCESSOR_ATHLON:
16027 case PROCESSOR_K8:
16028 case PROCESSOR_AMDFAM10:
16029 case PROCESSOR_GENERIC32:
16030 case PROCESSOR_GENERIC64:
16031 memory = get_attr_memory (insn);
16032
16033 /* Show ability of reorder buffer to hide latency of load by executing
16034 in parallel with previous instruction in case
16035 previous instruction is not needed to compute the address. */
16036 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
16037 && !ix86_agi_dependent (insn, dep_insn, insn_type))
16038 {
16039 enum attr_unit unit = get_attr_unit (insn);
16040 int loadcost = 3;
16041
16042 /* Because of the difference between the length of integer and
16043 floating unit pipeline preparation stages, the memory operands
16044 for floating point are cheaper.
16045
16046 ??? For Athlon it the difference is most probably 2. */
16047 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
16048 loadcost = 3;
16049 else
16050 loadcost = TARGET_ATHLON ? 2 : 0;
16051
16052 if (cost >= loadcost)
16053 cost -= loadcost;
16054 else
16055 cost = 0;
16056 }
16057
16058 default:
16059 break;
16060 }
16061
16062 return cost;
16063 }
16064
16065 /* How many alternative schedules to try. This should be as wide as the
16066 scheduling freedom in the DFA, but no wider. Making this value too
16067 large results extra work for the scheduler. */
16068
16069 static int
16070 ia32_multipass_dfa_lookahead (void)
16071 {
16072 if (ix86_tune == PROCESSOR_PENTIUM)
16073 return 2;
16074
16075 if (ix86_tune == PROCESSOR_PENTIUMPRO
16076 || ix86_tune == PROCESSOR_K6)
16077 return 1;
16078
16079 else
16080 return 0;
16081 }
16082
16083 \f
16084 /* Compute the alignment given to a constant that is being placed in memory.
16085 EXP is the constant and ALIGN is the alignment that the object would
16086 ordinarily have.
16087 The value of this function is used instead of that alignment to align
16088 the object. */
16089
16090 int
16091 ix86_constant_alignment (tree exp, int align)
16092 {
16093 if (TREE_CODE (exp) == REAL_CST)
16094 {
16095 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
16096 return 64;
16097 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
16098 return 128;
16099 }
16100 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
16101 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
16102 return BITS_PER_WORD;
16103
16104 return align;
16105 }
16106
16107 /* Compute the alignment for a static variable.
16108 TYPE is the data type, and ALIGN is the alignment that
16109 the object would ordinarily have. The value of this function is used
16110 instead of that alignment to align the object. */
16111
16112 int
16113 ix86_data_alignment (tree type, int align)
16114 {
16115 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
16116
16117 if (AGGREGATE_TYPE_P (type)
16118 && TYPE_SIZE (type)
16119 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
16120 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
16121 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
16122 && align < max_align)
16123 align = max_align;
16124
16125 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
16126 to 16byte boundary. */
16127 if (TARGET_64BIT)
16128 {
16129 if (AGGREGATE_TYPE_P (type)
16130 && TYPE_SIZE (type)
16131 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
16132 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
16133 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
16134 return 128;
16135 }
16136
16137 if (TREE_CODE (type) == ARRAY_TYPE)
16138 {
16139 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
16140 return 64;
16141 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
16142 return 128;
16143 }
16144 else if (TREE_CODE (type) == COMPLEX_TYPE)
16145 {
16146
16147 if (TYPE_MODE (type) == DCmode && align < 64)
16148 return 64;
16149 if (TYPE_MODE (type) == XCmode && align < 128)
16150 return 128;
16151 }
16152 else if ((TREE_CODE (type) == RECORD_TYPE
16153 || TREE_CODE (type) == UNION_TYPE
16154 || TREE_CODE (type) == QUAL_UNION_TYPE)
16155 && TYPE_FIELDS (type))
16156 {
16157 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
16158 return 64;
16159 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
16160 return 128;
16161 }
16162 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
16163 || TREE_CODE (type) == INTEGER_TYPE)
16164 {
16165 if (TYPE_MODE (type) == DFmode && align < 64)
16166 return 64;
16167 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
16168 return 128;
16169 }
16170
16171 return align;
16172 }
16173
16174 /* Compute the alignment for a local variable.
16175 TYPE is the data type, and ALIGN is the alignment that
16176 the object would ordinarily have. The value of this macro is used
16177 instead of that alignment to align the object. */
16178
16179 int
16180 ix86_local_alignment (tree type, int align)
16181 {
16182 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
16183 to 16byte boundary. */
16184 if (TARGET_64BIT)
16185 {
16186 if (AGGREGATE_TYPE_P (type)
16187 && TYPE_SIZE (type)
16188 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
16189 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
16190 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
16191 return 128;
16192 }
16193 if (TREE_CODE (type) == ARRAY_TYPE)
16194 {
16195 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
16196 return 64;
16197 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
16198 return 128;
16199 }
16200 else if (TREE_CODE (type) == COMPLEX_TYPE)
16201 {
16202 if (TYPE_MODE (type) == DCmode && align < 64)
16203 return 64;
16204 if (TYPE_MODE (type) == XCmode && align < 128)
16205 return 128;
16206 }
16207 else if ((TREE_CODE (type) == RECORD_TYPE
16208 || TREE_CODE (type) == UNION_TYPE
16209 || TREE_CODE (type) == QUAL_UNION_TYPE)
16210 && TYPE_FIELDS (type))
16211 {
16212 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
16213 return 64;
16214 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
16215 return 128;
16216 }
16217 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
16218 || TREE_CODE (type) == INTEGER_TYPE)
16219 {
16220
16221 if (TYPE_MODE (type) == DFmode && align < 64)
16222 return 64;
16223 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
16224 return 128;
16225 }
16226 return align;
16227 }
16228 \f
16229 /* Emit RTL insns to initialize the variable parts of a trampoline.
16230 FNADDR is an RTX for the address of the function's pure code.
16231 CXT is an RTX for the static chain value for the function. */
16232 void
16233 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
16234 {
16235 if (!TARGET_64BIT)
16236 {
16237 /* Compute offset from the end of the jmp to the target function. */
16238 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
16239 plus_constant (tramp, 10),
16240 NULL_RTX, 1, OPTAB_DIRECT);
16241 emit_move_insn (gen_rtx_MEM (QImode, tramp),
16242 gen_int_mode (0xb9, QImode));
16243 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
16244 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
16245 gen_int_mode (0xe9, QImode));
16246 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
16247 }
16248 else
16249 {
16250 int offset = 0;
16251 /* Try to load address using shorter movl instead of movabs.
16252 We may want to support movq for kernel mode, but kernel does not use
16253 trampolines at the moment. */
16254 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
16255 {
16256 fnaddr = copy_to_mode_reg (DImode, fnaddr);
16257 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
16258 gen_int_mode (0xbb41, HImode));
16259 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
16260 gen_lowpart (SImode, fnaddr));
16261 offset += 6;
16262 }
16263 else
16264 {
16265 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
16266 gen_int_mode (0xbb49, HImode));
16267 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
16268 fnaddr);
16269 offset += 10;
16270 }
16271 /* Load static chain using movabs to r10. */
16272 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
16273 gen_int_mode (0xba49, HImode));
16274 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
16275 cxt);
16276 offset += 10;
16277 /* Jump to the r11 */
16278 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
16279 gen_int_mode (0xff49, HImode));
16280 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
16281 gen_int_mode (0xe3, QImode));
16282 offset += 3;
16283 gcc_assert (offset <= TRAMPOLINE_SIZE);
16284 }
16285
16286 #ifdef ENABLE_EXECUTE_STACK
16287 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
16288 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
16289 #endif
16290 }
16291 \f
16292 /* Codes for all the SSE/MMX builtins. */
16293 enum ix86_builtins
16294 {
16295 IX86_BUILTIN_ADDPS,
16296 IX86_BUILTIN_ADDSS,
16297 IX86_BUILTIN_DIVPS,
16298 IX86_BUILTIN_DIVSS,
16299 IX86_BUILTIN_MULPS,
16300 IX86_BUILTIN_MULSS,
16301 IX86_BUILTIN_SUBPS,
16302 IX86_BUILTIN_SUBSS,
16303
16304 IX86_BUILTIN_CMPEQPS,
16305 IX86_BUILTIN_CMPLTPS,
16306 IX86_BUILTIN_CMPLEPS,
16307 IX86_BUILTIN_CMPGTPS,
16308 IX86_BUILTIN_CMPGEPS,
16309 IX86_BUILTIN_CMPNEQPS,
16310 IX86_BUILTIN_CMPNLTPS,
16311 IX86_BUILTIN_CMPNLEPS,
16312 IX86_BUILTIN_CMPNGTPS,
16313 IX86_BUILTIN_CMPNGEPS,
16314 IX86_BUILTIN_CMPORDPS,
16315 IX86_BUILTIN_CMPUNORDPS,
16316 IX86_BUILTIN_CMPEQSS,
16317 IX86_BUILTIN_CMPLTSS,
16318 IX86_BUILTIN_CMPLESS,
16319 IX86_BUILTIN_CMPNEQSS,
16320 IX86_BUILTIN_CMPNLTSS,
16321 IX86_BUILTIN_CMPNLESS,
16322 IX86_BUILTIN_CMPNGTSS,
16323 IX86_BUILTIN_CMPNGESS,
16324 IX86_BUILTIN_CMPORDSS,
16325 IX86_BUILTIN_CMPUNORDSS,
16326
16327 IX86_BUILTIN_COMIEQSS,
16328 IX86_BUILTIN_COMILTSS,
16329 IX86_BUILTIN_COMILESS,
16330 IX86_BUILTIN_COMIGTSS,
16331 IX86_BUILTIN_COMIGESS,
16332 IX86_BUILTIN_COMINEQSS,
16333 IX86_BUILTIN_UCOMIEQSS,
16334 IX86_BUILTIN_UCOMILTSS,
16335 IX86_BUILTIN_UCOMILESS,
16336 IX86_BUILTIN_UCOMIGTSS,
16337 IX86_BUILTIN_UCOMIGESS,
16338 IX86_BUILTIN_UCOMINEQSS,
16339
16340 IX86_BUILTIN_CVTPI2PS,
16341 IX86_BUILTIN_CVTPS2PI,
16342 IX86_BUILTIN_CVTSI2SS,
16343 IX86_BUILTIN_CVTSI642SS,
16344 IX86_BUILTIN_CVTSS2SI,
16345 IX86_BUILTIN_CVTSS2SI64,
16346 IX86_BUILTIN_CVTTPS2PI,
16347 IX86_BUILTIN_CVTTSS2SI,
16348 IX86_BUILTIN_CVTTSS2SI64,
16349
16350 IX86_BUILTIN_MAXPS,
16351 IX86_BUILTIN_MAXSS,
16352 IX86_BUILTIN_MINPS,
16353 IX86_BUILTIN_MINSS,
16354
16355 IX86_BUILTIN_LOADUPS,
16356 IX86_BUILTIN_STOREUPS,
16357 IX86_BUILTIN_MOVSS,
16358
16359 IX86_BUILTIN_MOVHLPS,
16360 IX86_BUILTIN_MOVLHPS,
16361 IX86_BUILTIN_LOADHPS,
16362 IX86_BUILTIN_LOADLPS,
16363 IX86_BUILTIN_STOREHPS,
16364 IX86_BUILTIN_STORELPS,
16365
16366 IX86_BUILTIN_MASKMOVQ,
16367 IX86_BUILTIN_MOVMSKPS,
16368 IX86_BUILTIN_PMOVMSKB,
16369
16370 IX86_BUILTIN_MOVNTPS,
16371 IX86_BUILTIN_MOVNTQ,
16372
16373 IX86_BUILTIN_LOADDQU,
16374 IX86_BUILTIN_STOREDQU,
16375
16376 IX86_BUILTIN_PACKSSWB,
16377 IX86_BUILTIN_PACKSSDW,
16378 IX86_BUILTIN_PACKUSWB,
16379
16380 IX86_BUILTIN_PADDB,
16381 IX86_BUILTIN_PADDW,
16382 IX86_BUILTIN_PADDD,
16383 IX86_BUILTIN_PADDQ,
16384 IX86_BUILTIN_PADDSB,
16385 IX86_BUILTIN_PADDSW,
16386 IX86_BUILTIN_PADDUSB,
16387 IX86_BUILTIN_PADDUSW,
16388 IX86_BUILTIN_PSUBB,
16389 IX86_BUILTIN_PSUBW,
16390 IX86_BUILTIN_PSUBD,
16391 IX86_BUILTIN_PSUBQ,
16392 IX86_BUILTIN_PSUBSB,
16393 IX86_BUILTIN_PSUBSW,
16394 IX86_BUILTIN_PSUBUSB,
16395 IX86_BUILTIN_PSUBUSW,
16396
16397 IX86_BUILTIN_PAND,
16398 IX86_BUILTIN_PANDN,
16399 IX86_BUILTIN_POR,
16400 IX86_BUILTIN_PXOR,
16401
16402 IX86_BUILTIN_PAVGB,
16403 IX86_BUILTIN_PAVGW,
16404
16405 IX86_BUILTIN_PCMPEQB,
16406 IX86_BUILTIN_PCMPEQW,
16407 IX86_BUILTIN_PCMPEQD,
16408 IX86_BUILTIN_PCMPGTB,
16409 IX86_BUILTIN_PCMPGTW,
16410 IX86_BUILTIN_PCMPGTD,
16411
16412 IX86_BUILTIN_PMADDWD,
16413
16414 IX86_BUILTIN_PMAXSW,
16415 IX86_BUILTIN_PMAXUB,
16416 IX86_BUILTIN_PMINSW,
16417 IX86_BUILTIN_PMINUB,
16418
16419 IX86_BUILTIN_PMULHUW,
16420 IX86_BUILTIN_PMULHW,
16421 IX86_BUILTIN_PMULLW,
16422
16423 IX86_BUILTIN_PSADBW,
16424 IX86_BUILTIN_PSHUFW,
16425
16426 IX86_BUILTIN_PSLLW,
16427 IX86_BUILTIN_PSLLD,
16428 IX86_BUILTIN_PSLLQ,
16429 IX86_BUILTIN_PSRAW,
16430 IX86_BUILTIN_PSRAD,
16431 IX86_BUILTIN_PSRLW,
16432 IX86_BUILTIN_PSRLD,
16433 IX86_BUILTIN_PSRLQ,
16434 IX86_BUILTIN_PSLLWI,
16435 IX86_BUILTIN_PSLLDI,
16436 IX86_BUILTIN_PSLLQI,
16437 IX86_BUILTIN_PSRAWI,
16438 IX86_BUILTIN_PSRADI,
16439 IX86_BUILTIN_PSRLWI,
16440 IX86_BUILTIN_PSRLDI,
16441 IX86_BUILTIN_PSRLQI,
16442
16443 IX86_BUILTIN_PUNPCKHBW,
16444 IX86_BUILTIN_PUNPCKHWD,
16445 IX86_BUILTIN_PUNPCKHDQ,
16446 IX86_BUILTIN_PUNPCKLBW,
16447 IX86_BUILTIN_PUNPCKLWD,
16448 IX86_BUILTIN_PUNPCKLDQ,
16449
16450 IX86_BUILTIN_SHUFPS,
16451
16452 IX86_BUILTIN_RCPPS,
16453 IX86_BUILTIN_RCPSS,
16454 IX86_BUILTIN_RSQRTPS,
16455 IX86_BUILTIN_RSQRTSS,
16456 IX86_BUILTIN_RSQRTF,
16457 IX86_BUILTIN_SQRTPS,
16458 IX86_BUILTIN_SQRTSS,
16459
16460 IX86_BUILTIN_UNPCKHPS,
16461 IX86_BUILTIN_UNPCKLPS,
16462
16463 IX86_BUILTIN_ANDPS,
16464 IX86_BUILTIN_ANDNPS,
16465 IX86_BUILTIN_ORPS,
16466 IX86_BUILTIN_XORPS,
16467
16468 IX86_BUILTIN_EMMS,
16469 IX86_BUILTIN_LDMXCSR,
16470 IX86_BUILTIN_STMXCSR,
16471 IX86_BUILTIN_SFENCE,
16472
16473 /* 3DNow! Original */
16474 IX86_BUILTIN_FEMMS,
16475 IX86_BUILTIN_PAVGUSB,
16476 IX86_BUILTIN_PF2ID,
16477 IX86_BUILTIN_PFACC,
16478 IX86_BUILTIN_PFADD,
16479 IX86_BUILTIN_PFCMPEQ,
16480 IX86_BUILTIN_PFCMPGE,
16481 IX86_BUILTIN_PFCMPGT,
16482 IX86_BUILTIN_PFMAX,
16483 IX86_BUILTIN_PFMIN,
16484 IX86_BUILTIN_PFMUL,
16485 IX86_BUILTIN_PFRCP,
16486 IX86_BUILTIN_PFRCPIT1,
16487 IX86_BUILTIN_PFRCPIT2,
16488 IX86_BUILTIN_PFRSQIT1,
16489 IX86_BUILTIN_PFRSQRT,
16490 IX86_BUILTIN_PFSUB,
16491 IX86_BUILTIN_PFSUBR,
16492 IX86_BUILTIN_PI2FD,
16493 IX86_BUILTIN_PMULHRW,
16494
16495 /* 3DNow! Athlon Extensions */
16496 IX86_BUILTIN_PF2IW,
16497 IX86_BUILTIN_PFNACC,
16498 IX86_BUILTIN_PFPNACC,
16499 IX86_BUILTIN_PI2FW,
16500 IX86_BUILTIN_PSWAPDSI,
16501 IX86_BUILTIN_PSWAPDSF,
16502
16503 /* SSE2 */
16504 IX86_BUILTIN_ADDPD,
16505 IX86_BUILTIN_ADDSD,
16506 IX86_BUILTIN_DIVPD,
16507 IX86_BUILTIN_DIVSD,
16508 IX86_BUILTIN_MULPD,
16509 IX86_BUILTIN_MULSD,
16510 IX86_BUILTIN_SUBPD,
16511 IX86_BUILTIN_SUBSD,
16512
16513 IX86_BUILTIN_CMPEQPD,
16514 IX86_BUILTIN_CMPLTPD,
16515 IX86_BUILTIN_CMPLEPD,
16516 IX86_BUILTIN_CMPGTPD,
16517 IX86_BUILTIN_CMPGEPD,
16518 IX86_BUILTIN_CMPNEQPD,
16519 IX86_BUILTIN_CMPNLTPD,
16520 IX86_BUILTIN_CMPNLEPD,
16521 IX86_BUILTIN_CMPNGTPD,
16522 IX86_BUILTIN_CMPNGEPD,
16523 IX86_BUILTIN_CMPORDPD,
16524 IX86_BUILTIN_CMPUNORDPD,
16525 IX86_BUILTIN_CMPEQSD,
16526 IX86_BUILTIN_CMPLTSD,
16527 IX86_BUILTIN_CMPLESD,
16528 IX86_BUILTIN_CMPNEQSD,
16529 IX86_BUILTIN_CMPNLTSD,
16530 IX86_BUILTIN_CMPNLESD,
16531 IX86_BUILTIN_CMPORDSD,
16532 IX86_BUILTIN_CMPUNORDSD,
16533
16534 IX86_BUILTIN_COMIEQSD,
16535 IX86_BUILTIN_COMILTSD,
16536 IX86_BUILTIN_COMILESD,
16537 IX86_BUILTIN_COMIGTSD,
16538 IX86_BUILTIN_COMIGESD,
16539 IX86_BUILTIN_COMINEQSD,
16540 IX86_BUILTIN_UCOMIEQSD,
16541 IX86_BUILTIN_UCOMILTSD,
16542 IX86_BUILTIN_UCOMILESD,
16543 IX86_BUILTIN_UCOMIGTSD,
16544 IX86_BUILTIN_UCOMIGESD,
16545 IX86_BUILTIN_UCOMINEQSD,
16546
16547 IX86_BUILTIN_MAXPD,
16548 IX86_BUILTIN_MAXSD,
16549 IX86_BUILTIN_MINPD,
16550 IX86_BUILTIN_MINSD,
16551
16552 IX86_BUILTIN_ANDPD,
16553 IX86_BUILTIN_ANDNPD,
16554 IX86_BUILTIN_ORPD,
16555 IX86_BUILTIN_XORPD,
16556
16557 IX86_BUILTIN_SQRTPD,
16558 IX86_BUILTIN_SQRTSD,
16559
16560 IX86_BUILTIN_UNPCKHPD,
16561 IX86_BUILTIN_UNPCKLPD,
16562
16563 IX86_BUILTIN_SHUFPD,
16564
16565 IX86_BUILTIN_LOADUPD,
16566 IX86_BUILTIN_STOREUPD,
16567 IX86_BUILTIN_MOVSD,
16568
16569 IX86_BUILTIN_LOADHPD,
16570 IX86_BUILTIN_LOADLPD,
16571
16572 IX86_BUILTIN_CVTDQ2PD,
16573 IX86_BUILTIN_CVTDQ2PS,
16574
16575 IX86_BUILTIN_CVTPD2DQ,
16576 IX86_BUILTIN_CVTPD2PI,
16577 IX86_BUILTIN_CVTPD2PS,
16578 IX86_BUILTIN_CVTTPD2DQ,
16579 IX86_BUILTIN_CVTTPD2PI,
16580
16581 IX86_BUILTIN_CVTPI2PD,
16582 IX86_BUILTIN_CVTSI2SD,
16583 IX86_BUILTIN_CVTSI642SD,
16584
16585 IX86_BUILTIN_CVTSD2SI,
16586 IX86_BUILTIN_CVTSD2SI64,
16587 IX86_BUILTIN_CVTSD2SS,
16588 IX86_BUILTIN_CVTSS2SD,
16589 IX86_BUILTIN_CVTTSD2SI,
16590 IX86_BUILTIN_CVTTSD2SI64,
16591
16592 IX86_BUILTIN_CVTPS2DQ,
16593 IX86_BUILTIN_CVTPS2PD,
16594 IX86_BUILTIN_CVTTPS2DQ,
16595
16596 IX86_BUILTIN_MOVNTI,
16597 IX86_BUILTIN_MOVNTPD,
16598 IX86_BUILTIN_MOVNTDQ,
16599
16600 /* SSE2 MMX */
16601 IX86_BUILTIN_MASKMOVDQU,
16602 IX86_BUILTIN_MOVMSKPD,
16603 IX86_BUILTIN_PMOVMSKB128,
16604
16605 IX86_BUILTIN_PACKSSWB128,
16606 IX86_BUILTIN_PACKSSDW128,
16607 IX86_BUILTIN_PACKUSWB128,
16608
16609 IX86_BUILTIN_PADDB128,
16610 IX86_BUILTIN_PADDW128,
16611 IX86_BUILTIN_PADDD128,
16612 IX86_BUILTIN_PADDQ128,
16613 IX86_BUILTIN_PADDSB128,
16614 IX86_BUILTIN_PADDSW128,
16615 IX86_BUILTIN_PADDUSB128,
16616 IX86_BUILTIN_PADDUSW128,
16617 IX86_BUILTIN_PSUBB128,
16618 IX86_BUILTIN_PSUBW128,
16619 IX86_BUILTIN_PSUBD128,
16620 IX86_BUILTIN_PSUBQ128,
16621 IX86_BUILTIN_PSUBSB128,
16622 IX86_BUILTIN_PSUBSW128,
16623 IX86_BUILTIN_PSUBUSB128,
16624 IX86_BUILTIN_PSUBUSW128,
16625
16626 IX86_BUILTIN_PAND128,
16627 IX86_BUILTIN_PANDN128,
16628 IX86_BUILTIN_POR128,
16629 IX86_BUILTIN_PXOR128,
16630
16631 IX86_BUILTIN_PAVGB128,
16632 IX86_BUILTIN_PAVGW128,
16633
16634 IX86_BUILTIN_PCMPEQB128,
16635 IX86_BUILTIN_PCMPEQW128,
16636 IX86_BUILTIN_PCMPEQD128,
16637 IX86_BUILTIN_PCMPGTB128,
16638 IX86_BUILTIN_PCMPGTW128,
16639 IX86_BUILTIN_PCMPGTD128,
16640
16641 IX86_BUILTIN_PMADDWD128,
16642
16643 IX86_BUILTIN_PMAXSW128,
16644 IX86_BUILTIN_PMAXUB128,
16645 IX86_BUILTIN_PMINSW128,
16646 IX86_BUILTIN_PMINUB128,
16647
16648 IX86_BUILTIN_PMULUDQ,
16649 IX86_BUILTIN_PMULUDQ128,
16650 IX86_BUILTIN_PMULHUW128,
16651 IX86_BUILTIN_PMULHW128,
16652 IX86_BUILTIN_PMULLW128,
16653
16654 IX86_BUILTIN_PSADBW128,
16655 IX86_BUILTIN_PSHUFHW,
16656 IX86_BUILTIN_PSHUFLW,
16657 IX86_BUILTIN_PSHUFD,
16658
16659 IX86_BUILTIN_PSLLDQI128,
16660 IX86_BUILTIN_PSLLWI128,
16661 IX86_BUILTIN_PSLLDI128,
16662 IX86_BUILTIN_PSLLQI128,
16663 IX86_BUILTIN_PSRAWI128,
16664 IX86_BUILTIN_PSRADI128,
16665 IX86_BUILTIN_PSRLDQI128,
16666 IX86_BUILTIN_PSRLWI128,
16667 IX86_BUILTIN_PSRLDI128,
16668 IX86_BUILTIN_PSRLQI128,
16669
16670 IX86_BUILTIN_PSLLDQ128,
16671 IX86_BUILTIN_PSLLW128,
16672 IX86_BUILTIN_PSLLD128,
16673 IX86_BUILTIN_PSLLQ128,
16674 IX86_BUILTIN_PSRAW128,
16675 IX86_BUILTIN_PSRAD128,
16676 IX86_BUILTIN_PSRLW128,
16677 IX86_BUILTIN_PSRLD128,
16678 IX86_BUILTIN_PSRLQ128,
16679
16680 IX86_BUILTIN_PUNPCKHBW128,
16681 IX86_BUILTIN_PUNPCKHWD128,
16682 IX86_BUILTIN_PUNPCKHDQ128,
16683 IX86_BUILTIN_PUNPCKHQDQ128,
16684 IX86_BUILTIN_PUNPCKLBW128,
16685 IX86_BUILTIN_PUNPCKLWD128,
16686 IX86_BUILTIN_PUNPCKLDQ128,
16687 IX86_BUILTIN_PUNPCKLQDQ128,
16688
16689 IX86_BUILTIN_CLFLUSH,
16690 IX86_BUILTIN_MFENCE,
16691 IX86_BUILTIN_LFENCE,
16692
16693 /* Prescott New Instructions. */
16694 IX86_BUILTIN_ADDSUBPS,
16695 IX86_BUILTIN_HADDPS,
16696 IX86_BUILTIN_HSUBPS,
16697 IX86_BUILTIN_MOVSHDUP,
16698 IX86_BUILTIN_MOVSLDUP,
16699 IX86_BUILTIN_ADDSUBPD,
16700 IX86_BUILTIN_HADDPD,
16701 IX86_BUILTIN_HSUBPD,
16702 IX86_BUILTIN_LDDQU,
16703
16704 IX86_BUILTIN_MONITOR,
16705 IX86_BUILTIN_MWAIT,
16706
16707 /* SSSE3. */
16708 IX86_BUILTIN_PHADDW,
16709 IX86_BUILTIN_PHADDD,
16710 IX86_BUILTIN_PHADDSW,
16711 IX86_BUILTIN_PHSUBW,
16712 IX86_BUILTIN_PHSUBD,
16713 IX86_BUILTIN_PHSUBSW,
16714 IX86_BUILTIN_PMADDUBSW,
16715 IX86_BUILTIN_PMULHRSW,
16716 IX86_BUILTIN_PSHUFB,
16717 IX86_BUILTIN_PSIGNB,
16718 IX86_BUILTIN_PSIGNW,
16719 IX86_BUILTIN_PSIGND,
16720 IX86_BUILTIN_PALIGNR,
16721 IX86_BUILTIN_PABSB,
16722 IX86_BUILTIN_PABSW,
16723 IX86_BUILTIN_PABSD,
16724
16725 IX86_BUILTIN_PHADDW128,
16726 IX86_BUILTIN_PHADDD128,
16727 IX86_BUILTIN_PHADDSW128,
16728 IX86_BUILTIN_PHSUBW128,
16729 IX86_BUILTIN_PHSUBD128,
16730 IX86_BUILTIN_PHSUBSW128,
16731 IX86_BUILTIN_PMADDUBSW128,
16732 IX86_BUILTIN_PMULHRSW128,
16733 IX86_BUILTIN_PSHUFB128,
16734 IX86_BUILTIN_PSIGNB128,
16735 IX86_BUILTIN_PSIGNW128,
16736 IX86_BUILTIN_PSIGND128,
16737 IX86_BUILTIN_PALIGNR128,
16738 IX86_BUILTIN_PABSB128,
16739 IX86_BUILTIN_PABSW128,
16740 IX86_BUILTIN_PABSD128,
16741
16742 /* AMDFAM10 - SSE4A New Instructions. */
16743 IX86_BUILTIN_MOVNTSD,
16744 IX86_BUILTIN_MOVNTSS,
16745 IX86_BUILTIN_EXTRQI,
16746 IX86_BUILTIN_EXTRQ,
16747 IX86_BUILTIN_INSERTQI,
16748 IX86_BUILTIN_INSERTQ,
16749
16750 /* SSE4.1. */
16751 IX86_BUILTIN_BLENDPD,
16752 IX86_BUILTIN_BLENDPS,
16753 IX86_BUILTIN_BLENDVPD,
16754 IX86_BUILTIN_BLENDVPS,
16755 IX86_BUILTIN_PBLENDVB128,
16756 IX86_BUILTIN_PBLENDW128,
16757
16758 IX86_BUILTIN_DPPD,
16759 IX86_BUILTIN_DPPS,
16760
16761 IX86_BUILTIN_INSERTPS128,
16762
16763 IX86_BUILTIN_MOVNTDQA,
16764 IX86_BUILTIN_MPSADBW128,
16765 IX86_BUILTIN_PACKUSDW128,
16766 IX86_BUILTIN_PCMPEQQ,
16767 IX86_BUILTIN_PHMINPOSUW128,
16768
16769 IX86_BUILTIN_PMAXSB128,
16770 IX86_BUILTIN_PMAXSD128,
16771 IX86_BUILTIN_PMAXUD128,
16772 IX86_BUILTIN_PMAXUW128,
16773
16774 IX86_BUILTIN_PMINSB128,
16775 IX86_BUILTIN_PMINSD128,
16776 IX86_BUILTIN_PMINUD128,
16777 IX86_BUILTIN_PMINUW128,
16778
16779 IX86_BUILTIN_PMOVSXBW128,
16780 IX86_BUILTIN_PMOVSXBD128,
16781 IX86_BUILTIN_PMOVSXBQ128,
16782 IX86_BUILTIN_PMOVSXWD128,
16783 IX86_BUILTIN_PMOVSXWQ128,
16784 IX86_BUILTIN_PMOVSXDQ128,
16785
16786 IX86_BUILTIN_PMOVZXBW128,
16787 IX86_BUILTIN_PMOVZXBD128,
16788 IX86_BUILTIN_PMOVZXBQ128,
16789 IX86_BUILTIN_PMOVZXWD128,
16790 IX86_BUILTIN_PMOVZXWQ128,
16791 IX86_BUILTIN_PMOVZXDQ128,
16792
16793 IX86_BUILTIN_PMULDQ128,
16794 IX86_BUILTIN_PMULLD128,
16795
16796 IX86_BUILTIN_ROUNDPD,
16797 IX86_BUILTIN_ROUNDPS,
16798 IX86_BUILTIN_ROUNDSD,
16799 IX86_BUILTIN_ROUNDSS,
16800
16801 IX86_BUILTIN_PTESTZ,
16802 IX86_BUILTIN_PTESTC,
16803 IX86_BUILTIN_PTESTNZC,
16804
16805 IX86_BUILTIN_VEC_INIT_V2SI,
16806 IX86_BUILTIN_VEC_INIT_V4HI,
16807 IX86_BUILTIN_VEC_INIT_V8QI,
16808 IX86_BUILTIN_VEC_EXT_V2DF,
16809 IX86_BUILTIN_VEC_EXT_V2DI,
16810 IX86_BUILTIN_VEC_EXT_V4SF,
16811 IX86_BUILTIN_VEC_EXT_V4SI,
16812 IX86_BUILTIN_VEC_EXT_V8HI,
16813 IX86_BUILTIN_VEC_EXT_V2SI,
16814 IX86_BUILTIN_VEC_EXT_V4HI,
16815 IX86_BUILTIN_VEC_EXT_V16QI,
16816 IX86_BUILTIN_VEC_SET_V2DI,
16817 IX86_BUILTIN_VEC_SET_V4SF,
16818 IX86_BUILTIN_VEC_SET_V4SI,
16819 IX86_BUILTIN_VEC_SET_V8HI,
16820 IX86_BUILTIN_VEC_SET_V4HI,
16821 IX86_BUILTIN_VEC_SET_V16QI,
16822
16823 IX86_BUILTIN_VEC_PACK_SFIX,
16824
16825 /* SSE4.2. */
16826 IX86_BUILTIN_CRC32QI,
16827 IX86_BUILTIN_CRC32HI,
16828 IX86_BUILTIN_CRC32SI,
16829 IX86_BUILTIN_CRC32DI,
16830
16831 IX86_BUILTIN_PCMPESTRI128,
16832 IX86_BUILTIN_PCMPESTRM128,
16833 IX86_BUILTIN_PCMPESTRA128,
16834 IX86_BUILTIN_PCMPESTRC128,
16835 IX86_BUILTIN_PCMPESTRO128,
16836 IX86_BUILTIN_PCMPESTRS128,
16837 IX86_BUILTIN_PCMPESTRZ128,
16838 IX86_BUILTIN_PCMPISTRI128,
16839 IX86_BUILTIN_PCMPISTRM128,
16840 IX86_BUILTIN_PCMPISTRA128,
16841 IX86_BUILTIN_PCMPISTRC128,
16842 IX86_BUILTIN_PCMPISTRO128,
16843 IX86_BUILTIN_PCMPISTRS128,
16844 IX86_BUILTIN_PCMPISTRZ128,
16845
16846 IX86_BUILTIN_PCMPGTQ,
16847
16848 /* TFmode support builtins. */
16849 IX86_BUILTIN_INFQ,
16850 IX86_BUILTIN_FABSQ,
16851 IX86_BUILTIN_COPYSIGNQ,
16852
16853 IX86_BUILTIN_MAX
16854 };
16855
16856 /* Table for the ix86 builtin decls. */
16857 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
16858
16859 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Do so,
16860 * if the target_flags include one of MASK. Stores the function decl
16861 * in the ix86_builtins array.
16862 * Returns the function decl or NULL_TREE, if the builtin was not added. */
16863
16864 static inline tree
16865 def_builtin (int mask, const char *name, tree type, enum ix86_builtins code)
16866 {
16867 tree decl = NULL_TREE;
16868
16869 if (mask & ix86_isa_flags
16870 && (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT))
16871 {
16872 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
16873 NULL, NULL_TREE);
16874 ix86_builtins[(int) code] = decl;
16875 }
16876
16877 return decl;
16878 }
16879
16880 /* Like def_builtin, but also marks the function decl "const". */
16881
16882 static inline tree
16883 def_builtin_const (int mask, const char *name, tree type,
16884 enum ix86_builtins code)
16885 {
16886 tree decl = def_builtin (mask, name, type, code);
16887 if (decl)
16888 TREE_READONLY (decl) = 1;
16889 return decl;
16890 }
16891
16892 /* Bits for builtin_description.flag. */
16893
16894 /* Set when we don't support the comparison natively, and should
16895 swap_comparison in order to support it. */
16896 #define BUILTIN_DESC_SWAP_OPERANDS 1
16897
16898 struct builtin_description
16899 {
16900 const unsigned int mask;
16901 const enum insn_code icode;
16902 const char *const name;
16903 const enum ix86_builtins code;
16904 const enum rtx_code comparison;
16905 const int flag;
16906 };
16907
16908 static const struct builtin_description bdesc_comi[] =
16909 {
16910 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
16911 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
16912 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
16913 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
16914 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
16915 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
16916 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
16917 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
16918 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
16919 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
16920 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
16921 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
16922 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
16923 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
16924 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
16925 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
16926 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
16927 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
16928 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
16929 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
16930 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
16931 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
16932 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
16933 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
16934 };
16935
16936 static const struct builtin_description bdesc_ptest[] =
16937 {
16938 /* SSE4.1 */
16939 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, 0 },
16940 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, 0 },
16941 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, 0 },
16942 };
16943
16944 static const struct builtin_description bdesc_pcmpestr[] =
16945 {
16946 /* SSE4.2 */
16947 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
16948 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
16949 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
16950 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
16951 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
16952 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
16953 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
16954 };
16955
16956 static const struct builtin_description bdesc_pcmpistr[] =
16957 {
16958 /* SSE4.2 */
16959 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
16960 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
16961 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
16962 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
16963 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
16964 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
16965 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
16966 };
16967
16968 static const struct builtin_description bdesc_crc32[] =
16969 {
16970 /* SSE4.2 */
16971 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32qi, 0, IX86_BUILTIN_CRC32QI, UNKNOWN, 0 },
16972 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32hi, 0, IX86_BUILTIN_CRC32HI, UNKNOWN, 0 },
16973 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32si, 0, IX86_BUILTIN_CRC32SI, UNKNOWN, 0 },
16974 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32di, 0, IX86_BUILTIN_CRC32DI, UNKNOWN, 0 },
16975 };
16976
16977 /* SSE builtins with 3 arguments and the last argument must be an immediate or xmm0. */
16978 static const struct builtin_description bdesc_sse_3arg[] =
16979 {
16980 /* SSE4.1 */
16981 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, 0 },
16982 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, 0 },
16983 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, 0 },
16984 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, 0 },
16985 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, 0 },
16986 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, 0 },
16987 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, 0 },
16988 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, 0 },
16989 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, 0 },
16990 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, 0 },
16991 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_roundsd, 0, IX86_BUILTIN_ROUNDSD, UNKNOWN, 0 },
16992 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_roundss, 0, IX86_BUILTIN_ROUNDSS, UNKNOWN, 0 },
16993 };
16994
16995 static const struct builtin_description bdesc_2arg[] =
16996 {
16997 /* SSE */
16998 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, 0 },
16999 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, 0 },
17000 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, 0 },
17001 { OPTION_MASK_ISA_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, 0 },
17002 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, 0 },
17003 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, 0 },
17004 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, 0 },
17005 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, 0 },
17006
17007 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
17008 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
17009 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
17010 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, BUILTIN_DESC_SWAP_OPERANDS },
17011 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, BUILTIN_DESC_SWAP_OPERANDS },
17012 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
17013 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 },
17014 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 },
17015 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 },
17016 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, BUILTIN_DESC_SWAP_OPERANDS },
17017 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, BUILTIN_DESC_SWAP_OPERANDS },
17018 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 },
17019 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
17020 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
17021 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
17022 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
17023 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 },
17024 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 },
17025 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 },
17026 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, BUILTIN_DESC_SWAP_OPERANDS },
17027 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, BUILTIN_DESC_SWAP_OPERANDS },
17028 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, 0 },
17029
17030 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, 0 },
17031 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, 0 },
17032 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, 0 },
17033 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, 0 },
17034
17035 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, 0 },
17036 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, 0 },
17037 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, 0 },
17038 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, 0 },
17039
17040 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, 0 },
17041 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, 0 },
17042 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, 0 },
17043 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, 0 },
17044 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, 0 },
17045
17046 /* MMX */
17047 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, 0 },
17048 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, 0 },
17049 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, 0 },
17050 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, 0 },
17051 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, 0 },
17052 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, 0 },
17053 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, 0 },
17054 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, 0 },
17055
17056 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, 0 },
17057 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, 0 },
17058 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, 0 },
17059 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, 0 },
17060 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, 0 },
17061 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, 0 },
17062 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, 0 },
17063 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, 0 },
17064
17065 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, 0 },
17066 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, 0 },
17067 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, 0 },
17068
17069 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, 0 },
17070 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, 0 },
17071 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, 0 },
17072 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, 0 },
17073
17074 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, 0 },
17075 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, 0 },
17076
17077 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, 0 },
17078 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, 0 },
17079 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, 0 },
17080 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, 0 },
17081 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, 0 },
17082 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, 0 },
17083
17084 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, 0 },
17085 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, 0 },
17086 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, 0 },
17087 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, 0 },
17088
17089 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, 0 },
17090 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, 0 },
17091 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, 0 },
17092 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, 0 },
17093 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, 0 },
17094 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, 0 },
17095
17096 /* Special. */
17097 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, UNKNOWN, 0 },
17098 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, UNKNOWN, 0 },
17099 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, UNKNOWN, 0 },
17100
17101 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, UNKNOWN, 0 },
17102 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, UNKNOWN, 0 },
17103 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, UNKNOWN, 0 },
17104
17105 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, UNKNOWN, 0 },
17106 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, UNKNOWN, 0 },
17107 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, UNKNOWN, 0 },
17108 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, UNKNOWN, 0 },
17109 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, UNKNOWN, 0 },
17110 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, UNKNOWN, 0 },
17111
17112 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, UNKNOWN, 0 },
17113 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, UNKNOWN, 0 },
17114 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, UNKNOWN, 0 },
17115 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, UNKNOWN, 0 },
17116 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, UNKNOWN, 0 },
17117 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, UNKNOWN, 0 },
17118
17119 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, UNKNOWN, 0 },
17120 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, UNKNOWN, 0 },
17121 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, UNKNOWN, 0 },
17122 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, UNKNOWN, 0 },
17123
17124 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, UNKNOWN, 0 },
17125 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, UNKNOWN, 0 },
17126
17127 /* SSE2 */
17128 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, 0 },
17129 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, 0 },
17130 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, 0 },
17131 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, 0 },
17132 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, 0 },
17133 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, 0 },
17134 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, 0 },
17135 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, 0 },
17136
17137 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
17138 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
17139 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
17140 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, BUILTIN_DESC_SWAP_OPERANDS },
17141 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, BUILTIN_DESC_SWAP_OPERANDS },
17142 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
17143 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 },
17144 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 },
17145 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 },
17146 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, BUILTIN_DESC_SWAP_OPERANDS },
17147 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, BUILTIN_DESC_SWAP_OPERANDS },
17148 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 },
17149 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
17150 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
17151 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
17152 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
17153 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 },
17154 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 },
17155 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 },
17156 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 },
17157
17158 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, 0 },
17159 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, 0 },
17160 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, 0 },
17161 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, 0 },
17162
17163 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, 0 },
17164 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, 0 },
17165 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, 0 },
17166 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, 0 },
17167
17168 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, 0 },
17169 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, 0 },
17170 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, 0 },
17171
17172 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, 0 },
17173
17174 /* SSE2 MMX */
17175 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, 0 },
17176 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, 0 },
17177 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, 0 },
17178 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, 0 },
17179 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, 0 },
17180 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, 0 },
17181 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, 0 },
17182 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, 0 },
17183
17184 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, 0 },
17185 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, 0 },
17186 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, 0 },
17187 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, 0 },
17188 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, 0 },
17189 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, 0 },
17190 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, 0 },
17191 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, 0 },
17192
17193 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, 0 },
17194 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN, 0 },
17195
17196 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, 0 },
17197 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, 0 },
17198 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, 0 },
17199 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, 0 },
17200
17201 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, 0 },
17202 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, 0 },
17203
17204 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, 0 },
17205 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, 0 },
17206 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, 0 },
17207 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, 0 },
17208 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, 0 },
17209 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, 0 },
17210
17211 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, 0 },
17212 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, 0 },
17213 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, 0 },
17214 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, 0 },
17215
17216 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, 0 },
17217 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, 0 },
17218 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, 0 },
17219 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, 0 },
17220 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, 0 },
17221 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, 0 },
17222 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, 0 },
17223 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, 0 },
17224
17225 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, 0 },
17226 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, 0 },
17227 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, 0 },
17228
17229 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, 0 },
17230 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, UNKNOWN, 0 },
17231
17232 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, UNKNOWN, 0 },
17233 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, UNKNOWN, 0 },
17234
17235 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, UNKNOWN, 0 },
17236 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, UNKNOWN, 0 },
17237 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, UNKNOWN, 0 },
17238
17239 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, UNKNOWN, 0 },
17240 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, UNKNOWN, 0 },
17241 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, UNKNOWN, 0 },
17242
17243 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, UNKNOWN, 0 },
17244 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, UNKNOWN, 0 },
17245
17246 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, UNKNOWN, 0 },
17247
17248 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, UNKNOWN, 0 },
17249 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, UNKNOWN, 0 },
17250 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, UNKNOWN, 0 },
17251 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, UNKNOWN, 0 },
17252
17253 /* SSE3 MMX */
17254 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, 0 },
17255 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, 0 },
17256 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, 0 },
17257 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, 0 },
17258 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, 0 },
17259 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, 0 },
17260
17261 /* SSSE3 */
17262 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, 0 },
17263 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, 0 },
17264 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, 0 },
17265 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, 0 },
17266 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, 0 },
17267 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, 0 },
17268 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, 0 },
17269 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, 0 },
17270 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, 0 },
17271 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, 0 },
17272 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, 0 },
17273 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, 0 },
17274 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubswv8hi3, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, 0 },
17275 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubswv4hi3, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, 0 },
17276 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, 0 },
17277 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, 0 },
17278 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, 0 },
17279 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, 0 },
17280 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, 0 },
17281 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, 0 },
17282 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, 0 },
17283 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, 0 },
17284 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, 0 },
17285 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, 0 },
17286
17287 /* SSE4.1 */
17288 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, 0 },
17289 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, 0 },
17290 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, 0 },
17291 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, 0 },
17292 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, 0 },
17293 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, 0 },
17294 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, 0 },
17295 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, 0 },
17296 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, 0 },
17297 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, 0 },
17298 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, 0, IX86_BUILTIN_PMULDQ128, UNKNOWN, 0 },
17299 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, 0 },
17300
17301 /* SSE4.2 */
17302 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, 0 },
17303 };
17304
17305 static const struct builtin_description bdesc_1arg[] =
17306 {
17307 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, UNKNOWN, 0 },
17308 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, UNKNOWN, 0 },
17309
17310 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, UNKNOWN, 0 },
17311 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, UNKNOWN, 0 },
17312 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, UNKNOWN, 0 },
17313
17314 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, UNKNOWN, 0 },
17315 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, UNKNOWN, 0 },
17316 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, UNKNOWN, 0 },
17317 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, UNKNOWN, 0 },
17318 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, UNKNOWN, 0 },
17319 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, 0 },
17320
17321 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, UNKNOWN, 0 },
17322 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, UNKNOWN, 0 },
17323
17324 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, UNKNOWN, 0 },
17325
17326 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, UNKNOWN, 0 },
17327 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, UNKNOWN, 0 },
17328
17329 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, UNKNOWN, 0 },
17330 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, UNKNOWN, 0 },
17331 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, UNKNOWN, 0 },
17332 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, 0 },
17333 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, UNKNOWN, 0 },
17334
17335 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, UNKNOWN, 0 },
17336
17337 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, UNKNOWN, 0 },
17338 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, UNKNOWN, 0 },
17339 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, UNKNOWN, 0 },
17340 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, 0 },
17341
17342 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, UNKNOWN, 0 },
17343 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, UNKNOWN, 0 },
17344 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, 0 },
17345
17346 /* SSE3 */
17347 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, 0 },
17348 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, 0 },
17349
17350 /* SSSE3 */
17351 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, 0 },
17352 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, 0 },
17353 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, 0 },
17354 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, 0 },
17355 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, 0 },
17356 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, 0 },
17357
17358 /* SSE4.1 */
17359 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv8qiv8hi2, 0, IX86_BUILTIN_PMOVSXBW128, UNKNOWN, 0 },
17360 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4qiv4si2, 0, IX86_BUILTIN_PMOVSXBD128, UNKNOWN, 0 },
17361 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2qiv2di2, 0, IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, 0 },
17362 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4hiv4si2, 0, IX86_BUILTIN_PMOVSXWD128, UNKNOWN, 0 },
17363 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2hiv2di2, 0, IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, 0 },
17364 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2siv2di2, 0, IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, 0 },
17365 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, 0, IX86_BUILTIN_PMOVZXBW128, UNKNOWN, 0 },
17366 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, 0, IX86_BUILTIN_PMOVZXBD128, UNKNOWN, 0 },
17367 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, 0, IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, 0 },
17368 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, 0, IX86_BUILTIN_PMOVZXWD128, UNKNOWN, 0 },
17369 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, 0, IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, 0 },
17370 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, 0, IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, 0 },
17371 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, 0 },
17372
17373 /* Fake 1 arg builtins with a constant smaller than 8 bits as the 2nd arg. */
17374 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_roundpd, 0, IX86_BUILTIN_ROUNDPD, UNKNOWN, 0 },
17375 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_roundps, 0, IX86_BUILTIN_ROUNDPS, UNKNOWN, 0 },
17376 };
17377
17378 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
17379 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
17380 builtins. */
17381 static void
17382 ix86_init_mmx_sse_builtins (void)
17383 {
17384 const struct builtin_description * d;
17385 size_t i;
17386
17387 tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode);
17388 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
17389 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
17390 tree V2DI_type_node
17391 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
17392 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
17393 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
17394 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
17395 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
17396 tree V8QI_type_node = build_vector_type_for_mode (char_type_node, V8QImode);
17397 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
17398
17399 tree pchar_type_node = build_pointer_type (char_type_node);
17400 tree pcchar_type_node = build_pointer_type (
17401 build_type_variant (char_type_node, 1, 0));
17402 tree pfloat_type_node = build_pointer_type (float_type_node);
17403 tree pcfloat_type_node = build_pointer_type (
17404 build_type_variant (float_type_node, 1, 0));
17405 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
17406 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
17407 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
17408
17409 /* Comparisons. */
17410 tree int_ftype_v4sf_v4sf
17411 = build_function_type_list (integer_type_node,
17412 V4SF_type_node, V4SF_type_node, NULL_TREE);
17413 tree v4si_ftype_v4sf_v4sf
17414 = build_function_type_list (V4SI_type_node,
17415 V4SF_type_node, V4SF_type_node, NULL_TREE);
17416 /* MMX/SSE/integer conversions. */
17417 tree int_ftype_v4sf
17418 = build_function_type_list (integer_type_node,
17419 V4SF_type_node, NULL_TREE);
17420 tree int64_ftype_v4sf
17421 = build_function_type_list (long_long_integer_type_node,
17422 V4SF_type_node, NULL_TREE);
17423 tree int_ftype_v8qi
17424 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
17425 tree v4sf_ftype_v4sf_int
17426 = build_function_type_list (V4SF_type_node,
17427 V4SF_type_node, integer_type_node, NULL_TREE);
17428 tree v4sf_ftype_v4sf_int64
17429 = build_function_type_list (V4SF_type_node,
17430 V4SF_type_node, long_long_integer_type_node,
17431 NULL_TREE);
17432 tree v4sf_ftype_v4sf_v2si
17433 = build_function_type_list (V4SF_type_node,
17434 V4SF_type_node, V2SI_type_node, NULL_TREE);
17435
17436 /* Miscellaneous. */
17437 tree v8qi_ftype_v4hi_v4hi
17438 = build_function_type_list (V8QI_type_node,
17439 V4HI_type_node, V4HI_type_node, NULL_TREE);
17440 tree v4hi_ftype_v2si_v2si
17441 = build_function_type_list (V4HI_type_node,
17442 V2SI_type_node, V2SI_type_node, NULL_TREE);
17443 tree v4sf_ftype_v4sf_v4sf_int
17444 = build_function_type_list (V4SF_type_node,
17445 V4SF_type_node, V4SF_type_node,
17446 integer_type_node, NULL_TREE);
17447 tree v2si_ftype_v4hi_v4hi
17448 = build_function_type_list (V2SI_type_node,
17449 V4HI_type_node, V4HI_type_node, NULL_TREE);
17450 tree v4hi_ftype_v4hi_int
17451 = build_function_type_list (V4HI_type_node,
17452 V4HI_type_node, integer_type_node, NULL_TREE);
17453 tree v4hi_ftype_v4hi_di
17454 = build_function_type_list (V4HI_type_node,
17455 V4HI_type_node, long_long_unsigned_type_node,
17456 NULL_TREE);
17457 tree v2si_ftype_v2si_di
17458 = build_function_type_list (V2SI_type_node,
17459 V2SI_type_node, long_long_unsigned_type_node,
17460 NULL_TREE);
17461 tree void_ftype_void
17462 = build_function_type (void_type_node, void_list_node);
17463 tree void_ftype_unsigned
17464 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
17465 tree void_ftype_unsigned_unsigned
17466 = build_function_type_list (void_type_node, unsigned_type_node,
17467 unsigned_type_node, NULL_TREE);
17468 tree void_ftype_pcvoid_unsigned_unsigned
17469 = build_function_type_list (void_type_node, const_ptr_type_node,
17470 unsigned_type_node, unsigned_type_node,
17471 NULL_TREE);
17472 tree unsigned_ftype_void
17473 = build_function_type (unsigned_type_node, void_list_node);
17474 tree v2si_ftype_v4sf
17475 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
17476 /* Loads/stores. */
17477 tree void_ftype_v8qi_v8qi_pchar
17478 = build_function_type_list (void_type_node,
17479 V8QI_type_node, V8QI_type_node,
17480 pchar_type_node, NULL_TREE);
17481 tree v4sf_ftype_pcfloat
17482 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
17483 /* @@@ the type is bogus */
17484 tree v4sf_ftype_v4sf_pv2si
17485 = build_function_type_list (V4SF_type_node,
17486 V4SF_type_node, pv2si_type_node, NULL_TREE);
17487 tree void_ftype_pv2si_v4sf
17488 = build_function_type_list (void_type_node,
17489 pv2si_type_node, V4SF_type_node, NULL_TREE);
17490 tree void_ftype_pfloat_v4sf
17491 = build_function_type_list (void_type_node,
17492 pfloat_type_node, V4SF_type_node, NULL_TREE);
17493 tree void_ftype_pdi_di
17494 = build_function_type_list (void_type_node,
17495 pdi_type_node, long_long_unsigned_type_node,
17496 NULL_TREE);
17497 tree void_ftype_pv2di_v2di
17498 = build_function_type_list (void_type_node,
17499 pv2di_type_node, V2DI_type_node, NULL_TREE);
17500 /* Normal vector unops. */
17501 tree v4sf_ftype_v4sf
17502 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
17503 tree v16qi_ftype_v16qi
17504 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
17505 tree v8hi_ftype_v8hi
17506 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
17507 tree v4si_ftype_v4si
17508 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
17509 tree v8qi_ftype_v8qi
17510 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
17511 tree v4hi_ftype_v4hi
17512 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
17513
17514 /* Normal vector binops. */
17515 tree v4sf_ftype_v4sf_v4sf
17516 = build_function_type_list (V4SF_type_node,
17517 V4SF_type_node, V4SF_type_node, NULL_TREE);
17518 tree v8qi_ftype_v8qi_v8qi
17519 = build_function_type_list (V8QI_type_node,
17520 V8QI_type_node, V8QI_type_node, NULL_TREE);
17521 tree v4hi_ftype_v4hi_v4hi
17522 = build_function_type_list (V4HI_type_node,
17523 V4HI_type_node, V4HI_type_node, NULL_TREE);
17524 tree v2si_ftype_v2si_v2si
17525 = build_function_type_list (V2SI_type_node,
17526 V2SI_type_node, V2SI_type_node, NULL_TREE);
17527 tree di_ftype_di_di
17528 = build_function_type_list (long_long_unsigned_type_node,
17529 long_long_unsigned_type_node,
17530 long_long_unsigned_type_node, NULL_TREE);
17531
17532 tree di_ftype_di_di_int
17533 = build_function_type_list (long_long_unsigned_type_node,
17534 long_long_unsigned_type_node,
17535 long_long_unsigned_type_node,
17536 integer_type_node, NULL_TREE);
17537
17538 tree v2si_ftype_v2sf
17539 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
17540 tree v2sf_ftype_v2si
17541 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
17542 tree v2si_ftype_v2si
17543 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
17544 tree v2sf_ftype_v2sf
17545 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
17546 tree v2sf_ftype_v2sf_v2sf
17547 = build_function_type_list (V2SF_type_node,
17548 V2SF_type_node, V2SF_type_node, NULL_TREE);
17549 tree v2si_ftype_v2sf_v2sf
17550 = build_function_type_list (V2SI_type_node,
17551 V2SF_type_node, V2SF_type_node, NULL_TREE);
17552 tree pint_type_node = build_pointer_type (integer_type_node);
17553 tree pdouble_type_node = build_pointer_type (double_type_node);
17554 tree pcdouble_type_node = build_pointer_type (
17555 build_type_variant (double_type_node, 1, 0));
17556 tree int_ftype_v2df_v2df
17557 = build_function_type_list (integer_type_node,
17558 V2DF_type_node, V2DF_type_node, NULL_TREE);
17559
17560 tree void_ftype_pcvoid
17561 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
17562 tree v4sf_ftype_v4si
17563 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
17564 tree v4si_ftype_v4sf
17565 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
17566 tree v2df_ftype_v4si
17567 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
17568 tree v4si_ftype_v2df
17569 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
17570 tree v4si_ftype_v2df_v2df
17571 = build_function_type_list (V4SI_type_node,
17572 V2DF_type_node, V2DF_type_node, NULL_TREE);
17573 tree v2si_ftype_v2df
17574 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
17575 tree v4sf_ftype_v2df
17576 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
17577 tree v2df_ftype_v2si
17578 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
17579 tree v2df_ftype_v4sf
17580 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
17581 tree int_ftype_v2df
17582 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
17583 tree int64_ftype_v2df
17584 = build_function_type_list (long_long_integer_type_node,
17585 V2DF_type_node, NULL_TREE);
17586 tree v2df_ftype_v2df_int
17587 = build_function_type_list (V2DF_type_node,
17588 V2DF_type_node, integer_type_node, NULL_TREE);
17589 tree v2df_ftype_v2df_int64
17590 = build_function_type_list (V2DF_type_node,
17591 V2DF_type_node, long_long_integer_type_node,
17592 NULL_TREE);
17593 tree v4sf_ftype_v4sf_v2df
17594 = build_function_type_list (V4SF_type_node,
17595 V4SF_type_node, V2DF_type_node, NULL_TREE);
17596 tree v2df_ftype_v2df_v4sf
17597 = build_function_type_list (V2DF_type_node,
17598 V2DF_type_node, V4SF_type_node, NULL_TREE);
17599 tree v2df_ftype_v2df_v2df_int
17600 = build_function_type_list (V2DF_type_node,
17601 V2DF_type_node, V2DF_type_node,
17602 integer_type_node,
17603 NULL_TREE);
17604 tree v2df_ftype_v2df_pcdouble
17605 = build_function_type_list (V2DF_type_node,
17606 V2DF_type_node, pcdouble_type_node, NULL_TREE);
17607 tree void_ftype_pdouble_v2df
17608 = build_function_type_list (void_type_node,
17609 pdouble_type_node, V2DF_type_node, NULL_TREE);
17610 tree void_ftype_pint_int
17611 = build_function_type_list (void_type_node,
17612 pint_type_node, integer_type_node, NULL_TREE);
17613 tree void_ftype_v16qi_v16qi_pchar
17614 = build_function_type_list (void_type_node,
17615 V16QI_type_node, V16QI_type_node,
17616 pchar_type_node, NULL_TREE);
17617 tree v2df_ftype_pcdouble
17618 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
17619 tree v2df_ftype_v2df_v2df
17620 = build_function_type_list (V2DF_type_node,
17621 V2DF_type_node, V2DF_type_node, NULL_TREE);
17622 tree v16qi_ftype_v16qi_v16qi
17623 = build_function_type_list (V16QI_type_node,
17624 V16QI_type_node, V16QI_type_node, NULL_TREE);
17625 tree v8hi_ftype_v8hi_v8hi
17626 = build_function_type_list (V8HI_type_node,
17627 V8HI_type_node, V8HI_type_node, NULL_TREE);
17628 tree v4si_ftype_v4si_v4si
17629 = build_function_type_list (V4SI_type_node,
17630 V4SI_type_node, V4SI_type_node, NULL_TREE);
17631 tree v2di_ftype_v2di_v2di
17632 = build_function_type_list (V2DI_type_node,
17633 V2DI_type_node, V2DI_type_node, NULL_TREE);
17634 tree v2di_ftype_v2df_v2df
17635 = build_function_type_list (V2DI_type_node,
17636 V2DF_type_node, V2DF_type_node, NULL_TREE);
17637 tree v2df_ftype_v2df
17638 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
17639 tree v2di_ftype_v2di_int
17640 = build_function_type_list (V2DI_type_node,
17641 V2DI_type_node, integer_type_node, NULL_TREE);
17642 tree v2di_ftype_v2di_v2di_int
17643 = build_function_type_list (V2DI_type_node, V2DI_type_node,
17644 V2DI_type_node, integer_type_node, NULL_TREE);
17645 tree v4si_ftype_v4si_int
17646 = build_function_type_list (V4SI_type_node,
17647 V4SI_type_node, integer_type_node, NULL_TREE);
17648 tree v8hi_ftype_v8hi_int
17649 = build_function_type_list (V8HI_type_node,
17650 V8HI_type_node, integer_type_node, NULL_TREE);
17651 tree v4si_ftype_v8hi_v8hi
17652 = build_function_type_list (V4SI_type_node,
17653 V8HI_type_node, V8HI_type_node, NULL_TREE);
17654 tree di_ftype_v8qi_v8qi
17655 = build_function_type_list (long_long_unsigned_type_node,
17656 V8QI_type_node, V8QI_type_node, NULL_TREE);
17657 tree di_ftype_v2si_v2si
17658 = build_function_type_list (long_long_unsigned_type_node,
17659 V2SI_type_node, V2SI_type_node, NULL_TREE);
17660 tree v2di_ftype_v16qi_v16qi
17661 = build_function_type_list (V2DI_type_node,
17662 V16QI_type_node, V16QI_type_node, NULL_TREE);
17663 tree v2di_ftype_v4si_v4si
17664 = build_function_type_list (V2DI_type_node,
17665 V4SI_type_node, V4SI_type_node, NULL_TREE);
17666 tree int_ftype_v16qi
17667 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
17668 tree v16qi_ftype_pcchar
17669 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
17670 tree void_ftype_pchar_v16qi
17671 = build_function_type_list (void_type_node,
17672 pchar_type_node, V16QI_type_node, NULL_TREE);
17673
17674 tree v2di_ftype_v2di_unsigned_unsigned
17675 = build_function_type_list (V2DI_type_node, V2DI_type_node,
17676 unsigned_type_node, unsigned_type_node,
17677 NULL_TREE);
17678 tree v2di_ftype_v2di_v2di_unsigned_unsigned
17679 = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node,
17680 unsigned_type_node, unsigned_type_node,
17681 NULL_TREE);
17682 tree v2di_ftype_v2di_v16qi
17683 = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node,
17684 NULL_TREE);
17685 tree v2df_ftype_v2df_v2df_v2df
17686 = build_function_type_list (V2DF_type_node,
17687 V2DF_type_node, V2DF_type_node,
17688 V2DF_type_node, NULL_TREE);
17689 tree v4sf_ftype_v4sf_v4sf_v4sf
17690 = build_function_type_list (V4SF_type_node,
17691 V4SF_type_node, V4SF_type_node,
17692 V4SF_type_node, NULL_TREE);
17693 tree v8hi_ftype_v16qi
17694 = build_function_type_list (V8HI_type_node, V16QI_type_node,
17695 NULL_TREE);
17696 tree v4si_ftype_v16qi
17697 = build_function_type_list (V4SI_type_node, V16QI_type_node,
17698 NULL_TREE);
17699 tree v2di_ftype_v16qi
17700 = build_function_type_list (V2DI_type_node, V16QI_type_node,
17701 NULL_TREE);
17702 tree v4si_ftype_v8hi
17703 = build_function_type_list (V4SI_type_node, V8HI_type_node,
17704 NULL_TREE);
17705 tree v2di_ftype_v8hi
17706 = build_function_type_list (V2DI_type_node, V8HI_type_node,
17707 NULL_TREE);
17708 tree v2di_ftype_v4si
17709 = build_function_type_list (V2DI_type_node, V4SI_type_node,
17710 NULL_TREE);
17711 tree v2di_ftype_pv2di
17712 = build_function_type_list (V2DI_type_node, pv2di_type_node,
17713 NULL_TREE);
17714 tree v16qi_ftype_v16qi_v16qi_int
17715 = build_function_type_list (V16QI_type_node, V16QI_type_node,
17716 V16QI_type_node, integer_type_node,
17717 NULL_TREE);
17718 tree v16qi_ftype_v16qi_v16qi_v16qi
17719 = build_function_type_list (V16QI_type_node, V16QI_type_node,
17720 V16QI_type_node, V16QI_type_node,
17721 NULL_TREE);
17722 tree v8hi_ftype_v8hi_v8hi_int
17723 = build_function_type_list (V8HI_type_node, V8HI_type_node,
17724 V8HI_type_node, integer_type_node,
17725 NULL_TREE);
17726 tree v4si_ftype_v4si_v4si_int
17727 = build_function_type_list (V4SI_type_node, V4SI_type_node,
17728 V4SI_type_node, integer_type_node,
17729 NULL_TREE);
17730 tree int_ftype_v2di_v2di
17731 = build_function_type_list (integer_type_node,
17732 V2DI_type_node, V2DI_type_node,
17733 NULL_TREE);
17734 tree int_ftype_v16qi_int_v16qi_int_int
17735 = build_function_type_list (integer_type_node,
17736 V16QI_type_node,
17737 integer_type_node,
17738 V16QI_type_node,
17739 integer_type_node,
17740 integer_type_node,
17741 NULL_TREE);
17742 tree v16qi_ftype_v16qi_int_v16qi_int_int
17743 = build_function_type_list (V16QI_type_node,
17744 V16QI_type_node,
17745 integer_type_node,
17746 V16QI_type_node,
17747 integer_type_node,
17748 integer_type_node,
17749 NULL_TREE);
17750 tree int_ftype_v16qi_v16qi_int
17751 = build_function_type_list (integer_type_node,
17752 V16QI_type_node,
17753 V16QI_type_node,
17754 integer_type_node,
17755 NULL_TREE);
17756 tree ftype;
17757
17758 /* The __float80 type. */
17759 if (TYPE_MODE (long_double_type_node) == XFmode)
17760 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
17761 "__float80");
17762 else
17763 {
17764 /* The __float80 type. */
17765 tree float80_type_node = make_node (REAL_TYPE);
17766
17767 TYPE_PRECISION (float80_type_node) = 80;
17768 layout_type (float80_type_node);
17769 (*lang_hooks.types.register_builtin_type) (float80_type_node,
17770 "__float80");
17771 }
17772
17773 if (TARGET_64BIT)
17774 {
17775 tree float128_type_node = make_node (REAL_TYPE);
17776
17777 TYPE_PRECISION (float128_type_node) = 128;
17778 layout_type (float128_type_node);
17779 (*lang_hooks.types.register_builtin_type) (float128_type_node,
17780 "__float128");
17781
17782 /* TFmode support builtins. */
17783 ftype = build_function_type (float128_type_node,
17784 void_list_node);
17785 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_infq", ftype, IX86_BUILTIN_INFQ);
17786
17787 ftype = build_function_type_list (float128_type_node,
17788 float128_type_node,
17789 NULL_TREE);
17790 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_fabsq", ftype, IX86_BUILTIN_FABSQ);
17791
17792 ftype = build_function_type_list (float128_type_node,
17793 float128_type_node,
17794 float128_type_node,
17795 NULL_TREE);
17796 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_copysignq", ftype, IX86_BUILTIN_COPYSIGNQ);
17797 }
17798
17799 /* Add all SSE builtins that are more or less simple operations on
17800 three operands. */
17801 for (i = 0, d = bdesc_sse_3arg;
17802 i < ARRAY_SIZE (bdesc_sse_3arg);
17803 i++, d++)
17804 {
17805 /* Use one of the operands; the target can have a different mode for
17806 mask-generating compares. */
17807 enum machine_mode mode;
17808 tree type;
17809
17810 if (d->name == 0)
17811 continue;
17812 mode = insn_data[d->icode].operand[1].mode;
17813
17814 switch (mode)
17815 {
17816 case V16QImode:
17817 type = v16qi_ftype_v16qi_v16qi_int;
17818 break;
17819 case V8HImode:
17820 type = v8hi_ftype_v8hi_v8hi_int;
17821 break;
17822 case V4SImode:
17823 type = v4si_ftype_v4si_v4si_int;
17824 break;
17825 case V2DImode:
17826 type = v2di_ftype_v2di_v2di_int;
17827 break;
17828 case V2DFmode:
17829 type = v2df_ftype_v2df_v2df_int;
17830 break;
17831 case V4SFmode:
17832 type = v4sf_ftype_v4sf_v4sf_int;
17833 break;
17834 default:
17835 gcc_unreachable ();
17836 }
17837
17838 /* Override for variable blends. */
17839 switch (d->icode)
17840 {
17841 case CODE_FOR_sse4_1_blendvpd:
17842 type = v2df_ftype_v2df_v2df_v2df;
17843 break;
17844 case CODE_FOR_sse4_1_blendvps:
17845 type = v4sf_ftype_v4sf_v4sf_v4sf;
17846 break;
17847 case CODE_FOR_sse4_1_pblendvb:
17848 type = v16qi_ftype_v16qi_v16qi_v16qi;
17849 break;
17850 default:
17851 break;
17852 }
17853
17854 def_builtin (d->mask, d->name, type, d->code);
17855 }
17856
17857 /* Add all builtins that are more or less simple operations on two
17858 operands. */
17859 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
17860 {
17861 /* Use one of the operands; the target can have a different mode for
17862 mask-generating compares. */
17863 enum machine_mode mode;
17864 tree type;
17865
17866 if (d->name == 0)
17867 continue;
17868 mode = insn_data[d->icode].operand[1].mode;
17869
17870 switch (mode)
17871 {
17872 case V16QImode:
17873 type = v16qi_ftype_v16qi_v16qi;
17874 break;
17875 case V8HImode:
17876 type = v8hi_ftype_v8hi_v8hi;
17877 break;
17878 case V4SImode:
17879 type = v4si_ftype_v4si_v4si;
17880 break;
17881 case V2DImode:
17882 type = v2di_ftype_v2di_v2di;
17883 break;
17884 case V2DFmode:
17885 type = v2df_ftype_v2df_v2df;
17886 break;
17887 case V4SFmode:
17888 type = v4sf_ftype_v4sf_v4sf;
17889 break;
17890 case V8QImode:
17891 type = v8qi_ftype_v8qi_v8qi;
17892 break;
17893 case V4HImode:
17894 type = v4hi_ftype_v4hi_v4hi;
17895 break;
17896 case V2SImode:
17897 type = v2si_ftype_v2si_v2si;
17898 break;
17899 case DImode:
17900 type = di_ftype_di_di;
17901 break;
17902
17903 default:
17904 gcc_unreachable ();
17905 }
17906
17907 /* Override for comparisons. */
17908 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
17909 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3)
17910 type = v4si_ftype_v4sf_v4sf;
17911
17912 if (d->icode == CODE_FOR_sse2_maskcmpv2df3
17913 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
17914 type = v2di_ftype_v2df_v2df;
17915
17916 if (d->icode == CODE_FOR_vec_pack_sfix_v2df)
17917 type = v4si_ftype_v2df_v2df;
17918
17919 def_builtin_const (d->mask, d->name, type, d->code);
17920 }
17921
17922 /* Add all builtins that are more or less simple operations on 1 operand. */
17923 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
17924 {
17925 enum machine_mode mode;
17926 tree type;
17927
17928 if (d->name == 0)
17929 continue;
17930 mode = insn_data[d->icode].operand[1].mode;
17931
17932 switch (mode)
17933 {
17934 case V16QImode:
17935 type = v16qi_ftype_v16qi;
17936 break;
17937 case V8HImode:
17938 type = v8hi_ftype_v8hi;
17939 break;
17940 case V4SImode:
17941 type = v4si_ftype_v4si;
17942 break;
17943 case V2DFmode:
17944 type = v2df_ftype_v2df;
17945 break;
17946 case V4SFmode:
17947 type = v4sf_ftype_v4sf;
17948 break;
17949 case V8QImode:
17950 type = v8qi_ftype_v8qi;
17951 break;
17952 case V4HImode:
17953 type = v4hi_ftype_v4hi;
17954 break;
17955 case V2SImode:
17956 type = v2si_ftype_v2si;
17957 break;
17958
17959 default:
17960 abort ();
17961 }
17962
17963 def_builtin (d->mask, d->name, type, d->code);
17964 }
17965
17966 /* pcmpestr[im] insns. */
17967 for (i = 0, d = bdesc_pcmpestr;
17968 i < ARRAY_SIZE (bdesc_pcmpestr);
17969 i++, d++)
17970 {
17971 if (d->code == IX86_BUILTIN_PCMPESTRM128)
17972 ftype = v16qi_ftype_v16qi_int_v16qi_int_int;
17973 else
17974 ftype = int_ftype_v16qi_int_v16qi_int_int;
17975 def_builtin (d->mask, d->name, ftype, d->code);
17976 }
17977
17978 /* pcmpistr[im] insns. */
17979 for (i = 0, d = bdesc_pcmpistr;
17980 i < ARRAY_SIZE (bdesc_pcmpistr);
17981 i++, d++)
17982 {
17983 if (d->code == IX86_BUILTIN_PCMPISTRM128)
17984 ftype = v16qi_ftype_v16qi_v16qi_int;
17985 else
17986 ftype = int_ftype_v16qi_v16qi_int;
17987 def_builtin (d->mask, d->name, ftype, d->code);
17988 }
17989
17990 /* Add the remaining MMX insns with somewhat more complicated types. */
17991 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
17992 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
17993 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
17994 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
17995
17996 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
17997 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
17998 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
17999
18000 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
18001 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
18002
18003 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
18004 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
18005
18006 /* comi/ucomi insns. */
18007 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
18008 if (d->mask == OPTION_MASK_ISA_SSE2)
18009 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
18010 else
18011 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
18012
18013 /* ptest insns. */
18014 for (i = 0, d = bdesc_ptest; i < ARRAY_SIZE (bdesc_ptest); i++, d++)
18015 def_builtin (d->mask, d->name, int_ftype_v2di_v2di, d->code);
18016
18017 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
18018 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
18019 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
18020
18021 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
18022 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
18023 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
18024 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
18025 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
18026 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
18027 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
18028 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
18029 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
18030 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
18031 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
18032
18033 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
18034
18035 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
18036 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
18037
18038 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
18039 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
18040 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
18041 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
18042
18043 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
18044 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
18045 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
18046 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
18047
18048 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
18049
18050 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
18051
18052 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
18053 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
18054 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
18055 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
18056 ftype = build_function_type_list (float_type_node,
18057 float_type_node,
18058 NULL_TREE);
18059 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rsqrtf", ftype, IX86_BUILTIN_RSQRTF);
18060 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
18061 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
18062
18063 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
18064
18065 /* Original 3DNow! */
18066 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
18067 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
18068 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
18069 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
18070 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
18071 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
18072 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
18073 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
18074 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
18075 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
18076 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
18077 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
18078 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
18079 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
18080 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
18081 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
18082 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
18083 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
18084 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
18085 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
18086
18087 /* 3DNow! extension as used in the Athlon CPU. */
18088 def_builtin (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
18089 def_builtin (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
18090 def_builtin (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
18091 def_builtin (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
18092 def_builtin (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
18093 def_builtin (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
18094
18095 /* SSE2 */
18096 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
18097
18098 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
18099 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
18100
18101 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
18102 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
18103
18104 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
18105 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
18106 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
18107 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
18108 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
18109
18110 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
18111 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
18112 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
18113 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
18114
18115 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
18116 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
18117
18118 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
18119
18120 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
18121 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
18122
18123 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
18124 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
18125 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
18126 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
18127 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
18128
18129 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
18130
18131 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
18132 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
18133 def_builtin_const (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
18134 def_builtin_const (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
18135
18136 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
18137 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
18138 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
18139
18140 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
18141 def_builtin_const (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
18142 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
18143 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
18144
18145 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
18146 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
18147 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
18148
18149 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
18150 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
18151
18152 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
18153 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
18154
18155 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
18156 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
18157 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
18158 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
18159 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSLLW128);
18160 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSLLD128);
18161 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
18162
18163 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
18164 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
18165 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
18166 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
18167 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRLW128);
18168 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRLD128);
18169 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
18170
18171 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
18172 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
18173 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRAW128);
18174 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRAD128);
18175
18176 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
18177
18178 /* Prescott New Instructions. */
18179 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor", void_ftype_pcvoid_unsigned_unsigned, IX86_BUILTIN_MONITOR);
18180 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait", void_ftype_unsigned_unsigned, IX86_BUILTIN_MWAIT);
18181 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_lddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
18182
18183 /* SSSE3. */
18184 def_builtin (OPTION_MASK_ISA_SSSE3, "__builtin_ia32_palignr128", v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PALIGNR128);
18185 def_builtin (OPTION_MASK_ISA_SSSE3, "__builtin_ia32_palignr", di_ftype_di_di_int, IX86_BUILTIN_PALIGNR);
18186
18187 /* SSE4.1. */
18188 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_movntdqa", v2di_ftype_pv2di, IX86_BUILTIN_MOVNTDQA);
18189 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxbw128", v8hi_ftype_v16qi, IX86_BUILTIN_PMOVSXBW128);
18190 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxbd128", v4si_ftype_v16qi, IX86_BUILTIN_PMOVSXBD128);
18191 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxbq128", v2di_ftype_v16qi, IX86_BUILTIN_PMOVSXBQ128);
18192 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxwd128", v4si_ftype_v8hi, IX86_BUILTIN_PMOVSXWD128);
18193 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxwq128", v2di_ftype_v8hi, IX86_BUILTIN_PMOVSXWQ128);
18194 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxdq128", v2di_ftype_v4si, IX86_BUILTIN_PMOVSXDQ128);
18195 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxbw128", v8hi_ftype_v16qi, IX86_BUILTIN_PMOVZXBW128);
18196 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxbd128", v4si_ftype_v16qi, IX86_BUILTIN_PMOVZXBD128);
18197 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxbq128", v2di_ftype_v16qi, IX86_BUILTIN_PMOVZXBQ128);
18198 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxwd128", v4si_ftype_v8hi, IX86_BUILTIN_PMOVZXWD128);
18199 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxwq128", v2di_ftype_v8hi, IX86_BUILTIN_PMOVZXWQ128);
18200 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxdq128", v2di_ftype_v4si, IX86_BUILTIN_PMOVZXDQ128);
18201 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmuldq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULDQ128);
18202 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_roundpd", v2df_ftype_v2df_int, IX86_BUILTIN_ROUNDPD);
18203 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_roundps", v4sf_ftype_v4sf_int, IX86_BUILTIN_ROUNDPS);
18204 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_roundsd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_ROUNDSD);
18205 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_roundss", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_ROUNDSS);
18206
18207 /* SSE4.2. */
18208 ftype = build_function_type_list (unsigned_type_node,
18209 unsigned_type_node,
18210 unsigned_char_type_node,
18211 NULL_TREE);
18212 def_builtin (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32qi", ftype, IX86_BUILTIN_CRC32QI);
18213 ftype = build_function_type_list (unsigned_type_node,
18214 unsigned_type_node,
18215 short_unsigned_type_node,
18216 NULL_TREE);
18217 def_builtin (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32hi", ftype, IX86_BUILTIN_CRC32HI);
18218 ftype = build_function_type_list (unsigned_type_node,
18219 unsigned_type_node,
18220 unsigned_type_node,
18221 NULL_TREE);
18222 def_builtin (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32si", ftype, IX86_BUILTIN_CRC32SI);
18223 ftype = build_function_type_list (long_long_unsigned_type_node,
18224 long_long_unsigned_type_node,
18225 long_long_unsigned_type_node,
18226 NULL_TREE);
18227 def_builtin (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32di", ftype, IX86_BUILTIN_CRC32DI);
18228
18229 /* AMDFAM10 SSE4A New built-ins */
18230 def_builtin (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_movntsd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTSD);
18231 def_builtin (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_movntss", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTSS);
18232 def_builtin (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_extrqi", v2di_ftype_v2di_unsigned_unsigned, IX86_BUILTIN_EXTRQI);
18233 def_builtin (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_extrq", v2di_ftype_v2di_v16qi, IX86_BUILTIN_EXTRQ);
18234 def_builtin (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_insertqi", v2di_ftype_v2di_v2di_unsigned_unsigned, IX86_BUILTIN_INSERTQI);
18235 def_builtin (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_insertq", v2di_ftype_v2di_v2di, IX86_BUILTIN_INSERTQ);
18236
18237 /* Access to the vec_init patterns. */
18238 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
18239 integer_type_node, NULL_TREE);
18240 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si", ftype, IX86_BUILTIN_VEC_INIT_V2SI);
18241
18242 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
18243 short_integer_type_node,
18244 short_integer_type_node,
18245 short_integer_type_node, NULL_TREE);
18246 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi", ftype, IX86_BUILTIN_VEC_INIT_V4HI);
18247
18248 ftype = build_function_type_list (V8QI_type_node, char_type_node,
18249 char_type_node, char_type_node,
18250 char_type_node, char_type_node,
18251 char_type_node, char_type_node,
18252 char_type_node, NULL_TREE);
18253 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi", ftype, IX86_BUILTIN_VEC_INIT_V8QI);
18254
18255 /* Access to the vec_extract patterns. */
18256 ftype = build_function_type_list (double_type_node, V2DF_type_node,
18257 integer_type_node, NULL_TREE);
18258 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df", ftype, IX86_BUILTIN_VEC_EXT_V2DF);
18259
18260 ftype = build_function_type_list (long_long_integer_type_node,
18261 V2DI_type_node, integer_type_node,
18262 NULL_TREE);
18263 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di", ftype, IX86_BUILTIN_VEC_EXT_V2DI);
18264
18265 ftype = build_function_type_list (float_type_node, V4SF_type_node,
18266 integer_type_node, NULL_TREE);
18267 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf", ftype, IX86_BUILTIN_VEC_EXT_V4SF);
18268
18269 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
18270 integer_type_node, NULL_TREE);
18271 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si", ftype, IX86_BUILTIN_VEC_EXT_V4SI);
18272
18273 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
18274 integer_type_node, NULL_TREE);
18275 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi", ftype, IX86_BUILTIN_VEC_EXT_V8HI);
18276
18277 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
18278 integer_type_node, NULL_TREE);
18279 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_ext_v4hi", ftype, IX86_BUILTIN_VEC_EXT_V4HI);
18280
18281 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
18282 integer_type_node, NULL_TREE);
18283 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si", ftype, IX86_BUILTIN_VEC_EXT_V2SI);
18284
18285 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
18286 integer_type_node, NULL_TREE);
18287 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi", ftype, IX86_BUILTIN_VEC_EXT_V16QI);
18288
18289 /* Access to the vec_set patterns. */
18290 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
18291 intDI_type_node,
18292 integer_type_node, NULL_TREE);
18293 def_builtin (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_vec_set_v2di", ftype, IX86_BUILTIN_VEC_SET_V2DI);
18294
18295 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
18296 float_type_node,
18297 integer_type_node, NULL_TREE);
18298 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf", ftype, IX86_BUILTIN_VEC_SET_V4SF);
18299
18300 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
18301 intSI_type_node,
18302 integer_type_node, NULL_TREE);
18303 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si", ftype, IX86_BUILTIN_VEC_SET_V4SI);
18304
18305 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
18306 intHI_type_node,
18307 integer_type_node, NULL_TREE);
18308 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi", ftype, IX86_BUILTIN_VEC_SET_V8HI);
18309
18310 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
18311 intHI_type_node,
18312 integer_type_node, NULL_TREE);
18313 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_set_v4hi", ftype, IX86_BUILTIN_VEC_SET_V4HI);
18314
18315 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
18316 intQI_type_node,
18317 integer_type_node, NULL_TREE);
18318 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi", ftype, IX86_BUILTIN_VEC_SET_V16QI);
18319 }
18320
18321 static void
18322 ix86_init_builtins (void)
18323 {
18324 if (TARGET_MMX)
18325 ix86_init_mmx_sse_builtins ();
18326 }
18327
18328 /* Errors in the source file can cause expand_expr to return const0_rtx
18329 where we expect a vector. To avoid crashing, use one of the vector
18330 clear instructions. */
18331 static rtx
18332 safe_vector_operand (rtx x, enum machine_mode mode)
18333 {
18334 if (x == const0_rtx)
18335 x = CONST0_RTX (mode);
18336 return x;
18337 }
18338
18339 /* Subroutine of ix86_expand_builtin to take care of SSE insns with
18340 4 operands. The third argument must be a constant smaller than 8
18341 bits or xmm0. */
18342
18343 static rtx
18344 ix86_expand_sse_4_operands_builtin (enum insn_code icode, tree exp,
18345 rtx target)
18346 {
18347 rtx pat;
18348 tree arg0 = CALL_EXPR_ARG (exp, 0);
18349 tree arg1 = CALL_EXPR_ARG (exp, 1);
18350 tree arg2 = CALL_EXPR_ARG (exp, 2);
18351 rtx op0 = expand_normal (arg0);
18352 rtx op1 = expand_normal (arg1);
18353 rtx op2 = expand_normal (arg2);
18354 enum machine_mode tmode = insn_data[icode].operand[0].mode;
18355 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
18356 enum machine_mode mode2 = insn_data[icode].operand[2].mode;
18357 enum machine_mode mode3 = insn_data[icode].operand[3].mode;
18358
18359 if (VECTOR_MODE_P (mode1))
18360 op0 = safe_vector_operand (op0, mode1);
18361 if (VECTOR_MODE_P (mode2))
18362 op1 = safe_vector_operand (op1, mode2);
18363 if (VECTOR_MODE_P (mode3))
18364 op2 = safe_vector_operand (op2, mode3);
18365
18366 if (optimize
18367 || target == 0
18368 || GET_MODE (target) != tmode
18369 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18370 target = gen_reg_rtx (tmode);
18371
18372 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18373 op0 = copy_to_mode_reg (mode1, op0);
18374 if ((optimize && !register_operand (op1, mode2))
18375 || !(*insn_data[icode].operand[2].predicate) (op1, mode2))
18376 op1 = copy_to_mode_reg (mode2, op1);
18377
18378 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
18379 switch (icode)
18380 {
18381 case CODE_FOR_sse4_1_blendvpd:
18382 case CODE_FOR_sse4_1_blendvps:
18383 case CODE_FOR_sse4_1_pblendvb:
18384 op2 = copy_to_mode_reg (mode3, op2);
18385 break;
18386
18387 case CODE_FOR_sse4_1_roundsd:
18388 case CODE_FOR_sse4_1_roundss:
18389 error ("the third argument must be a 4-bit immediate");
18390 return const0_rtx;
18391
18392 default:
18393 error ("the third argument must be an 8-bit immediate");
18394 return const0_rtx;
18395 }
18396
18397 pat = GEN_FCN (icode) (target, op0, op1, op2);
18398 if (! pat)
18399 return 0;
18400 emit_insn (pat);
18401 return target;
18402 }
18403
18404 /* Subroutine of ix86_expand_builtin to take care of crc32 insns. */
18405
18406 static rtx
18407 ix86_expand_crc32 (enum insn_code icode, tree exp, rtx target)
18408 {
18409 rtx pat;
18410 tree arg0 = CALL_EXPR_ARG (exp, 0);
18411 tree arg1 = CALL_EXPR_ARG (exp, 1);
18412 rtx op0 = expand_normal (arg0);
18413 rtx op1 = expand_normal (arg1);
18414 enum machine_mode tmode = insn_data[icode].operand[0].mode;
18415 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
18416 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
18417
18418 if (optimize
18419 || !target
18420 || GET_MODE (target) != tmode
18421 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18422 target = gen_reg_rtx (tmode);
18423
18424 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
18425 op0 = copy_to_mode_reg (mode0, op0);
18426 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
18427 {
18428 op1 = copy_to_reg (op1);
18429 op1 = simplify_gen_subreg (mode1, op1, GET_MODE (op1), 0);
18430 }
18431
18432 pat = GEN_FCN (icode) (target, op0, op1);
18433 if (! pat)
18434 return 0;
18435 emit_insn (pat);
18436 return target;
18437 }
18438
18439 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
18440
18441 static rtx
18442 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
18443 {
18444 rtx pat, xops[3];
18445 tree arg0 = CALL_EXPR_ARG (exp, 0);
18446 tree arg1 = CALL_EXPR_ARG (exp, 1);
18447 rtx op0 = expand_normal (arg0);
18448 rtx op1 = expand_normal (arg1);
18449 enum machine_mode tmode = insn_data[icode].operand[0].mode;
18450 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
18451 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
18452
18453 if (VECTOR_MODE_P (mode0))
18454 op0 = safe_vector_operand (op0, mode0);
18455 if (VECTOR_MODE_P (mode1))
18456 op1 = safe_vector_operand (op1, mode1);
18457
18458 if (optimize || !target
18459 || GET_MODE (target) != tmode
18460 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18461 target = gen_reg_rtx (tmode);
18462
18463 if (GET_MODE (op1) == SImode && mode1 == TImode)
18464 {
18465 rtx x = gen_reg_rtx (V4SImode);
18466 emit_insn (gen_sse2_loadd (x, op1));
18467 op1 = gen_lowpart (TImode, x);
18468 }
18469
18470 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
18471 op0 = copy_to_mode_reg (mode0, op0);
18472 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
18473 op1 = copy_to_mode_reg (mode1, op1);
18474
18475 /* ??? Using ix86_fixup_binary_operands is problematic when
18476 we've got mismatched modes. Fake it. */
18477
18478 xops[0] = target;
18479 xops[1] = op0;
18480 xops[2] = op1;
18481
18482 if (tmode == mode0 && tmode == mode1)
18483 {
18484 target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
18485 op0 = xops[1];
18486 op1 = xops[2];
18487 }
18488 else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops))
18489 {
18490 op0 = force_reg (mode0, op0);
18491 op1 = force_reg (mode1, op1);
18492 target = gen_reg_rtx (tmode);
18493 }
18494
18495 pat = GEN_FCN (icode) (target, op0, op1);
18496 if (! pat)
18497 return 0;
18498 emit_insn (pat);
18499 return target;
18500 }
18501
18502 /* Subroutine of ix86_expand_builtin to take care of stores. */
18503
18504 static rtx
18505 ix86_expand_store_builtin (enum insn_code icode, tree exp)
18506 {
18507 rtx pat;
18508 tree arg0 = CALL_EXPR_ARG (exp, 0);
18509 tree arg1 = CALL_EXPR_ARG (exp, 1);
18510 rtx op0 = expand_normal (arg0);
18511 rtx op1 = expand_normal (arg1);
18512 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
18513 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
18514
18515 if (VECTOR_MODE_P (mode1))
18516 op1 = safe_vector_operand (op1, mode1);
18517
18518 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
18519 op1 = copy_to_mode_reg (mode1, op1);
18520
18521 pat = GEN_FCN (icode) (op0, op1);
18522 if (pat)
18523 emit_insn (pat);
18524 return 0;
18525 }
18526
18527 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
18528
18529 static rtx
18530 ix86_expand_unop_builtin (enum insn_code icode, tree exp,
18531 rtx target, int do_load)
18532 {
18533 rtx pat;
18534 tree arg0 = CALL_EXPR_ARG (exp, 0);
18535 rtx op0 = expand_normal (arg0);
18536 enum machine_mode tmode = insn_data[icode].operand[0].mode;
18537 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
18538
18539 if (optimize || !target
18540 || GET_MODE (target) != tmode
18541 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18542 target = gen_reg_rtx (tmode);
18543 if (do_load)
18544 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
18545 else
18546 {
18547 if (VECTOR_MODE_P (mode0))
18548 op0 = safe_vector_operand (op0, mode0);
18549
18550 if ((optimize && !register_operand (op0, mode0))
18551 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18552 op0 = copy_to_mode_reg (mode0, op0);
18553 }
18554
18555 switch (icode)
18556 {
18557 case CODE_FOR_sse4_1_roundpd:
18558 case CODE_FOR_sse4_1_roundps:
18559 {
18560 tree arg1 = CALL_EXPR_ARG (exp, 1);
18561 rtx op1 = expand_normal (arg1);
18562 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
18563
18564 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
18565 {
18566 error ("the second argument must be a 4-bit immediate");
18567 return const0_rtx;
18568 }
18569 pat = GEN_FCN (icode) (target, op0, op1);
18570 }
18571 break;
18572 default:
18573 pat = GEN_FCN (icode) (target, op0);
18574 break;
18575 }
18576
18577 if (! pat)
18578 return 0;
18579 emit_insn (pat);
18580 return target;
18581 }
18582
18583 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
18584 sqrtss, rsqrtss, rcpss. */
18585
18586 static rtx
18587 ix86_expand_unop1_builtin (enum insn_code icode, tree exp, rtx target)
18588 {
18589 rtx pat;
18590 tree arg0 = CALL_EXPR_ARG (exp, 0);
18591 rtx op1, op0 = expand_normal (arg0);
18592 enum machine_mode tmode = insn_data[icode].operand[0].mode;
18593 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
18594
18595 if (optimize || !target
18596 || GET_MODE (target) != tmode
18597 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18598 target = gen_reg_rtx (tmode);
18599
18600 if (VECTOR_MODE_P (mode0))
18601 op0 = safe_vector_operand (op0, mode0);
18602
18603 if ((optimize && !register_operand (op0, mode0))
18604 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18605 op0 = copy_to_mode_reg (mode0, op0);
18606
18607 op1 = op0;
18608 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
18609 op1 = copy_to_mode_reg (mode0, op1);
18610
18611 pat = GEN_FCN (icode) (target, op0, op1);
18612 if (! pat)
18613 return 0;
18614 emit_insn (pat);
18615 return target;
18616 }
18617
18618 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
18619
18620 static rtx
18621 ix86_expand_sse_compare (const struct builtin_description *d, tree exp,
18622 rtx target)
18623 {
18624 rtx pat;
18625 tree arg0 = CALL_EXPR_ARG (exp, 0);
18626 tree arg1 = CALL_EXPR_ARG (exp, 1);
18627 rtx op0 = expand_normal (arg0);
18628 rtx op1 = expand_normal (arg1);
18629 rtx op2;
18630 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
18631 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
18632 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
18633 enum rtx_code comparison = d->comparison;
18634
18635 if (VECTOR_MODE_P (mode0))
18636 op0 = safe_vector_operand (op0, mode0);
18637 if (VECTOR_MODE_P (mode1))
18638 op1 = safe_vector_operand (op1, mode1);
18639
18640 /* Swap operands if we have a comparison that isn't available in
18641 hardware. */
18642 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
18643 {
18644 rtx tmp = gen_reg_rtx (mode1);
18645 emit_move_insn (tmp, op1);
18646 op1 = op0;
18647 op0 = tmp;
18648 }
18649
18650 if (optimize || !target
18651 || GET_MODE (target) != tmode
18652 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
18653 target = gen_reg_rtx (tmode);
18654
18655 if ((optimize && !register_operand (op0, mode0))
18656 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
18657 op0 = copy_to_mode_reg (mode0, op0);
18658 if ((optimize && !register_operand (op1, mode1))
18659 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
18660 op1 = copy_to_mode_reg (mode1, op1);
18661
18662 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
18663 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
18664 if (! pat)
18665 return 0;
18666 emit_insn (pat);
18667 return target;
18668 }
18669
18670 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
18671
18672 static rtx
18673 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
18674 rtx target)
18675 {
18676 rtx pat;
18677 tree arg0 = CALL_EXPR_ARG (exp, 0);
18678 tree arg1 = CALL_EXPR_ARG (exp, 1);
18679 rtx op0 = expand_normal (arg0);
18680 rtx op1 = expand_normal (arg1);
18681 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
18682 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
18683 enum rtx_code comparison = d->comparison;
18684
18685 if (VECTOR_MODE_P (mode0))
18686 op0 = safe_vector_operand (op0, mode0);
18687 if (VECTOR_MODE_P (mode1))
18688 op1 = safe_vector_operand (op1, mode1);
18689
18690 /* Swap operands if we have a comparison that isn't available in
18691 hardware. */
18692 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
18693 {
18694 rtx tmp = op1;
18695 op1 = op0;
18696 op0 = tmp;
18697 }
18698
18699 target = gen_reg_rtx (SImode);
18700 emit_move_insn (target, const0_rtx);
18701 target = gen_rtx_SUBREG (QImode, target, 0);
18702
18703 if ((optimize && !register_operand (op0, mode0))
18704 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
18705 op0 = copy_to_mode_reg (mode0, op0);
18706 if ((optimize && !register_operand (op1, mode1))
18707 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
18708 op1 = copy_to_mode_reg (mode1, op1);
18709
18710 pat = GEN_FCN (d->icode) (op0, op1);
18711 if (! pat)
18712 return 0;
18713 emit_insn (pat);
18714 emit_insn (gen_rtx_SET (VOIDmode,
18715 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
18716 gen_rtx_fmt_ee (comparison, QImode,
18717 SET_DEST (pat),
18718 const0_rtx)));
18719
18720 return SUBREG_REG (target);
18721 }
18722
18723 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
18724
18725 static rtx
18726 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
18727 rtx target)
18728 {
18729 rtx pat;
18730 tree arg0 = CALL_EXPR_ARG (exp, 0);
18731 tree arg1 = CALL_EXPR_ARG (exp, 1);
18732 rtx op0 = expand_normal (arg0);
18733 rtx op1 = expand_normal (arg1);
18734 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
18735 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
18736 enum rtx_code comparison = d->comparison;
18737
18738 if (VECTOR_MODE_P (mode0))
18739 op0 = safe_vector_operand (op0, mode0);
18740 if (VECTOR_MODE_P (mode1))
18741 op1 = safe_vector_operand (op1, mode1);
18742
18743 target = gen_reg_rtx (SImode);
18744 emit_move_insn (target, const0_rtx);
18745 target = gen_rtx_SUBREG (QImode, target, 0);
18746
18747 if ((optimize && !register_operand (op0, mode0))
18748 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
18749 op0 = copy_to_mode_reg (mode0, op0);
18750 if ((optimize && !register_operand (op1, mode1))
18751 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
18752 op1 = copy_to_mode_reg (mode1, op1);
18753
18754 pat = GEN_FCN (d->icode) (op0, op1);
18755 if (! pat)
18756 return 0;
18757 emit_insn (pat);
18758 emit_insn (gen_rtx_SET (VOIDmode,
18759 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
18760 gen_rtx_fmt_ee (comparison, QImode,
18761 SET_DEST (pat),
18762 const0_rtx)));
18763
18764 return SUBREG_REG (target);
18765 }
18766
18767 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
18768
18769 static rtx
18770 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
18771 tree exp, rtx target)
18772 {
18773 rtx pat;
18774 tree arg0 = CALL_EXPR_ARG (exp, 0);
18775 tree arg1 = CALL_EXPR_ARG (exp, 1);
18776 tree arg2 = CALL_EXPR_ARG (exp, 2);
18777 tree arg3 = CALL_EXPR_ARG (exp, 3);
18778 tree arg4 = CALL_EXPR_ARG (exp, 4);
18779 rtx scratch0, scratch1;
18780 rtx op0 = expand_normal (arg0);
18781 rtx op1 = expand_normal (arg1);
18782 rtx op2 = expand_normal (arg2);
18783 rtx op3 = expand_normal (arg3);
18784 rtx op4 = expand_normal (arg4);
18785 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
18786
18787 tmode0 = insn_data[d->icode].operand[0].mode;
18788 tmode1 = insn_data[d->icode].operand[1].mode;
18789 modev2 = insn_data[d->icode].operand[2].mode;
18790 modei3 = insn_data[d->icode].operand[3].mode;
18791 modev4 = insn_data[d->icode].operand[4].mode;
18792 modei5 = insn_data[d->icode].operand[5].mode;
18793 modeimm = insn_data[d->icode].operand[6].mode;
18794
18795 if (VECTOR_MODE_P (modev2))
18796 op0 = safe_vector_operand (op0, modev2);
18797 if (VECTOR_MODE_P (modev4))
18798 op2 = safe_vector_operand (op2, modev4);
18799
18800 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
18801 op0 = copy_to_mode_reg (modev2, op0);
18802 if (! (*insn_data[d->icode].operand[3].predicate) (op1, modei3))
18803 op1 = copy_to_mode_reg (modei3, op1);
18804 if ((optimize && !register_operand (op2, modev4))
18805 || !(*insn_data[d->icode].operand[4].predicate) (op2, modev4))
18806 op2 = copy_to_mode_reg (modev4, op2);
18807 if (! (*insn_data[d->icode].operand[5].predicate) (op3, modei5))
18808 op3 = copy_to_mode_reg (modei5, op3);
18809
18810 if (! (*insn_data[d->icode].operand[6].predicate) (op4, modeimm))
18811 {
18812 error ("the fifth argument must be a 8-bit immediate");
18813 return const0_rtx;
18814 }
18815
18816 if (d->code == IX86_BUILTIN_PCMPESTRI128)
18817 {
18818 if (optimize || !target
18819 || GET_MODE (target) != tmode0
18820 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
18821 target = gen_reg_rtx (tmode0);
18822
18823 scratch1 = gen_reg_rtx (tmode1);
18824
18825 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
18826 }
18827 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
18828 {
18829 if (optimize || !target
18830 || GET_MODE (target) != tmode1
18831 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
18832 target = gen_reg_rtx (tmode1);
18833
18834 scratch0 = gen_reg_rtx (tmode0);
18835
18836 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
18837 }
18838 else
18839 {
18840 gcc_assert (d->flag);
18841
18842 scratch0 = gen_reg_rtx (tmode0);
18843 scratch1 = gen_reg_rtx (tmode1);
18844
18845 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
18846 }
18847
18848 if (! pat)
18849 return 0;
18850
18851 emit_insn (pat);
18852
18853 if (d->flag)
18854 {
18855 target = gen_reg_rtx (SImode);
18856 emit_move_insn (target, const0_rtx);
18857 target = gen_rtx_SUBREG (QImode, target, 0);
18858
18859 emit_insn
18860 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
18861 gen_rtx_fmt_ee (EQ, QImode,
18862 gen_rtx_REG ((enum machine_mode) d->flag,
18863 FLAGS_REG),
18864 const0_rtx)));
18865 return SUBREG_REG (target);
18866 }
18867 else
18868 return target;
18869 }
18870
18871
18872 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
18873
18874 static rtx
18875 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
18876 tree exp, rtx target)
18877 {
18878 rtx pat;
18879 tree arg0 = CALL_EXPR_ARG (exp, 0);
18880 tree arg1 = CALL_EXPR_ARG (exp, 1);
18881 tree arg2 = CALL_EXPR_ARG (exp, 2);
18882 rtx scratch0, scratch1;
18883 rtx op0 = expand_normal (arg0);
18884 rtx op1 = expand_normal (arg1);
18885 rtx op2 = expand_normal (arg2);
18886 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
18887
18888 tmode0 = insn_data[d->icode].operand[0].mode;
18889 tmode1 = insn_data[d->icode].operand[1].mode;
18890 modev2 = insn_data[d->icode].operand[2].mode;
18891 modev3 = insn_data[d->icode].operand[3].mode;
18892 modeimm = insn_data[d->icode].operand[4].mode;
18893
18894 if (VECTOR_MODE_P (modev2))
18895 op0 = safe_vector_operand (op0, modev2);
18896 if (VECTOR_MODE_P (modev3))
18897 op1 = safe_vector_operand (op1, modev3);
18898
18899 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
18900 op0 = copy_to_mode_reg (modev2, op0);
18901 if ((optimize && !register_operand (op1, modev3))
18902 || !(*insn_data[d->icode].operand[3].predicate) (op1, modev3))
18903 op1 = copy_to_mode_reg (modev3, op1);
18904
18905 if (! (*insn_data[d->icode].operand[4].predicate) (op2, modeimm))
18906 {
18907 error ("the third argument must be a 8-bit immediate");
18908 return const0_rtx;
18909 }
18910
18911 if (d->code == IX86_BUILTIN_PCMPISTRI128)
18912 {
18913 if (optimize || !target
18914 || GET_MODE (target) != tmode0
18915 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
18916 target = gen_reg_rtx (tmode0);
18917
18918 scratch1 = gen_reg_rtx (tmode1);
18919
18920 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
18921 }
18922 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
18923 {
18924 if (optimize || !target
18925 || GET_MODE (target) != tmode1
18926 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
18927 target = gen_reg_rtx (tmode1);
18928
18929 scratch0 = gen_reg_rtx (tmode0);
18930
18931 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
18932 }
18933 else
18934 {
18935 gcc_assert (d->flag);
18936
18937 scratch0 = gen_reg_rtx (tmode0);
18938 scratch1 = gen_reg_rtx (tmode1);
18939
18940 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
18941 }
18942
18943 if (! pat)
18944 return 0;
18945
18946 emit_insn (pat);
18947
18948 if (d->flag)
18949 {
18950 target = gen_reg_rtx (SImode);
18951 emit_move_insn (target, const0_rtx);
18952 target = gen_rtx_SUBREG (QImode, target, 0);
18953
18954 emit_insn
18955 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
18956 gen_rtx_fmt_ee (EQ, QImode,
18957 gen_rtx_REG ((enum machine_mode) d->flag,
18958 FLAGS_REG),
18959 const0_rtx)));
18960 return SUBREG_REG (target);
18961 }
18962 else
18963 return target;
18964 }
18965
18966 /* Return the integer constant in ARG. Constrain it to be in the range
18967 of the subparts of VEC_TYPE; issue an error if not. */
18968
18969 static int
18970 get_element_number (tree vec_type, tree arg)
18971 {
18972 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
18973
18974 if (!host_integerp (arg, 1)
18975 || (elt = tree_low_cst (arg, 1), elt > max))
18976 {
18977 error ("selector must be an integer constant in the range 0..%wi", max);
18978 return 0;
18979 }
18980
18981 return elt;
18982 }
18983
18984 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
18985 ix86_expand_vector_init. We DO have language-level syntax for this, in
18986 the form of (type){ init-list }. Except that since we can't place emms
18987 instructions from inside the compiler, we can't allow the use of MMX
18988 registers unless the user explicitly asks for it. So we do *not* define
18989 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
18990 we have builtins invoked by mmintrin.h that gives us license to emit
18991 these sorts of instructions. */
18992
18993 static rtx
18994 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
18995 {
18996 enum machine_mode tmode = TYPE_MODE (type);
18997 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
18998 int i, n_elt = GET_MODE_NUNITS (tmode);
18999 rtvec v = rtvec_alloc (n_elt);
19000
19001 gcc_assert (VECTOR_MODE_P (tmode));
19002 gcc_assert (call_expr_nargs (exp) == n_elt);
19003
19004 for (i = 0; i < n_elt; ++i)
19005 {
19006 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
19007 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
19008 }
19009
19010 if (!target || !register_operand (target, tmode))
19011 target = gen_reg_rtx (tmode);
19012
19013 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
19014 return target;
19015 }
19016
19017 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
19018 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
19019 had a language-level syntax for referencing vector elements. */
19020
19021 static rtx
19022 ix86_expand_vec_ext_builtin (tree exp, rtx target)
19023 {
19024 enum machine_mode tmode, mode0;
19025 tree arg0, arg1;
19026 int elt;
19027 rtx op0;
19028
19029 arg0 = CALL_EXPR_ARG (exp, 0);
19030 arg1 = CALL_EXPR_ARG (exp, 1);
19031
19032 op0 = expand_normal (arg0);
19033 elt = get_element_number (TREE_TYPE (arg0), arg1);
19034
19035 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
19036 mode0 = TYPE_MODE (TREE_TYPE (arg0));
19037 gcc_assert (VECTOR_MODE_P (mode0));
19038
19039 op0 = force_reg (mode0, op0);
19040
19041 if (optimize || !target || !register_operand (target, tmode))
19042 target = gen_reg_rtx (tmode);
19043
19044 ix86_expand_vector_extract (true, target, op0, elt);
19045
19046 return target;
19047 }
19048
19049 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
19050 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
19051 a language-level syntax for referencing vector elements. */
19052
19053 static rtx
19054 ix86_expand_vec_set_builtin (tree exp)
19055 {
19056 enum machine_mode tmode, mode1;
19057 tree arg0, arg1, arg2;
19058 int elt;
19059 rtx op0, op1, target;
19060
19061 arg0 = CALL_EXPR_ARG (exp, 0);
19062 arg1 = CALL_EXPR_ARG (exp, 1);
19063 arg2 = CALL_EXPR_ARG (exp, 2);
19064
19065 tmode = TYPE_MODE (TREE_TYPE (arg0));
19066 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
19067 gcc_assert (VECTOR_MODE_P (tmode));
19068
19069 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
19070 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
19071 elt = get_element_number (TREE_TYPE (arg0), arg2);
19072
19073 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
19074 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
19075
19076 op0 = force_reg (tmode, op0);
19077 op1 = force_reg (mode1, op1);
19078
19079 /* OP0 is the source of these builtin functions and shouldn't be
19080 modified. Create a copy, use it and return it as target. */
19081 target = gen_reg_rtx (tmode);
19082 emit_move_insn (target, op0);
19083 ix86_expand_vector_set (true, target, op1, elt);
19084
19085 return target;
19086 }
19087
19088 /* Expand an expression EXP that calls a built-in function,
19089 with result going to TARGET if that's convenient
19090 (and in mode MODE if that's convenient).
19091 SUBTARGET may be used as the target for computing one of EXP's operands.
19092 IGNORE is nonzero if the value is to be ignored. */
19093
19094 static rtx
19095 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
19096 enum machine_mode mode ATTRIBUTE_UNUSED,
19097 int ignore ATTRIBUTE_UNUSED)
19098 {
19099 const struct builtin_description *d;
19100 size_t i;
19101 enum insn_code icode;
19102 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
19103 tree arg0, arg1, arg2, arg3;
19104 rtx op0, op1, op2, op3, pat;
19105 enum machine_mode tmode, mode0, mode1, mode2, mode3, mode4;
19106 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
19107
19108 switch (fcode)
19109 {
19110 case IX86_BUILTIN_EMMS:
19111 emit_insn (gen_mmx_emms ());
19112 return 0;
19113
19114 case IX86_BUILTIN_SFENCE:
19115 emit_insn (gen_sse_sfence ());
19116 return 0;
19117
19118 case IX86_BUILTIN_MASKMOVQ:
19119 case IX86_BUILTIN_MASKMOVDQU:
19120 icode = (fcode == IX86_BUILTIN_MASKMOVQ
19121 ? CODE_FOR_mmx_maskmovq
19122 : CODE_FOR_sse2_maskmovdqu);
19123 /* Note the arg order is different from the operand order. */
19124 arg1 = CALL_EXPR_ARG (exp, 0);
19125 arg2 = CALL_EXPR_ARG (exp, 1);
19126 arg0 = CALL_EXPR_ARG (exp, 2);
19127 op0 = expand_normal (arg0);
19128 op1 = expand_normal (arg1);
19129 op2 = expand_normal (arg2);
19130 mode0 = insn_data[icode].operand[0].mode;
19131 mode1 = insn_data[icode].operand[1].mode;
19132 mode2 = insn_data[icode].operand[2].mode;
19133
19134 op0 = force_reg (Pmode, op0);
19135 op0 = gen_rtx_MEM (mode1, op0);
19136
19137 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
19138 op0 = copy_to_mode_reg (mode0, op0);
19139 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
19140 op1 = copy_to_mode_reg (mode1, op1);
19141 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
19142 op2 = copy_to_mode_reg (mode2, op2);
19143 pat = GEN_FCN (icode) (op0, op1, op2);
19144 if (! pat)
19145 return 0;
19146 emit_insn (pat);
19147 return 0;
19148
19149 case IX86_BUILTIN_RSQRTF:
19150 return ix86_expand_unop1_builtin (CODE_FOR_rsqrtsf2, exp, target);
19151
19152 case IX86_BUILTIN_SQRTSS:
19153 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, exp, target);
19154 case IX86_BUILTIN_RSQRTSS:
19155 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, exp, target);
19156 case IX86_BUILTIN_RCPSS:
19157 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, exp, target);
19158
19159 case IX86_BUILTIN_LOADUPS:
19160 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, exp, target, 1);
19161
19162 case IX86_BUILTIN_STOREUPS:
19163 return ix86_expand_store_builtin (CODE_FOR_sse_movups, exp);
19164
19165 case IX86_BUILTIN_LOADHPS:
19166 case IX86_BUILTIN_LOADLPS:
19167 case IX86_BUILTIN_LOADHPD:
19168 case IX86_BUILTIN_LOADLPD:
19169 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
19170 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
19171 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
19172 : CODE_FOR_sse2_loadlpd);
19173 arg0 = CALL_EXPR_ARG (exp, 0);
19174 arg1 = CALL_EXPR_ARG (exp, 1);
19175 op0 = expand_normal (arg0);
19176 op1 = expand_normal (arg1);
19177 tmode = insn_data[icode].operand[0].mode;
19178 mode0 = insn_data[icode].operand[1].mode;
19179 mode1 = insn_data[icode].operand[2].mode;
19180
19181 op0 = force_reg (mode0, op0);
19182 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
19183 if (optimize || target == 0
19184 || GET_MODE (target) != tmode
19185 || !register_operand (target, tmode))
19186 target = gen_reg_rtx (tmode);
19187 pat = GEN_FCN (icode) (target, op0, op1);
19188 if (! pat)
19189 return 0;
19190 emit_insn (pat);
19191 return target;
19192
19193 case IX86_BUILTIN_STOREHPS:
19194 case IX86_BUILTIN_STORELPS:
19195 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
19196 : CODE_FOR_sse_storelps);
19197 arg0 = CALL_EXPR_ARG (exp, 0);
19198 arg1 = CALL_EXPR_ARG (exp, 1);
19199 op0 = expand_normal (arg0);
19200 op1 = expand_normal (arg1);
19201 mode0 = insn_data[icode].operand[0].mode;
19202 mode1 = insn_data[icode].operand[1].mode;
19203
19204 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
19205 op1 = force_reg (mode1, op1);
19206
19207 pat = GEN_FCN (icode) (op0, op1);
19208 if (! pat)
19209 return 0;
19210 emit_insn (pat);
19211 return const0_rtx;
19212
19213 case IX86_BUILTIN_MOVNTPS:
19214 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, exp);
19215 case IX86_BUILTIN_MOVNTQ:
19216 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, exp);
19217
19218 case IX86_BUILTIN_LDMXCSR:
19219 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
19220 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
19221 emit_move_insn (target, op0);
19222 emit_insn (gen_sse_ldmxcsr (target));
19223 return 0;
19224
19225 case IX86_BUILTIN_STMXCSR:
19226 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
19227 emit_insn (gen_sse_stmxcsr (target));
19228 return copy_to_mode_reg (SImode, target);
19229
19230 case IX86_BUILTIN_SHUFPS:
19231 case IX86_BUILTIN_SHUFPD:
19232 icode = (fcode == IX86_BUILTIN_SHUFPS
19233 ? CODE_FOR_sse_shufps
19234 : CODE_FOR_sse2_shufpd);
19235 arg0 = CALL_EXPR_ARG (exp, 0);
19236 arg1 = CALL_EXPR_ARG (exp, 1);
19237 arg2 = CALL_EXPR_ARG (exp, 2);
19238 op0 = expand_normal (arg0);
19239 op1 = expand_normal (arg1);
19240 op2 = expand_normal (arg2);
19241 tmode = insn_data[icode].operand[0].mode;
19242 mode0 = insn_data[icode].operand[1].mode;
19243 mode1 = insn_data[icode].operand[2].mode;
19244 mode2 = insn_data[icode].operand[3].mode;
19245
19246 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19247 op0 = copy_to_mode_reg (mode0, op0);
19248 if ((optimize && !register_operand (op1, mode1))
19249 || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
19250 op1 = copy_to_mode_reg (mode1, op1);
19251 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
19252 {
19253 /* @@@ better error message */
19254 error ("mask must be an immediate");
19255 return gen_reg_rtx (tmode);
19256 }
19257 if (optimize || target == 0
19258 || GET_MODE (target) != tmode
19259 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19260 target = gen_reg_rtx (tmode);
19261 pat = GEN_FCN (icode) (target, op0, op1, op2);
19262 if (! pat)
19263 return 0;
19264 emit_insn (pat);
19265 return target;
19266
19267 case IX86_BUILTIN_PSHUFW:
19268 case IX86_BUILTIN_PSHUFD:
19269 case IX86_BUILTIN_PSHUFHW:
19270 case IX86_BUILTIN_PSHUFLW:
19271 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
19272 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
19273 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
19274 : CODE_FOR_mmx_pshufw);
19275 arg0 = CALL_EXPR_ARG (exp, 0);
19276 arg1 = CALL_EXPR_ARG (exp, 1);
19277 op0 = expand_normal (arg0);
19278 op1 = expand_normal (arg1);
19279 tmode = insn_data[icode].operand[0].mode;
19280 mode1 = insn_data[icode].operand[1].mode;
19281 mode2 = insn_data[icode].operand[2].mode;
19282
19283 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19284 op0 = copy_to_mode_reg (mode1, op0);
19285 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
19286 {
19287 /* @@@ better error message */
19288 error ("mask must be an immediate");
19289 return const0_rtx;
19290 }
19291 if (target == 0
19292 || GET_MODE (target) != tmode
19293 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19294 target = gen_reg_rtx (tmode);
19295 pat = GEN_FCN (icode) (target, op0, op1);
19296 if (! pat)
19297 return 0;
19298 emit_insn (pat);
19299 return target;
19300
19301 case IX86_BUILTIN_PSLLWI128:
19302 icode = CODE_FOR_ashlv8hi3;
19303 goto do_pshifti;
19304 case IX86_BUILTIN_PSLLDI128:
19305 icode = CODE_FOR_ashlv4si3;
19306 goto do_pshifti;
19307 case IX86_BUILTIN_PSLLQI128:
19308 icode = CODE_FOR_ashlv2di3;
19309 goto do_pshifti;
19310 case IX86_BUILTIN_PSRAWI128:
19311 icode = CODE_FOR_ashrv8hi3;
19312 goto do_pshifti;
19313 case IX86_BUILTIN_PSRADI128:
19314 icode = CODE_FOR_ashrv4si3;
19315 goto do_pshifti;
19316 case IX86_BUILTIN_PSRLWI128:
19317 icode = CODE_FOR_lshrv8hi3;
19318 goto do_pshifti;
19319 case IX86_BUILTIN_PSRLDI128:
19320 icode = CODE_FOR_lshrv4si3;
19321 goto do_pshifti;
19322 case IX86_BUILTIN_PSRLQI128:
19323 icode = CODE_FOR_lshrv2di3;
19324 goto do_pshifti;
19325 do_pshifti:
19326 arg0 = CALL_EXPR_ARG (exp, 0);
19327 arg1 = CALL_EXPR_ARG (exp, 1);
19328 op0 = expand_normal (arg0);
19329 op1 = expand_normal (arg1);
19330
19331 if (!CONST_INT_P (op1))
19332 {
19333 error ("shift must be an immediate");
19334 return const0_rtx;
19335 }
19336 if (INTVAL (op1) < 0 || INTVAL (op1) > 255)
19337 op1 = GEN_INT (255);
19338
19339 tmode = insn_data[icode].operand[0].mode;
19340 mode1 = insn_data[icode].operand[1].mode;
19341 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19342 op0 = copy_to_reg (op0);
19343
19344 target = gen_reg_rtx (tmode);
19345 pat = GEN_FCN (icode) (target, op0, op1);
19346 if (!pat)
19347 return 0;
19348 emit_insn (pat);
19349 return target;
19350
19351 case IX86_BUILTIN_PSLLW128:
19352 icode = CODE_FOR_ashlv8hi3;
19353 goto do_pshift;
19354 case IX86_BUILTIN_PSLLD128:
19355 icode = CODE_FOR_ashlv4si3;
19356 goto do_pshift;
19357 case IX86_BUILTIN_PSLLQ128:
19358 icode = CODE_FOR_ashlv2di3;
19359 goto do_pshift;
19360 case IX86_BUILTIN_PSRAW128:
19361 icode = CODE_FOR_ashrv8hi3;
19362 goto do_pshift;
19363 case IX86_BUILTIN_PSRAD128:
19364 icode = CODE_FOR_ashrv4si3;
19365 goto do_pshift;
19366 case IX86_BUILTIN_PSRLW128:
19367 icode = CODE_FOR_lshrv8hi3;
19368 goto do_pshift;
19369 case IX86_BUILTIN_PSRLD128:
19370 icode = CODE_FOR_lshrv4si3;
19371 goto do_pshift;
19372 case IX86_BUILTIN_PSRLQ128:
19373 icode = CODE_FOR_lshrv2di3;
19374 goto do_pshift;
19375 do_pshift:
19376 arg0 = CALL_EXPR_ARG (exp, 0);
19377 arg1 = CALL_EXPR_ARG (exp, 1);
19378 op0 = expand_normal (arg0);
19379 op1 = expand_normal (arg1);
19380
19381 tmode = insn_data[icode].operand[0].mode;
19382 mode1 = insn_data[icode].operand[1].mode;
19383
19384 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19385 op0 = copy_to_reg (op0);
19386
19387 op1 = simplify_gen_subreg (TImode, op1, GET_MODE (op1), 0);
19388 if (! (*insn_data[icode].operand[2].predicate) (op1, TImode))
19389 op1 = copy_to_reg (op1);
19390
19391 target = gen_reg_rtx (tmode);
19392 pat = GEN_FCN (icode) (target, op0, op1);
19393 if (!pat)
19394 return 0;
19395 emit_insn (pat);
19396 return target;
19397
19398 case IX86_BUILTIN_PSLLDQI128:
19399 case IX86_BUILTIN_PSRLDQI128:
19400 icode = (fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
19401 : CODE_FOR_sse2_lshrti3);
19402 arg0 = CALL_EXPR_ARG (exp, 0);
19403 arg1 = CALL_EXPR_ARG (exp, 1);
19404 op0 = expand_normal (arg0);
19405 op1 = expand_normal (arg1);
19406 tmode = insn_data[icode].operand[0].mode;
19407 mode1 = insn_data[icode].operand[1].mode;
19408 mode2 = insn_data[icode].operand[2].mode;
19409
19410 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19411 {
19412 op0 = copy_to_reg (op0);
19413 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
19414 }
19415 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
19416 {
19417 error ("shift must be an immediate");
19418 return const0_rtx;
19419 }
19420 target = gen_reg_rtx (V2DImode);
19421 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0),
19422 op0, op1);
19423 if (! pat)
19424 return 0;
19425 emit_insn (pat);
19426 return target;
19427
19428 case IX86_BUILTIN_FEMMS:
19429 emit_insn (gen_mmx_femms ());
19430 return NULL_RTX;
19431
19432 case IX86_BUILTIN_PAVGUSB:
19433 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, exp, target);
19434
19435 case IX86_BUILTIN_PF2ID:
19436 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, exp, target, 0);
19437
19438 case IX86_BUILTIN_PFACC:
19439 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, exp, target);
19440
19441 case IX86_BUILTIN_PFADD:
19442 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, exp, target);
19443
19444 case IX86_BUILTIN_PFCMPEQ:
19445 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, exp, target);
19446
19447 case IX86_BUILTIN_PFCMPGE:
19448 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, exp, target);
19449
19450 case IX86_BUILTIN_PFCMPGT:
19451 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, exp, target);
19452
19453 case IX86_BUILTIN_PFMAX:
19454 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, exp, target);
19455
19456 case IX86_BUILTIN_PFMIN:
19457 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, exp, target);
19458
19459 case IX86_BUILTIN_PFMUL:
19460 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, exp, target);
19461
19462 case IX86_BUILTIN_PFRCP:
19463 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, exp, target, 0);
19464
19465 case IX86_BUILTIN_PFRCPIT1:
19466 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, exp, target);
19467
19468 case IX86_BUILTIN_PFRCPIT2:
19469 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, exp, target);
19470
19471 case IX86_BUILTIN_PFRSQIT1:
19472 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, exp, target);
19473
19474 case IX86_BUILTIN_PFRSQRT:
19475 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, exp, target, 0);
19476
19477 case IX86_BUILTIN_PFSUB:
19478 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, exp, target);
19479
19480 case IX86_BUILTIN_PFSUBR:
19481 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, exp, target);
19482
19483 case IX86_BUILTIN_PI2FD:
19484 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, exp, target, 0);
19485
19486 case IX86_BUILTIN_PMULHRW:
19487 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, exp, target);
19488
19489 case IX86_BUILTIN_PF2IW:
19490 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, exp, target, 0);
19491
19492 case IX86_BUILTIN_PFNACC:
19493 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, exp, target);
19494
19495 case IX86_BUILTIN_PFPNACC:
19496 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, exp, target);
19497
19498 case IX86_BUILTIN_PI2FW:
19499 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, exp, target, 0);
19500
19501 case IX86_BUILTIN_PSWAPDSI:
19502 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, exp, target, 0);
19503
19504 case IX86_BUILTIN_PSWAPDSF:
19505 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, exp, target, 0);
19506
19507 case IX86_BUILTIN_SQRTSD:
19508 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, exp, target);
19509 case IX86_BUILTIN_LOADUPD:
19510 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, exp, target, 1);
19511 case IX86_BUILTIN_STOREUPD:
19512 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, exp);
19513
19514 case IX86_BUILTIN_MFENCE:
19515 emit_insn (gen_sse2_mfence ());
19516 return 0;
19517 case IX86_BUILTIN_LFENCE:
19518 emit_insn (gen_sse2_lfence ());
19519 return 0;
19520
19521 case IX86_BUILTIN_CLFLUSH:
19522 arg0 = CALL_EXPR_ARG (exp, 0);
19523 op0 = expand_normal (arg0);
19524 icode = CODE_FOR_sse2_clflush;
19525 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
19526 op0 = copy_to_mode_reg (Pmode, op0);
19527
19528 emit_insn (gen_sse2_clflush (op0));
19529 return 0;
19530
19531 case IX86_BUILTIN_MOVNTPD:
19532 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, exp);
19533 case IX86_BUILTIN_MOVNTDQ:
19534 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, exp);
19535 case IX86_BUILTIN_MOVNTI:
19536 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, exp);
19537
19538 case IX86_BUILTIN_LOADDQU:
19539 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, exp, target, 1);
19540 case IX86_BUILTIN_STOREDQU:
19541 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, exp);
19542
19543 case IX86_BUILTIN_MONITOR:
19544 arg0 = CALL_EXPR_ARG (exp, 0);
19545 arg1 = CALL_EXPR_ARG (exp, 1);
19546 arg2 = CALL_EXPR_ARG (exp, 2);
19547 op0 = expand_normal (arg0);
19548 op1 = expand_normal (arg1);
19549 op2 = expand_normal (arg2);
19550 if (!REG_P (op0))
19551 op0 = copy_to_mode_reg (Pmode, op0);
19552 if (!REG_P (op1))
19553 op1 = copy_to_mode_reg (SImode, op1);
19554 if (!REG_P (op2))
19555 op2 = copy_to_mode_reg (SImode, op2);
19556 if (!TARGET_64BIT)
19557 emit_insn (gen_sse3_monitor (op0, op1, op2));
19558 else
19559 emit_insn (gen_sse3_monitor64 (op0, op1, op2));
19560 return 0;
19561
19562 case IX86_BUILTIN_MWAIT:
19563 arg0 = CALL_EXPR_ARG (exp, 0);
19564 arg1 = CALL_EXPR_ARG (exp, 1);
19565 op0 = expand_normal (arg0);
19566 op1 = expand_normal (arg1);
19567 if (!REG_P (op0))
19568 op0 = copy_to_mode_reg (SImode, op0);
19569 if (!REG_P (op1))
19570 op1 = copy_to_mode_reg (SImode, op1);
19571 emit_insn (gen_sse3_mwait (op0, op1));
19572 return 0;
19573
19574 case IX86_BUILTIN_LDDQU:
19575 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, exp,
19576 target, 1);
19577
19578 case IX86_BUILTIN_PALIGNR:
19579 case IX86_BUILTIN_PALIGNR128:
19580 if (fcode == IX86_BUILTIN_PALIGNR)
19581 {
19582 icode = CODE_FOR_ssse3_palignrdi;
19583 mode = DImode;
19584 }
19585 else
19586 {
19587 icode = CODE_FOR_ssse3_palignrti;
19588 mode = V2DImode;
19589 }
19590 arg0 = CALL_EXPR_ARG (exp, 0);
19591 arg1 = CALL_EXPR_ARG (exp, 1);
19592 arg2 = CALL_EXPR_ARG (exp, 2);
19593 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
19594 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, EXPAND_NORMAL);
19595 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, EXPAND_NORMAL);
19596 tmode = insn_data[icode].operand[0].mode;
19597 mode1 = insn_data[icode].operand[1].mode;
19598 mode2 = insn_data[icode].operand[2].mode;
19599 mode3 = insn_data[icode].operand[3].mode;
19600
19601 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19602 {
19603 op0 = copy_to_reg (op0);
19604 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
19605 }
19606 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
19607 {
19608 op1 = copy_to_reg (op1);
19609 op1 = simplify_gen_subreg (mode2, op1, GET_MODE (op1), 0);
19610 }
19611 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
19612 {
19613 error ("shift must be an immediate");
19614 return const0_rtx;
19615 }
19616 target = gen_reg_rtx (mode);
19617 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, mode, 0),
19618 op0, op1, op2);
19619 if (! pat)
19620 return 0;
19621 emit_insn (pat);
19622 return target;
19623
19624 case IX86_BUILTIN_MOVNTDQA:
19625 return ix86_expand_unop_builtin (CODE_FOR_sse4_1_movntdqa, exp,
19626 target, 1);
19627
19628 case IX86_BUILTIN_MOVNTSD:
19629 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv2df, exp);
19630
19631 case IX86_BUILTIN_MOVNTSS:
19632 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv4sf, exp);
19633
19634 case IX86_BUILTIN_INSERTQ:
19635 case IX86_BUILTIN_EXTRQ:
19636 icode = (fcode == IX86_BUILTIN_EXTRQ
19637 ? CODE_FOR_sse4a_extrq
19638 : CODE_FOR_sse4a_insertq);
19639 arg0 = CALL_EXPR_ARG (exp, 0);
19640 arg1 = CALL_EXPR_ARG (exp, 1);
19641 op0 = expand_normal (arg0);
19642 op1 = expand_normal (arg1);
19643 tmode = insn_data[icode].operand[0].mode;
19644 mode1 = insn_data[icode].operand[1].mode;
19645 mode2 = insn_data[icode].operand[2].mode;
19646 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19647 op0 = copy_to_mode_reg (mode1, op0);
19648 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
19649 op1 = copy_to_mode_reg (mode2, op1);
19650 if (optimize || target == 0
19651 || GET_MODE (target) != tmode
19652 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19653 target = gen_reg_rtx (tmode);
19654 pat = GEN_FCN (icode) (target, op0, op1);
19655 if (! pat)
19656 return NULL_RTX;
19657 emit_insn (pat);
19658 return target;
19659
19660 case IX86_BUILTIN_EXTRQI:
19661 icode = CODE_FOR_sse4a_extrqi;
19662 arg0 = CALL_EXPR_ARG (exp, 0);
19663 arg1 = CALL_EXPR_ARG (exp, 1);
19664 arg2 = CALL_EXPR_ARG (exp, 2);
19665 op0 = expand_normal (arg0);
19666 op1 = expand_normal (arg1);
19667 op2 = expand_normal (arg2);
19668 tmode = insn_data[icode].operand[0].mode;
19669 mode1 = insn_data[icode].operand[1].mode;
19670 mode2 = insn_data[icode].operand[2].mode;
19671 mode3 = insn_data[icode].operand[3].mode;
19672 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19673 op0 = copy_to_mode_reg (mode1, op0);
19674 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
19675 {
19676 error ("index mask must be an immediate");
19677 return gen_reg_rtx (tmode);
19678 }
19679 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
19680 {
19681 error ("length mask must be an immediate");
19682 return gen_reg_rtx (tmode);
19683 }
19684 if (optimize || target == 0
19685 || GET_MODE (target) != tmode
19686 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19687 target = gen_reg_rtx (tmode);
19688 pat = GEN_FCN (icode) (target, op0, op1, op2);
19689 if (! pat)
19690 return NULL_RTX;
19691 emit_insn (pat);
19692 return target;
19693
19694 case IX86_BUILTIN_INSERTQI:
19695 icode = CODE_FOR_sse4a_insertqi;
19696 arg0 = CALL_EXPR_ARG (exp, 0);
19697 arg1 = CALL_EXPR_ARG (exp, 1);
19698 arg2 = CALL_EXPR_ARG (exp, 2);
19699 arg3 = CALL_EXPR_ARG (exp, 3);
19700 op0 = expand_normal (arg0);
19701 op1 = expand_normal (arg1);
19702 op2 = expand_normal (arg2);
19703 op3 = expand_normal (arg3);
19704 tmode = insn_data[icode].operand[0].mode;
19705 mode1 = insn_data[icode].operand[1].mode;
19706 mode2 = insn_data[icode].operand[2].mode;
19707 mode3 = insn_data[icode].operand[3].mode;
19708 mode4 = insn_data[icode].operand[4].mode;
19709
19710 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19711 op0 = copy_to_mode_reg (mode1, op0);
19712
19713 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
19714 op1 = copy_to_mode_reg (mode2, op1);
19715
19716 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
19717 {
19718 error ("index mask must be an immediate");
19719 return gen_reg_rtx (tmode);
19720 }
19721 if (! (*insn_data[icode].operand[4].predicate) (op3, mode4))
19722 {
19723 error ("length mask must be an immediate");
19724 return gen_reg_rtx (tmode);
19725 }
19726 if (optimize || target == 0
19727 || GET_MODE (target) != tmode
19728 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19729 target = gen_reg_rtx (tmode);
19730 pat = GEN_FCN (icode) (target, op0, op1, op2, op3);
19731 if (! pat)
19732 return NULL_RTX;
19733 emit_insn (pat);
19734 return target;
19735
19736 case IX86_BUILTIN_VEC_INIT_V2SI:
19737 case IX86_BUILTIN_VEC_INIT_V4HI:
19738 case IX86_BUILTIN_VEC_INIT_V8QI:
19739 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
19740
19741 case IX86_BUILTIN_VEC_EXT_V2DF:
19742 case IX86_BUILTIN_VEC_EXT_V2DI:
19743 case IX86_BUILTIN_VEC_EXT_V4SF:
19744 case IX86_BUILTIN_VEC_EXT_V4SI:
19745 case IX86_BUILTIN_VEC_EXT_V8HI:
19746 case IX86_BUILTIN_VEC_EXT_V2SI:
19747 case IX86_BUILTIN_VEC_EXT_V4HI:
19748 case IX86_BUILTIN_VEC_EXT_V16QI:
19749 return ix86_expand_vec_ext_builtin (exp, target);
19750
19751 case IX86_BUILTIN_VEC_SET_V2DI:
19752 case IX86_BUILTIN_VEC_SET_V4SF:
19753 case IX86_BUILTIN_VEC_SET_V4SI:
19754 case IX86_BUILTIN_VEC_SET_V8HI:
19755 case IX86_BUILTIN_VEC_SET_V4HI:
19756 case IX86_BUILTIN_VEC_SET_V16QI:
19757 return ix86_expand_vec_set_builtin (exp);
19758
19759 case IX86_BUILTIN_INFQ:
19760 {
19761 REAL_VALUE_TYPE inf;
19762 rtx tmp;
19763
19764 real_inf (&inf);
19765 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
19766
19767 tmp = validize_mem (force_const_mem (mode, tmp));
19768
19769 if (target == 0)
19770 target = gen_reg_rtx (mode);
19771
19772 emit_move_insn (target, tmp);
19773 return target;
19774 }
19775
19776 case IX86_BUILTIN_FABSQ:
19777 return ix86_expand_unop_builtin (CODE_FOR_abstf2, exp, target, 0);
19778
19779 case IX86_BUILTIN_COPYSIGNQ:
19780 return ix86_expand_binop_builtin (CODE_FOR_copysigntf3, exp, target);
19781
19782 default:
19783 break;
19784 }
19785
19786 for (i = 0, d = bdesc_sse_3arg;
19787 i < ARRAY_SIZE (bdesc_sse_3arg);
19788 i++, d++)
19789 if (d->code == fcode)
19790 return ix86_expand_sse_4_operands_builtin (d->icode, exp,
19791 target);
19792
19793 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
19794 if (d->code == fcode)
19795 {
19796 /* Compares are treated specially. */
19797 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
19798 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3
19799 || d->icode == CODE_FOR_sse2_maskcmpv2df3
19800 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
19801 return ix86_expand_sse_compare (d, exp, target);
19802
19803 return ix86_expand_binop_builtin (d->icode, exp, target);
19804 }
19805
19806 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
19807 if (d->code == fcode)
19808 return ix86_expand_unop_builtin (d->icode, exp, target, 0);
19809
19810 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
19811 if (d->code == fcode)
19812 return ix86_expand_sse_comi (d, exp, target);
19813
19814 for (i = 0, d = bdesc_ptest; i < ARRAY_SIZE (bdesc_ptest); i++, d++)
19815 if (d->code == fcode)
19816 return ix86_expand_sse_ptest (d, exp, target);
19817
19818 for (i = 0, d = bdesc_crc32; i < ARRAY_SIZE (bdesc_crc32); i++, d++)
19819 if (d->code == fcode)
19820 return ix86_expand_crc32 (d->icode, exp, target);
19821
19822 for (i = 0, d = bdesc_pcmpestr;
19823 i < ARRAY_SIZE (bdesc_pcmpestr);
19824 i++, d++)
19825 if (d->code == fcode)
19826 return ix86_expand_sse_pcmpestr (d, exp, target);
19827
19828 for (i = 0, d = bdesc_pcmpistr;
19829 i < ARRAY_SIZE (bdesc_pcmpistr);
19830 i++, d++)
19831 if (d->code == fcode)
19832 return ix86_expand_sse_pcmpistr (d, exp, target);
19833
19834 gcc_unreachable ();
19835 }
19836
19837 /* Returns a function decl for a vectorized version of the builtin function
19838 with builtin function code FN and the result vector type TYPE, or NULL_TREE
19839 if it is not available. */
19840
19841 static tree
19842 ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
19843 tree type_in)
19844 {
19845 enum machine_mode in_mode, out_mode;
19846 int in_n, out_n;
19847
19848 if (TREE_CODE (type_out) != VECTOR_TYPE
19849 || TREE_CODE (type_in) != VECTOR_TYPE)
19850 return NULL_TREE;
19851
19852 out_mode = TYPE_MODE (TREE_TYPE (type_out));
19853 out_n = TYPE_VECTOR_SUBPARTS (type_out);
19854 in_mode = TYPE_MODE (TREE_TYPE (type_in));
19855 in_n = TYPE_VECTOR_SUBPARTS (type_in);
19856
19857 switch (fn)
19858 {
19859 case BUILT_IN_SQRT:
19860 if (out_mode == DFmode && out_n == 2
19861 && in_mode == DFmode && in_n == 2)
19862 return ix86_builtins[IX86_BUILTIN_SQRTPD];
19863 return NULL_TREE;
19864
19865 case BUILT_IN_SQRTF:
19866 if (out_mode == SFmode && out_n == 4
19867 && in_mode == SFmode && in_n == 4)
19868 return ix86_builtins[IX86_BUILTIN_SQRTPS];
19869 return NULL_TREE;
19870
19871 case BUILT_IN_LRINT:
19872 if (out_mode == SImode && out_n == 4
19873 && in_mode == DFmode && in_n == 2)
19874 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
19875 return NULL_TREE;
19876
19877 case BUILT_IN_LRINTF:
19878 if (out_mode == SImode && out_n == 4
19879 && in_mode == SFmode && in_n == 4)
19880 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
19881 return NULL_TREE;
19882
19883 default:
19884 ;
19885 }
19886
19887 return NULL_TREE;
19888 }
19889
19890 /* Returns a decl of a function that implements conversion of the
19891 input vector of type TYPE, or NULL_TREE if it is not available. */
19892
19893 static tree
19894 ix86_vectorize_builtin_conversion (unsigned int code, tree type)
19895 {
19896 if (TREE_CODE (type) != VECTOR_TYPE)
19897 return NULL_TREE;
19898
19899 switch (code)
19900 {
19901 case FLOAT_EXPR:
19902 switch (TYPE_MODE (type))
19903 {
19904 case V4SImode:
19905 return ix86_builtins[IX86_BUILTIN_CVTDQ2PS];
19906 default:
19907 return NULL_TREE;
19908 }
19909
19910 case FIX_TRUNC_EXPR:
19911 switch (TYPE_MODE (type))
19912 {
19913 case V4SFmode:
19914 return ix86_builtins[IX86_BUILTIN_CVTTPS2DQ];
19915 default:
19916 return NULL_TREE;
19917 }
19918 default:
19919 return NULL_TREE;
19920
19921 }
19922 }
19923
19924 /* Returns a code for a target-specific builtin that implements
19925 reciprocal of the function, or NULL_TREE if not available. */
19926
19927 static tree
19928 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
19929 bool sqrt ATTRIBUTE_UNUSED)
19930 {
19931 if (! (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
19932 && flag_finite_math_only && !flag_trapping_math
19933 && flag_unsafe_math_optimizations))
19934 return NULL_TREE;
19935
19936 if (md_fn)
19937 /* Machine dependent builtins. */
19938 switch (fn)
19939 {
19940 /* Vectorized version of sqrt to rsqrt conversion. */
19941 case IX86_BUILTIN_SQRTPS:
19942 return ix86_builtins[IX86_BUILTIN_RSQRTPS];
19943
19944 default:
19945 return NULL_TREE;
19946 }
19947 else
19948 /* Normal builtins. */
19949 switch (fn)
19950 {
19951 /* Sqrt to rsqrt conversion. */
19952 case BUILT_IN_SQRTF:
19953 return ix86_builtins[IX86_BUILTIN_RSQRTF];
19954
19955 default:
19956 return NULL_TREE;
19957 }
19958 }
19959
19960 /* Store OPERAND to the memory after reload is completed. This means
19961 that we can't easily use assign_stack_local. */
19962 rtx
19963 ix86_force_to_memory (enum machine_mode mode, rtx operand)
19964 {
19965 rtx result;
19966
19967 gcc_assert (reload_completed);
19968 if (TARGET_RED_ZONE)
19969 {
19970 result = gen_rtx_MEM (mode,
19971 gen_rtx_PLUS (Pmode,
19972 stack_pointer_rtx,
19973 GEN_INT (-RED_ZONE_SIZE)));
19974 emit_move_insn (result, operand);
19975 }
19976 else if (!TARGET_RED_ZONE && TARGET_64BIT)
19977 {
19978 switch (mode)
19979 {
19980 case HImode:
19981 case SImode:
19982 operand = gen_lowpart (DImode, operand);
19983 /* FALLTHRU */
19984 case DImode:
19985 emit_insn (
19986 gen_rtx_SET (VOIDmode,
19987 gen_rtx_MEM (DImode,
19988 gen_rtx_PRE_DEC (DImode,
19989 stack_pointer_rtx)),
19990 operand));
19991 break;
19992 default:
19993 gcc_unreachable ();
19994 }
19995 result = gen_rtx_MEM (mode, stack_pointer_rtx);
19996 }
19997 else
19998 {
19999 switch (mode)
20000 {
20001 case DImode:
20002 {
20003 rtx operands[2];
20004 split_di (&operand, 1, operands, operands + 1);
20005 emit_insn (
20006 gen_rtx_SET (VOIDmode,
20007 gen_rtx_MEM (SImode,
20008 gen_rtx_PRE_DEC (Pmode,
20009 stack_pointer_rtx)),
20010 operands[1]));
20011 emit_insn (
20012 gen_rtx_SET (VOIDmode,
20013 gen_rtx_MEM (SImode,
20014 gen_rtx_PRE_DEC (Pmode,
20015 stack_pointer_rtx)),
20016 operands[0]));
20017 }
20018 break;
20019 case HImode:
20020 /* Store HImodes as SImodes. */
20021 operand = gen_lowpart (SImode, operand);
20022 /* FALLTHRU */
20023 case SImode:
20024 emit_insn (
20025 gen_rtx_SET (VOIDmode,
20026 gen_rtx_MEM (GET_MODE (operand),
20027 gen_rtx_PRE_DEC (SImode,
20028 stack_pointer_rtx)),
20029 operand));
20030 break;
20031 default:
20032 gcc_unreachable ();
20033 }
20034 result = gen_rtx_MEM (mode, stack_pointer_rtx);
20035 }
20036 return result;
20037 }
20038
20039 /* Free operand from the memory. */
20040 void
20041 ix86_free_from_memory (enum machine_mode mode)
20042 {
20043 if (!TARGET_RED_ZONE)
20044 {
20045 int size;
20046
20047 if (mode == DImode || TARGET_64BIT)
20048 size = 8;
20049 else
20050 size = 4;
20051 /* Use LEA to deallocate stack space. In peephole2 it will be converted
20052 to pop or add instruction if registers are available. */
20053 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
20054 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
20055 GEN_INT (size))));
20056 }
20057 }
20058
20059 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
20060 QImode must go into class Q_REGS.
20061 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
20062 movdf to do mem-to-mem moves through integer regs. */
20063 enum reg_class
20064 ix86_preferred_reload_class (rtx x, enum reg_class regclass)
20065 {
20066 enum machine_mode mode = GET_MODE (x);
20067
20068 /* We're only allowed to return a subclass of CLASS. Many of the
20069 following checks fail for NO_REGS, so eliminate that early. */
20070 if (regclass == NO_REGS)
20071 return NO_REGS;
20072
20073 /* All classes can load zeros. */
20074 if (x == CONST0_RTX (mode))
20075 return regclass;
20076
20077 /* Force constants into memory if we are loading a (nonzero) constant into
20078 an MMX or SSE register. This is because there are no MMX/SSE instructions
20079 to load from a constant. */
20080 if (CONSTANT_P (x)
20081 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
20082 return NO_REGS;
20083
20084 /* Prefer SSE regs only, if we can use them for math. */
20085 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
20086 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
20087
20088 /* Floating-point constants need more complex checks. */
20089 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
20090 {
20091 /* General regs can load everything. */
20092 if (reg_class_subset_p (regclass, GENERAL_REGS))
20093 return regclass;
20094
20095 /* Floats can load 0 and 1 plus some others. Note that we eliminated
20096 zero above. We only want to wind up preferring 80387 registers if
20097 we plan on doing computation with them. */
20098 if (TARGET_80387
20099 && standard_80387_constant_p (x))
20100 {
20101 /* Limit class to non-sse. */
20102 if (regclass == FLOAT_SSE_REGS)
20103 return FLOAT_REGS;
20104 if (regclass == FP_TOP_SSE_REGS)
20105 return FP_TOP_REG;
20106 if (regclass == FP_SECOND_SSE_REGS)
20107 return FP_SECOND_REG;
20108 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
20109 return regclass;
20110 }
20111
20112 return NO_REGS;
20113 }
20114
20115 /* Generally when we see PLUS here, it's the function invariant
20116 (plus soft-fp const_int). Which can only be computed into general
20117 regs. */
20118 if (GET_CODE (x) == PLUS)
20119 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
20120
20121 /* QImode constants are easy to load, but non-constant QImode data
20122 must go into Q_REGS. */
20123 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
20124 {
20125 if (reg_class_subset_p (regclass, Q_REGS))
20126 return regclass;
20127 if (reg_class_subset_p (Q_REGS, regclass))
20128 return Q_REGS;
20129 return NO_REGS;
20130 }
20131
20132 return regclass;
20133 }
20134
20135 /* Discourage putting floating-point values in SSE registers unless
20136 SSE math is being used, and likewise for the 387 registers. */
20137 enum reg_class
20138 ix86_preferred_output_reload_class (rtx x, enum reg_class regclass)
20139 {
20140 enum machine_mode mode = GET_MODE (x);
20141
20142 /* Restrict the output reload class to the register bank that we are doing
20143 math on. If we would like not to return a subset of CLASS, reject this
20144 alternative: if reload cannot do this, it will still use its choice. */
20145 mode = GET_MODE (x);
20146 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
20147 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
20148
20149 if (X87_FLOAT_MODE_P (mode))
20150 {
20151 if (regclass == FP_TOP_SSE_REGS)
20152 return FP_TOP_REG;
20153 else if (regclass == FP_SECOND_SSE_REGS)
20154 return FP_SECOND_REG;
20155 else
20156 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
20157 }
20158
20159 return regclass;
20160 }
20161
20162 /* If we are copying between general and FP registers, we need a memory
20163 location. The same is true for SSE and MMX registers.
20164
20165 The macro can't work reliably when one of the CLASSES is class containing
20166 registers from multiple units (SSE, MMX, integer). We avoid this by never
20167 combining those units in single alternative in the machine description.
20168 Ensure that this constraint holds to avoid unexpected surprises.
20169
20170 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
20171 enforce these sanity checks. */
20172
20173 int
20174 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
20175 enum machine_mode mode, int strict)
20176 {
20177 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
20178 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
20179 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
20180 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
20181 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
20182 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
20183 {
20184 gcc_assert (!strict);
20185 return true;
20186 }
20187
20188 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
20189 return true;
20190
20191 /* ??? This is a lie. We do have moves between mmx/general, and for
20192 mmx/sse2. But by saying we need secondary memory we discourage the
20193 register allocator from using the mmx registers unless needed. */
20194 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
20195 return true;
20196
20197 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
20198 {
20199 /* SSE1 doesn't have any direct moves from other classes. */
20200 if (!TARGET_SSE2)
20201 return true;
20202
20203 /* If the target says that inter-unit moves are more expensive
20204 than moving through memory, then don't generate them. */
20205 if (!TARGET_INTER_UNIT_MOVES)
20206 return true;
20207
20208 /* Between SSE and general, we have moves no larger than word size. */
20209 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
20210 return true;
20211 }
20212
20213 return false;
20214 }
20215
20216 /* Return true if the registers in CLASS cannot represent the change from
20217 modes FROM to TO. */
20218
20219 bool
20220 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
20221 enum reg_class regclass)
20222 {
20223 if (from == to)
20224 return false;
20225
20226 /* x87 registers can't do subreg at all, as all values are reformatted
20227 to extended precision. */
20228 if (MAYBE_FLOAT_CLASS_P (regclass))
20229 return true;
20230
20231 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
20232 {
20233 /* Vector registers do not support QI or HImode loads. If we don't
20234 disallow a change to these modes, reload will assume it's ok to
20235 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
20236 the vec_dupv4hi pattern. */
20237 if (GET_MODE_SIZE (from) < 4)
20238 return true;
20239
20240 /* Vector registers do not support subreg with nonzero offsets, which
20241 are otherwise valid for integer registers. Since we can't see
20242 whether we have a nonzero offset from here, prohibit all
20243 nonparadoxical subregs changing size. */
20244 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
20245 return true;
20246 }
20247
20248 return false;
20249 }
20250
20251 /* Return the cost of moving data from a register in class CLASS1 to
20252 one in class CLASS2.
20253
20254 It is not required that the cost always equal 2 when FROM is the same as TO;
20255 on some machines it is expensive to move between registers if they are not
20256 general registers. */
20257
20258 int
20259 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
20260 enum reg_class class2)
20261 {
20262 /* In case we require secondary memory, compute cost of the store followed
20263 by load. In order to avoid bad register allocation choices, we need
20264 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
20265
20266 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
20267 {
20268 int cost = 1;
20269
20270 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
20271 MEMORY_MOVE_COST (mode, class1, 1));
20272 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
20273 MEMORY_MOVE_COST (mode, class2, 1));
20274
20275 /* In case of copying from general_purpose_register we may emit multiple
20276 stores followed by single load causing memory size mismatch stall.
20277 Count this as arbitrarily high cost of 20. */
20278 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
20279 cost += 20;
20280
20281 /* In the case of FP/MMX moves, the registers actually overlap, and we
20282 have to switch modes in order to treat them differently. */
20283 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
20284 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
20285 cost += 20;
20286
20287 return cost;
20288 }
20289
20290 /* Moves between SSE/MMX and integer unit are expensive. */
20291 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
20292 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
20293
20294 /* ??? By keeping returned value relatively high, we limit the number
20295 of moves between integer and MMX/SSE registers for all targets.
20296 Additionally, high value prevents problem with x86_modes_tieable_p(),
20297 where integer modes in MMX/SSE registers are not tieable
20298 because of missing QImode and HImode moves to, from or between
20299 MMX/SSE registers. */
20300 return MAX (ix86_cost->mmxsse_to_integer, 8);
20301
20302 if (MAYBE_FLOAT_CLASS_P (class1))
20303 return ix86_cost->fp_move;
20304 if (MAYBE_SSE_CLASS_P (class1))
20305 return ix86_cost->sse_move;
20306 if (MAYBE_MMX_CLASS_P (class1))
20307 return ix86_cost->mmx_move;
20308 return 2;
20309 }
20310
20311 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
20312
20313 bool
20314 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
20315 {
20316 /* Flags and only flags can only hold CCmode values. */
20317 if (CC_REGNO_P (regno))
20318 return GET_MODE_CLASS (mode) == MODE_CC;
20319 if (GET_MODE_CLASS (mode) == MODE_CC
20320 || GET_MODE_CLASS (mode) == MODE_RANDOM
20321 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
20322 return 0;
20323 if (FP_REGNO_P (regno))
20324 return VALID_FP_MODE_P (mode);
20325 if (SSE_REGNO_P (regno))
20326 {
20327 /* We implement the move patterns for all vector modes into and
20328 out of SSE registers, even when no operation instructions
20329 are available. */
20330 return (VALID_SSE_REG_MODE (mode)
20331 || VALID_SSE2_REG_MODE (mode)
20332 || VALID_MMX_REG_MODE (mode)
20333 || VALID_MMX_REG_MODE_3DNOW (mode));
20334 }
20335 if (MMX_REGNO_P (regno))
20336 {
20337 /* We implement the move patterns for 3DNOW modes even in MMX mode,
20338 so if the register is available at all, then we can move data of
20339 the given mode into or out of it. */
20340 return (VALID_MMX_REG_MODE (mode)
20341 || VALID_MMX_REG_MODE_3DNOW (mode));
20342 }
20343
20344 if (mode == QImode)
20345 {
20346 /* Take care for QImode values - they can be in non-QI regs,
20347 but then they do cause partial register stalls. */
20348 if (regno < 4 || TARGET_64BIT)
20349 return 1;
20350 if (!TARGET_PARTIAL_REG_STALL)
20351 return 1;
20352 return reload_in_progress || reload_completed;
20353 }
20354 /* We handle both integer and floats in the general purpose registers. */
20355 else if (VALID_INT_MODE_P (mode))
20356 return 1;
20357 else if (VALID_FP_MODE_P (mode))
20358 return 1;
20359 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
20360 on to use that value in smaller contexts, this can easily force a
20361 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
20362 supporting DImode, allow it. */
20363 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
20364 return 1;
20365
20366 return 0;
20367 }
20368
20369 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
20370 tieable integer mode. */
20371
20372 static bool
20373 ix86_tieable_integer_mode_p (enum machine_mode mode)
20374 {
20375 switch (mode)
20376 {
20377 case HImode:
20378 case SImode:
20379 return true;
20380
20381 case QImode:
20382 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
20383
20384 case DImode:
20385 return TARGET_64BIT;
20386
20387 default:
20388 return false;
20389 }
20390 }
20391
20392 /* Return true if MODE1 is accessible in a register that can hold MODE2
20393 without copying. That is, all register classes that can hold MODE2
20394 can also hold MODE1. */
20395
20396 bool
20397 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
20398 {
20399 if (mode1 == mode2)
20400 return true;
20401
20402 if (ix86_tieable_integer_mode_p (mode1)
20403 && ix86_tieable_integer_mode_p (mode2))
20404 return true;
20405
20406 /* MODE2 being XFmode implies fp stack or general regs, which means we
20407 can tie any smaller floating point modes to it. Note that we do not
20408 tie this with TFmode. */
20409 if (mode2 == XFmode)
20410 return mode1 == SFmode || mode1 == DFmode;
20411
20412 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
20413 that we can tie it with SFmode. */
20414 if (mode2 == DFmode)
20415 return mode1 == SFmode;
20416
20417 /* If MODE2 is only appropriate for an SSE register, then tie with
20418 any other mode acceptable to SSE registers. */
20419 if (GET_MODE_SIZE (mode2) == 16
20420 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
20421 return (GET_MODE_SIZE (mode1) == 16
20422 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
20423
20424 /* If MODE2 is appropriate for an MMX register, then tie
20425 with any other mode acceptable to MMX registers. */
20426 if (GET_MODE_SIZE (mode2) == 8
20427 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
20428 return (GET_MODE_SIZE (mode1) == 8
20429 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
20430
20431 return false;
20432 }
20433
20434 /* Return the cost of moving data of mode M between a
20435 register and memory. A value of 2 is the default; this cost is
20436 relative to those in `REGISTER_MOVE_COST'.
20437
20438 If moving between registers and memory is more expensive than
20439 between two registers, you should define this macro to express the
20440 relative cost.
20441
20442 Model also increased moving costs of QImode registers in non
20443 Q_REGS classes.
20444 */
20445 int
20446 ix86_memory_move_cost (enum machine_mode mode, enum reg_class regclass, int in)
20447 {
20448 if (FLOAT_CLASS_P (regclass))
20449 {
20450 int index;
20451 switch (mode)
20452 {
20453 case SFmode:
20454 index = 0;
20455 break;
20456 case DFmode:
20457 index = 1;
20458 break;
20459 case XFmode:
20460 index = 2;
20461 break;
20462 default:
20463 return 100;
20464 }
20465 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
20466 }
20467 if (SSE_CLASS_P (regclass))
20468 {
20469 int index;
20470 switch (GET_MODE_SIZE (mode))
20471 {
20472 case 4:
20473 index = 0;
20474 break;
20475 case 8:
20476 index = 1;
20477 break;
20478 case 16:
20479 index = 2;
20480 break;
20481 default:
20482 return 100;
20483 }
20484 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
20485 }
20486 if (MMX_CLASS_P (regclass))
20487 {
20488 int index;
20489 switch (GET_MODE_SIZE (mode))
20490 {
20491 case 4:
20492 index = 0;
20493 break;
20494 case 8:
20495 index = 1;
20496 break;
20497 default:
20498 return 100;
20499 }
20500 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
20501 }
20502 switch (GET_MODE_SIZE (mode))
20503 {
20504 case 1:
20505 if (in)
20506 return (Q_CLASS_P (regclass) ? ix86_cost->int_load[0]
20507 : ix86_cost->movzbl_load);
20508 else
20509 return (Q_CLASS_P (regclass) ? ix86_cost->int_store[0]
20510 : ix86_cost->int_store[0] + 4);
20511 break;
20512 case 2:
20513 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
20514 default:
20515 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
20516 if (mode == TFmode)
20517 mode = XFmode;
20518 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
20519 * (((int) GET_MODE_SIZE (mode)
20520 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
20521 }
20522 }
20523
20524 /* Compute a (partial) cost for rtx X. Return true if the complete
20525 cost has been computed, and false if subexpressions should be
20526 scanned. In either case, *TOTAL contains the cost result. */
20527
20528 static bool
20529 ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total)
20530 {
20531 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
20532 enum machine_mode mode = GET_MODE (x);
20533
20534 switch (code)
20535 {
20536 case CONST_INT:
20537 case CONST:
20538 case LABEL_REF:
20539 case SYMBOL_REF:
20540 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
20541 *total = 3;
20542 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
20543 *total = 2;
20544 else if (flag_pic && SYMBOLIC_CONST (x)
20545 && (!TARGET_64BIT
20546 || (!GET_CODE (x) != LABEL_REF
20547 && (GET_CODE (x) != SYMBOL_REF
20548 || !SYMBOL_REF_LOCAL_P (x)))))
20549 *total = 1;
20550 else
20551 *total = 0;
20552 return true;
20553
20554 case CONST_DOUBLE:
20555 if (mode == VOIDmode)
20556 *total = 0;
20557 else
20558 switch (standard_80387_constant_p (x))
20559 {
20560 case 1: /* 0.0 */
20561 *total = 1;
20562 break;
20563 default: /* Other constants */
20564 *total = 2;
20565 break;
20566 case 0:
20567 case -1:
20568 /* Start with (MEM (SYMBOL_REF)), since that's where
20569 it'll probably end up. Add a penalty for size. */
20570 *total = (COSTS_N_INSNS (1)
20571 + (flag_pic != 0 && !TARGET_64BIT)
20572 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
20573 break;
20574 }
20575 return true;
20576
20577 case ZERO_EXTEND:
20578 /* The zero extensions is often completely free on x86_64, so make
20579 it as cheap as possible. */
20580 if (TARGET_64BIT && mode == DImode
20581 && GET_MODE (XEXP (x, 0)) == SImode)
20582 *total = 1;
20583 else if (TARGET_ZERO_EXTEND_WITH_AND)
20584 *total = ix86_cost->add;
20585 else
20586 *total = ix86_cost->movzx;
20587 return false;
20588
20589 case SIGN_EXTEND:
20590 *total = ix86_cost->movsx;
20591 return false;
20592
20593 case ASHIFT:
20594 if (CONST_INT_P (XEXP (x, 1))
20595 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
20596 {
20597 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
20598 if (value == 1)
20599 {
20600 *total = ix86_cost->add;
20601 return false;
20602 }
20603 if ((value == 2 || value == 3)
20604 && ix86_cost->lea <= ix86_cost->shift_const)
20605 {
20606 *total = ix86_cost->lea;
20607 return false;
20608 }
20609 }
20610 /* FALLTHRU */
20611
20612 case ROTATE:
20613 case ASHIFTRT:
20614 case LSHIFTRT:
20615 case ROTATERT:
20616 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
20617 {
20618 if (CONST_INT_P (XEXP (x, 1)))
20619 {
20620 if (INTVAL (XEXP (x, 1)) > 32)
20621 *total = ix86_cost->shift_const + COSTS_N_INSNS (2);
20622 else
20623 *total = ix86_cost->shift_const * 2;
20624 }
20625 else
20626 {
20627 if (GET_CODE (XEXP (x, 1)) == AND)
20628 *total = ix86_cost->shift_var * 2;
20629 else
20630 *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
20631 }
20632 }
20633 else
20634 {
20635 if (CONST_INT_P (XEXP (x, 1)))
20636 *total = ix86_cost->shift_const;
20637 else
20638 *total = ix86_cost->shift_var;
20639 }
20640 return false;
20641
20642 case MULT:
20643 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
20644 {
20645 /* ??? SSE scalar cost should be used here. */
20646 *total = ix86_cost->fmul;
20647 return false;
20648 }
20649 else if (X87_FLOAT_MODE_P (mode))
20650 {
20651 *total = ix86_cost->fmul;
20652 return false;
20653 }
20654 else if (FLOAT_MODE_P (mode))
20655 {
20656 /* ??? SSE vector cost should be used here. */
20657 *total = ix86_cost->fmul;
20658 return false;
20659 }
20660 else
20661 {
20662 rtx op0 = XEXP (x, 0);
20663 rtx op1 = XEXP (x, 1);
20664 int nbits;
20665 if (CONST_INT_P (XEXP (x, 1)))
20666 {
20667 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
20668 for (nbits = 0; value != 0; value &= value - 1)
20669 nbits++;
20670 }
20671 else
20672 /* This is arbitrary. */
20673 nbits = 7;
20674
20675 /* Compute costs correctly for widening multiplication. */
20676 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
20677 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
20678 == GET_MODE_SIZE (mode))
20679 {
20680 int is_mulwiden = 0;
20681 enum machine_mode inner_mode = GET_MODE (op0);
20682
20683 if (GET_CODE (op0) == GET_CODE (op1))
20684 is_mulwiden = 1, op1 = XEXP (op1, 0);
20685 else if (CONST_INT_P (op1))
20686 {
20687 if (GET_CODE (op0) == SIGN_EXTEND)
20688 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
20689 == INTVAL (op1);
20690 else
20691 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
20692 }
20693
20694 if (is_mulwiden)
20695 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
20696 }
20697
20698 *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
20699 + nbits * ix86_cost->mult_bit
20700 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
20701
20702 return true;
20703 }
20704
20705 case DIV:
20706 case UDIV:
20707 case MOD:
20708 case UMOD:
20709 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
20710 /* ??? SSE cost should be used here. */
20711 *total = ix86_cost->fdiv;
20712 else if (X87_FLOAT_MODE_P (mode))
20713 *total = ix86_cost->fdiv;
20714 else if (FLOAT_MODE_P (mode))
20715 /* ??? SSE vector cost should be used here. */
20716 *total = ix86_cost->fdiv;
20717 else
20718 *total = ix86_cost->divide[MODE_INDEX (mode)];
20719 return false;
20720
20721 case PLUS:
20722 if (GET_MODE_CLASS (mode) == MODE_INT
20723 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
20724 {
20725 if (GET_CODE (XEXP (x, 0)) == PLUS
20726 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
20727 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
20728 && CONSTANT_P (XEXP (x, 1)))
20729 {
20730 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
20731 if (val == 2 || val == 4 || val == 8)
20732 {
20733 *total = ix86_cost->lea;
20734 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
20735 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
20736 outer_code);
20737 *total += rtx_cost (XEXP (x, 1), outer_code);
20738 return true;
20739 }
20740 }
20741 else if (GET_CODE (XEXP (x, 0)) == MULT
20742 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
20743 {
20744 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
20745 if (val == 2 || val == 4 || val == 8)
20746 {
20747 *total = ix86_cost->lea;
20748 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
20749 *total += rtx_cost (XEXP (x, 1), outer_code);
20750 return true;
20751 }
20752 }
20753 else if (GET_CODE (XEXP (x, 0)) == PLUS)
20754 {
20755 *total = ix86_cost->lea;
20756 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
20757 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
20758 *total += rtx_cost (XEXP (x, 1), outer_code);
20759 return true;
20760 }
20761 }
20762 /* FALLTHRU */
20763
20764 case MINUS:
20765 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
20766 {
20767 /* ??? SSE cost should be used here. */
20768 *total = ix86_cost->fadd;
20769 return false;
20770 }
20771 else if (X87_FLOAT_MODE_P (mode))
20772 {
20773 *total = ix86_cost->fadd;
20774 return false;
20775 }
20776 else if (FLOAT_MODE_P (mode))
20777 {
20778 /* ??? SSE vector cost should be used here. */
20779 *total = ix86_cost->fadd;
20780 return false;
20781 }
20782 /* FALLTHRU */
20783
20784 case AND:
20785 case IOR:
20786 case XOR:
20787 if (!TARGET_64BIT && mode == DImode)
20788 {
20789 *total = (ix86_cost->add * 2
20790 + (rtx_cost (XEXP (x, 0), outer_code)
20791 << (GET_MODE (XEXP (x, 0)) != DImode))
20792 + (rtx_cost (XEXP (x, 1), outer_code)
20793 << (GET_MODE (XEXP (x, 1)) != DImode)));
20794 return true;
20795 }
20796 /* FALLTHRU */
20797
20798 case NEG:
20799 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
20800 {
20801 /* ??? SSE cost should be used here. */
20802 *total = ix86_cost->fchs;
20803 return false;
20804 }
20805 else if (X87_FLOAT_MODE_P (mode))
20806 {
20807 *total = ix86_cost->fchs;
20808 return false;
20809 }
20810 else if (FLOAT_MODE_P (mode))
20811 {
20812 /* ??? SSE vector cost should be used here. */
20813 *total = ix86_cost->fchs;
20814 return false;
20815 }
20816 /* FALLTHRU */
20817
20818 case NOT:
20819 if (!TARGET_64BIT && mode == DImode)
20820 *total = ix86_cost->add * 2;
20821 else
20822 *total = ix86_cost->add;
20823 return false;
20824
20825 case COMPARE:
20826 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
20827 && XEXP (XEXP (x, 0), 1) == const1_rtx
20828 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
20829 && XEXP (x, 1) == const0_rtx)
20830 {
20831 /* This kind of construct is implemented using test[bwl].
20832 Treat it as if we had an AND. */
20833 *total = (ix86_cost->add
20834 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
20835 + rtx_cost (const1_rtx, outer_code));
20836 return true;
20837 }
20838 return false;
20839
20840 case FLOAT_EXTEND:
20841 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
20842 *total = 0;
20843 return false;
20844
20845 case ABS:
20846 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
20847 /* ??? SSE cost should be used here. */
20848 *total = ix86_cost->fabs;
20849 else if (X87_FLOAT_MODE_P (mode))
20850 *total = ix86_cost->fabs;
20851 else if (FLOAT_MODE_P (mode))
20852 /* ??? SSE vector cost should be used here. */
20853 *total = ix86_cost->fabs;
20854 return false;
20855
20856 case SQRT:
20857 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
20858 /* ??? SSE cost should be used here. */
20859 *total = ix86_cost->fsqrt;
20860 else if (X87_FLOAT_MODE_P (mode))
20861 *total = ix86_cost->fsqrt;
20862 else if (FLOAT_MODE_P (mode))
20863 /* ??? SSE vector cost should be used here. */
20864 *total = ix86_cost->fsqrt;
20865 return false;
20866
20867 case UNSPEC:
20868 if (XINT (x, 1) == UNSPEC_TP)
20869 *total = 0;
20870 return false;
20871
20872 default:
20873 return false;
20874 }
20875 }
20876
20877 #if TARGET_MACHO
20878
20879 static int current_machopic_label_num;
20880
20881 /* Given a symbol name and its associated stub, write out the
20882 definition of the stub. */
20883
20884 void
20885 machopic_output_stub (FILE *file, const char *symb, const char *stub)
20886 {
20887 unsigned int length;
20888 char *binder_name, *symbol_name, lazy_ptr_name[32];
20889 int label = ++current_machopic_label_num;
20890
20891 /* For 64-bit we shouldn't get here. */
20892 gcc_assert (!TARGET_64BIT);
20893
20894 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
20895 symb = (*targetm.strip_name_encoding) (symb);
20896
20897 length = strlen (stub);
20898 binder_name = alloca (length + 32);
20899 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
20900
20901 length = strlen (symb);
20902 symbol_name = alloca (length + 32);
20903 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
20904
20905 sprintf (lazy_ptr_name, "L%d$lz", label);
20906
20907 if (MACHOPIC_PURE)
20908 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
20909 else
20910 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
20911
20912 fprintf (file, "%s:\n", stub);
20913 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
20914
20915 if (MACHOPIC_PURE)
20916 {
20917 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
20918 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
20919 fprintf (file, "\tjmp\t*%%edx\n");
20920 }
20921 else
20922 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
20923
20924 fprintf (file, "%s:\n", binder_name);
20925
20926 if (MACHOPIC_PURE)
20927 {
20928 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
20929 fprintf (file, "\tpushl\t%%eax\n");
20930 }
20931 else
20932 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
20933
20934 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
20935
20936 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
20937 fprintf (file, "%s:\n", lazy_ptr_name);
20938 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
20939 fprintf (file, "\t.long %s\n", binder_name);
20940 }
20941
20942 void
20943 darwin_x86_file_end (void)
20944 {
20945 darwin_file_end ();
20946 ix86_file_end ();
20947 }
20948 #endif /* TARGET_MACHO */
20949
20950 /* Order the registers for register allocator. */
20951
20952 void
20953 x86_order_regs_for_local_alloc (void)
20954 {
20955 int pos = 0;
20956 int i;
20957
20958 /* First allocate the local general purpose registers. */
20959 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
20960 if (GENERAL_REGNO_P (i) && call_used_regs[i])
20961 reg_alloc_order [pos++] = i;
20962
20963 /* Global general purpose registers. */
20964 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
20965 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
20966 reg_alloc_order [pos++] = i;
20967
20968 /* x87 registers come first in case we are doing FP math
20969 using them. */
20970 if (!TARGET_SSE_MATH)
20971 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
20972 reg_alloc_order [pos++] = i;
20973
20974 /* SSE registers. */
20975 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
20976 reg_alloc_order [pos++] = i;
20977 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
20978 reg_alloc_order [pos++] = i;
20979
20980 /* x87 registers. */
20981 if (TARGET_SSE_MATH)
20982 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
20983 reg_alloc_order [pos++] = i;
20984
20985 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
20986 reg_alloc_order [pos++] = i;
20987
20988 /* Initialize the rest of array as we do not allocate some registers
20989 at all. */
20990 while (pos < FIRST_PSEUDO_REGISTER)
20991 reg_alloc_order [pos++] = 0;
20992 }
20993
20994 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
20995 struct attribute_spec.handler. */
20996 static tree
20997 ix86_handle_struct_attribute (tree *node, tree name,
20998 tree args ATTRIBUTE_UNUSED,
20999 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
21000 {
21001 tree *type = NULL;
21002 if (DECL_P (*node))
21003 {
21004 if (TREE_CODE (*node) == TYPE_DECL)
21005 type = &TREE_TYPE (*node);
21006 }
21007 else
21008 type = node;
21009
21010 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
21011 || TREE_CODE (*type) == UNION_TYPE)))
21012 {
21013 warning (OPT_Wattributes, "%qs attribute ignored",
21014 IDENTIFIER_POINTER (name));
21015 *no_add_attrs = true;
21016 }
21017
21018 else if ((is_attribute_p ("ms_struct", name)
21019 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
21020 || ((is_attribute_p ("gcc_struct", name)
21021 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
21022 {
21023 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
21024 IDENTIFIER_POINTER (name));
21025 *no_add_attrs = true;
21026 }
21027
21028 return NULL_TREE;
21029 }
21030
21031 static bool
21032 ix86_ms_bitfield_layout_p (tree record_type)
21033 {
21034 return (TARGET_MS_BITFIELD_LAYOUT &&
21035 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
21036 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
21037 }
21038
21039 /* Returns an expression indicating where the this parameter is
21040 located on entry to the FUNCTION. */
21041
21042 static rtx
21043 x86_this_parameter (tree function)
21044 {
21045 tree type = TREE_TYPE (function);
21046 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
21047
21048 if (TARGET_64BIT)
21049 {
21050 const int *parm_regs;
21051
21052 if (TARGET_64BIT_MS_ABI)
21053 parm_regs = x86_64_ms_abi_int_parameter_registers;
21054 else
21055 parm_regs = x86_64_int_parameter_registers;
21056 return gen_rtx_REG (DImode, parm_regs[aggr]);
21057 }
21058
21059 if (ix86_function_regparm (type, function) > 0
21060 && !type_has_variadic_args_p (type))
21061 {
21062 int regno = 0;
21063 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
21064 regno = 2;
21065 return gen_rtx_REG (SImode, regno);
21066 }
21067
21068 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
21069 }
21070
21071 /* Determine whether x86_output_mi_thunk can succeed. */
21072
21073 static bool
21074 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
21075 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
21076 HOST_WIDE_INT vcall_offset, tree function)
21077 {
21078 /* 64-bit can handle anything. */
21079 if (TARGET_64BIT)
21080 return true;
21081
21082 /* For 32-bit, everything's fine if we have one free register. */
21083 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
21084 return true;
21085
21086 /* Need a free register for vcall_offset. */
21087 if (vcall_offset)
21088 return false;
21089
21090 /* Need a free register for GOT references. */
21091 if (flag_pic && !(*targetm.binds_local_p) (function))
21092 return false;
21093
21094 /* Otherwise ok. */
21095 return true;
21096 }
21097
21098 /* Output the assembler code for a thunk function. THUNK_DECL is the
21099 declaration for the thunk function itself, FUNCTION is the decl for
21100 the target function. DELTA is an immediate constant offset to be
21101 added to THIS. If VCALL_OFFSET is nonzero, the word at
21102 *(*this + vcall_offset) should be added to THIS. */
21103
21104 static void
21105 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
21106 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
21107 HOST_WIDE_INT vcall_offset, tree function)
21108 {
21109 rtx xops[3];
21110 rtx this_param = x86_this_parameter (function);
21111 rtx this_reg, tmp;
21112
21113 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
21114 pull it in now and let DELTA benefit. */
21115 if (REG_P (this_param))
21116 this_reg = this_param;
21117 else if (vcall_offset)
21118 {
21119 /* Put the this parameter into %eax. */
21120 xops[0] = this_param;
21121 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
21122 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
21123 }
21124 else
21125 this_reg = NULL_RTX;
21126
21127 /* Adjust the this parameter by a fixed constant. */
21128 if (delta)
21129 {
21130 xops[0] = GEN_INT (delta);
21131 xops[1] = this_reg ? this_reg : this_param;
21132 if (TARGET_64BIT)
21133 {
21134 if (!x86_64_general_operand (xops[0], DImode))
21135 {
21136 tmp = gen_rtx_REG (DImode, R10_REG);
21137 xops[1] = tmp;
21138 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
21139 xops[0] = tmp;
21140 xops[1] = this_param;
21141 }
21142 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
21143 }
21144 else
21145 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
21146 }
21147
21148 /* Adjust the this parameter by a value stored in the vtable. */
21149 if (vcall_offset)
21150 {
21151 if (TARGET_64BIT)
21152 tmp = gen_rtx_REG (DImode, R10_REG);
21153 else
21154 {
21155 int tmp_regno = 2 /* ECX */;
21156 if (lookup_attribute ("fastcall",
21157 TYPE_ATTRIBUTES (TREE_TYPE (function))))
21158 tmp_regno = 0 /* EAX */;
21159 tmp = gen_rtx_REG (SImode, tmp_regno);
21160 }
21161
21162 xops[0] = gen_rtx_MEM (Pmode, this_reg);
21163 xops[1] = tmp;
21164 if (TARGET_64BIT)
21165 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
21166 else
21167 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
21168
21169 /* Adjust the this parameter. */
21170 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
21171 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
21172 {
21173 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
21174 xops[0] = GEN_INT (vcall_offset);
21175 xops[1] = tmp2;
21176 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
21177 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
21178 }
21179 xops[1] = this_reg;
21180 if (TARGET_64BIT)
21181 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
21182 else
21183 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
21184 }
21185
21186 /* If necessary, drop THIS back to its stack slot. */
21187 if (this_reg && this_reg != this_param)
21188 {
21189 xops[0] = this_reg;
21190 xops[1] = this_param;
21191 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
21192 }
21193
21194 xops[0] = XEXP (DECL_RTL (function), 0);
21195 if (TARGET_64BIT)
21196 {
21197 if (!flag_pic || (*targetm.binds_local_p) (function))
21198 output_asm_insn ("jmp\t%P0", xops);
21199 /* All thunks should be in the same object as their target,
21200 and thus binds_local_p should be true. */
21201 else if (TARGET_64BIT_MS_ABI)
21202 gcc_unreachable ();
21203 else
21204 {
21205 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
21206 tmp = gen_rtx_CONST (Pmode, tmp);
21207 tmp = gen_rtx_MEM (QImode, tmp);
21208 xops[0] = tmp;
21209 output_asm_insn ("jmp\t%A0", xops);
21210 }
21211 }
21212 else
21213 {
21214 if (!flag_pic || (*targetm.binds_local_p) (function))
21215 output_asm_insn ("jmp\t%P0", xops);
21216 else
21217 #if TARGET_MACHO
21218 if (TARGET_MACHO)
21219 {
21220 rtx sym_ref = XEXP (DECL_RTL (function), 0);
21221 tmp = (gen_rtx_SYMBOL_REF
21222 (Pmode,
21223 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
21224 tmp = gen_rtx_MEM (QImode, tmp);
21225 xops[0] = tmp;
21226 output_asm_insn ("jmp\t%0", xops);
21227 }
21228 else
21229 #endif /* TARGET_MACHO */
21230 {
21231 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
21232 output_set_got (tmp, NULL_RTX);
21233
21234 xops[1] = tmp;
21235 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
21236 output_asm_insn ("jmp\t{*}%1", xops);
21237 }
21238 }
21239 }
21240
21241 static void
21242 x86_file_start (void)
21243 {
21244 default_file_start ();
21245 #if TARGET_MACHO
21246 darwin_file_start ();
21247 #endif
21248 if (X86_FILE_START_VERSION_DIRECTIVE)
21249 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
21250 if (X86_FILE_START_FLTUSED)
21251 fputs ("\t.global\t__fltused\n", asm_out_file);
21252 if (ix86_asm_dialect == ASM_INTEL)
21253 fputs ("\t.intel_syntax\n", asm_out_file);
21254 }
21255
21256 int
21257 x86_field_alignment (tree field, int computed)
21258 {
21259 enum machine_mode mode;
21260 tree type = TREE_TYPE (field);
21261
21262 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
21263 return computed;
21264 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
21265 ? get_inner_array_type (type) : type);
21266 if (mode == DFmode || mode == DCmode
21267 || GET_MODE_CLASS (mode) == MODE_INT
21268 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
21269 return MIN (32, computed);
21270 return computed;
21271 }
21272
21273 /* Output assembler code to FILE to increment profiler label # LABELNO
21274 for profiling a function entry. */
21275 void
21276 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
21277 {
21278 if (TARGET_64BIT)
21279 {
21280 #ifndef NO_PROFILE_COUNTERS
21281 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
21282 #endif
21283
21284 if (!TARGET_64BIT_MS_ABI && flag_pic)
21285 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
21286 else
21287 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
21288 }
21289 else if (flag_pic)
21290 {
21291 #ifndef NO_PROFILE_COUNTERS
21292 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
21293 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
21294 #endif
21295 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
21296 }
21297 else
21298 {
21299 #ifndef NO_PROFILE_COUNTERS
21300 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
21301 PROFILE_COUNT_REGISTER);
21302 #endif
21303 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
21304 }
21305 }
21306
21307 /* We don't have exact information about the insn sizes, but we may assume
21308 quite safely that we are informed about all 1 byte insns and memory
21309 address sizes. This is enough to eliminate unnecessary padding in
21310 99% of cases. */
21311
21312 static int
21313 min_insn_size (rtx insn)
21314 {
21315 int l = 0;
21316
21317 if (!INSN_P (insn) || !active_insn_p (insn))
21318 return 0;
21319
21320 /* Discard alignments we've emit and jump instructions. */
21321 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
21322 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
21323 return 0;
21324 if (JUMP_P (insn)
21325 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
21326 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
21327 return 0;
21328
21329 /* Important case - calls are always 5 bytes.
21330 It is common to have many calls in the row. */
21331 if (CALL_P (insn)
21332 && symbolic_reference_mentioned_p (PATTERN (insn))
21333 && !SIBLING_CALL_P (insn))
21334 return 5;
21335 if (get_attr_length (insn) <= 1)
21336 return 1;
21337
21338 /* For normal instructions we may rely on the sizes of addresses
21339 and the presence of symbol to require 4 bytes of encoding.
21340 This is not the case for jumps where references are PC relative. */
21341 if (!JUMP_P (insn))
21342 {
21343 l = get_attr_length_address (insn);
21344 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
21345 l = 4;
21346 }
21347 if (l)
21348 return 1+l;
21349 else
21350 return 2;
21351 }
21352
21353 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
21354 window. */
21355
21356 static void
21357 ix86_avoid_jump_misspredicts (void)
21358 {
21359 rtx insn, start = get_insns ();
21360 int nbytes = 0, njumps = 0;
21361 int isjump = 0;
21362
21363 /* Look for all minimal intervals of instructions containing 4 jumps.
21364 The intervals are bounded by START and INSN. NBYTES is the total
21365 size of instructions in the interval including INSN and not including
21366 START. When the NBYTES is smaller than 16 bytes, it is possible
21367 that the end of START and INSN ends up in the same 16byte page.
21368
21369 The smallest offset in the page INSN can start is the case where START
21370 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
21371 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
21372 */
21373 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
21374 {
21375
21376 nbytes += min_insn_size (insn);
21377 if (dump_file)
21378 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
21379 INSN_UID (insn), min_insn_size (insn));
21380 if ((JUMP_P (insn)
21381 && GET_CODE (PATTERN (insn)) != ADDR_VEC
21382 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
21383 || CALL_P (insn))
21384 njumps++;
21385 else
21386 continue;
21387
21388 while (njumps > 3)
21389 {
21390 start = NEXT_INSN (start);
21391 if ((JUMP_P (start)
21392 && GET_CODE (PATTERN (start)) != ADDR_VEC
21393 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
21394 || CALL_P (start))
21395 njumps--, isjump = 1;
21396 else
21397 isjump = 0;
21398 nbytes -= min_insn_size (start);
21399 }
21400 gcc_assert (njumps >= 0);
21401 if (dump_file)
21402 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
21403 INSN_UID (start), INSN_UID (insn), nbytes);
21404
21405 if (njumps == 3 && isjump && nbytes < 16)
21406 {
21407 int padsize = 15 - nbytes + min_insn_size (insn);
21408
21409 if (dump_file)
21410 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
21411 INSN_UID (insn), padsize);
21412 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
21413 }
21414 }
21415 }
21416
21417 /* AMD Athlon works faster
21418 when RET is not destination of conditional jump or directly preceded
21419 by other jump instruction. We avoid the penalty by inserting NOP just
21420 before the RET instructions in such cases. */
21421 static void
21422 ix86_pad_returns (void)
21423 {
21424 edge e;
21425 edge_iterator ei;
21426
21427 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
21428 {
21429 basic_block bb = e->src;
21430 rtx ret = BB_END (bb);
21431 rtx prev;
21432 bool replace = false;
21433
21434 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
21435 || !maybe_hot_bb_p (bb))
21436 continue;
21437 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
21438 if (active_insn_p (prev) || LABEL_P (prev))
21439 break;
21440 if (prev && LABEL_P (prev))
21441 {
21442 edge e;
21443 edge_iterator ei;
21444
21445 FOR_EACH_EDGE (e, ei, bb->preds)
21446 if (EDGE_FREQUENCY (e) && e->src->index >= 0
21447 && !(e->flags & EDGE_FALLTHRU))
21448 replace = true;
21449 }
21450 if (!replace)
21451 {
21452 prev = prev_active_insn (ret);
21453 if (prev
21454 && ((JUMP_P (prev) && any_condjump_p (prev))
21455 || CALL_P (prev)))
21456 replace = true;
21457 /* Empty functions get branch mispredict even when the jump destination
21458 is not visible to us. */
21459 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
21460 replace = true;
21461 }
21462 if (replace)
21463 {
21464 emit_insn_before (gen_return_internal_long (), ret);
21465 delete_insn (ret);
21466 }
21467 }
21468 }
21469
21470 /* Implement machine specific optimizations. We implement padding of returns
21471 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
21472 static void
21473 ix86_reorg (void)
21474 {
21475 if (TARGET_PAD_RETURNS && optimize && !optimize_size)
21476 ix86_pad_returns ();
21477 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
21478 ix86_avoid_jump_misspredicts ();
21479 }
21480
21481 /* Return nonzero when QImode register that must be represented via REX prefix
21482 is used. */
21483 bool
21484 x86_extended_QIreg_mentioned_p (rtx insn)
21485 {
21486 int i;
21487 extract_insn_cached (insn);
21488 for (i = 0; i < recog_data.n_operands; i++)
21489 if (REG_P (recog_data.operand[i])
21490 && REGNO (recog_data.operand[i]) >= 4)
21491 return true;
21492 return false;
21493 }
21494
21495 /* Return nonzero when P points to register encoded via REX prefix.
21496 Called via for_each_rtx. */
21497 static int
21498 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
21499 {
21500 unsigned int regno;
21501 if (!REG_P (*p))
21502 return 0;
21503 regno = REGNO (*p);
21504 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
21505 }
21506
21507 /* Return true when INSN mentions register that must be encoded using REX
21508 prefix. */
21509 bool
21510 x86_extended_reg_mentioned_p (rtx insn)
21511 {
21512 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
21513 }
21514
21515 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
21516 optabs would emit if we didn't have TFmode patterns. */
21517
21518 void
21519 x86_emit_floatuns (rtx operands[2])
21520 {
21521 rtx neglab, donelab, i0, i1, f0, in, out;
21522 enum machine_mode mode, inmode;
21523
21524 inmode = GET_MODE (operands[1]);
21525 gcc_assert (inmode == SImode || inmode == DImode);
21526
21527 out = operands[0];
21528 in = force_reg (inmode, operands[1]);
21529 mode = GET_MODE (out);
21530 neglab = gen_label_rtx ();
21531 donelab = gen_label_rtx ();
21532 f0 = gen_reg_rtx (mode);
21533
21534 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
21535
21536 expand_float (out, in, 0);
21537
21538 emit_jump_insn (gen_jump (donelab));
21539 emit_barrier ();
21540
21541 emit_label (neglab);
21542
21543 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
21544 1, OPTAB_DIRECT);
21545 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
21546 1, OPTAB_DIRECT);
21547 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
21548
21549 expand_float (f0, i0, 0);
21550
21551 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
21552
21553 emit_label (donelab);
21554 }
21555 \f
21556 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
21557 with all elements equal to VAR. Return true if successful. */
21558
21559 static bool
21560 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
21561 rtx target, rtx val)
21562 {
21563 enum machine_mode smode, wsmode, wvmode;
21564 rtx x;
21565
21566 switch (mode)
21567 {
21568 case V2SImode:
21569 case V2SFmode:
21570 if (!mmx_ok)
21571 return false;
21572 /* FALLTHRU */
21573
21574 case V2DFmode:
21575 case V2DImode:
21576 case V4SFmode:
21577 case V4SImode:
21578 val = force_reg (GET_MODE_INNER (mode), val);
21579 x = gen_rtx_VEC_DUPLICATE (mode, val);
21580 emit_insn (gen_rtx_SET (VOIDmode, target, x));
21581 return true;
21582
21583 case V4HImode:
21584 if (!mmx_ok)
21585 return false;
21586 if (TARGET_SSE || TARGET_3DNOW_A)
21587 {
21588 val = gen_lowpart (SImode, val);
21589 x = gen_rtx_TRUNCATE (HImode, val);
21590 x = gen_rtx_VEC_DUPLICATE (mode, x);
21591 emit_insn (gen_rtx_SET (VOIDmode, target, x));
21592 return true;
21593 }
21594 else
21595 {
21596 smode = HImode;
21597 wsmode = SImode;
21598 wvmode = V2SImode;
21599 goto widen;
21600 }
21601
21602 case V8QImode:
21603 if (!mmx_ok)
21604 return false;
21605 smode = QImode;
21606 wsmode = HImode;
21607 wvmode = V4HImode;
21608 goto widen;
21609 case V8HImode:
21610 if (TARGET_SSE2)
21611 {
21612 rtx tmp1, tmp2;
21613 /* Extend HImode to SImode using a paradoxical SUBREG. */
21614 tmp1 = gen_reg_rtx (SImode);
21615 emit_move_insn (tmp1, gen_lowpart (SImode, val));
21616 /* Insert the SImode value as low element of V4SImode vector. */
21617 tmp2 = gen_reg_rtx (V4SImode);
21618 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
21619 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
21620 CONST0_RTX (V4SImode),
21621 const1_rtx);
21622 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
21623 /* Cast the V4SImode vector back to a V8HImode vector. */
21624 tmp1 = gen_reg_rtx (V8HImode);
21625 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
21626 /* Duplicate the low short through the whole low SImode word. */
21627 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
21628 /* Cast the V8HImode vector back to a V4SImode vector. */
21629 tmp2 = gen_reg_rtx (V4SImode);
21630 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
21631 /* Replicate the low element of the V4SImode vector. */
21632 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
21633 /* Cast the V2SImode back to V8HImode, and store in target. */
21634 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
21635 return true;
21636 }
21637 smode = HImode;
21638 wsmode = SImode;
21639 wvmode = V4SImode;
21640 goto widen;
21641 case V16QImode:
21642 if (TARGET_SSE2)
21643 {
21644 rtx tmp1, tmp2;
21645 /* Extend QImode to SImode using a paradoxical SUBREG. */
21646 tmp1 = gen_reg_rtx (SImode);
21647 emit_move_insn (tmp1, gen_lowpart (SImode, val));
21648 /* Insert the SImode value as low element of V4SImode vector. */
21649 tmp2 = gen_reg_rtx (V4SImode);
21650 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
21651 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
21652 CONST0_RTX (V4SImode),
21653 const1_rtx);
21654 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
21655 /* Cast the V4SImode vector back to a V16QImode vector. */
21656 tmp1 = gen_reg_rtx (V16QImode);
21657 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
21658 /* Duplicate the low byte through the whole low SImode word. */
21659 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
21660 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
21661 /* Cast the V16QImode vector back to a V4SImode vector. */
21662 tmp2 = gen_reg_rtx (V4SImode);
21663 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
21664 /* Replicate the low element of the V4SImode vector. */
21665 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
21666 /* Cast the V2SImode back to V16QImode, and store in target. */
21667 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
21668 return true;
21669 }
21670 smode = QImode;
21671 wsmode = HImode;
21672 wvmode = V8HImode;
21673 goto widen;
21674 widen:
21675 /* Replicate the value once into the next wider mode and recurse. */
21676 val = convert_modes (wsmode, smode, val, true);
21677 x = expand_simple_binop (wsmode, ASHIFT, val,
21678 GEN_INT (GET_MODE_BITSIZE (smode)),
21679 NULL_RTX, 1, OPTAB_LIB_WIDEN);
21680 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
21681
21682 x = gen_reg_rtx (wvmode);
21683 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
21684 gcc_unreachable ();
21685 emit_move_insn (target, gen_lowpart (mode, x));
21686 return true;
21687
21688 default:
21689 return false;
21690 }
21691 }
21692
21693 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
21694 whose ONE_VAR element is VAR, and other elements are zero. Return true
21695 if successful. */
21696
21697 static bool
21698 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
21699 rtx target, rtx var, int one_var)
21700 {
21701 enum machine_mode vsimode;
21702 rtx new_target;
21703 rtx x, tmp;
21704
21705 switch (mode)
21706 {
21707 case V2SFmode:
21708 case V2SImode:
21709 if (!mmx_ok)
21710 return false;
21711 /* FALLTHRU */
21712
21713 case V2DFmode:
21714 case V2DImode:
21715 if (one_var != 0)
21716 return false;
21717 var = force_reg (GET_MODE_INNER (mode), var);
21718 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
21719 emit_insn (gen_rtx_SET (VOIDmode, target, x));
21720 return true;
21721
21722 case V4SFmode:
21723 case V4SImode:
21724 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
21725 new_target = gen_reg_rtx (mode);
21726 else
21727 new_target = target;
21728 var = force_reg (GET_MODE_INNER (mode), var);
21729 x = gen_rtx_VEC_DUPLICATE (mode, var);
21730 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
21731 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
21732 if (one_var != 0)
21733 {
21734 /* We need to shuffle the value to the correct position, so
21735 create a new pseudo to store the intermediate result. */
21736
21737 /* With SSE2, we can use the integer shuffle insns. */
21738 if (mode != V4SFmode && TARGET_SSE2)
21739 {
21740 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
21741 GEN_INT (1),
21742 GEN_INT (one_var == 1 ? 0 : 1),
21743 GEN_INT (one_var == 2 ? 0 : 1),
21744 GEN_INT (one_var == 3 ? 0 : 1)));
21745 if (target != new_target)
21746 emit_move_insn (target, new_target);
21747 return true;
21748 }
21749
21750 /* Otherwise convert the intermediate result to V4SFmode and
21751 use the SSE1 shuffle instructions. */
21752 if (mode != V4SFmode)
21753 {
21754 tmp = gen_reg_rtx (V4SFmode);
21755 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
21756 }
21757 else
21758 tmp = new_target;
21759
21760 emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp,
21761 GEN_INT (1),
21762 GEN_INT (one_var == 1 ? 0 : 1),
21763 GEN_INT (one_var == 2 ? 0+4 : 1+4),
21764 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
21765
21766 if (mode != V4SFmode)
21767 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
21768 else if (tmp != target)
21769 emit_move_insn (target, tmp);
21770 }
21771 else if (target != new_target)
21772 emit_move_insn (target, new_target);
21773 return true;
21774
21775 case V8HImode:
21776 case V16QImode:
21777 vsimode = V4SImode;
21778 goto widen;
21779 case V4HImode:
21780 case V8QImode:
21781 if (!mmx_ok)
21782 return false;
21783 vsimode = V2SImode;
21784 goto widen;
21785 widen:
21786 if (one_var != 0)
21787 return false;
21788
21789 /* Zero extend the variable element to SImode and recurse. */
21790 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
21791
21792 x = gen_reg_rtx (vsimode);
21793 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
21794 var, one_var))
21795 gcc_unreachable ();
21796
21797 emit_move_insn (target, gen_lowpart (mode, x));
21798 return true;
21799
21800 default:
21801 return false;
21802 }
21803 }
21804
21805 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
21806 consisting of the values in VALS. It is known that all elements
21807 except ONE_VAR are constants. Return true if successful. */
21808
21809 static bool
21810 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
21811 rtx target, rtx vals, int one_var)
21812 {
21813 rtx var = XVECEXP (vals, 0, one_var);
21814 enum machine_mode wmode;
21815 rtx const_vec, x;
21816
21817 const_vec = copy_rtx (vals);
21818 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
21819 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
21820
21821 switch (mode)
21822 {
21823 case V2DFmode:
21824 case V2DImode:
21825 case V2SFmode:
21826 case V2SImode:
21827 /* For the two element vectors, it's just as easy to use
21828 the general case. */
21829 return false;
21830
21831 case V4SFmode:
21832 case V4SImode:
21833 case V8HImode:
21834 case V4HImode:
21835 break;
21836
21837 case V16QImode:
21838 wmode = V8HImode;
21839 goto widen;
21840 case V8QImode:
21841 wmode = V4HImode;
21842 goto widen;
21843 widen:
21844 /* There's no way to set one QImode entry easily. Combine
21845 the variable value with its adjacent constant value, and
21846 promote to an HImode set. */
21847 x = XVECEXP (vals, 0, one_var ^ 1);
21848 if (one_var & 1)
21849 {
21850 var = convert_modes (HImode, QImode, var, true);
21851 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
21852 NULL_RTX, 1, OPTAB_LIB_WIDEN);
21853 x = GEN_INT (INTVAL (x) & 0xff);
21854 }
21855 else
21856 {
21857 var = convert_modes (HImode, QImode, var, true);
21858 x = gen_int_mode (INTVAL (x) << 8, HImode);
21859 }
21860 if (x != const0_rtx)
21861 var = expand_simple_binop (HImode, IOR, var, x, var,
21862 1, OPTAB_LIB_WIDEN);
21863
21864 x = gen_reg_rtx (wmode);
21865 emit_move_insn (x, gen_lowpart (wmode, const_vec));
21866 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
21867
21868 emit_move_insn (target, gen_lowpart (mode, x));
21869 return true;
21870
21871 default:
21872 return false;
21873 }
21874
21875 emit_move_insn (target, const_vec);
21876 ix86_expand_vector_set (mmx_ok, target, var, one_var);
21877 return true;
21878 }
21879
21880 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
21881 all values variable, and none identical. */
21882
21883 static void
21884 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
21885 rtx target, rtx vals)
21886 {
21887 enum machine_mode half_mode = GET_MODE_INNER (mode);
21888 rtx op0 = NULL, op1 = NULL;
21889 bool use_vec_concat = false;
21890
21891 switch (mode)
21892 {
21893 case V2SFmode:
21894 case V2SImode:
21895 if (!mmx_ok && !TARGET_SSE)
21896 break;
21897 /* FALLTHRU */
21898
21899 case V2DFmode:
21900 case V2DImode:
21901 /* For the two element vectors, we always implement VEC_CONCAT. */
21902 op0 = XVECEXP (vals, 0, 0);
21903 op1 = XVECEXP (vals, 0, 1);
21904 use_vec_concat = true;
21905 break;
21906
21907 case V4SFmode:
21908 half_mode = V2SFmode;
21909 goto half;
21910 case V4SImode:
21911 half_mode = V2SImode;
21912 goto half;
21913 half:
21914 {
21915 rtvec v;
21916
21917 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
21918 Recurse to load the two halves. */
21919
21920 op0 = gen_reg_rtx (half_mode);
21921 v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
21922 ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
21923
21924 op1 = gen_reg_rtx (half_mode);
21925 v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
21926 ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
21927
21928 use_vec_concat = true;
21929 }
21930 break;
21931
21932 case V8HImode:
21933 case V16QImode:
21934 case V4HImode:
21935 case V8QImode:
21936 break;
21937
21938 default:
21939 gcc_unreachable ();
21940 }
21941
21942 if (use_vec_concat)
21943 {
21944 if (!register_operand (op0, half_mode))
21945 op0 = force_reg (half_mode, op0);
21946 if (!register_operand (op1, half_mode))
21947 op1 = force_reg (half_mode, op1);
21948
21949 emit_insn (gen_rtx_SET (VOIDmode, target,
21950 gen_rtx_VEC_CONCAT (mode, op0, op1)));
21951 }
21952 else
21953 {
21954 int i, j, n_elts, n_words, n_elt_per_word;
21955 enum machine_mode inner_mode;
21956 rtx words[4], shift;
21957
21958 inner_mode = GET_MODE_INNER (mode);
21959 n_elts = GET_MODE_NUNITS (mode);
21960 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
21961 n_elt_per_word = n_elts / n_words;
21962 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
21963
21964 for (i = 0; i < n_words; ++i)
21965 {
21966 rtx word = NULL_RTX;
21967
21968 for (j = 0; j < n_elt_per_word; ++j)
21969 {
21970 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
21971 elt = convert_modes (word_mode, inner_mode, elt, true);
21972
21973 if (j == 0)
21974 word = elt;
21975 else
21976 {
21977 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
21978 word, 1, OPTAB_LIB_WIDEN);
21979 word = expand_simple_binop (word_mode, IOR, word, elt,
21980 word, 1, OPTAB_LIB_WIDEN);
21981 }
21982 }
21983
21984 words[i] = word;
21985 }
21986
21987 if (n_words == 1)
21988 emit_move_insn (target, gen_lowpart (mode, words[0]));
21989 else if (n_words == 2)
21990 {
21991 rtx tmp = gen_reg_rtx (mode);
21992 emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
21993 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
21994 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
21995 emit_move_insn (target, tmp);
21996 }
21997 else if (n_words == 4)
21998 {
21999 rtx tmp = gen_reg_rtx (V4SImode);
22000 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
22001 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
22002 emit_move_insn (target, gen_lowpart (mode, tmp));
22003 }
22004 else
22005 gcc_unreachable ();
22006 }
22007 }
22008
22009 /* Initialize vector TARGET via VALS. Suppress the use of MMX
22010 instructions unless MMX_OK is true. */
22011
22012 void
22013 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
22014 {
22015 enum machine_mode mode = GET_MODE (target);
22016 enum machine_mode inner_mode = GET_MODE_INNER (mode);
22017 int n_elts = GET_MODE_NUNITS (mode);
22018 int n_var = 0, one_var = -1;
22019 bool all_same = true, all_const_zero = true;
22020 int i;
22021 rtx x;
22022
22023 for (i = 0; i < n_elts; ++i)
22024 {
22025 x = XVECEXP (vals, 0, i);
22026 if (!CONSTANT_P (x))
22027 n_var++, one_var = i;
22028 else if (x != CONST0_RTX (inner_mode))
22029 all_const_zero = false;
22030 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
22031 all_same = false;
22032 }
22033
22034 /* Constants are best loaded from the constant pool. */
22035 if (n_var == 0)
22036 {
22037 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
22038 return;
22039 }
22040
22041 /* If all values are identical, broadcast the value. */
22042 if (all_same
22043 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
22044 XVECEXP (vals, 0, 0)))
22045 return;
22046
22047 /* Values where only one field is non-constant are best loaded from
22048 the pool and overwritten via move later. */
22049 if (n_var == 1)
22050 {
22051 if (all_const_zero
22052 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
22053 XVECEXP (vals, 0, one_var),
22054 one_var))
22055 return;
22056
22057 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
22058 return;
22059 }
22060
22061 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
22062 }
22063
22064 void
22065 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
22066 {
22067 enum machine_mode mode = GET_MODE (target);
22068 enum machine_mode inner_mode = GET_MODE_INNER (mode);
22069 bool use_vec_merge = false;
22070 rtx tmp;
22071
22072 switch (mode)
22073 {
22074 case V2SFmode:
22075 case V2SImode:
22076 if (mmx_ok)
22077 {
22078 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
22079 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
22080 if (elt == 0)
22081 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
22082 else
22083 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
22084 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
22085 return;
22086 }
22087 break;
22088
22089 case V2DImode:
22090 use_vec_merge = TARGET_SSE4_1;
22091 if (use_vec_merge)
22092 break;
22093
22094 case V2DFmode:
22095 {
22096 rtx op0, op1;
22097
22098 /* For the two element vectors, we implement a VEC_CONCAT with
22099 the extraction of the other element. */
22100
22101 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
22102 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
22103
22104 if (elt == 0)
22105 op0 = val, op1 = tmp;
22106 else
22107 op0 = tmp, op1 = val;
22108
22109 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
22110 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
22111 }
22112 return;
22113
22114 case V4SFmode:
22115 use_vec_merge = TARGET_SSE4_1;
22116 if (use_vec_merge)
22117 break;
22118
22119 switch (elt)
22120 {
22121 case 0:
22122 use_vec_merge = true;
22123 break;
22124
22125 case 1:
22126 /* tmp = target = A B C D */
22127 tmp = copy_to_reg (target);
22128 /* target = A A B B */
22129 emit_insn (gen_sse_unpcklps (target, target, target));
22130 /* target = X A B B */
22131 ix86_expand_vector_set (false, target, val, 0);
22132 /* target = A X C D */
22133 emit_insn (gen_sse_shufps_1 (target, target, tmp,
22134 GEN_INT (1), GEN_INT (0),
22135 GEN_INT (2+4), GEN_INT (3+4)));
22136 return;
22137
22138 case 2:
22139 /* tmp = target = A B C D */
22140 tmp = copy_to_reg (target);
22141 /* tmp = X B C D */
22142 ix86_expand_vector_set (false, tmp, val, 0);
22143 /* target = A B X D */
22144 emit_insn (gen_sse_shufps_1 (target, target, tmp,
22145 GEN_INT (0), GEN_INT (1),
22146 GEN_INT (0+4), GEN_INT (3+4)));
22147 return;
22148
22149 case 3:
22150 /* tmp = target = A B C D */
22151 tmp = copy_to_reg (target);
22152 /* tmp = X B C D */
22153 ix86_expand_vector_set (false, tmp, val, 0);
22154 /* target = A B X D */
22155 emit_insn (gen_sse_shufps_1 (target, target, tmp,
22156 GEN_INT (0), GEN_INT (1),
22157 GEN_INT (2+4), GEN_INT (0+4)));
22158 return;
22159
22160 default:
22161 gcc_unreachable ();
22162 }
22163 break;
22164
22165 case V4SImode:
22166 use_vec_merge = TARGET_SSE4_1;
22167 if (use_vec_merge)
22168 break;
22169
22170 /* Element 0 handled by vec_merge below. */
22171 if (elt == 0)
22172 {
22173 use_vec_merge = true;
22174 break;
22175 }
22176
22177 if (TARGET_SSE2)
22178 {
22179 /* With SSE2, use integer shuffles to swap element 0 and ELT,
22180 store into element 0, then shuffle them back. */
22181
22182 rtx order[4];
22183
22184 order[0] = GEN_INT (elt);
22185 order[1] = const1_rtx;
22186 order[2] = const2_rtx;
22187 order[3] = GEN_INT (3);
22188 order[elt] = const0_rtx;
22189
22190 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
22191 order[1], order[2], order[3]));
22192
22193 ix86_expand_vector_set (false, target, val, 0);
22194
22195 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
22196 order[1], order[2], order[3]));
22197 }
22198 else
22199 {
22200 /* For SSE1, we have to reuse the V4SF code. */
22201 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
22202 gen_lowpart (SFmode, val), elt);
22203 }
22204 return;
22205
22206 case V8HImode:
22207 use_vec_merge = TARGET_SSE2;
22208 break;
22209 case V4HImode:
22210 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
22211 break;
22212
22213 case V16QImode:
22214 use_vec_merge = TARGET_SSE4_1;
22215 break;
22216
22217 case V8QImode:
22218 default:
22219 break;
22220 }
22221
22222 if (use_vec_merge)
22223 {
22224 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
22225 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
22226 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
22227 }
22228 else
22229 {
22230 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
22231
22232 emit_move_insn (mem, target);
22233
22234 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
22235 emit_move_insn (tmp, val);
22236
22237 emit_move_insn (target, mem);
22238 }
22239 }
22240
22241 void
22242 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
22243 {
22244 enum machine_mode mode = GET_MODE (vec);
22245 enum machine_mode inner_mode = GET_MODE_INNER (mode);
22246 bool use_vec_extr = false;
22247 rtx tmp;
22248
22249 switch (mode)
22250 {
22251 case V2SImode:
22252 case V2SFmode:
22253 if (!mmx_ok)
22254 break;
22255 /* FALLTHRU */
22256
22257 case V2DFmode:
22258 case V2DImode:
22259 use_vec_extr = true;
22260 break;
22261
22262 case V4SFmode:
22263 use_vec_extr = TARGET_SSE4_1;
22264 if (use_vec_extr)
22265 break;
22266
22267 switch (elt)
22268 {
22269 case 0:
22270 tmp = vec;
22271 break;
22272
22273 case 1:
22274 case 3:
22275 tmp = gen_reg_rtx (mode);
22276 emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
22277 GEN_INT (elt), GEN_INT (elt),
22278 GEN_INT (elt+4), GEN_INT (elt+4)));
22279 break;
22280
22281 case 2:
22282 tmp = gen_reg_rtx (mode);
22283 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
22284 break;
22285
22286 default:
22287 gcc_unreachable ();
22288 }
22289 vec = tmp;
22290 use_vec_extr = true;
22291 elt = 0;
22292 break;
22293
22294 case V4SImode:
22295 use_vec_extr = TARGET_SSE4_1;
22296 if (use_vec_extr)
22297 break;
22298
22299 if (TARGET_SSE2)
22300 {
22301 switch (elt)
22302 {
22303 case 0:
22304 tmp = vec;
22305 break;
22306
22307 case 1:
22308 case 3:
22309 tmp = gen_reg_rtx (mode);
22310 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
22311 GEN_INT (elt), GEN_INT (elt),
22312 GEN_INT (elt), GEN_INT (elt)));
22313 break;
22314
22315 case 2:
22316 tmp = gen_reg_rtx (mode);
22317 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
22318 break;
22319
22320 default:
22321 gcc_unreachable ();
22322 }
22323 vec = tmp;
22324 use_vec_extr = true;
22325 elt = 0;
22326 }
22327 else
22328 {
22329 /* For SSE1, we have to reuse the V4SF code. */
22330 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
22331 gen_lowpart (V4SFmode, vec), elt);
22332 return;
22333 }
22334 break;
22335
22336 case V8HImode:
22337 use_vec_extr = TARGET_SSE2;
22338 break;
22339 case V4HImode:
22340 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
22341 break;
22342
22343 case V16QImode:
22344 use_vec_extr = TARGET_SSE4_1;
22345 break;
22346
22347 case V8QImode:
22348 /* ??? Could extract the appropriate HImode element and shift. */
22349 default:
22350 break;
22351 }
22352
22353 if (use_vec_extr)
22354 {
22355 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
22356 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
22357
22358 /* Let the rtl optimizers know about the zero extension performed. */
22359 if (inner_mode == QImode || inner_mode == HImode)
22360 {
22361 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
22362 target = gen_lowpart (SImode, target);
22363 }
22364
22365 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
22366 }
22367 else
22368 {
22369 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
22370
22371 emit_move_insn (mem, vec);
22372
22373 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
22374 emit_move_insn (target, tmp);
22375 }
22376 }
22377
22378 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
22379 pattern to reduce; DEST is the destination; IN is the input vector. */
22380
22381 void
22382 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
22383 {
22384 rtx tmp1, tmp2, tmp3;
22385
22386 tmp1 = gen_reg_rtx (V4SFmode);
22387 tmp2 = gen_reg_rtx (V4SFmode);
22388 tmp3 = gen_reg_rtx (V4SFmode);
22389
22390 emit_insn (gen_sse_movhlps (tmp1, in, in));
22391 emit_insn (fn (tmp2, tmp1, in));
22392
22393 emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2,
22394 GEN_INT (1), GEN_INT (1),
22395 GEN_INT (1+4), GEN_INT (1+4)));
22396 emit_insn (fn (dest, tmp2, tmp3));
22397 }
22398 \f
22399 /* Target hook for scalar_mode_supported_p. */
22400 static bool
22401 ix86_scalar_mode_supported_p (enum machine_mode mode)
22402 {
22403 if (DECIMAL_FLOAT_MODE_P (mode))
22404 return true;
22405 else if (mode == TFmode)
22406 return TARGET_64BIT;
22407 else
22408 return default_scalar_mode_supported_p (mode);
22409 }
22410
22411 /* Implements target hook vector_mode_supported_p. */
22412 static bool
22413 ix86_vector_mode_supported_p (enum machine_mode mode)
22414 {
22415 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
22416 return true;
22417 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
22418 return true;
22419 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
22420 return true;
22421 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
22422 return true;
22423 return false;
22424 }
22425
22426 /* Target hook for c_mode_for_suffix. */
22427 static enum machine_mode
22428 ix86_c_mode_for_suffix (char suffix)
22429 {
22430 if (TARGET_64BIT && suffix == 'q')
22431 return TFmode;
22432 if (TARGET_MMX && suffix == 'w')
22433 return XFmode;
22434
22435 return VOIDmode;
22436 }
22437
22438 /* Worker function for TARGET_MD_ASM_CLOBBERS.
22439
22440 We do this in the new i386 backend to maintain source compatibility
22441 with the old cc0-based compiler. */
22442
22443 static tree
22444 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
22445 tree inputs ATTRIBUTE_UNUSED,
22446 tree clobbers)
22447 {
22448 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
22449 clobbers);
22450 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
22451 clobbers);
22452 return clobbers;
22453 }
22454
22455 /* Implements target vector targetm.asm.encode_section_info. This
22456 is not used by netware. */
22457
22458 static void ATTRIBUTE_UNUSED
22459 ix86_encode_section_info (tree decl, rtx rtl, int first)
22460 {
22461 default_encode_section_info (decl, rtl, first);
22462
22463 if (TREE_CODE (decl) == VAR_DECL
22464 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
22465 && ix86_in_large_data_p (decl))
22466 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
22467 }
22468
22469 /* Worker function for REVERSE_CONDITION. */
22470
22471 enum rtx_code
22472 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
22473 {
22474 return (mode != CCFPmode && mode != CCFPUmode
22475 ? reverse_condition (code)
22476 : reverse_condition_maybe_unordered (code));
22477 }
22478
22479 /* Output code to perform an x87 FP register move, from OPERANDS[1]
22480 to OPERANDS[0]. */
22481
22482 const char *
22483 output_387_reg_move (rtx insn, rtx *operands)
22484 {
22485 if (REG_P (operands[0]))
22486 {
22487 if (REG_P (operands[1])
22488 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
22489 {
22490 if (REGNO (operands[0]) == FIRST_STACK_REG)
22491 return output_387_ffreep (operands, 0);
22492 return "fstp\t%y0";
22493 }
22494 if (STACK_TOP_P (operands[0]))
22495 return "fld%z1\t%y1";
22496 return "fst\t%y0";
22497 }
22498 else if (MEM_P (operands[0]))
22499 {
22500 gcc_assert (REG_P (operands[1]));
22501 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
22502 return "fstp%z0\t%y0";
22503 else
22504 {
22505 /* There is no non-popping store to memory for XFmode.
22506 So if we need one, follow the store with a load. */
22507 if (GET_MODE (operands[0]) == XFmode)
22508 return "fstp%z0\t%y0\n\tfld%z0\t%y0";
22509 else
22510 return "fst%z0\t%y0";
22511 }
22512 }
22513 else
22514 gcc_unreachable();
22515 }
22516
22517 /* Output code to perform a conditional jump to LABEL, if C2 flag in
22518 FP status register is set. */
22519
22520 void
22521 ix86_emit_fp_unordered_jump (rtx label)
22522 {
22523 rtx reg = gen_reg_rtx (HImode);
22524 rtx temp;
22525
22526 emit_insn (gen_x86_fnstsw_1 (reg));
22527
22528 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_size))
22529 {
22530 emit_insn (gen_x86_sahf_1 (reg));
22531
22532 temp = gen_rtx_REG (CCmode, FLAGS_REG);
22533 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
22534 }
22535 else
22536 {
22537 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
22538
22539 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
22540 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
22541 }
22542
22543 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
22544 gen_rtx_LABEL_REF (VOIDmode, label),
22545 pc_rtx);
22546 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
22547
22548 emit_jump_insn (temp);
22549 predict_jump (REG_BR_PROB_BASE * 10 / 100);
22550 }
22551
22552 /* Output code to perform a log1p XFmode calculation. */
22553
22554 void ix86_emit_i387_log1p (rtx op0, rtx op1)
22555 {
22556 rtx label1 = gen_label_rtx ();
22557 rtx label2 = gen_label_rtx ();
22558
22559 rtx tmp = gen_reg_rtx (XFmode);
22560 rtx tmp2 = gen_reg_rtx (XFmode);
22561
22562 emit_insn (gen_absxf2 (tmp, op1));
22563 emit_insn (gen_cmpxf (tmp,
22564 CONST_DOUBLE_FROM_REAL_VALUE (
22565 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
22566 XFmode)));
22567 emit_jump_insn (gen_bge (label1));
22568
22569 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
22570 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
22571 emit_jump (label2);
22572
22573 emit_label (label1);
22574 emit_move_insn (tmp, CONST1_RTX (XFmode));
22575 emit_insn (gen_addxf3 (tmp, op1, tmp));
22576 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
22577 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
22578
22579 emit_label (label2);
22580 }
22581
22582 /* Output code to perform a Newton-Rhapson approximation of a single precision
22583 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
22584
22585 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
22586 {
22587 rtx x0, x1, e0, e1, two;
22588
22589 x0 = gen_reg_rtx (mode);
22590 e0 = gen_reg_rtx (mode);
22591 e1 = gen_reg_rtx (mode);
22592 x1 = gen_reg_rtx (mode);
22593
22594 two = CONST_DOUBLE_FROM_REAL_VALUE (dconst2, SFmode);
22595
22596 if (VECTOR_MODE_P (mode))
22597 two = ix86_build_const_vector (SFmode, true, two);
22598
22599 two = force_reg (mode, two);
22600
22601 /* a / b = a * rcp(b) * (2.0 - b * rcp(b)) */
22602
22603 /* x0 = 1./b estimate */
22604 emit_insn (gen_rtx_SET (VOIDmode, x0,
22605 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
22606 UNSPEC_RCP)));
22607 /* e0 = x0 * b */
22608 emit_insn (gen_rtx_SET (VOIDmode, e0,
22609 gen_rtx_MULT (mode, x0, b)));
22610 /* e1 = 2. - e0 */
22611 emit_insn (gen_rtx_SET (VOIDmode, e1,
22612 gen_rtx_MINUS (mode, two, e0)));
22613 /* x1 = x0 * e1 */
22614 emit_insn (gen_rtx_SET (VOIDmode, x1,
22615 gen_rtx_MULT (mode, x0, e1)));
22616 /* res = a * x1 */
22617 emit_insn (gen_rtx_SET (VOIDmode, res,
22618 gen_rtx_MULT (mode, a, x1)));
22619 }
22620
22621 /* Output code to perform a Newton-Rhapson approximation of a
22622 single precision floating point [reciprocal] square root. */
22623
22624 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
22625 bool recip)
22626 {
22627 rtx x0, e0, e1, e2, e3, three, half, zero, mask;
22628
22629 x0 = gen_reg_rtx (mode);
22630 e0 = gen_reg_rtx (mode);
22631 e1 = gen_reg_rtx (mode);
22632 e2 = gen_reg_rtx (mode);
22633 e3 = gen_reg_rtx (mode);
22634
22635 three = CONST_DOUBLE_FROM_REAL_VALUE (dconst3, SFmode);
22636 half = CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf, SFmode);
22637
22638 mask = gen_reg_rtx (mode);
22639
22640 if (VECTOR_MODE_P (mode))
22641 {
22642 three = ix86_build_const_vector (SFmode, true, three);
22643 half = ix86_build_const_vector (SFmode, true, half);
22644 }
22645
22646 three = force_reg (mode, three);
22647 half = force_reg (mode, half);
22648
22649 zero = force_reg (mode, CONST0_RTX(mode));
22650
22651 /* sqrt(a) = 0.5 * a * rsqrtss(a) * (3.0 - a * rsqrtss(a) * rsqrtss(a))
22652 1.0 / sqrt(a) = 0.5 * rsqrtss(a) * (3.0 - a * rsqrtss(a) * rsqrtss(a)) */
22653
22654 /* Compare a to zero. */
22655 emit_insn (gen_rtx_SET (VOIDmode, mask,
22656 gen_rtx_NE (mode, a, zero)));
22657
22658 /* x0 = 1./sqrt(a) estimate */
22659 emit_insn (gen_rtx_SET (VOIDmode, x0,
22660 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
22661 UNSPEC_RSQRT)));
22662 /* Filter out infinity. */
22663 if (VECTOR_MODE_P (mode))
22664 emit_insn (gen_rtx_SET (VOIDmode, gen_lowpart (V4SFmode, x0),
22665 gen_rtx_AND (mode,
22666 gen_lowpart (V4SFmode, x0),
22667 gen_lowpart (V4SFmode, mask))));
22668 else
22669 emit_insn (gen_rtx_SET (VOIDmode, x0,
22670 gen_rtx_AND (mode, x0, mask)));
22671
22672 /* e0 = x0 * a */
22673 emit_insn (gen_rtx_SET (VOIDmode, e0,
22674 gen_rtx_MULT (mode, x0, a)));
22675 /* e1 = e0 * x0 */
22676 emit_insn (gen_rtx_SET (VOIDmode, e1,
22677 gen_rtx_MULT (mode, e0, x0)));
22678 /* e2 = 3. - e1 */
22679 emit_insn (gen_rtx_SET (VOIDmode, e2,
22680 gen_rtx_MINUS (mode, three, e1)));
22681 if (recip)
22682 /* e3 = .5 * x0 */
22683 emit_insn (gen_rtx_SET (VOIDmode, e3,
22684 gen_rtx_MULT (mode, half, x0)));
22685 else
22686 /* e3 = .5 * e0 */
22687 emit_insn (gen_rtx_SET (VOIDmode, e3,
22688 gen_rtx_MULT (mode, half, e0)));
22689 /* ret = e2 * e3 */
22690 emit_insn (gen_rtx_SET (VOIDmode, res,
22691 gen_rtx_MULT (mode, e2, e3)));
22692 }
22693
22694 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
22695
22696 static void ATTRIBUTE_UNUSED
22697 i386_solaris_elf_named_section (const char *name, unsigned int flags,
22698 tree decl)
22699 {
22700 /* With Binutils 2.15, the "@unwind" marker must be specified on
22701 every occurrence of the ".eh_frame" section, not just the first
22702 one. */
22703 if (TARGET_64BIT
22704 && strcmp (name, ".eh_frame") == 0)
22705 {
22706 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
22707 flags & SECTION_WRITE ? "aw" : "a");
22708 return;
22709 }
22710 default_elf_asm_named_section (name, flags, decl);
22711 }
22712
22713 /* Return the mangling of TYPE if it is an extended fundamental type. */
22714
22715 static const char *
22716 ix86_mangle_fundamental_type (tree type)
22717 {
22718 switch (TYPE_MODE (type))
22719 {
22720 case TFmode:
22721 /* __float128 is "g". */
22722 return "g";
22723 case XFmode:
22724 /* "long double" or __float80 is "e". */
22725 return "e";
22726 default:
22727 return NULL;
22728 }
22729 }
22730
22731 /* For 32-bit code we can save PIC register setup by using
22732 __stack_chk_fail_local hidden function instead of calling
22733 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
22734 register, so it is better to call __stack_chk_fail directly. */
22735
22736 static tree
22737 ix86_stack_protect_fail (void)
22738 {
22739 return TARGET_64BIT
22740 ? default_external_stack_protect_fail ()
22741 : default_hidden_stack_protect_fail ();
22742 }
22743
22744 /* Select a format to encode pointers in exception handling data. CODE
22745 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
22746 true if the symbol may be affected by dynamic relocations.
22747
22748 ??? All x86 object file formats are capable of representing this.
22749 After all, the relocation needed is the same as for the call insn.
22750 Whether or not a particular assembler allows us to enter such, I
22751 guess we'll have to see. */
22752 int
22753 asm_preferred_eh_data_format (int code, int global)
22754 {
22755 if (flag_pic)
22756 {
22757 int type = DW_EH_PE_sdata8;
22758 if (!TARGET_64BIT
22759 || ix86_cmodel == CM_SMALL_PIC
22760 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
22761 type = DW_EH_PE_sdata4;
22762 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
22763 }
22764 if (ix86_cmodel == CM_SMALL
22765 || (ix86_cmodel == CM_MEDIUM && code))
22766 return DW_EH_PE_udata4;
22767 return DW_EH_PE_absptr;
22768 }
22769 \f
22770 /* Expand copysign from SIGN to the positive value ABS_VALUE
22771 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
22772 the sign-bit. */
22773 static void
22774 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
22775 {
22776 enum machine_mode mode = GET_MODE (sign);
22777 rtx sgn = gen_reg_rtx (mode);
22778 if (mask == NULL_RTX)
22779 {
22780 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
22781 if (!VECTOR_MODE_P (mode))
22782 {
22783 /* We need to generate a scalar mode mask in this case. */
22784 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
22785 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
22786 mask = gen_reg_rtx (mode);
22787 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
22788 }
22789 }
22790 else
22791 mask = gen_rtx_NOT (mode, mask);
22792 emit_insn (gen_rtx_SET (VOIDmode, sgn,
22793 gen_rtx_AND (mode, mask, sign)));
22794 emit_insn (gen_rtx_SET (VOIDmode, result,
22795 gen_rtx_IOR (mode, abs_value, sgn)));
22796 }
22797
22798 /* Expand fabs (OP0) and return a new rtx that holds the result. The
22799 mask for masking out the sign-bit is stored in *SMASK, if that is
22800 non-null. */
22801 static rtx
22802 ix86_expand_sse_fabs (rtx op0, rtx *smask)
22803 {
22804 enum machine_mode mode = GET_MODE (op0);
22805 rtx xa, mask;
22806
22807 xa = gen_reg_rtx (mode);
22808 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
22809 if (!VECTOR_MODE_P (mode))
22810 {
22811 /* We need to generate a scalar mode mask in this case. */
22812 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
22813 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
22814 mask = gen_reg_rtx (mode);
22815 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
22816 }
22817 emit_insn (gen_rtx_SET (VOIDmode, xa,
22818 gen_rtx_AND (mode, op0, mask)));
22819
22820 if (smask)
22821 *smask = mask;
22822
22823 return xa;
22824 }
22825
22826 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
22827 swapping the operands if SWAP_OPERANDS is true. The expanded
22828 code is a forward jump to a newly created label in case the
22829 comparison is true. The generated label rtx is returned. */
22830 static rtx
22831 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
22832 bool swap_operands)
22833 {
22834 rtx label, tmp;
22835
22836 if (swap_operands)
22837 {
22838 tmp = op0;
22839 op0 = op1;
22840 op1 = tmp;
22841 }
22842
22843 label = gen_label_rtx ();
22844 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
22845 emit_insn (gen_rtx_SET (VOIDmode, tmp,
22846 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
22847 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
22848 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
22849 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
22850 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
22851 JUMP_LABEL (tmp) = label;
22852
22853 return label;
22854 }
22855
22856 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
22857 using comparison code CODE. Operands are swapped for the comparison if
22858 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
22859 static rtx
22860 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
22861 bool swap_operands)
22862 {
22863 enum machine_mode mode = GET_MODE (op0);
22864 rtx mask = gen_reg_rtx (mode);
22865
22866 if (swap_operands)
22867 {
22868 rtx tmp = op0;
22869 op0 = op1;
22870 op1 = tmp;
22871 }
22872
22873 if (mode == DFmode)
22874 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
22875 gen_rtx_fmt_ee (code, mode, op0, op1)));
22876 else
22877 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
22878 gen_rtx_fmt_ee (code, mode, op0, op1)));
22879
22880 return mask;
22881 }
22882
22883 /* Generate and return a rtx of mode MODE for 2**n where n is the number
22884 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
22885 static rtx
22886 ix86_gen_TWO52 (enum machine_mode mode)
22887 {
22888 REAL_VALUE_TYPE TWO52r;
22889 rtx TWO52;
22890
22891 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
22892 TWO52 = const_double_from_real_value (TWO52r, mode);
22893 TWO52 = force_reg (mode, TWO52);
22894
22895 return TWO52;
22896 }
22897
22898 /* Expand SSE sequence for computing lround from OP1 storing
22899 into OP0. */
22900 void
22901 ix86_expand_lround (rtx op0, rtx op1)
22902 {
22903 /* C code for the stuff we're doing below:
22904 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
22905 return (long)tmp;
22906 */
22907 enum machine_mode mode = GET_MODE (op1);
22908 const struct real_format *fmt;
22909 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
22910 rtx adj;
22911
22912 /* load nextafter (0.5, 0.0) */
22913 fmt = REAL_MODE_FORMAT (mode);
22914 real_2expN (&half_minus_pred_half, -(fmt->p) - 1);
22915 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
22916
22917 /* adj = copysign (0.5, op1) */
22918 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
22919 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
22920
22921 /* adj = op1 + adj */
22922 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
22923
22924 /* op0 = (imode)adj */
22925 expand_fix (op0, adj, 0);
22926 }
22927
22928 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
22929 into OPERAND0. */
22930 void
22931 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
22932 {
22933 /* C code for the stuff we're doing below (for do_floor):
22934 xi = (long)op1;
22935 xi -= (double)xi > op1 ? 1 : 0;
22936 return xi;
22937 */
22938 enum machine_mode fmode = GET_MODE (op1);
22939 enum machine_mode imode = GET_MODE (op0);
22940 rtx ireg, freg, label, tmp;
22941
22942 /* reg = (long)op1 */
22943 ireg = gen_reg_rtx (imode);
22944 expand_fix (ireg, op1, 0);
22945
22946 /* freg = (double)reg */
22947 freg = gen_reg_rtx (fmode);
22948 expand_float (freg, ireg, 0);
22949
22950 /* ireg = (freg > op1) ? ireg - 1 : ireg */
22951 label = ix86_expand_sse_compare_and_jump (UNLE,
22952 freg, op1, !do_floor);
22953 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
22954 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
22955 emit_move_insn (ireg, tmp);
22956
22957 emit_label (label);
22958 LABEL_NUSES (label) = 1;
22959
22960 emit_move_insn (op0, ireg);
22961 }
22962
22963 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
22964 result in OPERAND0. */
22965 void
22966 ix86_expand_rint (rtx operand0, rtx operand1)
22967 {
22968 /* C code for the stuff we're doing below:
22969 xa = fabs (operand1);
22970 if (!isless (xa, 2**52))
22971 return operand1;
22972 xa = xa + 2**52 - 2**52;
22973 return copysign (xa, operand1);
22974 */
22975 enum machine_mode mode = GET_MODE (operand0);
22976 rtx res, xa, label, TWO52, mask;
22977
22978 res = gen_reg_rtx (mode);
22979 emit_move_insn (res, operand1);
22980
22981 /* xa = abs (operand1) */
22982 xa = ix86_expand_sse_fabs (res, &mask);
22983
22984 /* if (!isless (xa, TWO52)) goto label; */
22985 TWO52 = ix86_gen_TWO52 (mode);
22986 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
22987
22988 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
22989 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
22990
22991 ix86_sse_copysign_to_positive (res, xa, res, mask);
22992
22993 emit_label (label);
22994 LABEL_NUSES (label) = 1;
22995
22996 emit_move_insn (operand0, res);
22997 }
22998
22999 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
23000 into OPERAND0. */
23001 void
23002 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
23003 {
23004 /* C code for the stuff we expand below.
23005 double xa = fabs (x), x2;
23006 if (!isless (xa, TWO52))
23007 return x;
23008 xa = xa + TWO52 - TWO52;
23009 x2 = copysign (xa, x);
23010 Compensate. Floor:
23011 if (x2 > x)
23012 x2 -= 1;
23013 Compensate. Ceil:
23014 if (x2 < x)
23015 x2 -= -1;
23016 return x2;
23017 */
23018 enum machine_mode mode = GET_MODE (operand0);
23019 rtx xa, TWO52, tmp, label, one, res, mask;
23020
23021 TWO52 = ix86_gen_TWO52 (mode);
23022
23023 /* Temporary for holding the result, initialized to the input
23024 operand to ease control flow. */
23025 res = gen_reg_rtx (mode);
23026 emit_move_insn (res, operand1);
23027
23028 /* xa = abs (operand1) */
23029 xa = ix86_expand_sse_fabs (res, &mask);
23030
23031 /* if (!isless (xa, TWO52)) goto label; */
23032 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
23033
23034 /* xa = xa + TWO52 - TWO52; */
23035 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
23036 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
23037
23038 /* xa = copysign (xa, operand1) */
23039 ix86_sse_copysign_to_positive (xa, xa, res, mask);
23040
23041 /* generate 1.0 or -1.0 */
23042 one = force_reg (mode,
23043 const_double_from_real_value (do_floor
23044 ? dconst1 : dconstm1, mode));
23045
23046 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
23047 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
23048 emit_insn (gen_rtx_SET (VOIDmode, tmp,
23049 gen_rtx_AND (mode, one, tmp)));
23050 /* We always need to subtract here to preserve signed zero. */
23051 tmp = expand_simple_binop (mode, MINUS,
23052 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
23053 emit_move_insn (res, tmp);
23054
23055 emit_label (label);
23056 LABEL_NUSES (label) = 1;
23057
23058 emit_move_insn (operand0, res);
23059 }
23060
23061 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
23062 into OPERAND0. */
23063 void
23064 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
23065 {
23066 /* C code for the stuff we expand below.
23067 double xa = fabs (x), x2;
23068 if (!isless (xa, TWO52))
23069 return x;
23070 x2 = (double)(long)x;
23071 Compensate. Floor:
23072 if (x2 > x)
23073 x2 -= 1;
23074 Compensate. Ceil:
23075 if (x2 < x)
23076 x2 += 1;
23077 if (HONOR_SIGNED_ZEROS (mode))
23078 return copysign (x2, x);
23079 return x2;
23080 */
23081 enum machine_mode mode = GET_MODE (operand0);
23082 rtx xa, xi, TWO52, tmp, label, one, res, mask;
23083
23084 TWO52 = ix86_gen_TWO52 (mode);
23085
23086 /* Temporary for holding the result, initialized to the input
23087 operand to ease control flow. */
23088 res = gen_reg_rtx (mode);
23089 emit_move_insn (res, operand1);
23090
23091 /* xa = abs (operand1) */
23092 xa = ix86_expand_sse_fabs (res, &mask);
23093
23094 /* if (!isless (xa, TWO52)) goto label; */
23095 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
23096
23097 /* xa = (double)(long)x */
23098 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
23099 expand_fix (xi, res, 0);
23100 expand_float (xa, xi, 0);
23101
23102 /* generate 1.0 */
23103 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
23104
23105 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
23106 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
23107 emit_insn (gen_rtx_SET (VOIDmode, tmp,
23108 gen_rtx_AND (mode, one, tmp)));
23109 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
23110 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
23111 emit_move_insn (res, tmp);
23112
23113 if (HONOR_SIGNED_ZEROS (mode))
23114 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
23115
23116 emit_label (label);
23117 LABEL_NUSES (label) = 1;
23118
23119 emit_move_insn (operand0, res);
23120 }
23121
23122 /* Expand SSE sequence for computing round from OPERAND1 storing
23123 into OPERAND0. Sequence that works without relying on DImode truncation
23124 via cvttsd2siq that is only available on 64bit targets. */
23125 void
23126 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
23127 {
23128 /* C code for the stuff we expand below.
23129 double xa = fabs (x), xa2, x2;
23130 if (!isless (xa, TWO52))
23131 return x;
23132 Using the absolute value and copying back sign makes
23133 -0.0 -> -0.0 correct.
23134 xa2 = xa + TWO52 - TWO52;
23135 Compensate.
23136 dxa = xa2 - xa;
23137 if (dxa <= -0.5)
23138 xa2 += 1;
23139 else if (dxa > 0.5)
23140 xa2 -= 1;
23141 x2 = copysign (xa2, x);
23142 return x2;
23143 */
23144 enum machine_mode mode = GET_MODE (operand0);
23145 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
23146
23147 TWO52 = ix86_gen_TWO52 (mode);
23148
23149 /* Temporary for holding the result, initialized to the input
23150 operand to ease control flow. */
23151 res = gen_reg_rtx (mode);
23152 emit_move_insn (res, operand1);
23153
23154 /* xa = abs (operand1) */
23155 xa = ix86_expand_sse_fabs (res, &mask);
23156
23157 /* if (!isless (xa, TWO52)) goto label; */
23158 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
23159
23160 /* xa2 = xa + TWO52 - TWO52; */
23161 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
23162 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
23163
23164 /* dxa = xa2 - xa; */
23165 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
23166
23167 /* generate 0.5, 1.0 and -0.5 */
23168 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
23169 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
23170 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
23171 0, OPTAB_DIRECT);
23172
23173 /* Compensate. */
23174 tmp = gen_reg_rtx (mode);
23175 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
23176 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
23177 emit_insn (gen_rtx_SET (VOIDmode, tmp,
23178 gen_rtx_AND (mode, one, tmp)));
23179 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
23180 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
23181 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
23182 emit_insn (gen_rtx_SET (VOIDmode, tmp,
23183 gen_rtx_AND (mode, one, tmp)));
23184 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
23185
23186 /* res = copysign (xa2, operand1) */
23187 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
23188
23189 emit_label (label);
23190 LABEL_NUSES (label) = 1;
23191
23192 emit_move_insn (operand0, res);
23193 }
23194
23195 /* Expand SSE sequence for computing trunc from OPERAND1 storing
23196 into OPERAND0. */
23197 void
23198 ix86_expand_trunc (rtx operand0, rtx operand1)
23199 {
23200 /* C code for SSE variant we expand below.
23201 double xa = fabs (x), x2;
23202 if (!isless (xa, TWO52))
23203 return x;
23204 x2 = (double)(long)x;
23205 if (HONOR_SIGNED_ZEROS (mode))
23206 return copysign (x2, x);
23207 return x2;
23208 */
23209 enum machine_mode mode = GET_MODE (operand0);
23210 rtx xa, xi, TWO52, label, res, mask;
23211
23212 TWO52 = ix86_gen_TWO52 (mode);
23213
23214 /* Temporary for holding the result, initialized to the input
23215 operand to ease control flow. */
23216 res = gen_reg_rtx (mode);
23217 emit_move_insn (res, operand1);
23218
23219 /* xa = abs (operand1) */
23220 xa = ix86_expand_sse_fabs (res, &mask);
23221
23222 /* if (!isless (xa, TWO52)) goto label; */
23223 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
23224
23225 /* x = (double)(long)x */
23226 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
23227 expand_fix (xi, res, 0);
23228 expand_float (res, xi, 0);
23229
23230 if (HONOR_SIGNED_ZEROS (mode))
23231 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
23232
23233 emit_label (label);
23234 LABEL_NUSES (label) = 1;
23235
23236 emit_move_insn (operand0, res);
23237 }
23238
23239 /* Expand SSE sequence for computing trunc from OPERAND1 storing
23240 into OPERAND0. */
23241 void
23242 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
23243 {
23244 enum machine_mode mode = GET_MODE (operand0);
23245 rtx xa, mask, TWO52, label, one, res, smask, tmp;
23246
23247 /* C code for SSE variant we expand below.
23248 double xa = fabs (x), x2;
23249 if (!isless (xa, TWO52))
23250 return x;
23251 xa2 = xa + TWO52 - TWO52;
23252 Compensate:
23253 if (xa2 > xa)
23254 xa2 -= 1.0;
23255 x2 = copysign (xa2, x);
23256 return x2;
23257 */
23258
23259 TWO52 = ix86_gen_TWO52 (mode);
23260
23261 /* Temporary for holding the result, initialized to the input
23262 operand to ease control flow. */
23263 res = gen_reg_rtx (mode);
23264 emit_move_insn (res, operand1);
23265
23266 /* xa = abs (operand1) */
23267 xa = ix86_expand_sse_fabs (res, &smask);
23268
23269 /* if (!isless (xa, TWO52)) goto label; */
23270 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
23271
23272 /* res = xa + TWO52 - TWO52; */
23273 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
23274 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
23275 emit_move_insn (res, tmp);
23276
23277 /* generate 1.0 */
23278 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
23279
23280 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
23281 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
23282 emit_insn (gen_rtx_SET (VOIDmode, mask,
23283 gen_rtx_AND (mode, mask, one)));
23284 tmp = expand_simple_binop (mode, MINUS,
23285 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
23286 emit_move_insn (res, tmp);
23287
23288 /* res = copysign (res, operand1) */
23289 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
23290
23291 emit_label (label);
23292 LABEL_NUSES (label) = 1;
23293
23294 emit_move_insn (operand0, res);
23295 }
23296
23297 /* Expand SSE sequence for computing round from OPERAND1 storing
23298 into OPERAND0. */
23299 void
23300 ix86_expand_round (rtx operand0, rtx operand1)
23301 {
23302 /* C code for the stuff we're doing below:
23303 double xa = fabs (x);
23304 if (!isless (xa, TWO52))
23305 return x;
23306 xa = (double)(long)(xa + nextafter (0.5, 0.0));
23307 return copysign (xa, x);
23308 */
23309 enum machine_mode mode = GET_MODE (operand0);
23310 rtx res, TWO52, xa, label, xi, half, mask;
23311 const struct real_format *fmt;
23312 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
23313
23314 /* Temporary for holding the result, initialized to the input
23315 operand to ease control flow. */
23316 res = gen_reg_rtx (mode);
23317 emit_move_insn (res, operand1);
23318
23319 TWO52 = ix86_gen_TWO52 (mode);
23320 xa = ix86_expand_sse_fabs (res, &mask);
23321 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
23322
23323 /* load nextafter (0.5, 0.0) */
23324 fmt = REAL_MODE_FORMAT (mode);
23325 real_2expN (&half_minus_pred_half, -(fmt->p) - 1);
23326 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
23327
23328 /* xa = xa + 0.5 */
23329 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
23330 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
23331
23332 /* xa = (double)(int64_t)xa */
23333 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
23334 expand_fix (xi, xa, 0);
23335 expand_float (xa, xi, 0);
23336
23337 /* res = copysign (xa, operand1) */
23338 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
23339
23340 emit_label (label);
23341 LABEL_NUSES (label) = 1;
23342
23343 emit_move_insn (operand0, res);
23344 }
23345
23346 \f
23347 /* Table of valid machine attributes. */
23348 static const struct attribute_spec ix86_attribute_table[] =
23349 {
23350 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
23351 /* Stdcall attribute says callee is responsible for popping arguments
23352 if they are not variable. */
23353 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
23354 /* Fastcall attribute says callee is responsible for popping arguments
23355 if they are not variable. */
23356 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
23357 /* Cdecl attribute says the callee is a normal C declaration */
23358 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
23359 /* Regparm attribute specifies how many integer arguments are to be
23360 passed in registers. */
23361 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
23362 /* Sseregparm attribute says we are using x86_64 calling conventions
23363 for FP arguments. */
23364 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
23365 /* force_align_arg_pointer says this function realigns the stack at entry. */
23366 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
23367 false, true, true, ix86_handle_cconv_attribute },
23368 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
23369 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
23370 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
23371 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
23372 #endif
23373 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
23374 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
23375 #ifdef SUBTARGET_ATTRIBUTE_TABLE
23376 SUBTARGET_ATTRIBUTE_TABLE,
23377 #endif
23378 { NULL, 0, 0, false, false, false, NULL }
23379 };
23380
23381 /* Initialize the GCC target structure. */
23382 #undef TARGET_ATTRIBUTE_TABLE
23383 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
23384 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
23385 # undef TARGET_MERGE_DECL_ATTRIBUTES
23386 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
23387 #endif
23388
23389 #undef TARGET_COMP_TYPE_ATTRIBUTES
23390 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
23391
23392 #undef TARGET_INIT_BUILTINS
23393 #define TARGET_INIT_BUILTINS ix86_init_builtins
23394 #undef TARGET_EXPAND_BUILTIN
23395 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
23396
23397 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
23398 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
23399 ix86_builtin_vectorized_function
23400
23401 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
23402 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
23403
23404 #undef TARGET_BUILTIN_RECIPROCAL
23405 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
23406
23407 #undef TARGET_ASM_FUNCTION_EPILOGUE
23408 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
23409
23410 #undef TARGET_ENCODE_SECTION_INFO
23411 #ifndef SUBTARGET_ENCODE_SECTION_INFO
23412 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
23413 #else
23414 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
23415 #endif
23416
23417 #undef TARGET_ASM_OPEN_PAREN
23418 #define TARGET_ASM_OPEN_PAREN ""
23419 #undef TARGET_ASM_CLOSE_PAREN
23420 #define TARGET_ASM_CLOSE_PAREN ""
23421
23422 #undef TARGET_ASM_ALIGNED_HI_OP
23423 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
23424 #undef TARGET_ASM_ALIGNED_SI_OP
23425 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
23426 #ifdef ASM_QUAD
23427 #undef TARGET_ASM_ALIGNED_DI_OP
23428 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
23429 #endif
23430
23431 #undef TARGET_ASM_UNALIGNED_HI_OP
23432 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
23433 #undef TARGET_ASM_UNALIGNED_SI_OP
23434 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
23435 #undef TARGET_ASM_UNALIGNED_DI_OP
23436 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
23437
23438 #undef TARGET_SCHED_ADJUST_COST
23439 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
23440 #undef TARGET_SCHED_ISSUE_RATE
23441 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
23442 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
23443 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
23444 ia32_multipass_dfa_lookahead
23445
23446 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
23447 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
23448
23449 #ifdef HAVE_AS_TLS
23450 #undef TARGET_HAVE_TLS
23451 #define TARGET_HAVE_TLS true
23452 #endif
23453 #undef TARGET_CANNOT_FORCE_CONST_MEM
23454 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
23455 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
23456 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_rtx_true
23457
23458 #undef TARGET_DELEGITIMIZE_ADDRESS
23459 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
23460
23461 #undef TARGET_MS_BITFIELD_LAYOUT_P
23462 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
23463
23464 #if TARGET_MACHO
23465 #undef TARGET_BINDS_LOCAL_P
23466 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
23467 #endif
23468 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
23469 #undef TARGET_BINDS_LOCAL_P
23470 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
23471 #endif
23472
23473 #undef TARGET_ASM_OUTPUT_MI_THUNK
23474 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
23475 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
23476 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
23477
23478 #undef TARGET_ASM_FILE_START
23479 #define TARGET_ASM_FILE_START x86_file_start
23480
23481 #undef TARGET_DEFAULT_TARGET_FLAGS
23482 #define TARGET_DEFAULT_TARGET_FLAGS \
23483 (TARGET_DEFAULT \
23484 | TARGET_SUBTARGET_DEFAULT \
23485 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
23486
23487 #undef TARGET_HANDLE_OPTION
23488 #define TARGET_HANDLE_OPTION ix86_handle_option
23489
23490 #undef TARGET_RTX_COSTS
23491 #define TARGET_RTX_COSTS ix86_rtx_costs
23492 #undef TARGET_ADDRESS_COST
23493 #define TARGET_ADDRESS_COST ix86_address_cost
23494
23495 #undef TARGET_FIXED_CONDITION_CODE_REGS
23496 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
23497 #undef TARGET_CC_MODES_COMPATIBLE
23498 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
23499
23500 #undef TARGET_MACHINE_DEPENDENT_REORG
23501 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
23502
23503 #undef TARGET_BUILD_BUILTIN_VA_LIST
23504 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
23505
23506 #undef TARGET_MD_ASM_CLOBBERS
23507 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
23508
23509 #undef TARGET_PROMOTE_PROTOTYPES
23510 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
23511 #undef TARGET_STRUCT_VALUE_RTX
23512 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
23513 #undef TARGET_SETUP_INCOMING_VARARGS
23514 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
23515 #undef TARGET_MUST_PASS_IN_STACK
23516 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
23517 #undef TARGET_PASS_BY_REFERENCE
23518 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
23519 #undef TARGET_INTERNAL_ARG_POINTER
23520 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
23521 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
23522 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
23523 #undef TARGET_STRICT_ARGUMENT_NAMING
23524 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
23525
23526 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
23527 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
23528
23529 #undef TARGET_SCALAR_MODE_SUPPORTED_P
23530 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
23531
23532 #undef TARGET_VECTOR_MODE_SUPPORTED_P
23533 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
23534
23535 #undef TARGET_C_MODE_FOR_SUFFIX
23536 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
23537
23538 #ifdef HAVE_AS_TLS
23539 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
23540 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
23541 #endif
23542
23543 #ifdef SUBTARGET_INSERT_ATTRIBUTES
23544 #undef TARGET_INSERT_ATTRIBUTES
23545 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
23546 #endif
23547
23548 #undef TARGET_MANGLE_FUNDAMENTAL_TYPE
23549 #define TARGET_MANGLE_FUNDAMENTAL_TYPE ix86_mangle_fundamental_type
23550
23551 #undef TARGET_STACK_PROTECT_FAIL
23552 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
23553
23554 #undef TARGET_FUNCTION_VALUE
23555 #define TARGET_FUNCTION_VALUE ix86_function_value
23556
23557 struct gcc_target targetm = TARGET_INITIALIZER;
23558 \f
23559 #include "gt-i386.h"